diff options
author | Dave Airlie <airlied@redhat.com> | 2015-01-22 10:44:41 +1000 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2015-01-22 10:44:41 +1000 |
commit | 281d1bbd34b734e4f22b30b6f3b673dda46a7470 (patch) | |
tree | f67a2d45d248e9deaa1c68d3f8f33cdf476daacd /drivers/gpu | |
parent | bfa55bd4990815b055162b6d5d031f37a39942a5 (diff) | |
parent | b942c653ae265abbd31032f3b4f5f857e5c7c723 (diff) | |
download | op-kernel-dev-281d1bbd34b734e4f22b30b6f3b673dda46a7470.zip op-kernel-dev-281d1bbd34b734e4f22b30b6f3b673dda46a7470.tar.gz |
Merge remote-tracking branch 'origin/master' into drm-next
Backmerge Linus tree after rc5 + drm-fixes went in.
There were a few amdkfd conflicts I wanted to avoid,
and Ben requested this for nouveau also.
Conflicts:
drivers/gpu/drm/amd/amdkfd/Makefile
drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
drivers/gpu/drm/amd/amdkfd/kfd_priv.h
drivers/gpu/drm/amd/include/kgd_kfd_interface.h
drivers/gpu/drm/i915/intel_runtime_pm.c
drivers/gpu/drm/radeon/radeon_kfd.c
Diffstat (limited to 'drivers/gpu')
49 files changed, 614 insertions, 500 deletions
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile index 9d92ba3..cf0eed8 100644 --- a/drivers/gpu/drm/Makefile +++ b/drivers/gpu/drm/Makefile @@ -37,6 +37,7 @@ obj-$(CONFIG_DRM_MIPI_DSI) += drm_mipi_dsi.o obj-$(CONFIG_DRM_TTM) += ttm/ obj-$(CONFIG_DRM_TDFX) += tdfx/ obj-$(CONFIG_DRM_R128) += r128/ +obj-$(CONFIG_HSA_AMD) += amd/amdkfd/ obj-$(CONFIG_DRM_RADEON)+= radeon/ obj-$(CONFIG_DRM_MGA) += mga/ obj-$(CONFIG_DRM_I810) += i810/ @@ -68,4 +69,3 @@ obj-$(CONFIG_DRM_IMX) += imx/ obj-y += i2c/ obj-y += panel/ obj-y += bridge/ -obj-$(CONFIG_HSA_AMD) += amd/amdkfd/ diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile index cd09c05..0f49601 100644 --- a/drivers/gpu/drm/amd/amdkfd/Makefile +++ b/drivers/gpu/drm/amd/amdkfd/Makefile @@ -12,6 +12,5 @@ amdkfd-y := kfd_module.o kfd_device.o kfd_chardev.o kfd_topology.o \ kfd_kernel_queue_vi.o kfd_packet_manager.o \ kfd_process_queue_manager.o kfd_device_queue_manager.o \ kfd_device_queue_manager_cik.o kfd_device_queue_manager_vi.o \ - kfd_interrupt.o obj-$(CONFIG_HSA_AMD) += amdkfd.o diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 38b6150..732087dc 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -31,7 +31,6 @@ #include <uapi/linux/kfd_ioctl.h> #include <linux/time.h> #include <linux/mm.h> -#include <linux/uaccess.h> #include <uapi/asm-generic/mman-common.h> #include <asm/processor.h> #include "kfd_priv.h" @@ -127,17 +126,14 @@ static int kfd_open(struct inode *inode, struct file *filep) return 0; } -static long kfd_ioctl_get_version(struct file *filep, struct kfd_process *p, - void __user *arg) +static int kfd_ioctl_get_version(struct file *filep, struct kfd_process *p, + void *data) { - struct kfd_ioctl_get_version_args args; + struct kfd_ioctl_get_version_args *args = data; int err = 0; - args.major_version = KFD_IOCTL_MAJOR_VERSION; - args.minor_version = KFD_IOCTL_MINOR_VERSION; - - if (copy_to_user(arg, &args, sizeof(args))) - err = -EFAULT; + args->major_version = KFD_IOCTL_MAJOR_VERSION; + args->minor_version = KFD_IOCTL_MINOR_VERSION; return err; } @@ -249,10 +245,10 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties, return 0; } -static long kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, - void __user *arg) +static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, + void *data) { - struct kfd_ioctl_create_queue_args args; + struct kfd_ioctl_create_queue_args *args = data; struct kfd_dev *dev; int err = 0; unsigned int queue_id; @@ -261,19 +257,16 @@ static long kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, memset(&q_properties, 0, sizeof(struct queue_properties)); - if (copy_from_user(&args, arg, sizeof(args))) - return -EFAULT; - pr_debug("kfd: creating queue ioctl\n"); - err = set_queue_properties_from_user(&q_properties, &args); + err = set_queue_properties_from_user(&q_properties, args); if (err) return err; - pr_debug("kfd: looking for gpu id 0x%x\n", args.gpu_id); - dev = kfd_device_by_id(args.gpu_id); + pr_debug("kfd: looking for gpu id 0x%x\n", args->gpu_id); + dev = kfd_device_by_id(args->gpu_id); if (dev == NULL) { - pr_debug("kfd: gpu id 0x%x was not found\n", args.gpu_id); + pr_debug("kfd: gpu id 0x%x was not found\n", args->gpu_id); return -EINVAL; } @@ -281,7 +274,7 @@ static long kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, pdd = kfd_bind_process_to_device(dev, p); if (IS_ERR(pdd)) { - err = PTR_ERR(pdd); + err = -ESRCH; goto err_bind_process; } @@ -294,33 +287,26 @@ static long kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, if (err != 0) goto err_create_queue; - args.queue_id = queue_id; + args->queue_id = queue_id; /* Return gpu_id as doorbell offset for mmap usage */ - args.doorbell_offset = args.gpu_id << PAGE_SHIFT; - - if (copy_to_user(arg, &args, sizeof(args))) { - err = -EFAULT; - goto err_copy_args_out; - } + args->doorbell_offset = args->gpu_id << PAGE_SHIFT; mutex_unlock(&p->mutex); - pr_debug("kfd: queue id %d was created successfully\n", args.queue_id); + pr_debug("kfd: queue id %d was created successfully\n", args->queue_id); pr_debug("ring buffer address == 0x%016llX\n", - args.ring_base_address); + args->ring_base_address); pr_debug("read ptr address == 0x%016llX\n", - args.read_pointer_address); + args->read_pointer_address); pr_debug("write ptr address == 0x%016llX\n", - args.write_pointer_address); + args->write_pointer_address); return 0; -err_copy_args_out: - pqm_destroy_queue(&p->pqm, queue_id); err_create_queue: err_bind_process: mutex_unlock(&p->mutex); @@ -328,99 +314,90 @@ err_bind_process: } static int kfd_ioctl_destroy_queue(struct file *filp, struct kfd_process *p, - void __user *arg) + void *data) { int retval; - struct kfd_ioctl_destroy_queue_args args; - - if (copy_from_user(&args, arg, sizeof(args))) - return -EFAULT; + struct kfd_ioctl_destroy_queue_args *args = data; pr_debug("kfd: destroying queue id %d for PASID %d\n", - args.queue_id, + args->queue_id, p->pasid); mutex_lock(&p->mutex); - retval = pqm_destroy_queue(&p->pqm, args.queue_id); + retval = pqm_destroy_queue(&p->pqm, args->queue_id); mutex_unlock(&p->mutex); return retval; } static int kfd_ioctl_update_queue(struct file *filp, struct kfd_process *p, - void __user *arg) + void *data) { int retval; - struct kfd_ioctl_update_queue_args args; + struct kfd_ioctl_update_queue_args *args = data; struct queue_properties properties; - if (copy_from_user(&args, arg, sizeof(args))) - return -EFAULT; - - if (args.queue_percentage > KFD_MAX_QUEUE_PERCENTAGE) { + if (args->queue_percentage > KFD_MAX_QUEUE_PERCENTAGE) { pr_err("kfd: queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n"); return -EINVAL; } - if (args.queue_priority > KFD_MAX_QUEUE_PRIORITY) { + if (args->queue_priority > KFD_MAX_QUEUE_PRIORITY) { pr_err("kfd: queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n"); return -EINVAL; } - if ((args.ring_base_address) && + if ((args->ring_base_address) && (!access_ok(VERIFY_WRITE, - (const void __user *) args.ring_base_address, + (const void __user *) args->ring_base_address, sizeof(uint64_t)))) { pr_err("kfd: can't access ring base address\n"); return -EFAULT; } - if (!is_power_of_2(args.ring_size) && (args.ring_size != 0)) { + if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) { pr_err("kfd: ring size must be a power of 2 or 0\n"); return -EINVAL; } - properties.queue_address = args.ring_base_address; - properties.queue_size = args.ring_size; - properties.queue_percent = args.queue_percentage; - properties.priority = args.queue_priority; + properties.queue_address = args->ring_base_address; + properties.queue_size = args->ring_size; + properties.queue_percent = args->queue_percentage; + properties.priority = args->queue_priority; pr_debug("kfd: updating queue id %d for PASID %d\n", - args.queue_id, p->pasid); + args->queue_id, p->pasid); mutex_lock(&p->mutex); - retval = pqm_update_queue(&p->pqm, args.queue_id, &properties); + retval = pqm_update_queue(&p->pqm, args->queue_id, &properties); mutex_unlock(&p->mutex); return retval; } -static long kfd_ioctl_set_memory_policy(struct file *filep, - struct kfd_process *p, void __user *arg) +static int kfd_ioctl_set_memory_policy(struct file *filep, + struct kfd_process *p, void *data) { - struct kfd_ioctl_set_memory_policy_args args; + struct kfd_ioctl_set_memory_policy_args *args = data; struct kfd_dev *dev; int err = 0; struct kfd_process_device *pdd; enum cache_policy default_policy, alternate_policy; - if (copy_from_user(&args, arg, sizeof(args))) - return -EFAULT; - - if (args.default_policy != KFD_IOC_CACHE_POLICY_COHERENT - && args.default_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) { + if (args->default_policy != KFD_IOC_CACHE_POLICY_COHERENT + && args->default_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) { return -EINVAL; } - if (args.alternate_policy != KFD_IOC_CACHE_POLICY_COHERENT - && args.alternate_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) { + if (args->alternate_policy != KFD_IOC_CACHE_POLICY_COHERENT + && args->alternate_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) { return -EINVAL; } - dev = kfd_device_by_id(args.gpu_id); + dev = kfd_device_by_id(args->gpu_id); if (dev == NULL) return -EINVAL; @@ -428,23 +405,23 @@ static long kfd_ioctl_set_memory_policy(struct file *filep, pdd = kfd_bind_process_to_device(dev, p); if (IS_ERR(pdd)) { - err = PTR_ERR(pdd); + err = -ESRCH; goto out; } - default_policy = (args.default_policy == KFD_IOC_CACHE_POLICY_COHERENT) + default_policy = (args->default_policy == KFD_IOC_CACHE_POLICY_COHERENT) ? cache_policy_coherent : cache_policy_noncoherent; alternate_policy = - (args.alternate_policy == KFD_IOC_CACHE_POLICY_COHERENT) + (args->alternate_policy == KFD_IOC_CACHE_POLICY_COHERENT) ? cache_policy_coherent : cache_policy_noncoherent; if (!dev->dqm->ops.set_cache_memory_policy(dev->dqm, &pdd->qpd, default_policy, alternate_policy, - (void __user *)args.alternate_aperture_base, - args.alternate_aperture_size)) + (void __user *)args->alternate_aperture_base, + args->alternate_aperture_size)) err = -EINVAL; out: @@ -453,53 +430,44 @@ out: return err; } -static long kfd_ioctl_get_clock_counters(struct file *filep, - struct kfd_process *p, void __user *arg) +static int kfd_ioctl_get_clock_counters(struct file *filep, + struct kfd_process *p, void *data) { - struct kfd_ioctl_get_clock_counters_args args; + struct kfd_ioctl_get_clock_counters_args *args = data; struct kfd_dev *dev; struct timespec time; - if (copy_from_user(&args, arg, sizeof(args))) - return -EFAULT; - - dev = kfd_device_by_id(args.gpu_id); + dev = kfd_device_by_id(args->gpu_id); if (dev == NULL) return -EINVAL; /* Reading GPU clock counter from KGD */ - args.gpu_clock_counter = kfd2kgd->get_gpu_clock_counter(dev->kgd); + args->gpu_clock_counter = kfd2kgd->get_gpu_clock_counter(dev->kgd); /* No access to rdtsc. Using raw monotonic time */ getrawmonotonic(&time); - args.cpu_clock_counter = (uint64_t)timespec_to_ns(&time); + args->cpu_clock_counter = (uint64_t)timespec_to_ns(&time); get_monotonic_boottime(&time); - args.system_clock_counter = (uint64_t)timespec_to_ns(&time); + args->system_clock_counter = (uint64_t)timespec_to_ns(&time); /* Since the counter is in nano-seconds we use 1GHz frequency */ - args.system_clock_freq = 1000000000; - - if (copy_to_user(arg, &args, sizeof(args))) - return -EFAULT; + args->system_clock_freq = 1000000000; return 0; } static int kfd_ioctl_get_process_apertures(struct file *filp, - struct kfd_process *p, void __user *arg) + struct kfd_process *p, void *data) { - struct kfd_ioctl_get_process_apertures_args args; + struct kfd_ioctl_get_process_apertures_args *args = data; struct kfd_process_device_apertures *pAperture; struct kfd_process_device *pdd; dev_dbg(kfd_device, "get apertures for PASID %d", p->pasid); - if (copy_from_user(&args, arg, sizeof(args))) - return -EFAULT; - - args.num_of_nodes = 0; + args->num_of_nodes = 0; mutex_lock(&p->mutex); @@ -508,7 +476,8 @@ static int kfd_ioctl_get_process_apertures(struct file *filp, /* Run over all pdd of the process */ pdd = kfd_get_first_process_device_data(p); do { - pAperture = &args.process_apertures[args.num_of_nodes]; + pAperture = + &args->process_apertures[args->num_of_nodes]; pAperture->gpu_id = pdd->dev->id; pAperture->lds_base = pdd->lds_base; pAperture->lds_limit = pdd->lds_limit; @@ -518,7 +487,7 @@ static int kfd_ioctl_get_process_apertures(struct file *filp, pAperture->scratch_limit = pdd->scratch_limit; dev_dbg(kfd_device, - "node id %u\n", args.num_of_nodes); + "node id %u\n", args->num_of_nodes); dev_dbg(kfd_device, "gpu id %u\n", pdd->dev->id); dev_dbg(kfd_device, @@ -534,80 +503,131 @@ static int kfd_ioctl_get_process_apertures(struct file *filp, dev_dbg(kfd_device, "scratch_limit %llX\n", pdd->scratch_limit); - args.num_of_nodes++; + args->num_of_nodes++; } while ((pdd = kfd_get_next_process_device_data(p, pdd)) != NULL && - (args.num_of_nodes < NUM_OF_SUPPORTED_GPUS)); + (args->num_of_nodes < NUM_OF_SUPPORTED_GPUS)); } mutex_unlock(&p->mutex); - if (copy_to_user(arg, &args, sizeof(args))) - return -EFAULT; - return 0; } +#define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \ + [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, .cmd_drv = 0, .name = #ioctl} + +/** Ioctl table */ +static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { + AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_VERSION, + kfd_ioctl_get_version, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_CREATE_QUEUE, + kfd_ioctl_create_queue, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_DESTROY_QUEUE, + kfd_ioctl_destroy_queue, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_MEMORY_POLICY, + kfd_ioctl_set_memory_policy, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_CLOCK_COUNTERS, + kfd_ioctl_get_clock_counters, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES, + kfd_ioctl_get_process_apertures, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_UPDATE_QUEUE, + kfd_ioctl_update_queue, 0), +}; + +#define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls) + static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) { struct kfd_process *process; - long err = -EINVAL; + amdkfd_ioctl_t *func; + const struct amdkfd_ioctl_desc *ioctl = NULL; + unsigned int nr = _IOC_NR(cmd); + char stack_kdata[128]; + char *kdata = NULL; + unsigned int usize, asize; + int retcode = -EINVAL; - dev_dbg(kfd_device, - "ioctl cmd 0x%x (#%d), arg 0x%lx\n", - cmd, _IOC_NR(cmd), arg); + if (nr >= AMDKFD_CORE_IOCTL_COUNT) + goto err_i1; + + if ((nr >= AMDKFD_COMMAND_START) && (nr < AMDKFD_COMMAND_END)) { + u32 amdkfd_size; + + ioctl = &amdkfd_ioctls[nr]; + + amdkfd_size = _IOC_SIZE(ioctl->cmd); + usize = asize = _IOC_SIZE(cmd); + if (amdkfd_size > asize) + asize = amdkfd_size; + + cmd = ioctl->cmd; + } else + goto err_i1; + + dev_dbg(kfd_device, "ioctl cmd 0x%x (#%d), arg 0x%lx\n", cmd, nr, arg); process = kfd_get_process(current); - if (IS_ERR(process)) - return PTR_ERR(process); + if (IS_ERR(process)) { + dev_dbg(kfd_device, "no process\n"); + goto err_i1; + } - switch (cmd) { - case KFD_IOC_GET_VERSION: - err = kfd_ioctl_get_version(filep, process, (void __user *)arg); - break; - case KFD_IOC_CREATE_QUEUE: - err = kfd_ioctl_create_queue(filep, process, - (void __user *)arg); - break; - - case KFD_IOC_DESTROY_QUEUE: - err = kfd_ioctl_destroy_queue(filep, process, - (void __user *)arg); - break; - - case KFD_IOC_SET_MEMORY_POLICY: - err = kfd_ioctl_set_memory_policy(filep, process, - (void __user *)arg); - break; - - case KFD_IOC_GET_CLOCK_COUNTERS: - err = kfd_ioctl_get_clock_counters(filep, process, - (void __user *)arg); - break; - - case KFD_IOC_GET_PROCESS_APERTURES: - err = kfd_ioctl_get_process_apertures(filep, process, - (void __user *)arg); - break; - - case KFD_IOC_UPDATE_QUEUE: - err = kfd_ioctl_update_queue(filep, process, - (void __user *)arg); - break; - - default: - dev_err(kfd_device, - "unknown ioctl cmd 0x%x, arg 0x%lx)\n", - cmd, arg); - err = -EINVAL; - break; + /* Do not trust userspace, use our own definition */ + func = ioctl->func; + + if (unlikely(!func)) { + dev_dbg(kfd_device, "no function\n"); + retcode = -EINVAL; + goto err_i1; } - if (err < 0) - dev_err(kfd_device, - "ioctl error %ld for ioctl cmd 0x%x (#%d)\n", - err, cmd, _IOC_NR(cmd)); + if (cmd & (IOC_IN | IOC_OUT)) { + if (asize <= sizeof(stack_kdata)) { + kdata = stack_kdata; + } else { + kdata = kmalloc(asize, GFP_KERNEL); + if (!kdata) { + retcode = -ENOMEM; + goto err_i1; + } + } + if (asize > usize) + memset(kdata + usize, 0, asize - usize); + } - return err; + if (cmd & IOC_IN) { + if (copy_from_user(kdata, (void __user *)arg, usize) != 0) { + retcode = -EFAULT; + goto err_i1; + } + } else if (cmd & IOC_OUT) { + memset(kdata, 0, usize); + } + + retcode = func(filep, process, kdata); + + if (cmd & IOC_OUT) + if (copy_to_user((void __user *)arg, kdata, usize) != 0) + retcode = -EFAULT; + +err_i1: + if (!ioctl) + dev_dbg(kfd_device, "invalid ioctl: pid=%d, cmd=0x%02x, nr=0x%02x\n", + task_pid_nr(current), cmd, nr); + + if (kdata != stack_kdata) + kfree(kdata); + + if (retcode) + dev_dbg(kfd_device, "ret = %d\n", retcode); + + return retcode; } static int kfd_mmap(struct file *filp, struct vm_area_struct *vma) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index a770ec6..1ba8332 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -229,13 +229,6 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, goto kfd_topology_add_device_error; } - if (kfd_interrupt_init(kfd)) { - dev_err(kfd_device, - "Error initializing interrupts for device (%x:%x)\n", - kfd->pdev->vendor, kfd->pdev->device); - goto kfd_interrupt_error; - } - if (!device_iommu_pasid_init(kfd)) { dev_err(kfd_device, "Error initializing iommuv2 for device (%x:%x)\n", @@ -274,8 +267,6 @@ dqm_start_error: device_queue_manager_error: amd_iommu_free_device(kfd->pdev); device_iommu_pasid_error: - kfd_interrupt_exit(kfd); -kfd_interrupt_error: kfd_topology_remove_device(kfd); kfd_topology_add_device_error: kfd_gtt_sa_fini(kfd); @@ -293,7 +284,6 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd) if (kfd->init_complete) { device_queue_manager_uninit(kfd->dqm); amd_iommu_free_device(kfd->pdev); - kfd_interrupt_exit(kfd); kfd_topology_remove_device(kfd); kfd_gtt_sa_fini(kfd); kfd2kgd->free_gtt_mem(kfd->kgd, kfd->gtt_mem); @@ -337,15 +327,7 @@ int kgd2kfd_resume(struct kfd_dev *kfd) /* This is called directly from KGD at ISR. */ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry) { - if (kfd->init_complete) { - spin_lock(&kfd->interrupt_lock); - - if (kfd->interrupts_active - && enqueue_ih_ring_entry(kfd, ih_ring_entry)) - schedule_work(&kfd->interrupt_work); - - spin_unlock(&kfd->interrupt_lock); - } + /* Process interrupts / schedule work as necessary */ } static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index e804e87..a5c69e9 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -138,6 +138,9 @@ static void deallocate_vmid(struct device_queue_manager *dqm, { int bit = qpd->vmid - KFD_VMID_START_OFFSET; + /* Release the vmid mapping */ + set_pasid_vmid_mapping(dqm, 0, qpd->vmid); + set_bit(bit, (unsigned long *)&dqm->vmid_bitmap); qpd->vmid = 0; q->properties.vmid = 0; @@ -253,6 +256,18 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm, return retval; } + pr_debug("kfd: loading mqd to hqd on pipe (%d) queue (%d)\n", + q->pipe, + q->queue); + + retval = mqd->load_mqd(mqd, q->mqd, q->pipe, + q->queue, (uint32_t __user *) q->properties.write_ptr); + if (retval != 0) { + deallocate_hqd(dqm, q); + mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); + return retval; + } + return 0; } @@ -311,6 +326,7 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q) { int retval; struct mqd_manager *mqd; + bool prev_active = false; BUG_ON(!dqm || !q || !q->mqd); @@ -321,10 +337,18 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q) return -ENOMEM; } - retval = mqd->update_mqd(mqd, q->mqd, &q->properties); if (q->properties.is_active == true) + prev_active = true; + + /* + * + * check active state vs. the previous state + * and modify counter accordingly + */ + retval = mqd->update_mqd(mqd, q->mqd, &q->properties); + if ((q->properties.is_active == true) && (prev_active == false)) dqm->queue_count++; - else + else if ((q->properties.is_active == false) && (prev_active == true)) dqm->queue_count--; if (sched_policy != KFD_SCHED_POLICY_NO_HWS) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c deleted file mode 100644 index 5b99909..0000000 --- a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c +++ /dev/null @@ -1,176 +0,0 @@ -/* - * Copyright 2014 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -/* - * KFD Interrupts. - * - * AMD GPUs deliver interrupts by pushing an interrupt description onto the - * interrupt ring and then sending an interrupt. KGD receives the interrupt - * in ISR and sends us a pointer to each new entry on the interrupt ring. - * - * We generally can't process interrupt-signaled events from ISR, so we call - * out to each interrupt client module (currently only the scheduler) to ask if - * each interrupt is interesting. If they return true, then it requires further - * processing so we copy it to an internal interrupt ring and call each - * interrupt client again from a work-queue. - * - * There's no acknowledgment for the interrupts we use. The hardware simply - * queues a new interrupt each time without waiting. - * - * The fixed-size internal queue means that it's possible for us to lose - * interrupts because we have no back-pressure to the hardware. - */ - -#include <linux/slab.h> -#include <linux/device.h> -#include "kfd_priv.h" - -#define KFD_INTERRUPT_RING_SIZE 256 - -static void interrupt_wq(struct work_struct *); - -int kfd_interrupt_init(struct kfd_dev *kfd) -{ - void *interrupt_ring = kmalloc_array(KFD_INTERRUPT_RING_SIZE, - kfd->device_info->ih_ring_entry_size, - GFP_KERNEL); - if (!interrupt_ring) - return -ENOMEM; - - kfd->interrupt_ring = interrupt_ring; - kfd->interrupt_ring_size = - KFD_INTERRUPT_RING_SIZE * kfd->device_info->ih_ring_entry_size; - atomic_set(&kfd->interrupt_ring_wptr, 0); - atomic_set(&kfd->interrupt_ring_rptr, 0); - - spin_lock_init(&kfd->interrupt_lock); - - INIT_WORK(&kfd->interrupt_work, interrupt_wq); - - kfd->interrupts_active = true; - - /* - * After this function returns, the interrupt will be enabled. This - * barrier ensures that the interrupt running on a different processor - * sees all the above writes. - */ - smp_wmb(); - - return 0; -} - -void kfd_interrupt_exit(struct kfd_dev *kfd) -{ - /* - * Stop the interrupt handler from writing to the ring and scheduling - * workqueue items. The spinlock ensures that any interrupt running - * after we have unlocked sees interrupts_active = false. - */ - unsigned long flags; - - spin_lock_irqsave(&kfd->interrupt_lock, flags); - kfd->interrupts_active = false; - spin_unlock_irqrestore(&kfd->interrupt_lock, flags); - - /* - * Flush_scheduled_work ensures that there are no outstanding - * work-queue items that will access interrupt_ring. New work items - * can't be created because we stopped interrupt handling above. - */ - flush_scheduled_work(); - - kfree(kfd->interrupt_ring); -} - -/* - * This assumes that it can't be called concurrently with itself - * but only with dequeue_ih_ring_entry. - */ -bool enqueue_ih_ring_entry(struct kfd_dev *kfd, const void *ih_ring_entry) -{ - unsigned int rptr = atomic_read(&kfd->interrupt_ring_rptr); - unsigned int wptr = atomic_read(&kfd->interrupt_ring_wptr); - - if ((rptr - wptr) % kfd->interrupt_ring_size == - kfd->device_info->ih_ring_entry_size) { - /* This is very bad, the system is likely to hang. */ - dev_err_ratelimited(kfd_chardev(), - "Interrupt ring overflow, dropping interrupt.\n"); - return false; - } - - memcpy(kfd->interrupt_ring + wptr, ih_ring_entry, - kfd->device_info->ih_ring_entry_size); - - wptr = (wptr + kfd->device_info->ih_ring_entry_size) % - kfd->interrupt_ring_size; - smp_wmb(); /* Ensure memcpy'd data is visible before wptr update. */ - atomic_set(&kfd->interrupt_ring_wptr, wptr); - - return true; -} - -/* - * This assumes that it can't be called concurrently with itself - * but only with enqueue_ih_ring_entry. - */ -static bool dequeue_ih_ring_entry(struct kfd_dev *kfd, void *ih_ring_entry) -{ - /* - * Assume that wait queues have an implicit barrier, i.e. anything that - * happened in the ISR before it queued work is visible. - */ - - unsigned int wptr = atomic_read(&kfd->interrupt_ring_wptr); - unsigned int rptr = atomic_read(&kfd->interrupt_ring_rptr); - - if (rptr == wptr) - return false; - - memcpy(ih_ring_entry, kfd->interrupt_ring + rptr, - kfd->device_info->ih_ring_entry_size); - - rptr = (rptr + kfd->device_info->ih_ring_entry_size) % - kfd->interrupt_ring_size; - - /* - * Ensure the rptr write update is not visible until - * memcpy has finished reading. - */ - smp_mb(); - atomic_set(&kfd->interrupt_ring_rptr, rptr); - - return true; -} - -static void interrupt_wq(struct work_struct *work) -{ - struct kfd_dev *dev = container_of(work, struct kfd_dev, - interrupt_work); - - uint32_t ih_ring_entry[DIV_ROUND_UP( - dev->device_info->ih_ring_entry_size, - sizeof(uint32_t))]; - - while (dequeue_ih_ring_entry(dev, ih_ring_entry)) - ; -} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c index 4e582de..a318743 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c @@ -264,7 +264,7 @@ static bool is_occupied(struct mqd_manager *mm, void *mqd, uint32_t queue_id) { - return kfd2kgd->hqd_is_occupies(mm->dev->kgd, queue_address, + return kfd2kgd->hqd_is_occupied(mm->dev->kgd, queue_address, pipe_id, queue_id); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c b/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c index 71699ad..4c25ef5 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c @@ -32,7 +32,7 @@ int kfd_pasid_init(void) { pasid_limit = max_num_of_processes; - pasid_bitmap = kzalloc(BITS_TO_LONGS(pasid_limit), GFP_KERNEL); + pasid_bitmap = kcalloc(BITS_TO_LONGS(pasid_limit), sizeof(long), GFP_KERNEL); if (!pasid_bitmap) return -ENOMEM; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index bfcf45f..1b35a9c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -157,22 +157,10 @@ struct kfd_dev { unsigned int gtt_sa_chunk_size; unsigned int gtt_sa_num_of_chunks; - void *interrupt_ring; - size_t interrupt_ring_size; - atomic_t interrupt_ring_rptr; - atomic_t interrupt_ring_wptr; - struct work_struct interrupt_work; - spinlock_t interrupt_lock; - /* QCM Device instance */ struct device_queue_manager *dqm; bool init_complete; - /* - * Interrupts of interest to KFD are copied - * from the HW ring into a SW ring. - */ - bool interrupts_active; }; /* KGD2KFD callbacks */ @@ -490,6 +478,24 @@ struct kfd_process { bool is_32bit_user_mode; }; +/** + * Ioctl function type. + * + * \param filep pointer to file structure. + * \param p amdkfd process pointer. + * \param data pointer to arg that was copied from user. + */ +typedef int amdkfd_ioctl_t(struct file *filep, struct kfd_process *p, + void *data); + +struct amdkfd_ioctl_desc { + unsigned int cmd; + int flags; + amdkfd_ioctl_t *func; + unsigned int cmd_drv; + const char *name; +}; + void kfd_process_create_wq(void); void kfd_process_destroy_wq(void); struct kfd_process *kfd_create_process(const struct task_struct *); @@ -548,10 +554,7 @@ struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev); struct kfd_dev *kfd_topology_enum_kfd_devices(uint8_t idx); /* Interrupts */ -int kfd_interrupt_init(struct kfd_dev *dev); -void kfd_interrupt_exit(struct kfd_dev *dev); void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry); -bool enqueue_ih_ring_entry(struct kfd_dev *kfd, const void *ih_ring_entry); /* Power Management */ void kgd2kfd_suspend(struct kfd_dev *kfd); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 4886dde..4983993 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -934,7 +934,7 @@ static int kfd_build_sysfs_node_tree(void) uint32_t i = 0; list_for_each_entry(dev, &topology_device_list, list) { - ret = kfd_build_sysfs_node_entry(dev, 0); + ret = kfd_build_sysfs_node_entry(dev, i); if (ret < 0) return ret; i++; diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h index 8834997..239bc16 100644 --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h @@ -180,7 +180,7 @@ struct kfd2kgd_calls { int (*hqd_sdma_load)(struct kgd_dev *kgd, void *mqd); - bool (*hqd_is_occupies)(struct kgd_dev *kgd, uint64_t queue_address, + bool (*hqd_is_occupied)(struct kgd_dev *kgd, uint64_t queue_address, uint32_t pipe_id, uint32_t queue_id); int (*hqd_destroy)(struct kgd_dev *kgd, uint32_t reset_type, diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index 52ce26d..cf775a4 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -741,7 +741,9 @@ int drm_fb_helper_setcmap(struct fb_cmap *cmap, struct fb_info *info) int i, j, rc = 0; int start; - drm_modeset_lock_all(dev); + if (__drm_modeset_lock_all(dev, !!oops_in_progress)) { + return -EBUSY; + } if (!drm_fb_helper_is_bound(fb_helper)) { drm_modeset_unlock_all(dev); return -EBUSY; @@ -915,7 +917,9 @@ int drm_fb_helper_pan_display(struct fb_var_screeninfo *var, int ret = 0; int i; - drm_modeset_lock_all(dev); + if (__drm_modeset_lock_all(dev, !!oops_in_progress)) { + return -EBUSY; + } if (!drm_fb_helper_is_bound(fb_helper)) { drm_modeset_unlock_all(dev); return -EBUSY; diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.c b/drivers/gpu/drm/exynos/exynos_drm_drv.c index 121470a..1bcbe07 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_drv.c +++ b/drivers/gpu/drm/exynos/exynos_drm_drv.c @@ -645,18 +645,6 @@ static int exynos_drm_init(void) if (!is_exynos) return -ENODEV; - /* - * Register device object only in case of Exynos SoC. - * - * Below codes resolves temporarily infinite loop issue incurred - * by Exynos drm driver when using multi-platform kernel. - * So these codes will be replaced with more generic way later. - */ - if (!of_machine_is_compatible("samsung,exynos3") && - !of_machine_is_compatible("samsung,exynos4") && - !of_machine_is_compatible("samsung,exynos5")) - return -ENODEV; - exynos_drm_pdev = platform_device_register_simple("exynos-drm", -1, NULL, 0); if (IS_ERR(exynos_drm_pdev)) diff --git a/drivers/gpu/drm/exynos/exynos_hdmi.c b/drivers/gpu/drm/exynos/exynos_hdmi.c index 5765a16..98051e8 100644 --- a/drivers/gpu/drm/exynos/exynos_hdmi.c +++ b/drivers/gpu/drm/exynos/exynos_hdmi.c @@ -1669,7 +1669,6 @@ static void hdmi_mode_apply(struct hdmi_context *hdata) static void hdmiphy_conf_reset(struct hdmi_context *hdata) { - u8 buffer[2]; u32 reg; clk_disable_unprepare(hdata->res.sclk_hdmi); @@ -1677,11 +1676,8 @@ static void hdmiphy_conf_reset(struct hdmi_context *hdata) clk_prepare_enable(hdata->res.sclk_hdmi); /* operation mode */ - buffer[0] = 0x1f; - buffer[1] = 0x00; - - if (hdata->hdmiphy_port) - i2c_master_send(hdata->hdmiphy_port, buffer, 2); + hdmiphy_reg_writeb(hdata, HDMIPHY_MODE_SET_DONE, + HDMI_PHY_ENABLE_MODE_SET); if (hdata->type == HDMI_TYPE13) reg = HDMI_V13_PHY_RSTOUT; diff --git a/drivers/gpu/drm/exynos/exynos_mixer.c b/drivers/gpu/drm/exynos/exynos_mixer.c index 820b762..064ed65 100644 --- a/drivers/gpu/drm/exynos/exynos_mixer.c +++ b/drivers/gpu/drm/exynos/exynos_mixer.c @@ -1026,6 +1026,7 @@ static void mixer_win_disable(struct exynos_drm_manager *mgr, int zpos) static void mixer_wait_for_vblank(struct exynos_drm_manager *mgr) { struct mixer_context *mixer_ctx = mgr_to_mixer(mgr); + int err; mutex_lock(&mixer_ctx->mixer_mutex); if (!mixer_ctx->powered) { @@ -1034,7 +1035,11 @@ static void mixer_wait_for_vblank(struct exynos_drm_manager *mgr) } mutex_unlock(&mixer_ctx->mixer_mutex); - drm_vblank_get(mgr->crtc->dev, mixer_ctx->pipe); + err = drm_vblank_get(mgr->crtc->dev, mixer_ctx->pipe); + if (err < 0) { + DRM_DEBUG_KMS("failed to acquire vblank counter\n"); + return; + } atomic_set(&mixer_ctx->wait_vsync_event, 1); @@ -1262,8 +1267,6 @@ static int mixer_bind(struct device *dev, struct device *manager, void *data) return ret; } - pm_runtime_enable(dev); - return 0; } @@ -1272,8 +1275,6 @@ static void mixer_unbind(struct device *dev, struct device *master, void *data) struct mixer_context *ctx = dev_get_drvdata(dev); mixer_mgr_remove(&ctx->manager); - - pm_runtime_disable(dev); } static const struct component_ops mixer_component_ops = { diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index fd7a493..54f2a27 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1826,8 +1826,6 @@ struct drm_i915_private { */ struct workqueue_struct *dp_wq; - uint32_t bios_vgacntr; - /* Abstract the submission mechanism (legacy ringbuffer or execlists) away */ struct { int (*do_execbuf)(struct drm_device *dev, struct drm_file *file, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 3044fb3..4e4d969 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1048,6 +1048,7 @@ int i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { + struct drm_i915_private *dev_priv = dev->dev_private; struct drm_i915_gem_pwrite *args = data; struct drm_i915_gem_object *obj; int ret; @@ -1067,9 +1068,11 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, return -EFAULT; } + intel_runtime_pm_get(dev_priv); + ret = i915_mutex_lock_interruptible(dev); if (ret) - return ret; + goto put_rpm; obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); if (&obj->base == NULL) { @@ -1121,6 +1124,9 @@ out: drm_gem_object_unreference(&obj->base); unlock: mutex_unlock(&dev->struct_mutex); +put_rpm: + intel_runtime_pm_put(dev_priv); + return ret; } @@ -5090,7 +5096,7 @@ static bool mutex_is_locked_by(struct mutex *mutex, struct task_struct *task) if (!mutex_is_locked(mutex)) return false; -#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_MUTEXES) +#if defined(CONFIG_SMP) && !defined(CONFIG_DEBUG_MUTEXES) return mutex->owner == task; #else /* Since UP may be pre-empted, we cannot assume that we own the lock */ diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 5d83773..818ab4e 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -296,6 +296,23 @@ void gen6_enable_rps_interrupts(struct drm_device *dev) spin_unlock_irq(&dev_priv->irq_lock); } +u32 gen6_sanitize_rps_pm_mask(struct drm_i915_private *dev_priv, u32 mask) +{ + /* + * SNB,IVB can while VLV,CHV may hard hang on looping batchbuffer + * if GEN6_PM_UP_EI_EXPIRED is masked. + * + * TODO: verify if this can be reproduced on VLV,CHV. + */ + if (INTEL_INFO(dev_priv)->gen <= 7 && !IS_HASWELL(dev_priv)) + mask &= ~GEN6_PM_RP_UP_EI_EXPIRED; + + if (INTEL_INFO(dev_priv)->gen >= 8) + mask &= ~GEN8_PMINTR_REDIRECT_TO_NON_DISP; + + return mask; +} + void gen6_disable_rps_interrupts(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; @@ -308,8 +325,7 @@ void gen6_disable_rps_interrupts(struct drm_device *dev) spin_lock_irq(&dev_priv->irq_lock); - I915_WRITE(GEN6_PMINTRMSK, INTEL_INFO(dev_priv)->gen >= 8 ? - ~GEN8_PMINTR_REDIRECT_TO_NON_DISP : ~0); + I915_WRITE(GEN6_PMINTRMSK, gen6_sanitize_rps_pm_mask(dev_priv, ~0)); __gen6_disable_pm_irq(dev_priv, dev_priv->pm_rps_events); I915_WRITE(gen6_pm_ier(dev_priv), I915_READ(gen6_pm_ier(dev_priv)) & @@ -3731,8 +3747,6 @@ static bool i8xx_handle_vblank(struct drm_device *dev, if ((iir & flip_pending) == 0) goto check_page_flip; - intel_prepare_page_flip(dev, plane); - /* We detect FlipDone by looking for the change in PendingFlip from '1' * to '0' on the following vblank, i.e. IIR has the Pendingflip * asserted following the MI_DISPLAY_FLIP, but ISR is deasserted, hence @@ -3742,6 +3756,7 @@ static bool i8xx_handle_vblank(struct drm_device *dev, if (I915_READ16(ISR) & flip_pending) goto check_page_flip; + intel_prepare_page_flip(dev, plane); intel_finish_page_flip(dev, pipe); return true; @@ -3913,8 +3928,6 @@ static bool i915_handle_vblank(struct drm_device *dev, if ((iir & flip_pending) == 0) goto check_page_flip; - intel_prepare_page_flip(dev, plane); - /* We detect FlipDone by looking for the change in PendingFlip from '1' * to '0' on the following vblank, i.e. IIR has the Pendingflip * asserted following the MI_DISPLAY_FLIP, but ISR is deasserted, hence @@ -3924,6 +3937,7 @@ static bool i915_handle_vblank(struct drm_device *dev, if (I915_READ(ISR) & flip_pending) goto check_page_flip; + intel_prepare_page_flip(dev, plane); intel_finish_page_flip(dev, pipe); return true; diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index d01db1b..dc266e7 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -9703,7 +9703,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, if (obj->tiling_mode != work->old_fb_obj->tiling_mode) /* vlv: DISPLAY_FLIP fails to change tiling */ ring = NULL; - } else if (IS_IVYBRIDGE(dev)) { + } else if (IS_IVYBRIDGE(dev) || IS_HASWELL(dev)) { ring = &dev_priv->ring[BCS]; } else if (INTEL_INFO(dev)->gen >= 7) { ring = i915_gem_request_get_ring(obj->last_read_req); @@ -12956,11 +12956,7 @@ static void i915_disable_vga(struct drm_device *dev) vga_put(dev->pdev, VGA_RSRC_LEGACY_IO); udelay(300); - /* - * Fujitsu-Siemens Lifebook S6010 (830) has problems resuming - * from S3 without preserving (some of?) the other bits. - */ - I915_WRITE(vga_reg, dev_priv->bios_vgacntr | VGA_DISP_DISABLE); + I915_WRITE(vga_reg, VGA_DISP_DISABLE); POSTING_READ(vga_reg); } @@ -13045,8 +13041,6 @@ void intel_modeset_init(struct drm_device *dev) intel_shared_dpll_init(dev); - /* save the BIOS value before clobbering it */ - dev_priv->bios_vgacntr = I915_READ(i915_vgacntrl_reg(dev)); /* Just disable it once at startup */ i915_disable_vga(dev); intel_setup_outputs(dev); diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 588b618..bb871f3 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -795,6 +795,7 @@ void gen6_disable_pm_irq(struct drm_i915_private *dev_priv, uint32_t mask); void gen6_reset_rps_interrupts(struct drm_device *dev); void gen6_enable_rps_interrupts(struct drm_device *dev); void gen6_disable_rps_interrupts(struct drm_device *dev); +u32 gen6_sanitize_rps_pm_mask(struct drm_i915_private *dev_priv, u32 mask); void intel_runtime_pm_disable_interrupts(struct drm_i915_private *dev_priv); void intel_runtime_pm_enable_interrupts(struct drm_i915_private *dev_priv); static inline bool intel_irqs_enabled(struct drm_i915_private *dev_priv) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index a3ebaa8..7d99a9c 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3745,16 +3745,7 @@ static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val) mask |= dev_priv->pm_rps_events & (GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED); mask &= dev_priv->pm_rps_events; - /* IVB and SNB hard hangs on looping batchbuffer - * if GEN6_PM_UP_EI_EXPIRED is masked. - */ - if (INTEL_INFO(dev_priv->dev)->gen <= 7 && !IS_HASWELL(dev_priv->dev)) - mask |= GEN6_PM_RP_UP_EI_EXPIRED; - - if (IS_GEN8(dev_priv->dev)) - mask |= GEN8_PMINTR_REDIRECT_TO_NON_DISP; - - return ~mask; + return gen6_sanitize_rps_pm_mask(dev_priv, ~mask); } /* gen6_set_rps is called to update the frequency request, but should also be @@ -3823,7 +3814,8 @@ static void vlv_set_rps_idle(struct drm_i915_private *dev_priv) return; /* Mask turbo interrupt so that they will not come in between */ - I915_WRITE(GEN6_PMINTRMSK, 0xffffffff); + I915_WRITE(GEN6_PMINTRMSK, + gen6_sanitize_rps_pm_mask(dev_priv, ~0)); vlv_force_gfx_clock(dev_priv, true); diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index 6aa3a81..39e1b07 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -615,29 +615,6 @@ static void chv_pipe_power_well_disable(struct drm_i915_private *dev_priv, vlv_power_sequencer_reset(dev_priv); } -static void check_power_well_state(struct drm_i915_private *dev_priv, - struct i915_power_well *power_well) -{ - bool enabled = power_well->ops->is_enabled(dev_priv, power_well); - - if (power_well->always_on || !i915.disable_power_well) { - if (!enabled) - goto mismatch; - - return; - } - - if (enabled != (power_well->count > 0)) - goto mismatch; - - return; - -mismatch: - I915_STATE_WARN(1, "state mismatch for '%s' (always_on %d hw state %d use-count %d disable_power_well %d\n", - power_well->name, power_well->always_on, enabled, - power_well->count, i915.disable_power_well); -} - /** * intel_display_power_get - grab a power domain reference * @dev_priv: i915 device instance @@ -669,8 +646,6 @@ void intel_display_power_get(struct drm_i915_private *dev_priv, power_well->ops->enable(dev_priv, power_well); power_well->hw_enabled = true; } - - check_power_well_state(dev_priv, power_well); } power_domains->domain_use_count[domain]++; @@ -709,8 +684,6 @@ void intel_display_power_put(struct drm_i915_private *dev_priv, power_well->hw_enabled = false; power_well->ops->disable(dev_priv, power_well); } - - check_power_well_state(dev_priv, power_well); } mutex_unlock(&power_domains->lock); diff --git a/drivers/gpu/drm/nouveau/core/core/event.c b/drivers/gpu/drm/nouveau/core/core/event.c index ff2b434..760947e 100644 --- a/drivers/gpu/drm/nouveau/core/core/event.c +++ b/drivers/gpu/drm/nouveau/core/core/event.c @@ -26,7 +26,7 @@ void nvkm_event_put(struct nvkm_event *event, u32 types, int index) { - BUG_ON(!spin_is_locked(&event->refs_lock)); + assert_spin_locked(&event->refs_lock); while (types) { int type = __ffs(types); types &= ~(1 << type); if (--event->refs[index * event->types_nr + type] == 0) { @@ -39,7 +39,7 @@ nvkm_event_put(struct nvkm_event *event, u32 types, int index) void nvkm_event_get(struct nvkm_event *event, u32 types, int index) { - BUG_ON(!spin_is_locked(&event->refs_lock)); + assert_spin_locked(&event->refs_lock); while (types) { int type = __ffs(types); types &= ~(1 << type); if (++event->refs[index * event->types_nr + type] == 1) { diff --git a/drivers/gpu/drm/nouveau/core/core/notify.c b/drivers/gpu/drm/nouveau/core/core/notify.c index d1bcde5..839a325 100644 --- a/drivers/gpu/drm/nouveau/core/core/notify.c +++ b/drivers/gpu/drm/nouveau/core/core/notify.c @@ -98,7 +98,7 @@ nvkm_notify_send(struct nvkm_notify *notify, void *data, u32 size) struct nvkm_event *event = notify->event; unsigned long flags; - BUG_ON(!spin_is_locked(&event->list_lock)); + assert_spin_locked(&event->list_lock); BUG_ON(size != notify->size); spin_lock_irqsave(&event->refs_lock, flags); diff --git a/drivers/gpu/drm/nouveau/core/engine/device/nve0.c b/drivers/gpu/drm/nouveau/core/engine/device/nve0.c index 674da1f..7329226 100644 --- a/drivers/gpu/drm/nouveau/core/engine/device/nve0.c +++ b/drivers/gpu/drm/nouveau/core/engine/device/nve0.c @@ -249,6 +249,39 @@ nve0_identify(struct nouveau_device *device) device->oclass[NVDEV_ENGINE_PPP ] = &nvc0_ppp_oclass; device->oclass[NVDEV_ENGINE_PERFMON] = &nvf0_perfmon_oclass; break; + case 0x106: + device->cname = "GK208B"; + device->oclass[NVDEV_SUBDEV_VBIOS ] = &nouveau_bios_oclass; + device->oclass[NVDEV_SUBDEV_GPIO ] = nve0_gpio_oclass; + device->oclass[NVDEV_SUBDEV_I2C ] = nve0_i2c_oclass; + device->oclass[NVDEV_SUBDEV_FUSE ] = &gf100_fuse_oclass; + device->oclass[NVDEV_SUBDEV_CLOCK ] = &nve0_clock_oclass; + device->oclass[NVDEV_SUBDEV_THERM ] = &nvd0_therm_oclass; + device->oclass[NVDEV_SUBDEV_MXM ] = &nv50_mxm_oclass; + device->oclass[NVDEV_SUBDEV_DEVINIT] = nvc0_devinit_oclass; + device->oclass[NVDEV_SUBDEV_MC ] = gk20a_mc_oclass; + device->oclass[NVDEV_SUBDEV_BUS ] = nvc0_bus_oclass; + device->oclass[NVDEV_SUBDEV_TIMER ] = &nv04_timer_oclass; + device->oclass[NVDEV_SUBDEV_FB ] = nve0_fb_oclass; + device->oclass[NVDEV_SUBDEV_LTC ] = gk104_ltc_oclass; + device->oclass[NVDEV_SUBDEV_IBUS ] = &nve0_ibus_oclass; + device->oclass[NVDEV_SUBDEV_INSTMEM] = nv50_instmem_oclass; + device->oclass[NVDEV_SUBDEV_VM ] = &nvc0_vmmgr_oclass; + device->oclass[NVDEV_SUBDEV_BAR ] = &nvc0_bar_oclass; + device->oclass[NVDEV_SUBDEV_PWR ] = nv108_pwr_oclass; + device->oclass[NVDEV_SUBDEV_VOLT ] = &nv40_volt_oclass; + device->oclass[NVDEV_ENGINE_DMAOBJ ] = nvd0_dmaeng_oclass; + device->oclass[NVDEV_ENGINE_FIFO ] = nv108_fifo_oclass; + device->oclass[NVDEV_ENGINE_SW ] = nvc0_software_oclass; + device->oclass[NVDEV_ENGINE_GR ] = nv108_graph_oclass; + device->oclass[NVDEV_ENGINE_DISP ] = nvf0_disp_oclass; + device->oclass[NVDEV_ENGINE_COPY0 ] = &nve0_copy0_oclass; + device->oclass[NVDEV_ENGINE_COPY1 ] = &nve0_copy1_oclass; + device->oclass[NVDEV_ENGINE_COPY2 ] = &nve0_copy2_oclass; + device->oclass[NVDEV_ENGINE_BSP ] = &nve0_bsp_oclass; + device->oclass[NVDEV_ENGINE_VP ] = &nve0_vp_oclass; + device->oclass[NVDEV_ENGINE_PPP ] = &nvc0_ppp_oclass; + break; case 0x108: device->cname = "GK208"; device->oclass[NVDEV_SUBDEV_VBIOS ] = &nouveau_bios_oclass; diff --git a/drivers/gpu/drm/nouveau/core/subdev/bios/shadowramin.c b/drivers/gpu/drm/nouveau/core/subdev/bios/shadowramin.c index 5e58bba..a7a890f 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/bios/shadowramin.c +++ b/drivers/gpu/drm/nouveau/core/subdev/bios/shadowramin.c @@ -44,8 +44,10 @@ static void pramin_fini(void *data) { struct priv *priv = data; - nv_wr32(priv->bios, 0x001700, priv->bar0); - kfree(priv); + if (priv) { + nv_wr32(priv->bios, 0x001700, priv->bar0); + kfree(priv); + } } static void * diff --git a/drivers/gpu/drm/nouveau/core/subdev/fb/ramnvaa.c b/drivers/gpu/drm/nouveau/core/subdev/fb/ramnvaa.c index 00f2ca7..033a8e9 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/fb/ramnvaa.c +++ b/drivers/gpu/drm/nouveau/core/subdev/fb/ramnvaa.c @@ -24,34 +24,71 @@ #include "nv50.h" +struct nvaa_ram_priv { + struct nouveau_ram base; + u64 poller_base; +}; + static int nvaa_ram_ctor(struct nouveau_object *parent, struct nouveau_object *engine, struct nouveau_oclass *oclass, void *data, u32 datasize, struct nouveau_object **pobject) { - const u32 rsvd_head = ( 256 * 1024) >> 12; /* vga memory */ - const u32 rsvd_tail = (1024 * 1024) >> 12; /* vbios etc */ + u32 rsvd_head = ( 256 * 1024); /* vga memory */ + u32 rsvd_tail = (1024 * 1024); /* vbios etc */ struct nouveau_fb *pfb = nouveau_fb(parent); - struct nouveau_ram *ram; + struct nvaa_ram_priv *priv; int ret; - ret = nouveau_ram_create(parent, engine, oclass, &ram); - *pobject = nv_object(ram); + ret = nouveau_ram_create(parent, engine, oclass, &priv); + *pobject = nv_object(priv); if (ret) return ret; - ram->size = nv_rd32(pfb, 0x10020c); - ram->size = (ram->size & 0xffffff00) | ((ram->size & 0x000000ff) << 32); + priv->base.type = NV_MEM_TYPE_STOLEN; + priv->base.stolen = (u64)nv_rd32(pfb, 0x100e10) << 12; + priv->base.size = (u64)nv_rd32(pfb, 0x100e14) << 12; - ret = nouveau_mm_init(&pfb->vram, rsvd_head, (ram->size >> 12) - - (rsvd_head + rsvd_tail), 1); + rsvd_tail += 0x1000; + priv->poller_base = priv->base.size - rsvd_tail; + + ret = nouveau_mm_init(&pfb->vram, rsvd_head >> 12, + (priv->base.size - (rsvd_head + rsvd_tail)) >> 12, + 1); if (ret) return ret; - ram->type = NV_MEM_TYPE_STOLEN; - ram->stolen = (u64)nv_rd32(pfb, 0x100e10) << 12; - ram->get = nv50_ram_get; - ram->put = nv50_ram_put; + priv->base.get = nv50_ram_get; + priv->base.put = nv50_ram_put; + return 0; +} + +static int +nvaa_ram_init(struct nouveau_object *object) +{ + struct nouveau_fb *pfb = nouveau_fb(object); + struct nvaa_ram_priv *priv = (void *)object; + int ret; + u64 dniso, hostnb, flush; + + ret = nouveau_ram_init(&priv->base); + if (ret) + return ret; + + dniso = ((priv->base.size - (priv->poller_base + 0x00)) >> 5) - 1; + hostnb = ((priv->base.size - (priv->poller_base + 0x20)) >> 5) - 1; + flush = ((priv->base.size - (priv->poller_base + 0x40)) >> 5) - 1; + + /* Enable NISO poller for various clients and set their associated + * read address, only for MCP77/78 and MCP79/7A. (fd#25701) + */ + nv_wr32(pfb, 0x100c18, dniso); + nv_mask(pfb, 0x100c14, 0x00000000, 0x00000001); + nv_wr32(pfb, 0x100c1c, hostnb); + nv_mask(pfb, 0x100c14, 0x00000000, 0x00000002); + nv_wr32(pfb, 0x100c24, flush); + nv_mask(pfb, 0x100c14, 0x00000000, 0x00010000); + return 0; } @@ -60,7 +97,7 @@ nvaa_ram_oclass = { .ofuncs = &(struct nouveau_ofuncs) { .ctor = nvaa_ram_ctor, .dtor = _nouveau_ram_dtor, - .init = _nouveau_ram_init, + .init = nvaa_ram_init, .fini = _nouveau_ram_fini, }, }; diff --git a/drivers/gpu/drm/nouveau/core/subdev/mc/nv4c.c b/drivers/gpu/drm/nouveau/core/subdev/mc/nv4c.c index a75c35c..165401c 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/mc/nv4c.c +++ b/drivers/gpu/drm/nouveau/core/subdev/mc/nv4c.c @@ -24,13 +24,6 @@ #include "nv04.h" -static void -nv4c_mc_msi_rearm(struct nouveau_mc *pmc) -{ - struct nv04_mc_priv *priv = (void *)pmc; - nv_wr08(priv, 0x088050, 0xff); -} - struct nouveau_oclass * nv4c_mc_oclass = &(struct nouveau_mc_oclass) { .base.handle = NV_SUBDEV(MC, 0x4c), @@ -41,5 +34,4 @@ nv4c_mc_oclass = &(struct nouveau_mc_oclass) { .fini = _nouveau_mc_fini, }, .intr = nv04_mc_intr, - .msi_rearm = nv4c_mc_msi_rearm, }.base; diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index 21ec561..bba2960 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -1572,8 +1572,10 @@ nouveau_ttm_tt_unpopulate(struct ttm_tt *ttm) * so use the DMA API for them. */ if (!nv_device_is_cpu_coherent(device) && - ttm->caching_state == tt_uncached) + ttm->caching_state == tt_uncached) { ttm_dma_unpopulate(ttm_dma, dev->dev); + return; + } #if __OS_HAS_AGP if (drm->agp.stat == ENABLED) { diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c index 42c34ba..bf0f9e21d 100644 --- a/drivers/gpu/drm/nouveau/nouveau_gem.c +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c @@ -36,7 +36,14 @@ void nouveau_gem_object_del(struct drm_gem_object *gem) { struct nouveau_bo *nvbo = nouveau_gem_object(gem); + struct nouveau_drm *drm = nouveau_bdev(nvbo->bo.bdev); struct ttm_buffer_object *bo = &nvbo->bo; + struct device *dev = drm->dev->dev; + int ret; + + ret = pm_runtime_get_sync(dev); + if (WARN_ON(ret < 0 && ret != -EACCES)) + return; if (gem->import_attach) drm_prime_gem_destroy(gem, nvbo->bo.sg); @@ -46,6 +53,9 @@ nouveau_gem_object_del(struct drm_gem_object *gem) /* reset filp so nouveau_bo_del_ttm() can test for it */ gem->filp = NULL; ttm_bo_unref(&bo); + + pm_runtime_mark_last_busy(dev); + pm_runtime_put_autosuspend(dev); } int @@ -53,7 +63,9 @@ nouveau_gem_object_open(struct drm_gem_object *gem, struct drm_file *file_priv) { struct nouveau_cli *cli = nouveau_cli(file_priv); struct nouveau_bo *nvbo = nouveau_gem_object(gem); + struct nouveau_drm *drm = nouveau_bdev(nvbo->bo.bdev); struct nouveau_vma *vma; + struct device *dev = drm->dev->dev; int ret; if (!cli->vm) @@ -71,11 +83,16 @@ nouveau_gem_object_open(struct drm_gem_object *gem, struct drm_file *file_priv) goto out; } + ret = pm_runtime_get_sync(dev); + if (ret < 0 && ret != -EACCES) + goto out; + ret = nouveau_bo_vma_add(nvbo, cli->vm, vma); - if (ret) { + if (ret) kfree(vma); - goto out; - } + + pm_runtime_mark_last_busy(dev); + pm_runtime_put_autosuspend(dev); } else { vma->refcount++; } @@ -129,6 +146,8 @@ nouveau_gem_object_close(struct drm_gem_object *gem, struct drm_file *file_priv) { struct nouveau_cli *cli = nouveau_cli(file_priv); struct nouveau_bo *nvbo = nouveau_gem_object(gem); + struct nouveau_drm *drm = nouveau_bdev(nvbo->bo.bdev); + struct device *dev = drm->dev->dev; struct nouveau_vma *vma; int ret; @@ -141,8 +160,14 @@ nouveau_gem_object_close(struct drm_gem_object *gem, struct drm_file *file_priv) vma = nouveau_bo_vma_find(nvbo, cli->vm); if (vma) { - if (--vma->refcount == 0) - nouveau_gem_object_unmap(nvbo, vma); + if (--vma->refcount == 0) { + ret = pm_runtime_get_sync(dev); + if (!WARN_ON(ret < 0 && ret != -EACCES)) { + nouveau_gem_object_unmap(nvbo, vma); + pm_runtime_mark_last_busy(dev); + pm_runtime_put_autosuspend(dev); + } + } } ttm_bo_unreserve(&nvbo->bo); } diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c index d59ec49..ed644a4 100644 --- a/drivers/gpu/drm/radeon/atombios_crtc.c +++ b/drivers/gpu/drm/radeon/atombios_crtc.c @@ -1851,10 +1851,9 @@ static int radeon_atom_pick_pll(struct drm_crtc *crtc) return pll; } /* otherwise, pick one of the plls */ - if ((rdev->family == CHIP_KAVERI) || - (rdev->family == CHIP_KABINI) || + if ((rdev->family == CHIP_KABINI) || (rdev->family == CHIP_MULLINS)) { - /* KB/KV/ML has PPLL1 and PPLL2 */ + /* KB/ML has PPLL1 and PPLL2 */ pll_in_use = radeon_get_pll_use_mask(crtc); if (!(pll_in_use & (1 << ATOM_PPLL2))) return ATOM_PPLL2; @@ -1863,7 +1862,7 @@ static int radeon_atom_pick_pll(struct drm_crtc *crtc) DRM_ERROR("unable to allocate a PPLL\n"); return ATOM_PPLL_INVALID; } else { - /* CI has PPLL0, PPLL1, and PPLL2 */ + /* CI/KV has PPLL0, PPLL1, and PPLL2 */ pll_in_use = radeon_get_pll_use_mask(crtc); if (!(pll_in_use & (1 << ATOM_PPLL2))) return ATOM_PPLL2; @@ -2155,6 +2154,7 @@ static void atombios_crtc_disable(struct drm_crtc *crtc) case ATOM_PPLL0: /* disable the ppll */ if ((rdev->family == CHIP_ARUBA) || + (rdev->family == CHIP_KAVERI) || (rdev->family == CHIP_BONAIRE) || (rdev->family == CHIP_HAWAII)) atombios_crtc_program_pll(crtc, radeon_crtc->crtc_id, radeon_crtc->pll_id, diff --git a/drivers/gpu/drm/radeon/atombios_dp.c b/drivers/gpu/drm/radeon/atombios_dp.c index 11ba9d2..db42a67 100644 --- a/drivers/gpu/drm/radeon/atombios_dp.c +++ b/drivers/gpu/drm/radeon/atombios_dp.c @@ -492,6 +492,10 @@ int radeon_dp_mode_valid_helper(struct drm_connector *connector, struct radeon_connector_atom_dig *dig_connector; int dp_clock; + if ((mode->clock > 340000) && + (!radeon_connector_is_dp12_capable(connector))) + return MODE_CLOCK_HIGH; + if (!radeon_connector->con_priv) return MODE_CLOCK_HIGH; dig_connector = radeon_connector->con_priv; diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index 14d173e..ed336fb 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -6057,6 +6057,17 @@ void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring, radeon_ring_write(ring, 0); radeon_ring_write(ring, 1 << vm_id); + /* wait for the invalidate to complete */ + radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); + radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */ + WAIT_REG_MEM_FUNCTION(0) | /* always */ + WAIT_REG_MEM_ENGINE(0))); /* me */ + radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2); + radeon_ring_write(ring, 0); + radeon_ring_write(ring, 0); /* ref */ + radeon_ring_write(ring, 0); /* mask */ + radeon_ring_write(ring, 0x20); /* poll interval */ + /* compute doesn't have PFP */ if (usepfp) { /* sync PFP to ME, otherwise we might get invalid PFP reads */ diff --git a/drivers/gpu/drm/radeon/cik_sdma.c b/drivers/gpu/drm/radeon/cik_sdma.c index 1f4ded1..479519c 100644 --- a/drivers/gpu/drm/radeon/cik_sdma.c +++ b/drivers/gpu/drm/radeon/cik_sdma.c @@ -932,6 +932,9 @@ void cik_sdma_vm_pad_ib(struct radeon_ib *ib) void cik_dma_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring, unsigned vm_id, uint64_t pd_addr) { + u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(0) | + SDMA_POLL_REG_MEM_EXTRA_FUNC(0)); /* always */ + radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); if (vm_id < 8) { radeon_ring_write(ring, (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2); @@ -972,5 +975,12 @@ void cik_dma_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring, radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2); radeon_ring_write(ring, 1 << vm_id); + + radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits)); + radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2); + radeon_ring_write(ring, 0); + radeon_ring_write(ring, 0); /* reference */ + radeon_ring_write(ring, 0); /* mask */ + radeon_ring_write(ring, (0xfff << 16) | 10); /* retry count, poll interval */ } diff --git a/drivers/gpu/drm/radeon/cikd.h b/drivers/gpu/drm/radeon/cikd.h index ba85986..03003f8 100644 --- a/drivers/gpu/drm/radeon/cikd.h +++ b/drivers/gpu/drm/radeon/cikd.h @@ -2156,4 +2156,6 @@ #define ATC_VM_APERTURE1_HIGH_ADDR 0x330Cu #define ATC_VM_APERTURE1_LOW_ADDR 0x3304u +#define IH_VMID_0_LUT 0x3D40u + #endif diff --git a/drivers/gpu/drm/radeon/dce3_1_afmt.c b/drivers/gpu/drm/radeon/dce3_1_afmt.c index 2fe8cfc..bafdf92 100644 --- a/drivers/gpu/drm/radeon/dce3_1_afmt.c +++ b/drivers/gpu/drm/radeon/dce3_1_afmt.c @@ -103,7 +103,7 @@ static void dce3_2_afmt_write_sad_regs(struct drm_encoder *encoder) } sad_count = drm_edid_to_sad(radeon_connector->edid, &sads); - if (sad_count < 0) { + if (sad_count <= 0) { DRM_ERROR("Couldn't read SADs: %d\n", sad_count); return; } diff --git a/drivers/gpu/drm/radeon/kv_dpm.c b/drivers/gpu/drm/radeon/kv_dpm.c index 9b42001..e3e9c10 100644 --- a/drivers/gpu/drm/radeon/kv_dpm.c +++ b/drivers/gpu/drm/radeon/kv_dpm.c @@ -2745,13 +2745,11 @@ int kv_dpm_init(struct radeon_device *rdev) pi->enable_auto_thermal_throttling = true; pi->disable_nb_ps3_in_battery = false; if (radeon_bapm == -1) { - /* There are stability issues reported on with - * bapm enabled on an asrock system. - */ - if (rdev->pdev->subsystem_vendor == 0x1849) - pi->bapm_enable = false; - else + /* only enable bapm on KB, ML by default */ + if (rdev->family == CHIP_KABINI || rdev->family == CHIP_MULLINS) pi->bapm_enable = true; + else + pi->bapm_enable = false; } else if (radeon_bapm == 0) { pi->bapm_enable = false; } else { diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index 360de9f..aea48c8 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -2516,6 +2516,16 @@ void cayman_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring, radeon_ring_write(ring, PACKET0(VM_INVALIDATE_REQUEST, 0)); radeon_ring_write(ring, 1 << vm_id); + /* wait for the invalidate to complete */ + radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); + radeon_ring_write(ring, (WAIT_REG_MEM_FUNCTION(0) | /* always */ + WAIT_REG_MEM_ENGINE(0))); /* me */ + radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2); + radeon_ring_write(ring, 0); + radeon_ring_write(ring, 0); /* ref */ + radeon_ring_write(ring, 0); /* mask */ + radeon_ring_write(ring, 0x20); /* poll interval */ + /* sync PFP to ME, otherwise we might get invalid PFP reads */ radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); radeon_ring_write(ring, 0x0); diff --git a/drivers/gpu/drm/radeon/ni_dma.c b/drivers/gpu/drm/radeon/ni_dma.c index 50f8861..4be2bb7 100644 --- a/drivers/gpu/drm/radeon/ni_dma.c +++ b/drivers/gpu/drm/radeon/ni_dma.c @@ -463,5 +463,11 @@ void cayman_dma_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring, radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0)); radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2)); radeon_ring_write(ring, 1 << vm_id); + + /* wait for invalidate to complete */ + radeon_ring_write(ring, DMA_SRBM_READ_PACKET); + radeon_ring_write(ring, (0xff << 20) | (VM_INVALIDATE_REQUEST >> 2)); + radeon_ring_write(ring, 0); /* mask */ + radeon_ring_write(ring, 0); /* value */ } diff --git a/drivers/gpu/drm/radeon/nid.h b/drivers/gpu/drm/radeon/nid.h index 2e12e4d..ad71254 100644 --- a/drivers/gpu/drm/radeon/nid.h +++ b/drivers/gpu/drm/radeon/nid.h @@ -1133,6 +1133,23 @@ #define PACKET3_MEM_SEMAPHORE 0x39 #define PACKET3_MPEG_INDEX 0x3A #define PACKET3_WAIT_REG_MEM 0x3C +#define WAIT_REG_MEM_FUNCTION(x) ((x) << 0) + /* 0 - always + * 1 - < + * 2 - <= + * 3 - == + * 4 - != + * 5 - >= + * 6 - > + */ +#define WAIT_REG_MEM_MEM_SPACE(x) ((x) << 4) + /* 0 - reg + * 1 - mem + */ +#define WAIT_REG_MEM_ENGINE(x) ((x) << 8) + /* 0 - me + * 1 - pfp + */ #define PACKET3_MEM_WRITE 0x3D #define PACKET3_PFP_SYNC_ME 0x42 #define PACKET3_SURFACE_SYNC 0x43 @@ -1272,6 +1289,13 @@ (1 << 21) | \ (((n) & 0xFFFFF) << 0)) +#define DMA_SRBM_POLL_PACKET ((9 << 28) | \ + (1 << 27) | \ + (1 << 26)) + +#define DMA_SRBM_READ_PACKET ((9 << 28) | \ + (1 << 27)) + /* async DMA Packet types */ #define DMA_PACKET_WRITE 0x2 #define DMA_PACKET_COPY 0x3 diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c index 850de57..121aff6 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.c +++ b/drivers/gpu/drm/radeon/radeon_asic.c @@ -333,6 +333,20 @@ static struct radeon_asic_ring r300_gfx_ring = { .set_wptr = &r100_gfx_set_wptr, }; +static struct radeon_asic_ring rv515_gfx_ring = { + .ib_execute = &r100_ring_ib_execute, + .emit_fence = &r300_fence_ring_emit, + .emit_semaphore = &r100_semaphore_ring_emit, + .cs_parse = &r300_cs_parse, + .ring_start = &rv515_ring_start, + .ring_test = &r100_ring_test, + .ib_test = &r100_ib_test, + .is_lockup = &r100_gpu_is_lockup, + .get_rptr = &r100_gfx_get_rptr, + .get_wptr = &r100_gfx_get_wptr, + .set_wptr = &r100_gfx_set_wptr, +}; + static struct radeon_asic r300_asic = { .init = &r300_init, .fini = &r300_fini, @@ -748,7 +762,7 @@ static struct radeon_asic rv515_asic = { .set_page = &rv370_pcie_gart_set_page, }, .ring = { - [RADEON_RING_TYPE_GFX_INDEX] = &r300_gfx_ring + [RADEON_RING_TYPE_GFX_INDEX] = &rv515_gfx_ring }, .irq = { .set = &rs600_irq_set, @@ -814,7 +828,7 @@ static struct radeon_asic r520_asic = { .set_page = &rv370_pcie_gart_set_page, }, .ring = { - [RADEON_RING_TYPE_GFX_INDEX] = &r300_gfx_ring + [RADEON_RING_TYPE_GFX_INDEX] = &rv515_gfx_ring }, .irq = { .set = &rs600_irq_set, diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index a46f737..d0b4f7d 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -576,7 +576,7 @@ error_unreserve: error_free: drm_free_large(vm_bos); - if (r) + if (r && r != -ERESTARTSYS) DRM_ERROR("Couldn't update BO_VA (%d)\n", r); } diff --git a/drivers/gpu/drm/radeon/radeon_kfd.c b/drivers/gpu/drm/radeon/radeon_kfd.c index ddbabab..7b27420 100644 --- a/drivers/gpu/drm/radeon/radeon_kfd.c +++ b/drivers/gpu/drm/radeon/radeon_kfd.c @@ -70,7 +70,7 @@ static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, uint32_t queue_id, uint32_t __user *wptr); static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd); -static bool kgd_hqd_is_occupies(struct kgd_dev *kgd, uint64_t queue_address, +static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, uint32_t pipe_id, uint32_t queue_id); static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type, @@ -91,7 +91,7 @@ static const struct kfd2kgd_calls kfd2kgd = { .init_pipeline = kgd_init_pipeline, .hqd_load = kgd_hqd_load, .hqd_sdma_load = kgd_hqd_sdma_load, - .hqd_is_occupies = kgd_hqd_is_occupies, + .hqd_is_occupied = kgd_hqd_is_occupied, .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, .hqd_destroy = kgd_hqd_destroy, .hqd_sdma_destroy = kgd_hqd_sdma_destroy, @@ -102,6 +102,7 @@ static const struct kgd2kfd_calls *kgd2kfd; bool radeon_kfd_init(void) { +#if defined(CONFIG_HSA_AMD_MODULE) bool (*kgd2kfd_init_p)(unsigned, const struct kfd2kgd_calls*, const struct kgd2kfd_calls**); @@ -118,6 +119,17 @@ bool radeon_kfd_init(void) } return true; +#elif defined(CONFIG_HSA_AMD) + if (!kgd2kfd_init(KFD_INTERFACE_VERSION, &kfd2kgd, &kgd2kfd)) { + kgd2kfd = NULL; + + return false; + } + + return true; +#else + return false; +#endif } void radeon_kfd_fini(void) @@ -370,6 +382,10 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, cpu_relax(); write_register(kgd, ATC_VMID_PASID_MAPPING_UPDATE_STATUS, 1U << vmid); + /* Mapping vmid to pasid also for IH block */ + write_register(kgd, IH_VMID_0_LUT + vmid * sizeof(uint32_t), + pasid_mapping); + return 0; } @@ -529,7 +545,7 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd) return 0; } -static bool kgd_hqd_is_occupies(struct kgd_dev *kgd, uint64_t queue_address, +static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, uint32_t pipe_id, uint32_t queue_id) { uint32_t act; @@ -586,6 +602,7 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type, if (timeout == 0) { pr_err("kfd: cp queue preemption time out (%dms)\n", temp); + release_queue(kgd); return -ETIME; } msleep(20); diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c index 32522cc..f7da8fe 100644 --- a/drivers/gpu/drm/radeon/radeon_pm.c +++ b/drivers/gpu/drm/radeon/radeon_pm.c @@ -1287,8 +1287,39 @@ dpm_failed: return ret; } +struct radeon_dpm_quirk { + u32 chip_vendor; + u32 chip_device; + u32 subsys_vendor; + u32 subsys_device; +}; + +/* cards with dpm stability problems */ +static struct radeon_dpm_quirk radeon_dpm_quirk_list[] = { + /* TURKS - https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1386534 */ + { PCI_VENDOR_ID_ATI, 0x6759, 0x1682, 0x3195 }, + /* TURKS - https://bugzilla.kernel.org/show_bug.cgi?id=83731 */ + { PCI_VENDOR_ID_ATI, 0x6840, 0x1179, 0xfb81 }, + { 0, 0, 0, 0 }, +}; + int radeon_pm_init(struct radeon_device *rdev) { + struct radeon_dpm_quirk *p = radeon_dpm_quirk_list; + bool disable_dpm = false; + + /* Apply dpm quirks */ + while (p && p->chip_device != 0) { + if (rdev->pdev->vendor == p->chip_vendor && + rdev->pdev->device == p->chip_device && + rdev->pdev->subsystem_vendor == p->subsys_vendor && + rdev->pdev->subsystem_device == p->subsys_device) { + disable_dpm = true; + break; + } + ++p; + } + /* enable dpm on rv6xx+ */ switch (rdev->family) { case CHIP_RV610: @@ -1344,6 +1375,8 @@ int radeon_pm_init(struct radeon_device *rdev) (!(rdev->flags & RADEON_IS_IGP)) && (!rdev->smc_fw)) rdev->pm.pm_method = PM_METHOD_PROFILE; + else if (disable_dpm && (radeon_dpm == -1)) + rdev->pm.pm_method = PM_METHOD_PROFILE; else if (radeon_dpm == 0) rdev->pm.pm_method = PM_METHOD_PROFILE; else diff --git a/drivers/gpu/drm/radeon/radeon_state.c b/drivers/gpu/drm/radeon/radeon_state.c index 535403e..15aee72 100644 --- a/drivers/gpu/drm/radeon/radeon_state.c +++ b/drivers/gpu/drm/radeon/radeon_state.c @@ -1703,7 +1703,7 @@ static int radeon_cp_dispatch_texture(struct drm_device * dev, u32 format; u32 *buffer; const u8 __user *data; - int size, dwords, tex_width, blit_width, spitch; + unsigned int size, dwords, tex_width, blit_width, spitch; u32 height; int i; u32 texpitch, microtile; diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index 60df444..5d89b87 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -5057,6 +5057,16 @@ void si_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring, radeon_ring_write(ring, 0); radeon_ring_write(ring, 1 << vm_id); + /* wait for the invalidate to complete */ + radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); + radeon_ring_write(ring, (WAIT_REG_MEM_FUNCTION(0) | /* always */ + WAIT_REG_MEM_ENGINE(0))); /* me */ + radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2); + radeon_ring_write(ring, 0); + radeon_ring_write(ring, 0); /* ref */ + radeon_ring_write(ring, 0); /* mask */ + radeon_ring_write(ring, 0x20); /* poll interval */ + /* sync PFP to ME, otherwise we might get invalid PFP reads */ radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); radeon_ring_write(ring, 0x0); diff --git a/drivers/gpu/drm/radeon/si_dma.c b/drivers/gpu/drm/radeon/si_dma.c index f5cc777..aa7b872 100644 --- a/drivers/gpu/drm/radeon/si_dma.c +++ b/drivers/gpu/drm/radeon/si_dma.c @@ -206,6 +206,14 @@ void si_dma_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring, radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0)); radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2)); radeon_ring_write(ring, 1 << vm_id); + + /* wait for invalidate to complete */ + radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_POLL_REG_MEM, 0, 0, 0, 0)); + radeon_ring_write(ring, VM_INVALIDATE_REQUEST); + radeon_ring_write(ring, 0xff << 16); /* retry */ + radeon_ring_write(ring, 1 << vm_id); /* mask */ + radeon_ring_write(ring, 0); /* value */ + radeon_ring_write(ring, (0 << 28) | 0x20); /* func(always) | poll interval */ } /** diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c index 32e354b..eff8a64 100644 --- a/drivers/gpu/drm/radeon/si_dpm.c +++ b/drivers/gpu/drm/radeon/si_dpm.c @@ -2908,6 +2908,22 @@ static int si_init_smc_spll_table(struct radeon_device *rdev) return ret; } +struct si_dpm_quirk { + u32 chip_vendor; + u32 chip_device; + u32 subsys_vendor; + u32 subsys_device; + u32 max_sclk; + u32 max_mclk; +}; + +/* cards with dpm stability problems */ +static struct si_dpm_quirk si_dpm_quirk_list[] = { + /* PITCAIRN - https://bugs.freedesktop.org/show_bug.cgi?id=76490 */ + { PCI_VENDOR_ID_ATI, 0x6810, 0x1462, 0x3036, 0, 120000 }, + { 0, 0, 0, 0 }, +}; + static void si_apply_state_adjust_rules(struct radeon_device *rdev, struct radeon_ps *rps) { @@ -2918,7 +2934,22 @@ static void si_apply_state_adjust_rules(struct radeon_device *rdev, u32 mclk, sclk; u16 vddc, vddci; u32 max_sclk_vddc, max_mclk_vddci, max_mclk_vddc; + u32 max_sclk = 0, max_mclk = 0; int i; + struct si_dpm_quirk *p = si_dpm_quirk_list; + + /* Apply dpm quirks */ + while (p && p->chip_device != 0) { + if (rdev->pdev->vendor == p->chip_vendor && + rdev->pdev->device == p->chip_device && + rdev->pdev->subsystem_vendor == p->subsys_vendor && + rdev->pdev->subsystem_device == p->subsys_device) { + max_sclk = p->max_sclk; + max_mclk = p->max_mclk; + break; + } + ++p; + } if ((rdev->pm.dpm.new_active_crtc_count > 1) || ni_dpm_vblank_too_short(rdev)) @@ -2972,6 +3003,14 @@ static void si_apply_state_adjust_rules(struct radeon_device *rdev, if (ps->performance_levels[i].mclk > max_mclk_vddc) ps->performance_levels[i].mclk = max_mclk_vddc; } + if (max_mclk) { + if (ps->performance_levels[i].mclk > max_mclk) + ps->performance_levels[i].mclk = max_mclk; + } + if (max_sclk) { + if (ps->performance_levels[i].sclk > max_sclk) + ps->performance_levels[i].sclk = max_sclk; + } } /* XXX validate the min clocks required for display */ diff --git a/drivers/gpu/drm/radeon/sid.h b/drivers/gpu/drm/radeon/sid.h index 4069be89..8499924 100644 --- a/drivers/gpu/drm/radeon/sid.h +++ b/drivers/gpu/drm/radeon/sid.h @@ -1632,6 +1632,23 @@ #define PACKET3_MPEG_INDEX 0x3A #define PACKET3_COPY_DW 0x3B #define PACKET3_WAIT_REG_MEM 0x3C +#define WAIT_REG_MEM_FUNCTION(x) ((x) << 0) + /* 0 - always + * 1 - < + * 2 - <= + * 3 - == + * 4 - != + * 5 - >= + * 6 - > + */ +#define WAIT_REG_MEM_MEM_SPACE(x) ((x) << 4) + /* 0 - reg + * 1 - mem + */ +#define WAIT_REG_MEM_ENGINE(x) ((x) << 8) + /* 0 - me + * 1 - pfp + */ #define PACKET3_MEM_WRITE 0x3D #define PACKET3_COPY_DATA 0x40 #define PACKET3_CP_DMA 0x41 @@ -1835,6 +1852,7 @@ #define DMA_PACKET_TRAP 0x7 #define DMA_PACKET_SRBM_WRITE 0x9 #define DMA_PACKET_CONSTANT_FILL 0xd +#define DMA_PACKET_POLL_REG_MEM 0xe #define DMA_PACKET_NOP 0xf #define VCE_STATUS 0x20004 |