summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915')
-rw-r--r--drivers/gpu/drm/i915/gvt/cmd_parser.c8
-rw-r--r--drivers/gpu/drm/i915/gvt/kvmgt.c51
-rw-r--r--drivers/gpu/drm/i915/gvt/mmio_context.c3
-rw-r--r--drivers/gpu/drm/i915/gvt/scheduler.c71
-rw-r--r--drivers/gpu/drm/i915/gvt/scheduler.h5
-rw-r--r--drivers/gpu/drm/i915/gvt/trace.h2
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c16
-rw-r--r--drivers/gpu/drm/i915/i915_sysfs.c10
-rw-r--r--drivers/gpu/drm/i915/intel_ddi.c7
-rw-r--r--drivers/gpu/drm/i915/intel_hangcheck.c4
10 files changed, 157 insertions, 20 deletions
diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c
index c8454ac..db6b94d 100644
--- a/drivers/gpu/drm/i915/gvt/cmd_parser.c
+++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c
@@ -471,6 +471,7 @@ struct parser_exec_state {
* used when ret from 2nd level batch buffer
*/
int saved_buf_addr_type;
+ bool is_ctx_wa;
struct cmd_info *info;
@@ -1715,6 +1716,11 @@ static int perform_bb_shadow(struct parser_exec_state *s)
bb->accessing = true;
bb->bb_start_cmd_va = s->ip_va;
+ if ((s->buf_type == BATCH_BUFFER_INSTRUCTION) && (!s->is_ctx_wa))
+ bb->bb_offset = s->ip_va - s->rb_va;
+ else
+ bb->bb_offset = 0;
+
/*
* ip_va saves the virtual address of the shadow batch buffer, while
* ip_gma saves the graphics address of the original batch buffer.
@@ -2571,6 +2577,7 @@ static int scan_workload(struct intel_vgpu_workload *workload)
s.ring_tail = gma_tail;
s.rb_va = workload->shadow_ring_buffer_va;
s.workload = workload;
+ s.is_ctx_wa = false;
if ((bypass_scan_mask & (1 << workload->ring_id)) ||
gma_head == gma_tail)
@@ -2624,6 +2631,7 @@ static int scan_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)
s.ring_tail = gma_tail;
s.rb_va = wa_ctx->indirect_ctx.shadow_va;
s.workload = workload;
+ s.is_ctx_wa = true;
if (!intel_gvt_ggtt_validate_range(s.vgpu, s.ring_start, s.ring_size)) {
ret = -EINVAL;
diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c
index 520fe3d..c16a492 100644
--- a/drivers/gpu/drm/i915/gvt/kvmgt.c
+++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
@@ -750,6 +750,25 @@ static ssize_t intel_vgpu_rw(struct mdev_device *mdev, char *buf,
return ret == 0 ? count : ret;
}
+static bool gtt_entry(struct mdev_device *mdev, loff_t *ppos)
+{
+ struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
+ unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
+ struct intel_gvt *gvt = vgpu->gvt;
+ int offset;
+
+ /* Only allow MMIO GGTT entry access */
+ if (index != PCI_BASE_ADDRESS_0)
+ return false;
+
+ offset = (u64)(*ppos & VFIO_PCI_OFFSET_MASK) -
+ intel_vgpu_get_bar_gpa(vgpu, PCI_BASE_ADDRESS_0);
+
+ return (offset >= gvt->device_info.gtt_start_offset &&
+ offset < gvt->device_info.gtt_start_offset + gvt_ggtt_sz(gvt)) ?
+ true : false;
+}
+
static ssize_t intel_vgpu_read(struct mdev_device *mdev, char __user *buf,
size_t count, loff_t *ppos)
{
@@ -759,7 +778,21 @@ static ssize_t intel_vgpu_read(struct mdev_device *mdev, char __user *buf,
while (count) {
size_t filled;
- if (count >= 4 && !(*ppos % 4)) {
+ /* Only support GGTT entry 8 bytes read */
+ if (count >= 8 && !(*ppos % 8) &&
+ gtt_entry(mdev, ppos)) {
+ u64 val;
+
+ ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val),
+ ppos, false);
+ if (ret <= 0)
+ goto read_err;
+
+ if (copy_to_user(buf, &val, sizeof(val)))
+ goto read_err;
+
+ filled = 8;
+ } else if (count >= 4 && !(*ppos % 4)) {
u32 val;
ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val),
@@ -819,7 +852,21 @@ static ssize_t intel_vgpu_write(struct mdev_device *mdev,
while (count) {
size_t filled;
- if (count >= 4 && !(*ppos % 4)) {
+ /* Only support GGTT entry 8 bytes write */
+ if (count >= 8 && !(*ppos % 8) &&
+ gtt_entry(mdev, ppos)) {
+ u64 val;
+
+ if (copy_from_user(&val, buf, sizeof(val)))
+ goto write_err;
+
+ ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val),
+ ppos, true);
+ if (ret <= 0)
+ goto write_err;
+
+ filled = 8;
+ } else if (count >= 4 && !(*ppos % 4)) {
u32 val;
if (copy_from_user(&val, buf, sizeof(val)))
diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c
index 74a9c7b..a5bac83 100644
--- a/drivers/gpu/drm/i915/gvt/mmio_context.c
+++ b/drivers/gpu/drm/i915/gvt/mmio_context.c
@@ -120,6 +120,7 @@ static struct engine_mmio gen9_engine_mmio_list[] __cacheline_aligned = {
{RCS, HALF_SLICE_CHICKEN3, 0xffff, true}, /* 0xe184 */
{RCS, GEN9_HALF_SLICE_CHICKEN5, 0xffff, true}, /* 0xe188 */
{RCS, GEN9_HALF_SLICE_CHICKEN7, 0xffff, true}, /* 0xe194 */
+ {RCS, GEN8_ROW_CHICKEN, 0xffff, true}, /* 0xe4f0 */
{RCS, TRVATTL3PTRDW(0), 0, false}, /* 0x4de0 */
{RCS, TRVATTL3PTRDW(1), 0, false}, /* 0x4de4 */
{RCS, TRNULLDETCT, 0, false}, /* 0x4de8 */
@@ -557,9 +558,11 @@ void intel_gvt_switch_mmio(struct intel_vgpu *pre,
* performace for batch mmio read/write, so we need
* handle forcewake mannually.
*/
+ intel_runtime_pm_get(dev_priv);
intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
switch_mmio(pre, next, ring_id);
intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
+ intel_runtime_pm_put(dev_priv);
}
/**
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index a55b497..638abe8 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -75,6 +75,54 @@ static void update_shadow_pdps(struct intel_vgpu_workload *workload)
kunmap(page);
}
+/*
+ * when populating shadow ctx from guest, we should not overrride oa related
+ * registers, so that they will not be overlapped by guest oa configs. Thus
+ * made it possible to capture oa data from host for both host and guests.
+ */
+static void sr_oa_regs(struct intel_vgpu_workload *workload,
+ u32 *reg_state, bool save)
+{
+ struct drm_i915_private *dev_priv = workload->vgpu->gvt->dev_priv;
+ u32 ctx_oactxctrl = dev_priv->perf.oa.ctx_oactxctrl_offset;
+ u32 ctx_flexeu0 = dev_priv->perf.oa.ctx_flexeu0_offset;
+ int i = 0;
+ u32 flex_mmio[] = {
+ i915_mmio_reg_offset(EU_PERF_CNTL0),
+ i915_mmio_reg_offset(EU_PERF_CNTL1),
+ i915_mmio_reg_offset(EU_PERF_CNTL2),
+ i915_mmio_reg_offset(EU_PERF_CNTL3),
+ i915_mmio_reg_offset(EU_PERF_CNTL4),
+ i915_mmio_reg_offset(EU_PERF_CNTL5),
+ i915_mmio_reg_offset(EU_PERF_CNTL6),
+ };
+
+ if (!workload || !reg_state || workload->ring_id != RCS)
+ return;
+
+ if (save) {
+ workload->oactxctrl = reg_state[ctx_oactxctrl + 1];
+
+ for (i = 0; i < ARRAY_SIZE(workload->flex_mmio); i++) {
+ u32 state_offset = ctx_flexeu0 + i * 2;
+
+ workload->flex_mmio[i] = reg_state[state_offset + 1];
+ }
+ } else {
+ reg_state[ctx_oactxctrl] =
+ i915_mmio_reg_offset(GEN8_OACTXCONTROL);
+ reg_state[ctx_oactxctrl + 1] = workload->oactxctrl;
+
+ for (i = 0; i < ARRAY_SIZE(workload->flex_mmio); i++) {
+ u32 state_offset = ctx_flexeu0 + i * 2;
+ u32 mmio = flex_mmio[i];
+
+ reg_state[state_offset] = mmio;
+ reg_state[state_offset + 1] = workload->flex_mmio[i];
+ }
+ }
+}
+
static int populate_shadow_context(struct intel_vgpu_workload *workload)
{
struct intel_vgpu *vgpu = workload->vgpu;
@@ -121,6 +169,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload)
page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN);
shadow_ring_context = kmap(page);
+ sr_oa_regs(workload, (u32 *)shadow_ring_context, true);
#define COPY_REG(name) \
intel_gvt_hypervisor_read_gpa(vgpu, workload->ring_context_gpa \
+ RING_CTX_OFF(name.val), &shadow_ring_context->name.val, 4)
@@ -149,6 +198,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload)
sizeof(*shadow_ring_context),
I915_GTT_PAGE_SIZE - sizeof(*shadow_ring_context));
+ sr_oa_regs(workload, (u32 *)shadow_ring_context, false);
kunmap(page);
return 0;
}
@@ -408,6 +458,17 @@ static int prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload)
goto err;
}
+ /* For privilge batch buffer and not wa_ctx, the bb_start_cmd_va
+ * is only updated into ring_scan_buffer, not real ring address
+ * allocated in later copy_workload_to_ring_buffer. pls be noted
+ * shadow_ring_buffer_va is now pointed to real ring buffer va
+ * in copy_workload_to_ring_buffer.
+ */
+
+ if (bb->bb_offset)
+ bb->bb_start_cmd_va = workload->shadow_ring_buffer_va
+ + bb->bb_offset;
+
/* relocate shadow batch buffer */
bb->bb_start_cmd_va[1] = i915_ggtt_offset(bb->vma);
if (gmadr_bytes == 8)
@@ -1078,10 +1139,12 @@ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu)
bitmap_zero(s->shadow_ctx_desc_updated, I915_NUM_ENGINES);
- s->workloads = kmem_cache_create("gvt-g_vgpu_workload",
- sizeof(struct intel_vgpu_workload), 0,
- SLAB_HWCACHE_ALIGN,
- NULL);
+ s->workloads = kmem_cache_create_usercopy("gvt-g_vgpu_workload",
+ sizeof(struct intel_vgpu_workload), 0,
+ SLAB_HWCACHE_ALIGN,
+ offsetof(struct intel_vgpu_workload, rb_tail),
+ sizeof_field(struct intel_vgpu_workload, rb_tail),
+ NULL);
if (!s->workloads) {
ret = -ENOMEM;
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.h b/drivers/gpu/drm/i915/gvt/scheduler.h
index bab4097..486ed57 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.h
+++ b/drivers/gpu/drm/i915/gvt/scheduler.h
@@ -110,6 +110,10 @@ struct intel_vgpu_workload {
/* shadow batch buffer */
struct list_head shadow_bb;
struct intel_shadow_wa_ctx wa_ctx;
+
+ /* oa registers */
+ u32 oactxctrl;
+ u32 flex_mmio[7];
};
struct intel_vgpu_shadow_bb {
@@ -120,6 +124,7 @@ struct intel_vgpu_shadow_bb {
u32 *bb_start_cmd_va;
unsigned int clflush;
bool accessing;
+ unsigned long bb_offset;
};
#define workload_q_head(vgpu, ring_id) \
diff --git a/drivers/gpu/drm/i915/gvt/trace.h b/drivers/gpu/drm/i915/gvt/trace.h
index fc7831a..82093f1 100644
--- a/drivers/gpu/drm/i915/gvt/trace.h
+++ b/drivers/gpu/drm/i915/gvt/trace.h
@@ -333,7 +333,7 @@ TRACE_EVENT(render_mmio,
TP_PROTO(int old_id, int new_id, char *action, unsigned int reg,
unsigned int old_val, unsigned int new_val),
- TP_ARGS(old_id, new_id, action, reg, new_val, old_val),
+ TP_ARGS(old_id, new_id, action, reg, old_val, new_val),
TP_STRUCT__entry(
__field(int, old_id)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 0359d6f..7b5a9d7 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -433,20 +433,28 @@ i915_gem_object_wait_reservation(struct reservation_object *resv,
dma_fence_put(shared[i]);
kfree(shared);
+ /*
+ * If both shared fences and an exclusive fence exist,
+ * then by construction the shared fences must be later
+ * than the exclusive fence. If we successfully wait for
+ * all the shared fences, we know that the exclusive fence
+ * must all be signaled. If all the shared fences are
+ * signaled, we can prune the array and recover the
+ * floating references on the fences/requests.
+ */
prune_fences = count && timeout >= 0;
} else {
excl = reservation_object_get_excl_rcu(resv);
}
- if (excl && timeout >= 0) {
+ if (excl && timeout >= 0)
timeout = i915_gem_object_wait_fence(excl, flags, timeout,
rps_client);
- prune_fences = timeout >= 0;
- }
dma_fence_put(excl);
- /* Oportunistically prune the fences iff we know they have *all* been
+ /*
+ * Opportunistically prune the fences iff we know they have *all* been
* signaled and that the reservation object has not been changed (i.e.
* no new fences have been added).
*/
diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
index b33d215..e5e6f6b 100644
--- a/drivers/gpu/drm/i915/i915_sysfs.c
+++ b/drivers/gpu/drm/i915/i915_sysfs.c
@@ -304,8 +304,9 @@ static ssize_t gt_boost_freq_mhz_store(struct device *kdev,
{
struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
struct intel_rps *rps = &dev_priv->gt_pm.rps;
- u32 val;
+ bool boost = false;
ssize_t ret;
+ u32 val;
ret = kstrtou32(buf, 0, &val);
if (ret)
@@ -317,8 +318,13 @@ static ssize_t gt_boost_freq_mhz_store(struct device *kdev,
return -EINVAL;
mutex_lock(&dev_priv->pcu_lock);
- rps->boost_freq = val;
+ if (val != rps->boost_freq) {
+ rps->boost_freq = val;
+ boost = atomic_read(&rps->num_waiters);
+ }
mutex_unlock(&dev_priv->pcu_lock);
+ if (boost)
+ schedule_work(&rps->work);
return count;
}
diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c
index dbcf1a0..8c2d778 100644
--- a/drivers/gpu/drm/i915/intel_ddi.c
+++ b/drivers/gpu/drm/i915/intel_ddi.c
@@ -2205,8 +2205,7 @@ static void intel_ddi_pre_enable_dp(struct intel_encoder *encoder,
intel_prepare_dp_ddi_buffers(encoder, crtc_state);
intel_ddi_init_dp_buf_reg(encoder);
- if (!is_mst)
- intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_ON);
+ intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_ON);
intel_dp_start_link_train(intel_dp);
if (port != PORT_A || INTEL_GEN(dev_priv) >= 9)
intel_dp_stop_link_train(intel_dp);
@@ -2304,14 +2303,12 @@ static void intel_ddi_post_disable_dp(struct intel_encoder *encoder,
struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base);
struct intel_dp *intel_dp = &dig_port->dp;
- bool is_mst = intel_crtc_has_type(old_crtc_state, INTEL_OUTPUT_DP_MST);
/*
* Power down sink before disabling the port, otherwise we end
* up getting interrupts from the sink on detecting link loss.
*/
- if (!is_mst)
- intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_OFF);
+ intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_OFF);
intel_disable_ddi_buf(encoder);
diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/intel_hangcheck.c
index 42e45ae..c8ea510 100644
--- a/drivers/gpu/drm/i915/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/intel_hangcheck.c
@@ -246,7 +246,7 @@ engine_stuck(struct intel_engine_cs *engine, u64 acthd)
*/
tmp = I915_READ_CTL(engine);
if (tmp & RING_WAIT) {
- i915_handle_error(dev_priv, 0,
+ i915_handle_error(dev_priv, BIT(engine->id),
"Kicking stuck wait on %s",
engine->name);
I915_WRITE_CTL(engine, tmp);
@@ -258,7 +258,7 @@ engine_stuck(struct intel_engine_cs *engine, u64 acthd)
default:
return ENGINE_DEAD;
case 1:
- i915_handle_error(dev_priv, 0,
+ i915_handle_error(dev_priv, ALL_ENGINES,
"Kicking stuck semaphore on %s",
engine->name);
I915_WRITE_CTL(engine, tmp);
OpenPOWER on IntegriCloud