diff options
Diffstat (limited to 'drivers/gpu/drm/i915/i915_perf.c')
-rw-r--r-- | drivers/gpu/drm/i915/i915_perf.c | 178 |
1 files changed, 38 insertions, 140 deletions
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 6bf10952c724..664b96bb65a3 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -210,6 +210,7 @@ #include "i915_oa_cflgt3.h" #include "i915_oa_cnl.h" #include "i915_oa_icl.h" +#include "intel_lrc_reg.h" /* HW requires this to be a power of two, between 128k and 16M, though driver * is currently generally designed assuming the largest 16M size is used such @@ -1338,14 +1339,12 @@ free_oa_buffer(struct drm_i915_private *i915) { mutex_lock(&i915->drm.struct_mutex); - i915_gem_object_unpin_map(i915->perf.oa.oa_buffer.vma->obj); - i915_vma_unpin(i915->perf.oa.oa_buffer.vma); - i915_gem_object_put(i915->perf.oa.oa_buffer.vma->obj); - - i915->perf.oa.oa_buffer.vma = NULL; - i915->perf.oa.oa_buffer.vaddr = NULL; + i915_vma_unpin_and_release(&i915->perf.oa.oa_buffer.vma, + I915_VMA_RELEASE_MAP); mutex_unlock(&i915->drm.struct_mutex); + + i915->perf.oa.oa_buffer.vaddr = NULL; } static void i915_oa_stream_destroy(struct i915_perf_stream *stream) @@ -1638,27 +1637,25 @@ static void gen8_update_reg_state_unlocked(struct i915_gem_context *ctx, u32 ctx_oactxctrl = dev_priv->perf.oa.ctx_oactxctrl_offset; u32 ctx_flexeu0 = dev_priv->perf.oa.ctx_flexeu0_offset; /* The MMIO offsets for Flex EU registers aren't contiguous */ - u32 flex_mmio[] = { - i915_mmio_reg_offset(EU_PERF_CNTL0), - i915_mmio_reg_offset(EU_PERF_CNTL1), - i915_mmio_reg_offset(EU_PERF_CNTL2), - i915_mmio_reg_offset(EU_PERF_CNTL3), - i915_mmio_reg_offset(EU_PERF_CNTL4), - i915_mmio_reg_offset(EU_PERF_CNTL5), - i915_mmio_reg_offset(EU_PERF_CNTL6), + i915_reg_t flex_regs[] = { + EU_PERF_CNTL0, + EU_PERF_CNTL1, + EU_PERF_CNTL2, + EU_PERF_CNTL3, + EU_PERF_CNTL4, + EU_PERF_CNTL5, + EU_PERF_CNTL6, }; int i; - reg_state[ctx_oactxctrl] = i915_mmio_reg_offset(GEN8_OACTXCONTROL); - reg_state[ctx_oactxctrl+1] = (dev_priv->perf.oa.period_exponent << - GEN8_OA_TIMER_PERIOD_SHIFT) | - (dev_priv->perf.oa.periodic ? - GEN8_OA_TIMER_ENABLE : 0) | - GEN8_OA_COUNTER_RESUME; + CTX_REG(reg_state, ctx_oactxctrl, GEN8_OACTXCONTROL, + (dev_priv->perf.oa.period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) | + (dev_priv->perf.oa.periodic ? GEN8_OA_TIMER_ENABLE : 0) | + GEN8_OA_COUNTER_RESUME); - for (i = 0; i < ARRAY_SIZE(flex_mmio); i++) { + for (i = 0; i < ARRAY_SIZE(flex_regs); i++) { u32 state_offset = ctx_flexeu0 + i * 2; - u32 mmio = flex_mmio[i]; + u32 mmio = i915_mmio_reg_offset(flex_regs[i]); /* * This arbitrary default will select the 'EU FPU0 Pipeline @@ -1678,110 +1675,8 @@ static void gen8_update_reg_state_unlocked(struct i915_gem_context *ctx, } } - reg_state[state_offset] = mmio; - reg_state[state_offset+1] = value; - } -} - -/* - * Same as gen8_update_reg_state_unlocked only through the batchbuffer. This - * is only used by the kernel context. - */ -static int gen8_emit_oa_config(struct i915_request *rq, - const struct i915_oa_config *oa_config) -{ - struct drm_i915_private *dev_priv = rq->i915; - /* The MMIO offsets for Flex EU registers aren't contiguous */ - u32 flex_mmio[] = { - i915_mmio_reg_offset(EU_PERF_CNTL0), - i915_mmio_reg_offset(EU_PERF_CNTL1), - i915_mmio_reg_offset(EU_PERF_CNTL2), - i915_mmio_reg_offset(EU_PERF_CNTL3), - i915_mmio_reg_offset(EU_PERF_CNTL4), - i915_mmio_reg_offset(EU_PERF_CNTL5), - i915_mmio_reg_offset(EU_PERF_CNTL6), - }; - u32 *cs; - int i; - - cs = intel_ring_begin(rq, ARRAY_SIZE(flex_mmio) * 2 + 4); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - *cs++ = MI_LOAD_REGISTER_IMM(ARRAY_SIZE(flex_mmio) + 1); - - *cs++ = i915_mmio_reg_offset(GEN8_OACTXCONTROL); - *cs++ = (dev_priv->perf.oa.period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) | - (dev_priv->perf.oa.periodic ? GEN8_OA_TIMER_ENABLE : 0) | - GEN8_OA_COUNTER_RESUME; - - for (i = 0; i < ARRAY_SIZE(flex_mmio); i++) { - u32 mmio = flex_mmio[i]; - - /* - * This arbitrary default will select the 'EU FPU0 Pipeline - * Active' event. In the future it's anticipated that there - * will be an explicit 'No Event' we can select, but not - * yet... - */ - u32 value = 0; - - if (oa_config) { - u32 j; - - for (j = 0; j < oa_config->flex_regs_len; j++) { - if (i915_mmio_reg_offset(oa_config->flex_regs[j].addr) == mmio) { - value = oa_config->flex_regs[j].value; - break; - } - } - } - - *cs++ = mmio; - *cs++ = value; + CTX_REG(reg_state, state_offset, flex_regs[i], value); } - - *cs++ = MI_NOOP; - intel_ring_advance(rq, cs); - - return 0; -} - -static int gen8_switch_to_updated_kernel_context(struct drm_i915_private *dev_priv, - const struct i915_oa_config *oa_config) -{ - struct intel_engine_cs *engine = dev_priv->engine[RCS]; - struct i915_timeline *timeline; - struct i915_request *rq; - int ret; - - lockdep_assert_held(&dev_priv->drm.struct_mutex); - - i915_retire_requests(dev_priv); - - rq = i915_request_alloc(engine, dev_priv->kernel_context); - if (IS_ERR(rq)) - return PTR_ERR(rq); - - ret = gen8_emit_oa_config(rq, oa_config); - if (ret) { - i915_request_add(rq); - return ret; - } - - /* Queue this switch after all other activity */ - list_for_each_entry(timeline, &dev_priv->gt.timelines, link) { - struct i915_request *prev; - - prev = i915_gem_active_raw(&timeline->last_request, - &dev_priv->drm.struct_mutex); - if (prev) - i915_request_await_dma_fence(rq, &prev->fence); - } - - i915_request_add(rq); - - return 0; } /* @@ -1812,17 +1707,13 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv, const struct i915_oa_config *oa_config) { struct intel_engine_cs *engine = dev_priv->engine[RCS]; + unsigned int map_type = i915_coherent_map_type(dev_priv); struct i915_gem_context *ctx; + struct i915_request *rq; int ret; - unsigned int wait_flags = I915_WAIT_LOCKED; lockdep_assert_held(&dev_priv->drm.struct_mutex); - /* Switch away from any user context. */ - ret = gen8_switch_to_updated_kernel_context(dev_priv, oa_config); - if (ret) - goto out; - /* * The OA register config is setup through the context image. This image * might be written to by the GPU on context switch (in particular on @@ -1837,10 +1728,10 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv, * the GPU from any submitted work. */ ret = i915_gem_wait_for_idle(dev_priv, - wait_flags, + I915_WAIT_LOCKED, MAX_SCHEDULE_TIMEOUT); if (ret) - goto out; + return ret; /* Update all contexts now that we've stalled the submission. */ list_for_each_entry(ctx, &dev_priv->contexts.list, link) { @@ -1851,11 +1742,9 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv, if (!ce->state) continue; - regs = i915_gem_object_pin_map(ce->state->obj, I915_MAP_WB); - if (IS_ERR(regs)) { - ret = PTR_ERR(regs); - goto out; - } + regs = i915_gem_object_pin_map(ce->state->obj, map_type); + if (IS_ERR(regs)) + return PTR_ERR(regs); ce->state->obj->mm.dirty = true; regs += LRC_STATE_PN * PAGE_SIZE / sizeof(*regs); @@ -1865,8 +1754,17 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv, i915_gem_object_unpin_map(ce->state->obj); } - out: - return ret; + /* + * Apply the configuration by doing one context restore of the edited + * context image. + */ + rq = i915_request_alloc(engine, dev_priv->kernel_context); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + i915_request_add(rq); + + return 0; } static int gen8_enable_metric_set(struct drm_i915_private *dev_priv, |