diff options
author | Dave Airlie <airlied@redhat.com> | 2017-12-08 08:39:14 +1000 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2017-12-08 08:41:22 +1000 |
commit | 3f1f0b1c57dd617e9b0ded50efb8d6c011b85b20 (patch) | |
tree | faed170d2c9f5e69ad4a63c15a331af7fde6fade | |
parent | 5c379b4f4fd0c97e7bd31b3523c7e5c2bdf4a9b6 (diff) | |
parent | d65efe7c951371fbad2c426b59bbac8bf2e60662 (diff) | |
download | lwn-3f1f0b1c57dd617e9b0ded50efb8d6c011b85b20.tar.gz lwn-3f1f0b1c57dd617e9b0ded50efb8d6c011b85b20.zip |
Merge tag 'drm-intel-next-2017-12-01' of git://anongit.freedesktop.org/drm/drm-intel into drm-next
[airlied: fix conflict in intel_dsi.c]
drm-intel-next-2017-12-01:
- Init clock gate fix (Ville)
- Execlists event handling corrections (Chris, Michel)
- Improvements on GPU Cache invalidation and context switch (Chris)
- More perf OA changes (Lionel)
- More selftests improvements and fixes (Chris, Matthew)
- Clean-up on modules parameters (Chris)
- Clean-up around old ringbuffer submission and hw semaphore on old platforms (Chris)
- More Cannonlake stabilization effort (David, James)
- Display planes clean-up and improvements (Ville)
- New PMU interface for perf queries... (Tvrtko)
- ... and other subsequent PMU changes and fixes (Tvrtko, Chris)
- Remove success dmesg noise from rotation (Chris)
- New DMC for Kabylake (Anusha)
- Fixes around atomic commits (Daniel)
- GuC updates and fixes (Sagar, Michal, Chris)
- Couple gmbus/i2c fixes (Ville)
- Use exponential backoff for all our wait_for() (Chris)
- Fixes for i915/fbdev (Chris)
- Backlight fixes (Arnd)
- Updates on shrinker (Chris)
- Make Hotplug enable more robuts (Chris)
- Disable huge pages (TPH) on lack of a needed workaround (Joonas)
- New GuC images for SKL, KBL, BXT (Sagar)
- Add HW Workaround for Geminilake performance (Valtteri)
- Fixes for PPS timings (Imre)
- More IPS fixes (Maarten)
- Many fixes for Display Port on gen2-gen4 (Ville)
- Retry GPU reset making the recover from hang more robust (Chris)
* tag 'drm-intel-next-2017-12-01' of git://anongit.freedesktop.org/drm/drm-intel: (101 commits)
drm/i915: Update DRIVER_DATE to 20171201
drm/i915/cnl: Mask previous DDI - PLL mapping
drm/i915: Remove unsafe i915.enable_rc6
drm/i915: Sleep and retry a GPU reset if at first we don't succeed
drm/i915: Interlaced DP output doesn't work on VLV/CHV
drm/i915: Pass crtc state to intel_pipe_{enable,disable}()
drm/i915: Wait for pipe to start on i830 as well
drm/i915: Fix vblank timestamp/frame counter jumps on gen2
drm/i915: Fix deadlock in i830_disable_pipe()
drm/i915: Fix has_audio readout for DDI A
drm/i915: Don't add the "force audio" property to DP connectors that don't support audio
drm/i915: Disable DP audio for g4x
drm/i915/selftests: Wake the device before executing requests on the GPU
drm/i915: Set fake_vma.size as well as fake_vma.node.size for capture
drm/i915: Tidy up signed/unsigned comparison
drm/i915: Enable IPS with only sprite plane visible too, v4.
drm/i915: Make ips_enabled a property depending on whether IPS is enabled, v3.
drm/i915: Avoid PPS HW/SW state mismatch due to rounding
drm/i915: Skip switch-to-kernel-context on suspend when wedged
drm/i915/glk: Apply WaProgramL3SqcReg1DefaultForPerf for GLK too
...
61 files changed, 2839 insertions, 2057 deletions
diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug index 9e53edbc713b..fa36491495b1 100644 --- a/drivers/gpu/drm/i915/Kconfig.debug +++ b/drivers/gpu/drm/i915/Kconfig.debug @@ -18,6 +18,7 @@ config DRM_I915_WERROR config DRM_I915_DEBUG bool "Enable additional driver debugging" depends on DRM_I915 + select DEBUG_FS select PREEMPT_COUNT select I2C_CHARDEV select DRM_DP_AUX_CHARDEV diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 49b9535e40d1..091aef281963 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -46,6 +46,7 @@ i915-y := i915_drv.o \ i915-$(CONFIG_COMPAT) += i915_ioc32.o i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o intel_pipe_crc.o +i915-$(CONFIG_PERF_EVENTS) += i915_pmu.o # GEM code i915-y += i915_cmd_parser.o \ diff --git a/drivers/gpu/drm/i915/gvt/render.c b/drivers/gpu/drm/i915/gvt/render.c index 0672178548ef..dac12c25f349 100644 --- a/drivers/gpu/drm/i915/gvt/render.c +++ b/drivers/gpu/drm/i915/gvt/render.c @@ -294,8 +294,7 @@ static void switch_mmio_to_vgpu(struct intel_vgpu *vgpu, int ring_id) * write. */ if (mmio->in_context && - ((ctx_ctrl & inhibit_mask) != inhibit_mask) && - i915_modparams.enable_execlists) + (ctx_ctrl & inhibit_mask) != inhibit_mask) continue; if (mmio->mask) diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index b11629beeb63..ccb5ba043b63 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -26,6 +26,7 @@ */ #include "i915_drv.h" +#include "intel_ringbuffer.h" /** * DOC: batch buffer command parser @@ -940,7 +941,7 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine) return; } - engine->needs_cmd_parser = true; + engine->flags |= I915_ENGINE_NEEDS_CMD_PARSER; } /** @@ -952,7 +953,7 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine) */ void intel_engine_cleanup_cmd_parser(struct intel_engine_cs *engine) { - if (!engine->needs_cmd_parser) + if (!intel_engine_needs_cmd_parser(engine)) return; fini_hash_table(engine); @@ -1350,7 +1351,7 @@ int i915_cmd_parser_get_version(struct drm_i915_private *dev_priv) /* If the command parser is not enabled, report 0 - unsupported */ for_each_engine(engine, dev_priv, id) { - if (engine->needs_cmd_parser) { + if (intel_engine_needs_cmd_parser(engine)) { active = true; break; } diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index df3852c02a35..28294470ae31 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1151,13 +1151,8 @@ static int i915_frequency_info(struct seq_file *m, void *unused) rpdownei = I915_READ(GEN6_RP_CUR_DOWN_EI) & GEN6_CURIAVG_MASK; rpcurdown = I915_READ(GEN6_RP_CUR_DOWN) & GEN6_CURBSYTAVG_MASK; rpprevdown = I915_READ(GEN6_RP_PREV_DOWN) & GEN6_CURBSYTAVG_MASK; - if (INTEL_GEN(dev_priv) >= 9) - cagf = (rpstat & GEN9_CAGF_MASK) >> GEN9_CAGF_SHIFT; - else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) - cagf = (rpstat & HSW_CAGF_MASK) >> HSW_CAGF_SHIFT; - else - cagf = (rpstat & GEN6_CAGF_MASK) >> GEN6_CAGF_SHIFT; - cagf = intel_gpu_freq(dev_priv, cagf); + cagf = intel_gpu_freq(dev_priv, + intel_get_cagf(dev_priv, rpstat)); intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); @@ -1989,75 +1984,6 @@ static int i915_context_status(struct seq_file *m, void *unused) return 0; } -static void i915_dump_lrc_obj(struct seq_file *m, - struct i915_gem_context *ctx, - struct intel_engine_cs *engine) -{ - struct i915_vma *vma = ctx->engine[engine->id].state; - struct page *page; - int j; - - seq_printf(m, "CONTEXT: %s %u\n", engine->name, ctx->hw_id); - - if (!vma) { - seq_puts(m, "\tFake context\n"); - return; - } - - if (vma->flags & I915_VMA_GLOBAL_BIND) - seq_printf(m, "\tBound in GGTT at 0x%08x\n", - i915_ggtt_offset(vma)); - - if (i915_gem_object_pin_pages(vma->obj)) { - seq_puts(m, "\tFailed to get pages for context object\n\n"); - return; - } - - page = i915_gem_object_get_page(vma->obj, LRC_STATE_PN); - if (page) { - u32 *reg_state = kmap_atomic(page); - - for (j = 0; j < 0x600 / sizeof(u32) / 4; j += 4) { - seq_printf(m, - "\t[0x%04x] 0x%08x 0x%08x 0x%08x 0x%08x\n", - j * 4, - reg_state[j], reg_state[j + 1], - reg_state[j + 2], reg_state[j + 3]); - } - kunmap_atomic(reg_state); - } - - i915_gem_object_unpin_pages(vma->obj); - seq_putc(m, '\n'); -} - -static int i915_dump_lrc(struct seq_file *m, void *unused) -{ - struct drm_i915_private *dev_priv = node_to_i915(m->private); - struct drm_device *dev = &dev_priv->drm; - struct intel_engine_cs *engine; - struct i915_gem_context *ctx; - enum intel_engine_id id; - int ret; - - if (!i915_modparams.enable_execlists) { - seq_printf(m, "Logical Ring Contexts are disabled\n"); - return 0; - } - - ret = mutex_lock_interruptible(&dev->struct_mutex); - if (ret) - return ret; - - list_for_each_entry(ctx, &dev_priv->contexts.list, link) - for_each_engine(engine, dev_priv, id) - i915_dump_lrc_obj(m, ctx, engine); - - mutex_unlock(&dev->struct_mutex); - - return 0; -} - static const char *swizzle_string(unsigned swizzle) { switch (swizzle) { @@ -3304,69 +3230,6 @@ static int i915_shrinker_info(struct seq_file *m, void *unused) return 0; } -static int i915_semaphore_status(struct seq_file *m, void *unused) -{ - struct drm_i915_private *dev_priv = node_to_i915(m->private); - struct drm_device *dev = &dev_priv->drm; - struct intel_engine_cs *engine; - int num_rings = INTEL_INFO(dev_priv)->num_rings; - enum intel_engine_id id; - int j, ret; - - if (!i915_modparams.semaphores) { - seq_puts(m, "Semaphores are disabled\n"); - return 0; - } - - ret = mutex_lock_interruptible(&dev->struct_mutex); - if (ret) - return ret; - intel_runtime_pm_get(dev_priv); - - if (IS_BROADWELL(dev_priv)) { - struct page *page; - uint64_t *seqno; - - page = i915_gem_object_get_page(dev_priv->semaphore->obj, 0); - - seqno = (uint64_t *)kmap_atomic(page); - for_each_engine(engine, dev_priv, id) { - uint64_t offset; - - seq_printf(m, "%s\n", engine->name); - - seq_puts(m, " Last signal:"); - for (j = 0; j < num_rings; j++) { - offset = id * I915_NUM_ENGINES + j; - seq_printf(m, "0x%08llx (0x%02llx) ", - seqno[offset], offset * 8); - } - seq_putc(m, '\n'); - - seq_puts(m, " Last wait: "); - for (j = 0; j < num_rings; j++) { - offset = id + (j * I915_NUM_ENGINES); - seq_printf(m, "0x%08llx (0x%02llx) ", - seqno[offset], offset * 8); - } - seq_putc(m, '\n'); - - } - kunmap_atomic(seqno); - } else { - seq_puts(m, " Last signal:"); - for_each_engine(engine, dev_priv, id) - for (j = 0; j < num_rings; j++) - seq_printf(m, "0x%08x\n", - I915_READ(engine->semaphore.mbox.signal[j])); - seq_putc(m, '\n'); - } - - intel_runtime_pm_put(dev_priv); - mutex_unlock(&dev->struct_mutex); - return 0; -} - static int i915_shared_dplls_info(struct seq_file *m, void *unused) { struct drm_i915_private *dev_priv = node_to_i915(m->private); @@ -4833,7 +4696,6 @@ static const struct drm_info_list i915_debugfs_list[] = { {"i915_vbt", i915_vbt, 0}, {"i915_gem_framebuffer", i915_gem_framebuffer_info, 0}, {"i915_context_status", i915_context_status, 0}, - {"i915_dump_lrc", i915_dump_lrc, 0}, {"i915_forcewake_domains", i915_forcewake_domains, 0}, {"i915_swizzle_info", i915_swizzle_info, 0}, {"i915_ppgtt_info", i915_ppgtt_info, 0}, @@ -4847,7 +4709,6 @@ static const struct drm_info_list i915_debugfs_list[] = { {"i915_display_info", i915_display_info, 0}, {"i915_engine_info", i915_engine_info, 0}, {"i915_shrinker_info", i915_shrinker_info, 0}, - {"i915_semaphore_status", i915_semaphore_status, 0}, {"i915_shared_dplls_info", i915_shared_dplls_info, 0}, {"i915_dp_mst_info", i915_dp_mst_info, 0}, {"i915_wa_registers", i915_wa_registers, 0}, diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 5170a8ea83d4..7faf20aff25a 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -48,6 +48,7 @@ #include "i915_drv.h" #include "i915_trace.h" +#include "i915_pmu.h" #include "i915_vgpu.h" #include "intel_drv.h" #include "intel_uc.h" @@ -321,7 +322,7 @@ static int i915_getparam(struct drm_device *dev, void *data, value = USES_PPGTT(dev_priv); break; case I915_PARAM_HAS_SEMAPHORES: - value = i915_modparams.semaphores; + value = HAS_LEGACY_SEMAPHORES(dev_priv); break; case I915_PARAM_HAS_SECURE_BATCHES: value = capable(CAP_SYS_ADMIN); @@ -371,9 +372,7 @@ static int i915_getparam(struct drm_device *dev, void *data, if (dev_priv->engine[RCS] && dev_priv->engine[RCS]->schedule) { value |= I915_SCHEDULER_CAP_ENABLED; value |= I915_SCHEDULER_CAP_PRIORITY; - - if (HAS_LOGICAL_RING_PREEMPTION(dev_priv) && - i915_modparams.enable_execlists) + if (HAS_LOGICAL_RING_PREEMPTION(dev_priv)) value |= I915_SCHEDULER_CAP_PREEMPTION; } break; @@ -694,8 +693,6 @@ static int i915_load_modeset_init(struct drm_device *dev) /* Only enable hotplug handling once the fbdev is fully set up. */ intel_hpd_init(dev_priv); - drm_kms_helper_poll_init(dev); - return 0; cleanup_gem: @@ -936,8 +933,6 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv, intel_detect_preproduction_hw(dev_priv); - i915_perf_init(dev_priv); - return 0; err_irq: @@ -954,7 +949,6 @@ err_engines: */ static void i915_driver_cleanup_early(struct drm_i915_private *dev_priv) { - i915_perf_fini(dev_priv); i915_gem_load_cleanup(dev_priv); intel_irq_fini(dev_priv); i915_workqueues_cleanup(dev_priv); @@ -1057,10 +1051,6 @@ static void i915_driver_cleanup_mmio(struct drm_i915_private *dev_priv) static void intel_sanitize_options(struct drm_i915_private *dev_priv) { - i915_modparams.enable_execlists = - intel_sanitize_enable_execlists(dev_priv, - i915_modparams.enable_execlists); - /* * i915.enable_ppgtt is read-only, so do an early pass to validate the * user's requested state against the hardware/driver capabilities. We @@ -1072,11 +1062,6 @@ static void intel_sanitize_options(struct drm_i915_private *dev_priv) i915_modparams.enable_ppgtt); DRM_DEBUG_DRIVER("ppgtt mode: %i\n", i915_modparams.enable_ppgtt); - i915_modparams.semaphores = - intel_sanitize_semaphores(dev_priv, i915_modparams.semaphores); - DRM_DEBUG_DRIVER("use GPU semaphores? %s\n", - yesno(i915_modparams.semaphores)); - intel_uc_sanitize_options(dev_priv); intel_gvt_sanitize_options(dev_priv); @@ -1101,6 +1086,8 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv) intel_sanitize_options(dev_priv); + i915_perf_init(dev_priv); + ret = i915_ggtt_probe_hw(dev_priv); if (ret) return ret; @@ -1206,6 +1193,8 @@ static void i915_driver_cleanup_hw(struct drm_i915_private *dev_priv) { struct pci_dev *pdev = dev_priv->drm.pdev; + i915_perf_fini(dev_priv); + if (pdev->msi_enabled) pci_disable_msi(pdev); @@ -1224,7 +1213,8 @@ static void i915_driver_register(struct drm_i915_private *dev_priv) { struct drm_device *dev = &dev_priv->drm; - i915_gem_shrinker_init(dev_priv); + i915_gem_shrinker_register(dev_priv); + i915_pmu_register(dev_priv); /* * Notify a valid surface after modesetting, @@ -1263,6 +1253,13 @@ static void i915_driver_register(struct drm_i915_private *dev_priv) * cannot run before the connectors are registered. */ intel_fbdev_initial_config_async(dev); + + /* + * We need to coordinate the hotplugs with the asynchronous fbdev + * configuration, for which we use the fbdev->async_cookie. + */ + if (INTEL_INFO(dev_priv)->num_pipes) + drm_kms_helper_poll_init(dev); } /** @@ -1274,17 +1271,25 @@ static void i915_driver_unregister(struct drm_i915_private *dev_priv) intel_fbdev_unregister(dev_priv); intel_audio_deinit(dev_priv); + /* + * After flushing the fbdev (incl. a late async config which will + * have delayed queuing of a hotplug event), then flush the hotplug + * events. + */ + drm_kms_helper_poll_fini(&dev_priv->drm); + intel_gpu_ips_teardown(); acpi_video_unregister(); intel_opregion_unregister(dev_priv); i915_perf_unregister(dev_priv); + i915_pmu_unregister(dev_priv); i915_teardown_sysfs(dev_priv); i915_guc_log_unregister(dev_priv); drm_dev_unregister(&dev_priv->drm); - i915_gem_shrinker_cleanup(dev_priv); + i915_gem_shrinker_unregister(dev_priv); } /** @@ -1872,7 +1877,9 @@ void i915_reset(struct drm_i915_private *i915, unsigned int flags) { struct i915_gpu_error *error = &i915->gpu_error; int ret; + int i; + might_sleep(); lockdep_assert_held(&i915->drm.struct_mutex); GEM_BUG_ON(!test_bit(I915_RESET_BACKOFF, &error->flags)); @@ -1895,12 +1902,20 @@ void i915_reset(struct drm_i915_private *i915, unsigned int flags) goto error; } - ret = intel_gpu_reset(i915, ALL_ENGINES); + if (!intel_has_gpu_reset(i915)) { + DRM_DEBUG_DRIVER("GPU reset disabled\n"); + goto error; + } + + for (i = 0; i < 3; i++) { + ret = intel_gpu_reset(i915, ALL_ENGINES); + if (ret == 0) + break; + + msleep(100); + } if (ret) { - if (ret != -ENODEV) - DRM_ERROR("Failed to reset chip: %i\n", ret); - else - DRM_DEBUG_DRIVER("GPU reset disabled\n"); + dev_err(i915->drm.dev, "Failed to reset chip\n"); goto error; } @@ -2512,7 +2527,7 @@ static int intel_runtime_suspend(struct device *kdev) struct drm_i915_private *dev_priv = to_i915(dev); int ret; - if (WARN_ON_ONCE(!(dev_priv->gt_pm.rc6.enabled && intel_rc6_enabled()))) + if (WARN_ON_ONCE(!(dev_priv->gt_pm.rc6.enabled && HAS_RC6(dev_priv)))) return -ENODEV; if (WARN_ON_ONCE(!HAS_RUNTIME_PM(dev_priv))) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 36bb4927484a..594fd14e66c5 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -40,6 +40,7 @@ #include <linux/hash.h> #include <linux/intel-iommu.h> #include <linux/kref.h> +#include <linux/perf_event.h> #include <linux/pm_qos.h> #include <linux/reservation.h> #include <linux/shmem_fs.h> @@ -79,8 +80,8 @@ #define DRIVER_NAME "i915" #define DRIVER_DESC "Intel Graphics" -#define DRIVER_DATE "20171117" -#define DRIVER_TIMESTAMP 1510958822 +#define DRIVER_DATE "20171201" +#define DRIVER_TIMESTAMP 1512176839 /* Use I915_STATE_WARN(x) and I915_STATE_WARN_ON() (rather than WARN() and * WARN_ON()) for hw state sanity checks to check for unexpected conditions @@ -304,9 +305,9 @@ static inline bool transcoder_is_dsi(enum transcoder transcoder) /* * Global legacy plane identifier. Valid only for primary/sprite - * planes on pre-g4x, and only for primary planes on g4x+. + * planes on pre-g4x, and only for primary planes on g4x-bdw. */ -enum plane { +enum i9xx_plane_id { PLANE_A, PLANE_B, PLANE_C, @@ -560,13 +561,13 @@ struct i915_hotplug { for_each_power_well_rev(__dev_priv, __power_well) \ for_each_if ((__power_well)->domains & (__domain_mask)) -#define for_each_intel_plane_in_state(__state, plane, plane_state, __i) \ +#define for_each_new_intel_plane_in_state(__state, plane, new_plane_state, __i) \ for ((__i) = 0; \ (__i) < (__state)->base.dev->mode_config.num_total_plane && \ ((plane) = to_intel_plane((__state)->base.planes[__i].ptr), \ - (plane_state) = to_intel_plane_state((__state)->base.planes[__i].state), 1); \ + (new_plane_state) = to_intel_plane_state((__state)->base.planes[__i].new_state), 1); \ (__i)++) \ - for_each_if (plane_state) + for_each_if (plane) #define for_each_new_intel_crtc_in_state(__state, crtc, new_crtc_state, __i) \ for ((__i) = 0; \ @@ -576,7 +577,6 @@ struct i915_hotplug { (__i)++) \ for_each_if (crtc) - #define for_each_oldnew_intel_plane_in_state(__state, plane, old_plane_state, new_plane_state, __i) \ for ((__i) = 0; \ (__i) < (__state)->base.dev->mode_config.num_total_plane && \ @@ -698,7 +698,8 @@ struct drm_i915_display_funcs { struct intel_cdclk_state *cdclk_state); void (*set_cdclk)(struct drm_i915_private *dev_priv, const struct intel_cdclk_state *cdclk_state); - int (*get_fifo_size)(struct drm_i915_private *dev_priv, int plane); + int (*get_fifo_size)(struct drm_i915_private *dev_priv, + enum i9xx_plane_id i9xx_plane); int (*compute_pipe_wm)(struct intel_crtc_state *cstate); int (*compute_intermediate_wm)(struct drm_device *dev, struct intel_crtc *intel_crtc, @@ -942,7 +943,6 @@ struct i915_gpu_state { u64 fence[I915_MAX_NUM_FENCES]; struct intel_overlay_error_state *overlay; struct intel_display_error_state *display; - struct drm_i915_error_object *semaphore; struct drm_i915_error_engine { int engine_id; @@ -1009,6 +1009,7 @@ struct i915_gpu_state { long user_bo_count; struct drm_i915_error_object *wa_ctx; + struct drm_i915_error_object *default_state; struct drm_i915_error_request { long jiffies; @@ -1145,7 +1146,7 @@ struct intel_fbc { struct { enum pipe pipe; - enum plane plane; + enum i9xx_plane_id i9xx_plane; unsigned int fence_y_offset; } crtc; @@ -2291,7 +2292,8 @@ struct drm_i915_private { struct i915_gem_context *kernel_context; /* Context only to be used for injecting preemption commands */ struct i915_gem_context *preempt_context; - struct i915_vma *semaphore; + struct intel_engine_cs *engine_class[MAX_ENGINE_CLASS + 1] + [MAX_ENGINE_INSTANCE + 1]; struct drm_dma_handle *status_page_dmah; struct resource mch_res; @@ -2619,7 +2621,6 @@ struct drm_i915_private { bool periodic; int period_exponent; - int timestamp_frequency; struct i915_oa_config test_config; @@ -2764,6 +2765,8 @@ struct drm_i915_private { int irq; } lpe_audio; + struct i915_pmu pmu; + /* * NOTE: This is the dri1/ums dungeon, don't add stuff here. Your patch * will be rejected. Instead look for a better place. @@ -3142,6 +3145,8 @@ intel_info(const struct drm_i915_private *dev_priv) #define HAS_BLT(dev_priv) HAS_ENGINE(dev_priv, BCS) #define HAS_VEBOX(dev_priv) HAS_ENGINE(dev_priv, VECS) +#define HAS_LEGACY_SEMAPHORES(dev_priv) IS_GEN7(dev_priv) + #define HAS_LLC(dev_priv) ((dev_priv)->info.has_llc) #define HAS_SNOOP(dev_priv) ((dev_priv)->info.has_snoop) #define HAS_EDRAM(dev_priv) (!!((dev_priv)->edram_cap & EDRAM_ENABLED)) @@ -3154,6 +3159,9 @@ intel_info(const struct drm_i915_private *dev_priv) ((dev_priv)->info.has_logical_ring_contexts) #define HAS_LOGICAL_RING_PREEMPTION(dev_priv) \ ((dev_priv)->info.has_logical_ring_preemption) + +#define HAS_EXECLISTS(dev_priv) HAS_LOGICAL_RING_CONTEXTS(dev_priv) + #define USES_PPGTT(dev_priv) (i915_modparams.enable_ppgtt) #define USES_FULL_PPGTT(dev_priv) (i915_modparams.enable_ppgtt >= 2) #define USES_FULL_48BIT_PPGTT(dev_priv) (i915_modparams.enable_ppgtt == 3) @@ -3205,8 +3213,10 @@ intel_info(const struct drm_i915_private *dev_priv) #define HAS_DDI(dev_priv) ((dev_priv)->info.has_ddi) #define HAS_FPGA_DBG_UNCLAIMED(dev_priv) ((dev_priv)->info.has_fpga_dbg) #define HAS_PSR(dev_priv) ((dev_priv)->info.has_psr) + #define HAS_RC6(dev_priv) ((dev_priv)->info.has_rc6) #define HAS_RC6p(dev_priv) ((dev_priv)->info.has_rc6p) +#define HAS_RC6pp(dev_priv) (false) /* HW was never validated */ #define HAS_CSR(dev_priv) ((dev_priv)->info.has_csr) @@ -3302,8 +3312,6 @@ intel_ggtt_update_needs_vtd_wa(struct drm_i915_private *dev_priv) int intel_sanitize_enable_ppgtt(struct drm_i915_private *dev_priv, int enable_ppgtt); -bool intel_sanitize_semaphores(struct drm_i915_private *dev_priv, int value); - /* i915_drv.c */ void __printf(3, 4) __i915_printk(struct drm_i915_private *dev_priv, const char *level, @@ -3905,7 +3913,7 @@ i915_gem_object_create_internal(struct drm_i915_private *dev_priv, phys_addr_t size); /* i915_gem_shrinker.c */ -unsigned long i915_gem_shrink(struct drm_i915_private *dev_priv, +unsigned long i915_gem_shrink(struct drm_i915_private *i915, unsigned long target, unsigned long *nr_scanned, unsigned flags); @@ -3914,9 +3922,9 @@ unsigned long i915_gem_shrink(struct drm_i915_private *dev_priv, #define I915_SHRINK_BOUND 0x4 #define I915_SHRINK_ACTIVE 0x8 #define I915_SHRINK_VMAPS 0x10 -unsigned long i915_gem_shrink_all(struct drm_i915_private *dev_priv); -void i915_gem_shrinker_init(struct drm_i915_private *dev_priv); -void i915_gem_shrinker_cleanup(struct drm_i915_private *dev_priv); +unsigned long i915_gem_shrink_all(struct drm_i915_private *i915); +void i915_gem_shrinker_register(struct drm_i915_private *i915); +void i915_gem_shrinker_unregister(struct drm_i915_private *i915); /* i915_gem_tiling.c */ @@ -4223,9 +4231,17 @@ void vlv_phy_reset_lanes(struct intel_encoder *encoder, int intel_gpu_freq(struct drm_i915_private *dev_priv, int val); int intel_freq_opcode(struct drm_i915_private *dev_priv, int val); -u64 intel_rc6_residency_us(struct drm_i915_private *dev_priv, +u64 intel_rc6_residency_ns(struct drm_i915_private *dev_priv, const i915_reg_t reg); +u32 intel_get_cagf(struct drm_i915_private *dev_priv, u32 rpstat1); + +static inline u64 intel_rc6_residency_us(struct drm_i915_private *dev_priv, + const i915_reg_t reg) +{ + return DIV_ROUND_UP_ULL(intel_rc6_residency_ns(dev_priv, reg), 1000); +} + #define I915_READ8(reg) dev_priv->uncore.funcs.mmio_readb(dev_priv, (reg), true) #define I915_WRITE8(reg, val) dev_priv->uncore.funcs.mmio_writeb(dev_priv, (reg), (val), true) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 61ba321e9970..e083f242b8dc 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3371,7 +3371,9 @@ i915_gem_idle_work_handler(struct work_struct *work) synchronize_irq(dev_priv->drm.irq); intel_engines_park(dev_priv); - i915_gem_timelines_mark_idle(dev_priv); + i915_gem_timelines_park(dev_priv); + + i915_pmu_gt_parked(dev_priv); GEM_BUG_ON(!dev_priv->gt.awake); dev_priv->gt.awake = false; @@ -4772,17 +4774,19 @@ int i915_gem_suspend(struct drm_i915_private *dev_priv) * state. Fortunately, the kernel_context is disposable and we do * not rely on its state. */ - ret = i915_gem_switch_to_kernel_context(dev_priv); - if (ret) - goto err_unlock; + if (!i915_terminally_wedged(&dev_priv->gpu_error)) { + ret = i915_gem_switch_to_kernel_context(dev_priv); + if (ret) + goto err_unlock; - ret = i915_gem_wait_for_idle(dev_priv, - I915_WAIT_INTERRUPTIBLE | - I915_WAIT_LOCKED); - if (ret && ret != -EIO) - goto err_unlock; + ret = i915_gem_wait_for_idle(dev_priv, + I915_WAIT_INTERRUPTIBLE | + I915_WAIT_LOCKED); + if (ret && ret != -EIO) + goto err_unlock; - assert_kernel_context_is_current(dev_priv); + assert_kernel_context_is_current(dev_priv); + } i915_gem_contexts_lost(dev_priv); mutex_unlock(&dev->struct_mutex); @@ -4997,25 +5001,6 @@ out: return ret; } -bool intel_sanitize_semaphores(struct drm_i915_private *dev_priv, int value) -{ - if (INTEL_INFO(dev_priv)->gen < 6) - return false; - - /* TODO: make semaphores and Execlists play nicely together */ - if (i915_modparams.enable_execlists) - return false; - - if (value >= 0) - return value; - - /* Enable semaphores on SNB when IO remapping is off */ - if (IS_GEN6(dev_priv) && intel_vtd_active()) - return false; - - return true; -} - static int __intel_engines_record_defaults(struct drm_i915_private *i915) { struct i915_gem_context *ctx; @@ -5045,7 +5030,7 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915) goto out_ctx; } - err = i915_switch_context(rq); + err = 0; if (engine->init_context) err = engine->init_context(rq); @@ -5134,8 +5119,6 @@ int i915_gem_init(struct drm_i915_private *dev_priv) { int ret; - mutex_lock(&dev_priv->drm.struct_mutex); - /* * We need to fallback to 4K pages since gvt gtt handling doesn't * support huge page entries - we will need to check either hypervisor @@ -5147,26 +5130,27 @@ int i915_gem_init(struct drm_i915_private *dev_priv) dev_priv->mm.unordered_timeline = dma_fence_context_alloc(1); - if (!i915_modparams.enable_execlists) { - dev_priv->gt.resume = intel_legacy_submission_resume; - dev_priv->gt.cleanup_engine = intel_engine_cleanup; - } else { + if (HAS_LOGICAL_RING_CONTEXTS(dev_priv)) { dev_priv->gt.resume = intel_lr_context_resume; dev_priv->gt.cleanup_engine = intel_logical_ring_cleanup; + } else { + dev_priv->gt.resume = intel_legacy_submission_resume; + dev_priv->gt.cleanup_engine = intel_engine_cleanup; } + ret = i915_gem_init_userptr(dev_priv); + if (ret) + return ret; + /* This is just a security blanket to placate dragons. * On some systems, we very sporadically observe that the first TLBs * used by the CS may be stale, despite us poking the TLB reset. If * we hold the forcewake during initialisation these problems * just magically go away. */ + mutex_lock(&dev_priv->drm.struct_mutex); intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); - ret = i915_gem_init_userptr(dev_priv); - if (ret) - goto out_unlock; - ret = i915_gem_init_ggtt(dev_priv); if (ret) goto out_unlock; diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 2db040695035..ce3139e5ec4c 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -460,14 +460,6 @@ int i915_gem_contexts_init(struct drm_i915_private *dev_priv) INIT_WORK(&dev_priv->contexts.free_work, contexts_free_worker); init_llist_head(&dev_priv->contexts.free_list); - if (intel_vgpu_active(dev_priv) && - HAS_LOGICAL_RING_CONTEXTS(dev_priv)) { - if (!i915_modparams.enable_execlists) { - DRM_INFO("Only EXECLIST mode is supported in vgpu.\n"); - return -EINVAL; - } - } - /* Using the simple ida interface, the max is limited by sizeof(int) */ BUILD_BUG_ON(MAX_CONTEXT_HW_ID > INT_MAX); ida_init(&dev_priv->contexts.hw_ida); @@ -515,6 +507,7 @@ void i915_gem_contexts_lost(struct drm_i915_private *dev_priv) for_each_engine(engine, dev_priv, id) { engine->legacy_active_context = NULL; + engine->legacy_active_ppgtt = NULL; if (!engine->last_retired_context) continue; @@ -574,300 +567,6 @@ void i915_gem_context_close(struct drm_file *file) idr_destroy(&file_priv->context_idr); } -static inline int -mi_set_context(struct drm_i915_gem_request *req, u32 flags) -{ - struct drm_i915_private *dev_priv = req->i915; - struct intel_engine_cs *engine = req->engine; - enum intel_engine_id id; - const int num_rings = - /* Use an extended w/a on gen7 if signalling from other rings */ - (i915_modparams.semaphores && INTEL_GEN(dev_priv) == 7) ? - INTEL_INFO(dev_priv)->num_rings - 1 : - 0; - int len; - u32 *cs; - - flags |= MI_MM_SPACE_GTT; - if (IS_HASWELL(dev_priv) || INTEL_GEN(dev_priv) >= 8) - /* These flags are for resource streamer on HSW+ */ - flags |= HSW_MI_RS_SAVE_STATE_EN | HSW_MI_RS_RESTORE_STATE_EN; - else - flags |= MI_SAVE_EXT_STATE_EN | MI_RESTORE_EXT_STATE_EN; - - len = 4; - if (INTEL_GEN(dev_priv) >= 7) - len += 2 + (num_rings ? 4*num_rings + 6 : 0); - - cs = intel_ring_begin(req, len); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - /* WaProgramMiArbOnOffAroundMiSetContext:ivb,vlv,hsw,bdw,chv */ - if (INTEL_GEN(dev_priv) >= 7) { - *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; - if (num_rings) { - struct intel_engine_cs *signaller; - - *cs++ = MI_LOAD_REGISTER_IMM(num_rings); - for_each_engine(signaller, dev_priv, id) { - if (signaller == engine) - continue; - - *cs++ = i915_mmio_reg_offset( - RING_PSMI_CTL(signaller->mmio_base)); - *cs++ = _MASKED_BIT_ENABLE( - GEN6_PSMI_SLEEP_MSG_DISABLE); - } - } - } - - *cs++ = MI_NOOP; - *cs++ = MI_SET_CONTEXT; - *cs++ = i915_ggtt_offset(req->ctx->engine[RCS].state) | flags; - /* - * w/a: MI_SET_CONTEXT must always be followed by MI_NOOP - * WaMiSetContext_Hang:snb,ivb,vlv - */ - *cs++ = MI_NOOP; - - if (INTEL_GEN(dev_priv) >= 7) { - if (num_rings) { - struct intel_engine_cs *signaller; - i915_reg_t last_reg = {}; /* keep gcc quiet */ - - *cs++ = MI_LOAD_REGISTER_IMM(num_rings); - for_each_engine(signaller, dev_priv, id) { - if (signaller == engine) - continue; - - last_reg = RING_PSMI_CTL(signaller->mmio_base); - *cs++ = i915_mmio_reg_offset(last_reg); - *cs++ = _MASKED_BIT_DISABLE( - GEN6_PSMI_SLEEP_MSG_DISABLE); - } - - /* Insert a delay before the next switch! */ - *cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT; - *cs++ = i915_mmio_reg_offset(last_reg); - *cs++ = i915_ggtt_offset(engine->scratch); - *cs++ = MI_NOOP; - } - *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; - } - - intel_ring_advance(req, cs); - - return 0; -} - -static int remap_l3(struct drm_i915_gem_request *req, int slice) -{ - u32 *cs, *remap_info = req->i915->l3_parity.remap_info[slice]; - int i; - - if (!remap_info) - return 0; - - cs = intel_ring_begin(req, GEN7_L3LOG_SIZE/4 * 2 + 2); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - /* - * Note: We do not worry about the concurrent register cacheline hang - * here because no other code should access these registers other than - * at initialization time. - */ - *cs++ = MI_LOAD_REGISTER_IMM(GEN7_L3LOG_SIZE/4); - for (i = 0; i < GEN7_L3LOG_SIZE/4; i++) { - *cs++ = i915_mmio_reg_offset(GEN7_L3LOG(slice, i)); - *cs++ = remap_info[i]; - } - *cs++ = MI_NOOP; - intel_ring_advance(req, cs); - - return 0; -} - -static inline bool skip_rcs_switch(struct i915_hw_ppgtt *ppgtt, - struct intel_engine_cs *engine, - struct i915_gem_context *to) -{ - if (to->remap_slice) - return false; - - if (ppgtt && (intel_engine_flag(engine) & ppgtt->pd_dirty_rings)) - return false; - - return to == engine->legacy_active_context; -} - -static bool -needs_pd_load_pre(struct i915_hw_ppgtt *ppgtt, struct intel_engine_cs *engine) -{ - struct i915_gem_context *from = engine->legacy_active_context; - - if (!ppgtt) - return false; - - /* Always load the ppgtt on first use */ - if (!from) - return true; - - /* Same context without new entries, skip */ - if ((!from->ppgtt || from->ppgtt == ppgtt) && - !(intel_engine_flag(engine) & ppgtt->pd_dirty_rings)) - return false; - - if (engine->id != RCS) - return true; - - if (INTEL_GEN(engine->i915) < 8) - return true; - - return false; -} - -static bool -needs_pd_load_post(struct i915_hw_ppgtt *ppgtt, - struct i915_gem_context *to, - u32 hw_flags) -{ - if (!ppgtt) - return false; - - if (!IS_GEN8(to->i915)) - return false; - - if (hw_flags & MI_RESTORE_INHIBIT) - return true; - - return false; -} - -static int do_rcs_switch(struct drm_i915_gem_request *req) -{ - struct i915_gem_context *to = req->ctx; - struct intel_engine_cs *engine = req->engine; - struct i915_hw_ppgtt *ppgtt = to->ppgtt ?: req->i915->mm.aliasing_ppgtt; - struct i915_gem_context *from = engine->legacy_active_context; - u32 hw_flags; - int ret, i; - - GEM_BUG_ON(engine->id != RCS); - - if (skip_rcs_switch(ppgtt, engine, to)) - return 0; - - if (needs_pd_load_pre(ppgtt, engine)) { - /* Older GENs and non render rings still want the load first, - * "PP_DCLV followed by PP_DIR_BASE register through Load - * Register Immediate commands in Ring Buffer before submitting - * a context."*/ - trace_switch_mm(engine, to); - ret = ppgtt->switch_mm(ppgtt, req); - if (ret) - return ret; - } - - if (i915_gem_context_is_kernel(to)) - /* - * The kernel context(s) is treated as pure scratch and is not - * expected to retain any state (as we sacrifice it during - * suspend and on resume it may be corrupted). This is ok, - * as nothing actually executes using the kernel context; it - * is purely used for flushing user contexts. - */ - hw_flags = MI_RESTORE_INHIBIT; - else if (ppgtt && intel_engine_flag(engine) & ppgtt->pd_dirty_rings) - hw_flags = MI_FORCE_RESTORE; - else - hw_flags = 0; - - if (to != from || (hw_flags & MI_FORCE_RESTORE)) { - ret = mi_set_context(req, hw_flags); - if (ret) - return ret; - - engine->legacy_active_context = to; - } - - /* GEN8 does *not* require an explicit reload if the PDPs have been - * setup, and we do not wish to move them. - */ - if (needs_pd_load_post(ppgtt, to, hw_flags)) { - trace_switch_mm(engine, to); - ret = ppgtt->switch_mm(ppgtt, req); - /* The hardware context switch is emitted, but we haven't - * actually changed the state - so it's probably safe to bail - * here. Still, let the user know something dangerous has - * happened. - */ - if (ret) - return ret; - } - - if (ppgtt) - ppgtt->pd_dirty_rings &= ~intel_engine_flag(engine); - - for (i = 0; i < MAX_L3_SLICES; i++) { - if (!(to->remap_slice & (1<<i))) - continue; - - ret = remap_l3(req, i); - if (ret) - return ret; - - to->remap_slice &= ~(1<<i); - } - - return 0; -} - -/** - * i915_switch_context() - perform a GPU context switch. - * @req: request for which we'll execute the context switch - * - * The context life cycle is simple. The context refcount is incremented and - * decremented by 1 and create and destroy. If the context is in use by the GPU, - * it will have a refcount > 1. This allows us to destroy the context abstract - * object while letting the normal object tracking destroy the backing BO. - * - * This function should not be used in execlists mode. Instead the context is - * switched by writing to the ELSP and requests keep a reference to their - * context. - */ -int i915_switch_context(struct drm_i915_gem_request *req) -{ - struct intel_engine_cs *engine = req->engine; - - lockdep_assert_held(&req->i915->drm.struct_mutex); - if (i915_modparams.enable_execlists) - return 0; - - if (!req->ctx->engine[engine->id].state) { - struct i915_gem_context *to = req->ctx; - struct i915_hw_ppgtt *ppgtt = - to->ppgtt ?: req->i915->mm.aliasing_ppgtt; - - if (needs_pd_load_pre(ppgtt, engine)) { - int ret; - - trace_switch_mm(engine, to); - ret = ppgtt->switch_mm(ppgtt, req); - if (ret) - return ret; - - ppgtt->pd_dirty_rings &= ~intel_engine_flag(engine); - } - - engine->legacy_active_context = to; - return 0; - } - - return do_rcs_switch(req); -} - static bool engine_has_idle_kernel_context(struct intel_engine_cs *engine) { struct i915_gem_timeline *timeline; @@ -899,7 +598,6 @@ int i915_gem_switch_to_kernel_context(struct drm_i915_private *dev_priv) for_each_engine(engine, dev_priv, id) { struct drm_i915_gem_request *req; - int ret; if (engine_has_idle_kernel_context(engine)) continue; @@ -922,10 +620,14 @@ int i915_gem_switch_to_kernel_context(struct drm_i915_private *dev_priv) GFP_KERNEL); } - ret = i915_switch_context(req); - i915_add_request(req); - if (ret) - return ret; + /* + * Force a flush after the switch to ensure that all rendering + * and operations prior to switching to the kernel context hits + * memory. This should be guaranteed by the previous request, + * but an extra layer of paranoia before we declare the system + * idle (on suspend etc) is advisable! + */ + __i915_add_request(req, true); } return 0; diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 53ccb27bfe91..70ccd63cbf8e 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -271,7 +271,7 @@ static inline u64 gen8_noncanonical_addr(u64 address) static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb) { - return eb->engine->needs_cmd_parser && eb->batch_len; + return intel_engine_needs_cmd_parser(eb->engine) && eb->batch_len; } static int eb_create(struct i915_execbuffer *eb) @@ -1111,14 +1111,6 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, if (err) goto err_request; - err = eb->engine->emit_flush(rq, EMIT_INVALIDATE); - if (err) - goto err_request; - - err = i915_switch_context(rq); - if (err) - goto err_request; - err = eb->engine->emit_bb_start(rq, batch->node.start, PAGE_SIZE, cache->gen > 5 ? 0 : I915_DISPATCH_SECURE); @@ -1818,8 +1810,7 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb) /* Unconditionally flush any chipset caches (for streaming writes). */ i915_gem_chipset_flush(eb->i915); - /* Unconditionally invalidate GPU caches and TLBs. */ - return eb->engine->emit_flush(eb->request, EMIT_INVALIDATE); + return 0; } static bool i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec) @@ -1965,10 +1956,6 @@ static int eb_submit(struct i915_execbuffer *eb) if (err) return err; - err = i915_switch_context(eb->request); - if (err) - return err; - if (eb->args->flags & I915_EXEC_GEN7_SOL_RESET) { err = i915_reset_gen7_sol_offsets(eb->request); if (err) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 64e8ae1fd832..f3c35e826321 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -178,7 +178,7 @@ int intel_sanitize_enable_ppgtt(struct drm_i915_private *dev_priv, return 0; } - if (INTEL_GEN(dev_priv) >= 8 && i915_modparams.enable_execlists) { + if (HAS_LOGICAL_RING_CONTEXTS(dev_priv)) { if (has_full_48bit_ppgtt) return 3; @@ -2162,7 +2162,7 @@ int i915_ppgtt_init_hw(struct drm_i915_private *dev_priv) /* In the case of execlists, PPGTT is enabled by the context descriptor * and the PDPs are contained within the context itself. We don't * need to do anything here. */ - if (i915_modparams.enable_execlists) + if (HAS_LOGICAL_RING_CONTEXTS(dev_priv)) return 0; if (!USES_PPGTT(dev_priv)) @@ -3737,9 +3737,6 @@ intel_rotate_pages(struct intel_rotation_info *rot_info, rot_info->plane[i].stride, st, sg); } - DRM_DEBUG_KMS("Created rotated page mapping for object size %zu (%ux%u tiles, %u pages)\n", - obj->base.size, rot_info->plane[0].width, rot_info->plane[0].height, size); - kvfree(page_addr_list); return st; @@ -3749,8 +3746,8 @@ err_sg_alloc: err_st_alloc: kvfree(page_addr_list); - DRM_DEBUG_KMS("Failed to create rotated mapping for object size %zu! (%ux%u tiles, %u pages)\n", - obj->base.size, rot_info->plane[0].width, rot_info->plane[0].height, size); + DRM_DEBUG_DRIVER("Failed to create rotated mapping for object size %zu! (%ux%u tiles, %u pages)\n", + obj->base.size, rot_info->plane[0].width, rot_info->plane[0].height, size); return ERR_PTR(ret); } diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c index c2723a06fbb4..f7fc0df251ac 100644 --- a/drivers/gpu/drm/i915/i915_gem_render_state.c +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c @@ -208,10 +208,6 @@ int i915_gem_render_state_emit(struct drm_i915_gem_request *rq) if (err) goto err_unpin; - err = engine->emit_flush(rq, EMIT_INVALIDATE); - if (err) - goto err_unpin; - err = engine->emit_bb_start(rq, so.batch_offset, so.batch_size, I915_DISPATCH_SECURE); diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index e0d6221022a8..a90bdd26571f 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -258,6 +258,7 @@ static void mark_busy(struct drm_i915_private *i915) i915_update_gfx_val(i915); if (INTEL_GEN(i915) >= 6) gen6_rps_busy(i915); + i915_pmu_gt_unparked(i915); intel_engines_unpark(i915); @@ -624,6 +625,10 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, if (ret) goto err_unpin; + ret = intel_ring_wait_for_space(ring, MIN_SPACE_FOR_ADD_REQUEST); + if (ret) + goto err_unreserve; + /* Move the oldest request to the slab-cache (if not in use!) */ req = list_first_entry_or_null(&engine->timeline->requests, typeof(*req), link); @@ -703,22 +708,30 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, req->reserved_space = MIN_SPACE_FOR_ADD_REQUEST; GEM_BUG_ON(req->reserved_space < engine->emit_breadcrumb_sz); - ret = engine->request_alloc(req); - if (ret) - goto err_ctx; - - /* Record the position of the start of the request so that + /* + * Record the position of the start of the request so that * should we detect the updated seqno part-way through the * GPU processing the request, we never over-estimate the * position of the head. */ req->head = req->ring->emit; + /* Unconditionally invalidate GPU caches and TLBs. */ + ret = engine->emit_flush(req, EMIT_INVALIDATE); + if (ret) + goto err_unwind; + + ret = engine->request_alloc(req); + if (ret) + goto err_unwind; + /* Check that we didn't interrupt ourselves with a new request */ GEM_BUG_ON(req->timeline->seqno != req->fence.seqno); return req; -err_ctx: +err_unwind: + req->ring->emit = req->head; + /* Make sure we didn't add ourselves to external state before freeing */ GEM_BUG_ON(!list_empty(&req->active_list)); GEM_BUG_ON(!list_empty(&req->priotree.signalers_list)); diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c index 3770e3323fc8..9029ed04879c 100644 --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c @@ -35,9 +35,9 @@ #include "i915_drv.h" #include "i915_trace.h" -static bool shrinker_lock(struct drm_i915_private *dev_priv, bool *unlock) +static bool shrinker_lock(struct drm_i915_private *i915, bool *unlock) { - switch (mutex_trylock_recursive(&dev_priv->drm.struct_mutex)) { + switch (mutex_trylock_recursive(&i915->drm.struct_mutex)) { case MUTEX_TRYLOCK_RECURSIVE: *unlock = false; return true; @@ -47,7 +47,7 @@ static bool shrinker_lock(struct drm_i915_private *dev_priv, bool *unlock) preempt_disable(); do { cpu_relax(); - if (mutex_trylock(&dev_priv->drm.struct_mutex)) { + if (mutex_trylock(&i915->drm.struct_mutex)) { *unlock = true; break; } @@ -63,12 +63,12 @@ static bool shrinker_lock(struct drm_i915_private *dev_priv, bool *unlock) BUG(); } -static void shrinker_unlock(struct drm_i915_private *dev_priv, bool unlock) +static void shrinker_unlock(struct drm_i915_private *i915, bool unlock) { if (!unlock) return; - mutex_unlock(&dev_priv->drm.struct_mutex); + mutex_unlock(&i915->drm.struct_mutex); } static bool swap_available(void) @@ -118,7 +118,7 @@ static bool unsafe_drop_pages(struct drm_i915_gem_object *obj) /** * i915_gem_shrink - Shrink buffer object caches - * @dev_priv: i915 device + * @i915: i915 device * @target: amount of memory to make available, in pages * @nr_scanned: optional output for number of pages scanned (incremental) * @flags: control flags for selecting cache types @@ -142,7 +142,7 @@ static bool unsafe_drop_pages(struct drm_i915_gem_object *obj) * The number of pages of backing storage actually released. */ unsigned long -i915_gem_shrink(struct drm_i915_private *dev_priv, +i915_gem_shrink(struct drm_i915_private *i915, unsigned long target, unsigned long *nr_scanned, unsigned flags) @@ -151,15 +151,15 @@ i915_gem_shrink(struct drm_i915_private *dev_priv, struct list_head *list; unsigned int bit; } phases[] = { - { &dev_priv->mm.unbound_list, I915_SHRINK_UNBOUND }, - { &dev_priv->mm.bound_list, I915_SHRINK_BOUND }, + { &i915->mm.unbound_list, I915_SHRINK_UNBOUND }, + { &i915->mm.bound_list, I915_SHRINK_BOUND }, { NULL, 0 }, }, *phase; unsigned long count = 0; unsigned long scanned = 0; bool unlock; - if (!shrinker_lock(dev_priv, &unlock)) + if (!shrinker_lock(i915, &unlock)) return 0; /* @@ -172,10 +172,10 @@ i915_gem_shrink(struct drm_i915_private *dev_priv, * we will free as much as we can and hope to get a second chance. */ if (flags & I915_SHRINK_ACTIVE) - i915_gem_wait_for_idle(dev_priv, I915_WAIT_LOCKED); + i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED); - trace_i915_gem_shrink(dev_priv, target, flags); - i915_gem_retire_requests(dev_priv); + trace_i915_gem_shrink(i915, target, flags); + i915_gem_retire_requests(i915); /* * Unbinding of objects will require HW access; Let us not wake the @@ -183,7 +183,7 @@ i915_gem_shrink(struct drm_i915_private *dev_priv, * we will force the wake during oom-notifier. */ if ((flags & I915_SHRINK_BOUND) && - !intel_runtime_pm_get_if_in_use(dev_priv)) + !intel_runtime_pm_get_if_in_use(i915)) flags &= ~I915_SHRINK_BOUND; /* @@ -221,7 +221,7 @@ i915_gem_shrink(struct drm_i915_private *dev_priv, * to be able to shrink their pages, so they remain on * the unbound/bound list until actually freed. */ - spin_lock(&dev_priv->mm.obj_lock); + spin_lock(&i915->mm.obj_lock); while (count < target && (obj = list_first_entry_or_null(phase->list, typeof(*obj), @@ -244,7 +244,7 @@ i915_gem_shrink(struct drm_i915_private *dev_priv, if (!can_release_pages(obj)) continue; - spin_unlock(&dev_priv->mm.obj_lock); + spin_unlock(&i915->mm.obj_lock); if (unsafe_drop_pages(obj)) { /* May arrive from get_pages on another bo */ @@ -258,18 +258,18 @@ i915_gem_shrink(struct drm_i915_private *dev_priv, } scanned += obj->base.size >> PAGE_SHIFT; - spin_lock(&dev_priv->mm.obj_lock); + spin_lock(&i915->mm.obj_lock); } list_splice_tail(&still_in_list, phase->list); - spin_unlock(&dev_priv->mm.obj_lock); + spin_unlock(&i915->mm.obj_lock); } if (flags & I915_SHRINK_BOUND) - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put(i915); - i915_gem_retire_requests(dev_priv); + i915_gem_retire_requests(i915); - shrinker_unlock(dev_priv, unlock); + shrinker_unlock(i915, unlock); if (nr_scanned) *nr_scanned += scanned; @@ -278,7 +278,7 @@ i915_gem_shrink(struct drm_i915_private *dev_priv, /** * i915_gem_shrink_all - Shrink buffer object caches completely - * @dev_priv: i915 device + * @i915: i915 device * * This is a simple wraper around i915_gem_shrink() to aggressively shrink all * caches completely. It also first waits for and retires all outstanding @@ -290,16 +290,16 @@ i915_gem_shrink(struct drm_i915_private *dev_priv, * Returns: * The number of pages of backing storage actually released. */ -unsigned long i915_gem_shrink_all(struct drm_i915_private *dev_priv) +unsigned long i915_gem_shrink_all(struct drm_i915_private *i915) { unsigned long freed; - intel_runtime_pm_get(dev_priv); - freed = i915_gem_shrink(dev_priv, -1UL, NULL, + intel_runtime_pm_get(i915); + freed = i915_gem_shrink(i915, -1UL, NULL, I915_SHRINK_BOUND | I915_SHRINK_UNBOUND | I915_SHRINK_ACTIVE); - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put(i915); return freed; } @@ -347,53 +347,53 @@ i915_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc) static unsigned long i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) { - struct drm_i915_private *dev_priv = + struct drm_i915_private *i915 = container_of(shrinker, struct drm_i915_private, mm.shrinker); unsigned long freed; bool unlock; sc->nr_scanned = 0; - if (!shrinker_lock(dev_priv, &unlock)) + if (!shrinker_lock(i915, &unlock)) return SHRINK_STOP; - freed = i915_gem_shrink(dev_priv, + freed = i915_gem_shrink(i915, sc->nr_to_scan, &sc->nr_scanned, I915_SHRINK_BOUND | I915_SHRINK_UNBOUND | I915_SHRINK_PURGEABLE); if (freed < sc->nr_to_scan) - freed += i915_gem_shrink(dev_priv, + freed += i915_gem_shrink(i915, sc->nr_to_scan - sc->nr_scanned, &sc->nr_scanned, I915_SHRINK_BOUND | I915_SHRINK_UNBOUND); if (freed < sc->nr_to_scan && current_is_kswapd()) { - intel_runtime_pm_get(dev_priv); - freed += i915_gem_shrink(dev_priv, + intel_runtime_pm_get(i915); + freed += i915_gem_shrink(i915, sc->nr_to_scan - sc->nr_scanned, &sc->nr_scanned, I915_SHRINK_ACTIVE | I915_SHRINK_BOUND | I915_SHRINK_UNBOUND); - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put(i915); } - shrinker_unlock(dev_priv, unlock); + shrinker_unlock(i915, unlock); return sc->nr_scanned ? freed : SHRINK_STOP; } static bool -shrinker_lock_uninterruptible(struct drm_i915_private *dev_priv, bool *unlock, +shrinker_lock_uninterruptible(struct drm_i915_private *i915, bool *unlock, int timeout_ms) { unsigned long timeout = jiffies + msecs_to_jiffies_timeout(timeout_ms); do { - if (i915_gem_wait_for_idle(dev_priv, 0) == 0 && - shrinker_lock(dev_priv, unlock)) + if (i915_gem_wait_for_idle(i915, 0) == 0 && + shrinker_lock(i915, unlock)) break; schedule_timeout_killable(1); @@ -412,32 +412,32 @@ shrinker_lock_uninterruptible(struct drm_i915_private *dev_priv, bool *unlock, static int i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr) { - struct drm_i915_private *dev_priv = + struct drm_i915_private *i915 = container_of(nb, struct drm_i915_private, mm.oom_notifier); struct drm_i915_gem_object *obj; unsigned long unevictable, bound, unbound, freed_pages; - freed_pages = i915_gem_shrink_all(dev_priv); + freed_pages = i915_gem_shrink_all(i915); /* Because we may be allocating inside our own driver, we cannot * assert that there are no objects with pinned pages that are not * being pointed to by hardware. */ unbound = bound = unevictable = 0; - spin_lock(&dev_priv->mm.obj_lock); - list_for_each_entry(obj, &dev_priv->mm.unbound_list, mm.link) { + spin_lock(&i915->mm.obj_lock); + list_for_each_entry(obj, &i915->mm.unbound_list, mm.link) { if (!can_release_pages(obj)) unevictable += obj->base.size >> PAGE_SHIFT; else unbound += obj->base.size >> PAGE_SHIFT; } - list_for_each_entry(obj, &dev_priv->mm.bound_list, mm.link) { + list_for_each_entry(obj, &i915->mm.bound_list, mm.link) { if (!can_release_pages(obj)) unevictable += obj->base.size >> PAGE_SHIFT; else bound += obj->base.size >> PAGE_SHIFT; } - spin_unlock(&dev_priv->mm.obj_lock); + spin_unlock(&i915->mm.obj_lock); if (freed_pages || unbound || bound) pr_info("Purging GPU memory, %lu pages freed, " @@ -455,74 +455,74 @@ i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr) static int i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr) { - struct drm_i915_private *dev_priv = + struct drm_i915_private *i915 = container_of(nb, struct drm_i915_private, mm.vmap_notifier); struct i915_vma *vma, *next; unsigned long freed_pages = 0; bool unlock; int ret; - if (!shrinker_lock_uninterruptible(dev_priv, &unlock, 5000)) + if (!shrinker_lock_uninterruptible(i915, &unlock, 5000)) return NOTIFY_DONE; /* Force everything onto the inactive lists */ - ret = i915_gem_wait_for_idle(dev_priv, I915_WAIT_LOCKED); + ret = i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED); if (ret) goto out; - intel_runtime_pm_get(dev_priv); - freed_pages += i915_gem_shrink(dev_priv, -1UL, NULL, + intel_runtime_pm_get(i915); + freed_pages += i915_gem_shrink(i915, -1UL, NULL, I915_SHRINK_BOUND | I915_SHRINK_UNBOUND | I915_SHRINK_ACTIVE | I915_SHRINK_VMAPS); - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put(i915); /* We also want to clear any cached iomaps as they wrap vmap */ list_for_each_entry_safe(vma, next, - &dev_priv->ggtt.base.inactive_list, vm_link) { + &i915->ggtt.base.inactive_list, vm_link) { unsigned long count = vma->node.size >> PAGE_SHIFT; if (vma->iomap && i915_vma_unbind(vma) == 0) freed_pages += count; } out: - shrinker_unlock(dev_priv, unlock); + shrinker_unlock(i915, unlock); *(unsigned long *)ptr += freed_pages; return NOTIFY_DONE; } /** - * i915_gem_shrinker_init - Initialize i915 shrinker - * @dev_priv: i915 device + * i915_gem_shrinker_register - Register the i915 shrinker + * @i915: i915 device * * This function registers and sets up the i915 shrinker and OOM handler. */ -void i915_gem_shrinker_init(struct drm_i915_private *dev_priv) +void i915_gem_shrinker_register(struct drm_i915_private *i915) { - dev_priv->mm.shrinker.scan_objects = i915_gem_shrinker_scan; - dev_priv->mm.shrinker.count_objects = i915_gem_shrinker_count; - dev_priv->mm.shrinker.seeks = DEFAULT_SEEKS; - dev_priv->mm.shrinker.batch = 4096; - WARN_ON(register_shrinker(&dev_priv->mm.shrinker)); + i915->mm.shrinker.scan_objects = i915_gem_shrinker_scan; + i915->mm.shrinker.count_objects = i915_gem_shrinker_count; + i915->mm.shrinker.seeks = DEFAULT_SEEKS; + i915->mm.shrinker.batch = 4096; + WARN_ON(register_shrinker(&i915->mm.shrinker)); - dev_priv->mm.oom_notifier.notifier_call = i915_gem_shrinker_oom; - WARN_ON(register_oom_notifier(&dev_priv->mm.oom_notifier)); + i915->mm.oom_notifier.notifier_call = i915_gem_shrinker_oom; + WARN_ON(register_oom_notifier(&i915->mm.oom_notifier)); - dev_priv->mm.vmap_notifier.notifier_call = i915_gem_shrinker_vmap; - WARN_ON(register_vmap_purge_notifier(&dev_priv->mm.vmap_notifier)); + i915->mm.vmap_notifier.notifier_call = i915_gem_shrinker_vmap; + WARN_ON(register_vmap_purge_notifier(&i915->mm.vmap_notifier)); } /** - * i915_gem_shrinker_cleanup - Clean up i915 shrinker - * @dev_priv: i915 device + * i915_gem_shrinker_unregister - Unregisters the i915 shrinker + * @i915: i915 device * * This function unregisters the i915 shrinker and OOM handler. */ -void i915_gem_shrinker_cleanup(struct drm_i915_private *dev_priv) +void i915_gem_shrinker_unregister(struct drm_i915_private *i915) { - WARN_ON(unregister_vmap_purge_notifier(&dev_priv->mm.vmap_notifier)); - WARN_ON(unregister_oom_notifier(&dev_priv->mm.oom_notifier)); - unregister_shrinker(&dev_priv->mm.shrinker); + WARN_ON(unregister_vmap_purge_notifier(&i915->mm.vmap_notifier)); + WARN_ON(unregister_oom_notifier(&i915->mm.oom_notifier)); + unregister_shrinker(&i915->mm.shrinker); } diff --git a/drivers/gpu/drm/i915/i915_gem_timeline.c b/drivers/gpu/drm/i915/i915_gem_timeline.c index c597ce277a04..c01905d6450c 100644 --- a/drivers/gpu/drm/i915/i915_gem_timeline.c +++ b/drivers/gpu/drm/i915/i915_gem_timeline.c @@ -107,8 +107,8 @@ int i915_gem_timeline_init__global(struct drm_i915_private *i915) } /** - * i915_gem_timelines_mark_idle -- called when the driver idles - * @i915 - the drm_i915_private device + * i915_gem_timelines_park - called when the driver idles + * @i915: the drm_i915_private device * * When the driver is completely idle, we know that all of our sync points * have been signaled and our tracking is then entirely redundant. Any request @@ -116,7 +116,7 @@ int i915_gem_timeline_init__global(struct drm_i915_private *i915) * the fence is signaled and therefore we will not even look them up in the * sync point map. */ -void i915_gem_timelines_mark_idle(struct drm_i915_private *i915) +void i915_gem_timelines_park(struct drm_i915_private *i915) { struct i915_gem_timeline *timeline; int i; diff --git a/drivers/gpu/drm/i915/i915_gem_timeline.h b/drivers/gpu/drm/i915/i915_gem_timeline.h index bfb5eb94c64d..b5a22400a01f 100644 --- a/drivers/gpu/drm/i915/i915_gem_timeline.h +++ b/drivers/gpu/drm/i915/i915_gem_timeline.h @@ -93,7 +93,7 @@ int i915_gem_timeline_init(struct drm_i915_private *i915, struct i915_gem_timeline *tl, const char *name); int i915_gem_timeline_init__global(struct drm_i915_private *i915); -void i915_gem_timelines_mark_idle(struct drm_i915_private *i915); +void i915_gem_timelines_park(struct drm_i915_private *i915); void i915_gem_timeline_fini(struct i915_gem_timeline *tl); static inline int __intel_timeline_sync_set(struct intel_timeline *tl, diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 7481c8e1b5a8..48418fb81066 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -791,9 +791,10 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, print_error_obj(m, dev_priv->engine[i], "WA batchbuffer", ee->wa_batchbuffer); - } - print_error_obj(m, NULL, "Semaphores", error->semaphore); + print_error_obj(m, dev_priv->engine[i], + "NULL context", ee->default_state); + } if (error->overlay) intel_overlay_print_error_state(m, error->overlay); @@ -903,8 +904,6 @@ void __i915_gpu_state_free(struct kref *error_ref) kfree(ee->waiters); } - i915_error_object_free(error->semaphore); - for (i = 0; i < ARRAY_SIZE(error->active_bo); i++) kfree(error->active_bo[i]); kfree(error->pinned_bo); @@ -1116,34 +1115,6 @@ gen8_engine_sync_index(struct intel_engine_cs *engine, return idx; } -static void gen8_record_semaphore_state(struct i915_gpu_state *error, - struct intel_engine_cs *engine, - struct drm_i915_error_engine *ee) -{ - struct drm_i915_private *dev_priv = engine->i915; - struct intel_engine_cs *to; - enum intel_engine_id id; - - if (!error->semaphore) - return; - - for_each_engine(to, dev_priv, id) { - int idx; - u16 signal_offset; - u32 *tmp; - - if (engine == to) - continue; - - signal_offset = - (GEN8_SIGNAL_OFFSET(engine, id) & (PAGE_SIZE - 1)) / 4; - tmp = error->semaphore->pages[0]; - idx = gen8_engine_sync_index(engine, to); - - ee->semaphore_mboxes[idx] = tmp[signal_offset]; - } -} - static void gen6_record_semaphore_state(struct intel_engine_cs *engine, struct drm_i915_error_engine *ee) { @@ -1218,7 +1189,6 @@ static void error_record_engine_registers(struct i915_gpu_state *error, if (INTEL_GEN(dev_priv) >= 6) { ee->rc_psmi = I915_READ(RING_PSMI_CTL(engine->mmio_base)); if (INTEL_GEN(dev_priv) >= 8) { - gen8_record_semaphore_state(error, engine, ee); ee->fault_reg = I915_READ(GEN8_RING_FAULT_REG); } else { gen6_record_semaphore_state(engine, ee); @@ -1447,15 +1417,30 @@ static void request_record_user_bo(struct drm_i915_gem_request *request, ee->user_bo_count = count; } +static struct drm_i915_error_object * +capture_object(struct drm_i915_private *dev_priv, + struct drm_i915_gem_object *obj) +{ + if (obj && i915_gem_object_has_pages(obj)) { + struct i915_vma fake = { + .node = { .start = U64_MAX, .size = obj->base.size }, + .size = obj->base.size, + .pages = obj->mm.pages, + .obj = obj, + }; + + return i915_error_object_create(dev_priv, &fake); + } else { + return NULL; + } +} + static void i915_gem_record_rings(struct drm_i915_private *dev_priv, struct i915_gpu_state *error) { struct i915_ggtt *ggtt = &dev_priv->ggtt; int i; - error->semaphore = - i915_error_object_create(dev_priv, dev_priv->semaphore); - for (i = 0; i < I915_NUM_ENGINES; i++) { struct intel_engine_cs *engine = dev_priv->engine[i]; struct drm_i915_error_engine *ee = &error->engine[i]; @@ -1521,6 +1506,9 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv, ee->wa_ctx = i915_error_object_create(dev_priv, engine->wa_ctx.vma); + + ee->default_state = + capture_object(dev_priv, engine->default_state); } } diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 4fb183ae7a07..7cac07db89b9 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -3068,7 +3068,7 @@ static void vlv_display_irq_reset(struct drm_i915_private *dev_priv) i9xx_pipestat_irq_reset(dev_priv); GEN3_IRQ_RESET(VLV_); - dev_priv->irq_mask = ~0; + dev_priv->irq_mask = ~0u; } static void vlv_display_irq_postinstall(struct drm_i915_private *dev_priv) @@ -3093,7 +3093,7 @@ static void vlv_display_irq_postinstall(struct drm_i915_private *dev_priv) enable_mask |= I915_DISPLAY_PIPE_C_EVENT_INTERRUPT | I915_LPE_PIPE_C_INTERRUPT; - WARN_ON(dev_priv->irq_mask != ~0); + WARN_ON(dev_priv->irq_mask != ~0u); dev_priv->irq_mask = ~enable_mask; diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c index b4faeb6aa2bd..7bc538687871 100644 --- a/drivers/gpu/drm/i915/i915_params.c +++ b/drivers/gpu/drm/i915/i915_params.c @@ -46,17 +46,6 @@ i915_param_named_unsafe(panel_ignore_lid, int, 0600, "Override lid status (0=autodetect, 1=autodetect disabled [default], " "-1=force lid closed, -2=force lid open)"); -i915_param_named_unsafe(semaphores, int, 0400, - "Use semaphores for inter-ring sync " - "(default: -1 (use per-chip defaults))"); - -i915_param_named_unsafe(enable_rc6, int, 0400, - "Enable power-saving render C-state 6. " - "Different stages can be selected via bitmask values " - "(0 = disable; 1 = enable rc6; 2 = enable deep rc6; 4 = enable deepest rc6). " - "For example, 3 would enable rc6 and deep rc6, and 7 would enable everything. " - "default: -1 (use per-chip default)"); - i915_param_named_unsafe(enable_dc, int, 0400, "Enable power-saving display C-states. " "(-1=auto [default]; 0=disable; 1=up to DC5; 2=up to DC6)"); @@ -99,10 +88,6 @@ i915_param_named_unsafe(enable_ppgtt, int, 0400, "Override PPGTT usage. " "(-1=auto [default], 0=disabled, 1=aliasing, 2=full, 3=full with extended address space)"); -i915_param_named_unsafe(enable_execlists, int, 0400, - "Override execlists usage. " - "(-1=auto [default], 0=disabled, 1=enabled)"); - i915_param_named_unsafe(enable_psr, int, 0600, "Enable PSR " "(0=disabled, 1=enabled - link mode chosen per-platform, 2=force link-standby mode, 3=force link-off mode) " diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h index c7292268ed43..c48c88bb95e8 100644 --- a/drivers/gpu/drm/i915/i915_params.h +++ b/drivers/gpu/drm/i915/i915_params.h @@ -31,15 +31,12 @@ param(char *, vbt_firmware, NULL) \ param(int, modeset, -1) \ param(int, panel_ignore_lid, 1) \ - param(int, semaphores, -1) \ param(int, lvds_channel_mode, 0) \ param(int, panel_use_ssc, -1) \ param(int, vbt_sdvo_panel_type, -1) \ - param(int, enable_rc6, -1) \ param(int, enable_dc, -1) \ param(int, enable_fbc, -1) \ param(int, enable_ppgtt, -1) \ - param(int, enable_execlists, -1) \ param(int, enable_psr, -1) \ param(int, disable_power_well, -1) \ param(int, enable_ips, 1) \ diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 6458c309c039..fa67d3dde20e 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -209,6 +209,8 @@ static const struct intel_device_info intel_gm45_info __initconst = { .has_hotplug = 1, \ .ring_mask = RENDER_RING | BSD_RING, \ .has_snoop = true, \ + /* ilk does support rc6, but we do not implement [power] contexts */ \ + .has_rc6 = 0, \ GEN_DEFAULT_PIPEOFFSETS, \ GEN_DEFAULT_PAGE_SIZES, \ CURSOR_OFFSETS diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 00be015e01df..f0cfdece14ae 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1216,9 +1216,9 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream) { struct drm_i915_private *dev_priv = stream->dev_priv; - if (i915_modparams.enable_execlists) + if (HAS_LOGICAL_RING_CONTEXTS(dev_priv)) { dev_priv->perf.oa.specific_ctx_id = stream->ctx->hw_id; - else { + } else { struct intel_engine_cs *engine = dev_priv->engine[RCS]; struct intel_ring *ring; int ret; @@ -1262,7 +1262,7 @@ static void oa_put_render_ctx_id(struct i915_perf_stream *stream) { struct drm_i915_private *dev_priv = stream->dev_priv; - if (i915_modparams.enable_execlists) { + if (HAS_LOGICAL_RING_CONTEXTS(dev_priv)) { dev_priv->perf.oa.specific_ctx_id = INVALID_CTX_ID; } else { struct intel_engine_cs *engine = dev_priv->engine[RCS]; @@ -1726,10 +1726,9 @@ static int gen8_switch_to_updated_kernel_context(struct drm_i915_private *dev_pr GFP_KERNEL); } - ret = i915_switch_context(req); i915_add_request(req); - return ret; + return 0; } /* @@ -2691,8 +2690,8 @@ err: static u64 oa_exponent_to_ns(struct drm_i915_private *dev_priv, int exponent) { - return div_u64(1000000000ULL * (2ULL << exponent), - dev_priv->perf.oa.timestamp_frequency); + return div64_u64(1000000000ULL * (2ULL << exponent), + 1000ULL * INTEL_INFO(dev_priv)->cs_timestamp_frequency_khz); } /** @@ -3007,7 +3006,7 @@ static bool gen8_is_valid_flex_addr(struct drm_i915_private *dev_priv, u32 addr) int i; for (i = 0; i < ARRAY_SIZE(flex_eu_regs); i++) { - if (flex_eu_regs[i].reg == addr) + if (i915_mmio_reg_offset(flex_eu_regs[i]) == addr) return true; } return false; @@ -3015,38 +3014,47 @@ static bool gen8_is_valid_flex_addr(struct drm_i915_private *dev_priv, u32 addr) static bool gen7_is_valid_b_counter_addr(struct drm_i915_private *dev_priv, u32 addr) { - return (addr >= OASTARTTRIG1.reg && addr <= OASTARTTRIG8.reg) || - (addr >= OAREPORTTRIG1.reg && addr <= OAREPORTTRIG8.reg) || - (addr >= OACEC0_0.reg && addr <= OACEC7_1.reg); + return (addr >= i915_mmio_reg_offset(OASTARTTRIG1) && + addr <= i915_mmio_reg_offset(OASTARTTRIG8)) || + (addr >= i915_mmio_reg_offset(OAREPORTTRIG1) && + addr <= i915_mmio_reg_offset(OAREPORTTRIG8)) || + (addr >= i915_mmio_reg_offset(OACEC0_0) && + addr <= i915_mmio_reg_offset(OACEC7_1)); } static bool gen7_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr) { - return addr == HALF_SLICE_CHICKEN2.reg || - (addr >= MICRO_BP0_0.reg && addr <= NOA_WRITE.reg) || - (addr >= OA_PERFCNT1_LO.reg && addr <= OA_PERFCNT2_HI.reg) || - (addr >= OA_PERFMATRIX_LO.reg && addr <= OA_PERFMATRIX_HI.reg); + return addr == i915_mmio_reg_offset(HALF_SLICE_CHICKEN2) || + (addr >= i915_mmio_reg_offset(MICRO_BP0_0) && + addr <= i915_mmio_reg_offset(NOA_WRITE)) || + (addr >= i915_mmio_reg_offset(OA_PERFCNT1_LO) && + addr <= i915_mmio_reg_offset(OA_PERFCNT2_HI)) || + (addr >= i915_mmio_reg_offset(OA_PERFMATRIX_LO) && + addr <= i915_mmio_reg_offset(OA_PERFMATRIX_HI)); } static bool gen8_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr) { return gen7_is_valid_mux_addr(dev_priv, addr) || - addr == WAIT_FOR_RC6_EXIT.reg || - (addr >= RPM_CONFIG0.reg && addr <= NOA_CONFIG(8).reg); + addr == i915_mmio_reg_offset(WAIT_FOR_RC6_EXIT) || + (addr >= i915_mmio_reg_offset(RPM_CONFIG0) && + addr <= i915_mmio_reg_offset(NOA_CONFIG(8))); } static bool gen10_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr) { return gen8_is_valid_mux_addr(dev_priv, addr) || - (addr >= OA_PERFCNT3_LO.reg && addr <= OA_PERFCNT4_HI.reg); + (addr >= i915_mmio_reg_offset(OA_PERFCNT3_LO) && + addr <= i915_mmio_reg_offset(OA_PERFCNT4_HI)); } static bool hsw_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr) { return gen7_is_valid_mux_addr(dev_priv, addr) || (addr >= 0x25100 && addr <= 0x2FF90) || - (addr >= HSW_MBVID2_NOA0.reg && addr <= HSW_MBVID2_NOA9.reg) || - addr == HSW_MBVID2_MISR0.reg; + (addr >= i915_mmio_reg_offset(HSW_MBVID2_NOA0) && + addr <= i915_mmio_reg_offset(HSW_MBVID2_NOA9)) || + addr == i915_mmio_reg_offset(HSW_MBVID2_MISR0); } static bool chv_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr) @@ -3061,14 +3069,14 @@ static uint32_t mask_reg_value(u32 reg, u32 val) * WaDisableSTUnitPowerOptimization workaround. Make sure the value * programmed by userspace doesn't change this. */ - if (HALF_SLICE_CHICKEN2.reg == reg) + if (i915_mmio_reg_offset(HALF_SLICE_CHICKEN2) == reg) val = val & ~_MASKED_BIT_ENABLE(GEN8_ST_PO_DISABLE); /* WAIT_FOR_RC6_EXIT has only one bit fullfilling the function * indicated by its name and a bunch of selection fields used by OA * configs. */ - if (WAIT_FOR_RC6_EXIT.reg == reg) + if (i915_mmio_reg_offset(WAIT_FOR_RC6_EXIT) == reg) val = val & ~_MASKED_BIT_ENABLE(HSW_WAIT_FOR_RC6_EXIT_ENABLE); return val; @@ -3415,8 +3423,6 @@ static struct ctl_table dev_root[] = { */ void i915_perf_init(struct drm_i915_private *dev_priv) { - dev_priv->perf.oa.timestamp_frequency = 0; - if (IS_HASWELL(dev_priv)) { dev_priv->perf.oa.ops.is_valid_b_counter_reg = gen7_is_valid_b_counter_addr; @@ -3432,10 +3438,8 @@ void i915_perf_init(struct drm_i915_private *dev_priv) dev_priv->perf.oa.ops.oa_hw_tail_read = gen7_oa_hw_tail_read; - dev_priv->perf.oa.timestamp_frequency = 12500000; - dev_priv->perf.oa.oa_formats = hsw_oa_formats; - } else if (i915_modparams.enable_execlists) { + } else if (HAS_LOGICAL_RING_CONTEXTS(dev_priv)) { /* Note: that although we could theoretically also support the * legacy ringbuffer mode on BDW (and earlier iterations of * this driver, before upstreaming did this) it didn't seem @@ -3477,23 +3481,6 @@ void i915_perf_init(struct drm_i915_private *dev_priv) dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<16); } - - switch (dev_priv->info.platform) { - case INTEL_BROADWELL: - dev_priv->perf.oa.timestamp_frequency = 12500000; - break; - case INTEL_BROXTON: - case INTEL_GEMINILAKE: - dev_priv->perf.oa.timestamp_frequency = 19200000; - break; - case INTEL_SKYLAKE: - case INTEL_KABYLAKE: - case INTEL_COFFEELAKE: - dev_priv->perf.oa.timestamp_frequency = 12000000; - break; - default: - break; - } } else if (IS_GEN10(dev_priv)) { dev_priv->perf.oa.ops.is_valid_b_counter_reg = gen7_is_valid_b_counter_addr; @@ -3509,15 +3496,10 @@ void i915_perf_init(struct drm_i915_private *dev_priv) dev_priv->perf.oa.ctx_flexeu0_offset = 0x3de; dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<16); - - /* Default frequency, although we need to read it from - * the register as it might vary between parts. - */ - dev_priv->perf.oa.timestamp_frequency = 12000000; } } - if (dev_priv->perf.oa.timestamp_frequency) { + if (dev_priv->perf.oa.ops.enable_metric_set) { hrtimer_init(&dev_priv->perf.oa.poll_check_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); dev_priv->perf.oa.poll_check_timer.function = oa_poll_check_timer_cb; @@ -3527,8 +3509,8 @@ void i915_perf_init(struct drm_i915_private *dev_priv) mutex_init(&dev_priv->perf.lock); spin_lock_init(&dev_priv->perf.oa.oa_buffer.ptr_lock); - oa_sample_rate_hard_limit = - dev_priv->perf.oa.timestamp_frequency / 2; + oa_sample_rate_hard_limit = 1000 * + (INTEL_INFO(dev_priv)->cs_timestamp_frequency_khz / 2); dev_priv->perf.sysctl_header = register_sysctl_table(dev_root); mutex_init(&dev_priv->perf.metrics_lock); diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c new file mode 100644 index 000000000000..55a8a1e29424 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -0,0 +1,865 @@ +/* + * Copyright © 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include <linux/perf_event.h> +#include <linux/pm_runtime.h> + +#include "i915_drv.h" +#include "i915_pmu.h" +#include "intel_ringbuffer.h" + +/* Frequency for the sampling timer for events which need it. */ +#define FREQUENCY 200 +#define PERIOD max_t(u64, 10000, NSEC_PER_SEC / FREQUENCY) + +#define ENGINE_SAMPLE_MASK \ + (BIT(I915_SAMPLE_BUSY) | \ + BIT(I915_SAMPLE_WAIT) | \ + BIT(I915_SAMPLE_SEMA)) + +#define ENGINE_SAMPLE_BITS (1 << I915_PMU_SAMPLE_BITS) + +static cpumask_t i915_pmu_cpumask; + +static u8 engine_config_sample(u64 config) +{ + return config & I915_PMU_SAMPLE_MASK; +} + +static u8 engine_event_sample(struct perf_event *event) +{ + return engine_config_sample(event->attr.config); +} + +static u8 engine_event_class(struct perf_event *event) +{ + return (event->attr.config >> I915_PMU_CLASS_SHIFT) & 0xff; +} + +static u8 engine_event_instance(struct perf_event *event) +{ + return (event->attr.config >> I915_PMU_SAMPLE_BITS) & 0xff; +} + +static bool is_engine_config(u64 config) +{ + return config < __I915_PMU_OTHER(0); +} + +static unsigned int config_enabled_bit(u64 config) +{ + if (is_engine_config(config)) + return engine_config_sample(config); + else + return ENGINE_SAMPLE_BITS + (config - __I915_PMU_OTHER(0)); +} + +static u64 config_enabled_mask(u64 config) +{ + return BIT_ULL(config_enabled_bit(config)); +} + +static bool is_engine_event(struct perf_event *event) +{ + return is_engine_config(event->attr.config); +} + +static unsigned int event_enabled_bit(struct perf_event *event) +{ + return config_enabled_bit(event->attr.config); +} + +static bool pmu_needs_timer(struct drm_i915_private *i915, bool gpu_active) +{ + u64 enable; + + /* + * Only some counters need the sampling timer. + * + * We start with a bitmask of all currently enabled events. + */ + enable = i915->pmu.enable; + + /* + * Mask out all the ones which do not need the timer, or in + * other words keep all the ones that could need the timer. + */ + enable &= config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY) | + config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY) | + ENGINE_SAMPLE_MASK; + + /* + * When the GPU is idle per-engine counters do not need to be + * running so clear those bits out. + */ + if (!gpu_active) + enable &= ~ENGINE_SAMPLE_MASK; + /* + * Also there is software busyness tracking available we do not + * need the timer for I915_SAMPLE_BUSY counter. + * + * Use RCS as proxy for all engines. + */ + else if (intel_engine_supports_stats(i915->engine[RCS])) + enable &= ~BIT(I915_SAMPLE_BUSY); + + /* + * If some bits remain it means we need the sampling timer running. + */ + return enable; +} + +void i915_pmu_gt_parked(struct drm_i915_private *i915) +{ + if (!i915->pmu.base.event_init) + return; + + spin_lock_irq(&i915->pmu.lock); + /* + * Signal sampling timer to stop if only engine events are enabled and + * GPU went idle. + */ + i915->pmu.timer_enabled = pmu_needs_timer(i915, false); + spin_unlock_irq(&i915->pmu.lock); +} + +static void __i915_pmu_maybe_start_timer(struct drm_i915_private *i915) +{ + if (!i915->pmu.timer_enabled && pmu_needs_timer(i915, true)) { + i915->pmu.timer_enabled = true; + hrtimer_start_range_ns(&i915->pmu.timer, + ns_to_ktime(PERIOD), 0, + HRTIMER_MODE_REL_PINNED); + } +} + +void i915_pmu_gt_unparked(struct drm_i915_private *i915) +{ + if (!i915->pmu.base.event_init) + return; + + spin_lock_irq(&i915->pmu.lock); + /* + * Re-enable sampling timer when GPU goes active. + */ + __i915_pmu_maybe_start_timer(i915); + spin_unlock_irq(&i915->pmu.lock); +} + +static bool grab_forcewake(struct drm_i915_private *i915, bool fw) +{ + if (!fw) + intel_uncore_forcewake_get(i915, FORCEWAKE_ALL); + + return true; +} + +static void +update_sample(struct i915_pmu_sample *sample, u32 unit, u32 val) +{ + sample->cur += mul_u32_u32(val, unit); +} + +static void engines_sample(struct drm_i915_private *dev_priv) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + bool fw = false; + + if ((dev_priv->pmu.enable & ENGINE_SAMPLE_MASK) == 0) + return; + + if (!dev_priv->gt.awake) + return; + + if (!intel_runtime_pm_get_if_in_use(dev_priv)) + return; + + for_each_engine(engine, dev_priv, id) { + u32 current_seqno = intel_engine_get_seqno(engine); + u32 last_seqno = intel_engine_last_submit(engine); + u32 val; + + val = !i915_seqno_passed(current_seqno, last_seqno); + + update_sample(&engine->pmu.sample[I915_SAMPLE_BUSY], + PERIOD, val); + + if (val && (engine->pmu.enable & + (BIT(I915_SAMPLE_WAIT) | BIT(I915_SAMPLE_SEMA)))) { + fw = grab_forcewake(dev_priv, fw); + + val = I915_READ_FW(RING_CTL(engine->mmio_base)); + } else { + val = 0; + } + + update_sample(&engine->pmu.sample[I915_SAMPLE_WAIT], + PERIOD, !!(val & RING_WAIT)); + + update_sample(&engine->pmu.sample[I915_SAMPLE_SEMA], + PERIOD, !!(val & RING_WAIT_SEMAPHORE)); + } + + if (fw) + intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); + + intel_runtime_pm_put(dev_priv); +} + +static void frequency_sample(struct drm_i915_private *dev_priv) +{ + if (dev_priv->pmu.enable & + config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY)) { + u32 val; + + val = dev_priv->gt_pm.rps.cur_freq; + if (dev_priv->gt.awake && + intel_runtime_pm_get_if_in_use(dev_priv)) { + val = intel_get_cagf(dev_priv, + I915_READ_NOTRACE(GEN6_RPSTAT1)); + intel_runtime_pm_put(dev_priv); + } + + update_sample(&dev_priv->pmu.sample[__I915_SAMPLE_FREQ_ACT], + 1, intel_gpu_freq(dev_priv, val)); + } + + if (dev_priv->pmu.enable & + config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY)) { + update_sample(&dev_priv->pmu.sample[__I915_SAMPLE_FREQ_REQ], 1, + intel_gpu_freq(dev_priv, + dev_priv->gt_pm.rps.cur_freq)); + } +} + +static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer) +{ + struct drm_i915_private *i915 = + container_of(hrtimer, struct drm_i915_private, pmu.timer); + + if (!READ_ONCE(i915->pmu.timer_enabled)) + return HRTIMER_NORESTART; + + engines_sample(i915); + frequency_sample(i915); + + hrtimer_forward_now(hrtimer, ns_to_ktime(PERIOD)); + return HRTIMER_RESTART; +} + +static u64 count_interrupts(struct drm_i915_private *i915) +{ + /* open-coded kstat_irqs() */ + struct irq_desc *desc = irq_to_desc(i915->drm.pdev->irq); + u64 sum = 0; + int cpu; + + if (!desc || !desc->kstat_irqs) + return 0; + + for_each_possible_cpu(cpu) + sum += *per_cpu_ptr(desc->kstat_irqs, cpu); + + return sum; +} + +static void i915_pmu_event_destroy(struct perf_event *event) +{ + WARN_ON(event->parent); +} + +static int engine_event_init(struct perf_event *event) +{ + struct drm_i915_private *i915 = + container_of(event->pmu, typeof(*i915), pmu.base); + + if (!intel_engine_lookup_user(i915, engine_event_class(event), + engine_event_instance(event))) + return -ENODEV; + + switch (engine_event_sample(event)) { + case I915_SAMPLE_BUSY: + case I915_SAMPLE_WAIT: + break; + case I915_SAMPLE_SEMA: + if (INTEL_GEN(i915) < 6) + return -ENODEV; + break; + default: + return -ENOENT; + } + + return 0; +} + +static int i915_pmu_event_init(struct perf_event *event) +{ + struct drm_i915_private *i915 = + container_of(event->pmu, typeof(*i915), pmu.base); + int ret; + + if (event->attr.type != event->pmu->type) + return -ENOENT; + + /* unsupported modes and filters */ + if (event->attr.sample_period) /* no sampling */ + return -EINVAL; + + if (has_branch_stack(event)) + return -EOPNOTSUPP; + + if (event->cpu < 0) + return -EINVAL; + + /* only allow running on one cpu at a time */ + if (!cpumask_test_cpu(event->cpu, &i915_pmu_cpumask)) + return -EINVAL; + + if (is_engine_event(event)) { + ret = engine_event_init(event); + } else { + ret = 0; + switch (event->attr.config) { + case I915_PMU_ACTUAL_FREQUENCY: + if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) + /* Requires a mutex for sampling! */ + ret = -ENODEV; + case I915_PMU_REQUESTED_FREQUENCY: + if (INTEL_GEN(i915) < 6) + ret = -ENODEV; + break; + case I915_PMU_INTERRUPTS: + break; + case I915_PMU_RC6_RESIDENCY: + if (!HAS_RC6(i915)) + ret = -ENODEV; + break; + default: + ret = -ENOENT; + break; + } + } + if (ret) + return ret; + + if (!event->parent) + event->destroy = i915_pmu_event_destroy; + + return 0; +} + +static u64 __i915_pmu_event_read(struct perf_event *event) +{ + struct drm_i915_private *i915 = + container_of(event->pmu, typeof(*i915), pmu.base); + u64 val = 0; + + if (is_engine_event(event)) { + u8 sample = engine_event_sample(event); + struct intel_engine_cs *engine; + + engine = intel_engine_lookup_user(i915, + engine_event_class(event), + engine_event_instance(event)); + + if (WARN_ON_ONCE(!engine)) { + /* Do nothing */ + } else if (sample == I915_SAMPLE_BUSY && + engine->pmu.busy_stats) { + val = ktime_to_ns(intel_engine_get_busy_time(engine)); + } else { + val = engine->pmu.sample[sample].cur; + } + } else { + switch (event->attr.config) { + case I915_PMU_ACTUAL_FREQUENCY: + val = + div_u64(i915->pmu.sample[__I915_SAMPLE_FREQ_ACT].cur, + FREQUENCY); + break; + case I915_PMU_REQUESTED_FREQUENCY: + val = + div_u64(i915->pmu.sample[__I915_SAMPLE_FREQ_REQ].cur, + FREQUENCY); + break; + case I915_PMU_INTERRUPTS: + val = count_interrupts(i915); + break; + case I915_PMU_RC6_RESIDENCY: + intel_runtime_pm_get(i915); + val = intel_rc6_residency_ns(i915, + IS_VALLEYVIEW(i915) ? + VLV_GT_RENDER_RC6 : + GEN6_GT_GFX_RC6); + if (HAS_RC6p(i915)) + val += intel_rc6_residency_ns(i915, + GEN6_GT_GFX_RC6p); + if (HAS_RC6pp(i915)) + val += intel_rc6_residency_ns(i915, + GEN6_GT_GFX_RC6pp); + intel_runtime_pm_put(i915); + break; + } + } + + return val; +} + +static void i915_pmu_event_read(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + u64 prev, new; + +again: + prev = local64_read(&hwc->prev_count); + new = __i915_pmu_event_read(event); + + if (local64_cmpxchg(&hwc->prev_count, prev, new) != prev) + goto again; + + local64_add(new - prev, &event->count); +} + +static bool engine_needs_busy_stats(struct intel_engine_cs *engine) +{ + return intel_engine_supports_stats(engine) && + (engine->pmu.enable & BIT(I915_SAMPLE_BUSY)); +} + +static void i915_pmu_enable(struct perf_event *event) +{ + struct drm_i915_private *i915 = + container_of(event->pmu, typeof(*i915), pmu.base); + unsigned int bit = event_enabled_bit(event); + unsigned long flags; + + spin_lock_irqsave(&i915->pmu.lock, flags); + + /* + * Update the bitmask of enabled events and increment + * the event reference counter. + */ + GEM_BUG_ON(bit >= I915_PMU_MASK_BITS); + GEM_BUG_ON(i915->pmu.enable_count[bit] == ~0); + i915->pmu.enable |= BIT_ULL(bit); + i915->pmu.enable_count[bit]++; + + /* + * Start the sampling timer if needed and not already enabled. + */ + __i915_pmu_maybe_start_timer(i915); + + /* + * For per-engine events the bitmask and reference counting + * is stored per engine. + */ + if (is_engine_event(event)) { + u8 sample = engine_event_sample(event); + struct intel_engine_cs *engine; + + engine = intel_engine_lookup_user(i915, + engine_event_class(event), + engine_event_instance(event)); + GEM_BUG_ON(!engine); + engine->pmu.enable |= BIT(sample); + + GEM_BUG_ON(sample >= I915_PMU_SAMPLE_BITS); + GEM_BUG_ON(engine->pmu.enable_count[sample] == ~0); + if (engine->pmu.enable_count[sample]++ == 0) { + /* + * Enable engine busy stats tracking if needed or + * alternatively cancel the scheduled disable. + * + * If the delayed disable was pending, cancel it and + * in this case do not enable since it already is. + */ + if (engine_needs_busy_stats(engine) && + !engine->pmu.busy_stats) { + engine->pmu.busy_stats = true; + if (!cancel_delayed_work(&engine->pmu.disable_busy_stats)) + intel_enable_engine_stats(engine); + } + } + } + + /* + * Store the current counter value so we can report the correct delta + * for all listeners. Even when the event was already enabled and has + * an existing non-zero value. + */ + local64_set(&event->hw.prev_count, __i915_pmu_event_read(event)); + + spin_unlock_irqrestore(&i915->pmu.lock, flags); +} + +static void __disable_busy_stats(struct work_struct *work) +{ + struct intel_engine_cs *engine = + container_of(work, typeof(*engine), pmu.disable_busy_stats.work); + + intel_disable_engine_stats(engine); +} + +static void i915_pmu_disable(struct perf_event *event) +{ + struct drm_i915_private *i915 = + container_of(event->pmu, typeof(*i915), pmu.base); + unsigned int bit = event_enabled_bit(event); + unsigned long flags; + + spin_lock_irqsave(&i915->pmu.lock, flags); + + if (is_engine_event(event)) { + u8 sample = engine_event_sample(event); + struct intel_engine_cs *engine; + + engine = intel_engine_lookup_user(i915, + engine_event_class(event), + engine_event_instance(event)); + GEM_BUG_ON(!engine); + GEM_BUG_ON(sample >= I915_PMU_SAMPLE_BITS); + GEM_BUG_ON(engine->pmu.enable_count[sample] == 0); + /* + * Decrement the reference count and clear the enabled + * bitmask when the last listener on an event goes away. + */ + if (--engine->pmu.enable_count[sample] == 0) { + engine->pmu.enable &= ~BIT(sample); + if (!engine_needs_busy_stats(engine) && + engine->pmu.busy_stats) { + engine->pmu.busy_stats = false; + /* + * We request a delayed disable to handle the + * rapid on/off cycles on events, which can + * happen when tools like perf stat start, in a + * nicer way. + * + * In addition, this also helps with busy stats + * accuracy with background CPU offline/online + * migration events. + */ + queue_delayed_work(system_wq, + &engine->pmu.disable_busy_stats, + round_jiffies_up_relative(HZ)); + } + } + } + + GEM_BUG_ON(bit >= I915_PMU_MASK_BITS); + GEM_BUG_ON(i915->pmu.enable_count[bit] == 0); + /* + * Decrement the reference count and clear the enabled + * bitmask when the last listener on an event goes away. + */ + if (--i915->pmu.enable_count[bit] == 0) { + i915->pmu.enable &= ~BIT_ULL(bit); + i915->pmu.timer_enabled &= pmu_needs_timer(i915, true); + } + + spin_unlock_irqrestore(&i915->pmu.lock, flags); +} + +static void i915_pmu_event_start(struct perf_event *event, int flags) +{ + i915_pmu_enable(event); + event->hw.state = 0; +} + +static void i915_pmu_event_stop(struct perf_event *event, int flags) +{ + if (flags & PERF_EF_UPDATE) + i915_pmu_event_read(event); + i915_pmu_disable(event); + event->hw.state = PERF_HES_STOPPED; +} + +static int i915_pmu_event_add(struct perf_event *event, int flags) +{ + if (flags & PERF_EF_START) + i915_pmu_event_start(event, flags); + + return 0; +} + +static void i915_pmu_event_del(struct perf_event *event, int flags) +{ + i915_pmu_event_stop(event, PERF_EF_UPDATE); +} + +static int i915_pmu_event_event_idx(struct perf_event *event) +{ + return 0; +} + +struct i915_str_attribute { + struct device_attribute attr; + const char *str; +}; + +static ssize_t i915_pmu_format_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct i915_str_attribute *eattr; + + eattr = container_of(attr, struct i915_str_attribute, attr); + return sprintf(buf, "%s\n", eattr->str); +} + +#define I915_PMU_FORMAT_ATTR(_name, _config) \ + (&((struct i915_str_attribute[]) { \ + { .attr = __ATTR(_name, 0444, i915_pmu_format_show, NULL), \ + .str = _config, } \ + })[0].attr.attr) + +static struct attribute *i915_pmu_format_attrs[] = { + I915_PMU_FORMAT_ATTR(i915_eventid, "config:0-20"), + NULL, +}; + +static const struct attribute_group i915_pmu_format_attr_group = { + .name = "format", + .attrs = i915_pmu_format_attrs, +}; + +struct i915_ext_attribute { + struct device_attribute attr; + unsigned long val; +}; + +static ssize_t i915_pmu_event_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct i915_ext_attribute *eattr; + + eattr = container_of(attr, struct i915_ext_attribute, attr); + return sprintf(buf, "config=0x%lx\n", eattr->val); +} + +#define I915_EVENT_ATTR(_name, _config) \ + (&((struct i915_ext_attribute[]) { \ + { .attr = __ATTR(_name, 0444, i915_pmu_event_show, NULL), \ + .val = _config, } \ + })[0].attr.attr) + +#define I915_EVENT_STR(_name, _str) \ + (&((struct perf_pmu_events_attr[]) { \ + { .attr = __ATTR(_name, 0444, perf_event_sysfs_show, NULL), \ + .id = 0, \ + .event_str = _str, } \ + })[0].attr.attr) + +#define I915_EVENT(_name, _config, _unit) \ + I915_EVENT_ATTR(_name, _config), \ + I915_EVENT_STR(_name.unit, _unit) + +#define I915_ENGINE_EVENT(_name, _class, _instance, _sample) \ + I915_EVENT_ATTR(_name, __I915_PMU_ENGINE(_class, _instance, _sample)), \ + I915_EVENT_STR(_name.unit, "ns") + +#define I915_ENGINE_EVENTS(_name, _class, _instance) \ + I915_ENGINE_EVENT(_name##_instance-busy, _class, _instance, I915_SAMPLE_BUSY), \ + I915_ENGINE_EVENT(_name##_instance-sema, _class, _instance, I915_SAMPLE_SEMA), \ + I915_ENGINE_EVENT(_name##_instance-wait, _class, _instance, I915_SAMPLE_WAIT) + +static struct attribute *i915_pmu_events_attrs[] = { + I915_ENGINE_EVENTS(rcs, I915_ENGINE_CLASS_RENDER, 0), + I915_ENGINE_EVENTS(bcs, I915_ENGINE_CLASS_COPY, 0), + I915_ENGINE_EVENTS(vcs, I915_ENGINE_CLASS_VIDEO, 0), + I915_ENGINE_EVENTS(vcs, I915_ENGINE_CLASS_VIDEO, 1), + I915_ENGINE_EVENTS(vecs, I915_ENGINE_CLASS_VIDEO_ENHANCE, 0), + + I915_EVENT(actual-frequency, I915_PMU_ACTUAL_FREQUENCY, "MHz"), + I915_EVENT(requested-frequency, I915_PMU_REQUESTED_FREQUENCY, "MHz"), + + I915_EVENT_ATTR(interrupts, I915_PMU_INTERRUPTS), + + I915_EVENT(rc6-residency, I915_PMU_RC6_RESIDENCY, "ns"), + + NULL, +}; + +static const struct attribute_group i915_pmu_events_attr_group = { + .name = "events", + .attrs = i915_pmu_events_attrs, +}; + +static ssize_t +i915_pmu_get_attr_cpumask(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + return cpumap_print_to_pagebuf(true, buf, &i915_pmu_cpumask); +} + +static DEVICE_ATTR(cpumask, 0444, i915_pmu_get_attr_cpumask, NULL); + +static struct attribute *i915_cpumask_attrs[] = { + &dev_attr_cpumask.attr, + NULL, +}; + +static struct attribute_group i915_pmu_cpumask_attr_group = { + .attrs = i915_cpumask_attrs, +}; + +static const struct attribute_group *i915_pmu_attr_groups[] = { + &i915_pmu_format_attr_group, + &i915_pmu_events_attr_group, + &i915_pmu_cpumask_attr_group, + NULL +}; + +static int i915_pmu_cpu_online(unsigned int cpu, struct hlist_node *node) +{ + struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), node); + + GEM_BUG_ON(!pmu->base.event_init); + + /* Select the first online CPU as a designated reader. */ + if (!cpumask_weight(&i915_pmu_cpumask)) + cpumask_set_cpu(cpu, &i915_pmu_cpumask); + + return 0; +} + +static int i915_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node) +{ + struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), node); + unsigned int target; + + GEM_BUG_ON(!pmu->base.event_init); + + if (cpumask_test_and_clear_cpu(cpu, &i915_pmu_cpumask)) { + target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu); + /* Migrate events if there is a valid target */ + if (target < nr_cpu_ids) { + cpumask_set_cpu(target, &i915_pmu_cpumask); + perf_pmu_migrate_context(&pmu->base, cpu, target); + } + } + + return 0; +} + +static enum cpuhp_state cpuhp_slot = CPUHP_INVALID; + +static int i915_pmu_register_cpuhp_state(struct drm_i915_private *i915) +{ + enum cpuhp_state slot; + int ret; + + ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, + "perf/x86/intel/i915:online", + i915_pmu_cpu_online, + i915_pmu_cpu_offline); + if (ret < 0) + return ret; + + slot = ret; + ret = cpuhp_state_add_instance(slot, &i915->pmu.node); + if (ret) { + cpuhp_remove_multi_state(slot); + return ret; + } + + cpuhp_slot = slot; + return 0; +} + +static void i915_pmu_unregister_cpuhp_state(struct drm_i915_private *i915) +{ + WARN_ON(cpuhp_slot == CPUHP_INVALID); + WARN_ON(cpuhp_state_remove_instance(cpuhp_slot, &i915->pmu.node)); + cpuhp_remove_multi_state(cpuhp_slot); +} + +void i915_pmu_register(struct drm_i915_private *i915) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + int ret; + + if (INTEL_GEN(i915) <= 2) { + DRM_INFO("PMU not supported for this GPU."); + return; + } + + i915->pmu.base.attr_groups = i915_pmu_attr_groups; + i915->pmu.base.task_ctx_nr = perf_invalid_context; + i915->pmu.base.event_init = i915_pmu_event_init; + i915->pmu.base.add = i915_pmu_event_add; + i915->pmu.base.del = i915_pmu_event_del; + i915->pmu.base.start = i915_pmu_event_start; + i915->pmu.base.stop = i915_pmu_event_stop; + i915->pmu.base.read = i915_pmu_event_read; + i915->pmu.base.event_idx = i915_pmu_event_event_idx; + + spin_lock_init(&i915->pmu.lock); + hrtimer_init(&i915->pmu.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + i915->pmu.timer.function = i915_sample; + + for_each_engine(engine, i915, id) + INIT_DELAYED_WORK(&engine->pmu.disable_busy_stats, + __disable_busy_stats); + + ret = perf_pmu_register(&i915->pmu.base, "i915", -1); + if (ret) + goto err; + + ret = i915_pmu_register_cpuhp_state(i915); + if (ret) + goto err_unreg; + + return; + +err_unreg: + perf_pmu_unregister(&i915->pmu.base); +err: + i915->pmu.base.event_init = NULL; + DRM_NOTE("Failed to register PMU! (err=%d)\n", ret); +} + +void i915_pmu_unregister(struct drm_i915_private *i915) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + if (!i915->pmu.base.event_init) + return; + + WARN_ON(i915->pmu.enable); + + hrtimer_cancel(&i915->pmu.timer); + + for_each_engine(engine, i915, id) { + GEM_BUG_ON(engine->pmu.busy_stats); + flush_delayed_work(&engine->pmu.disable_busy_stats); + } + + i915_pmu_unregister_cpuhp_state(i915); + + perf_pmu_unregister(&i915->pmu.base); + i915->pmu.base.event_init = NULL; +} diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h new file mode 100644 index 000000000000..40c154d13565 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_pmu.h @@ -0,0 +1,111 @@ +/* + * Copyright © 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ +#ifndef __I915_PMU_H__ +#define __I915_PMU_H__ + +enum { + __I915_SAMPLE_FREQ_ACT = 0, + __I915_SAMPLE_FREQ_REQ, + __I915_NUM_PMU_SAMPLERS +}; + +/** + * How many different events we track in the global PMU mask. + * + * It is also used to know to needed number of event reference counters. + */ +#define I915_PMU_MASK_BITS \ + ((1 << I915_PMU_SAMPLE_BITS) + \ + (I915_PMU_LAST + 1 - __I915_PMU_OTHER(0))) + +struct i915_pmu_sample { + u64 cur; +}; + +struct i915_pmu { + /** + * @node: List node for CPU hotplug handling. + */ + struct hlist_node node; + /** + * @base: PMU base. + */ + struct pmu base; + /** + * @lock: Lock protecting enable mask and ref count handling. + */ + spinlock_t lock; + /** + * @timer: Timer for internal i915 PMU sampling. + */ + struct hrtimer timer; + /** + * @enable: Bitmask of all currently enabled events. + * + * Bits are derived from uAPI event numbers in a way that low 16 bits + * correspond to engine event _sample_ _type_ (I915_SAMPLE_QUEUED is + * bit 0), and higher bits correspond to other events (for instance + * I915_PMU_ACTUAL_FREQUENCY is bit 16 etc). + * + * In other words, low 16 bits are not per engine but per engine + * sampler type, while the upper bits are directly mapped to other + * event types. + */ + u64 enable; + /** + * @enable_count: Reference counts for the enabled events. + * + * Array indices are mapped in the same way as bits in the @enable field + * and they are used to control sampling on/off when multiple clients + * are using the PMU API. + */ + unsigned int enable_count[I915_PMU_MASK_BITS]; + /** + * @timer_enabled: Should the internal sampling timer be running. + */ + bool timer_enabled; + /** + * @sample: Current and previous (raw) counters for sampling events. + * + * These counters are updated from the i915 PMU sampling timer. + * + * Only global counters are held here, while the per-engine ones are in + * struct intel_engine_cs. + */ + struct i915_pmu_sample sample[__I915_NUM_PMU_SAMPLERS]; +}; + +#ifdef CONFIG_PERF_EVENTS +void i915_pmu_register(struct drm_i915_private *i915); +void i915_pmu_unregister(struct drm_i915_private *i915); +void i915_pmu_gt_parked(struct drm_i915_private *i915); +void i915_pmu_gt_unparked(struct drm_i915_private *i915); +#else +static inline void i915_pmu_register(struct drm_i915_private *i915) {} +static inline void i915_pmu_unregister(struct drm_i915_private *i915) {} +static inline void i915_pmu_gt_parked(struct drm_i915_private *i915) {} +static inline void i915_pmu_gt_unparked(struct drm_i915_private *i915) {} +#endif + +#endif diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 96c80fa0fcac..09bf043c1c2e 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -186,6 +186,9 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define VIDEO_ENHANCEMENT_CLASS 2 #define COPY_ENGINE_CLASS 3 #define OTHER_CLASS 4 +#define MAX_ENGINE_CLASS 4 + +#define MAX_ENGINE_INSTANCE 1 /* PCI config space */ diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c index 791759f632e1..c74a20b80182 100644 --- a/drivers/gpu/drm/i915/i915_sysfs.c +++ b/drivers/gpu/drm/i915/i915_sysfs.c @@ -42,14 +42,30 @@ static inline struct drm_i915_private *kdev_minor_to_i915(struct device *kdev) static u32 calc_residency(struct drm_i915_private *dev_priv, i915_reg_t reg) { - return DIV_ROUND_CLOSEST_ULL(intel_rc6_residency_us(dev_priv, reg), - 1000); + u64 res; + + intel_runtime_pm_get(dev_priv); + res = intel_rc6_residency_us(dev_priv, reg); + intel_runtime_pm_put(dev_priv); + + return DIV_ROUND_CLOSEST_ULL(res, 1000); } static ssize_t show_rc6_mask(struct device *kdev, struct device_attribute *attr, char *buf) { - return snprintf(buf, PAGE_SIZE, "%x\n", intel_rc6_enabled()); + struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); + unsigned int mask; + + mask = 0; + if (HAS_RC6(dev_priv)) + mask |= BIT(0); + if (HAS_RC6p(dev_priv)) + mask |= BIT(1); + if (HAS_RC6pp(dev_priv)) + mask |= BIT(2); + + return snprintf(buf, PAGE_SIZE, "%x\n", mask); } static ssize_t @@ -252,14 +268,9 @@ static ssize_t gt_act_freq_mhz_show(struct device *kdev, freq = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS); ret = intel_gpu_freq(dev_priv, (freq >> 8) & 0xff); } else { - u32 rpstat = I915_READ(GEN6_RPSTAT1); - if (INTEL_GEN(dev_priv) >= 9) - ret = (rpstat & GEN9_CAGF_MASK) >> GEN9_CAGF_SHIFT; - else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) - ret = (rpstat & HSW_CAGF_MASK) >> HSW_CAGF_SHIFT; - else - ret = (rpstat & GEN6_CAGF_MASK) >> GEN6_CAGF_SHIFT; - ret = intel_gpu_freq(dev_priv, ret); + ret = intel_gpu_freq(dev_priv, + intel_get_cagf(dev_priv, + I915_READ(GEN6_RPSTAT1))); } mutex_unlock(&dev_priv->pcu_lock); diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c index e8884c2ade98..9c5ceb98d48f 100644 --- a/drivers/gpu/drm/i915/intel_cdclk.c +++ b/drivers/gpu/drm/i915/intel_cdclk.c @@ -1896,7 +1896,7 @@ int intel_crtc_compute_min_cdclk(const struct intel_crtc_state *crtc_state) min_cdclk = intel_pixel_rate_to_cdclk(dev_priv, crtc_state->pixel_rate); /* pixel rate mustn't exceed 95% of cdclk with IPS on BDW */ - if (IS_BROADWELL(dev_priv) && crtc_state->ips_enabled) + if (IS_BROADWELL(dev_priv) && hsw_crtc_state_ips_capable(crtc_state)) min_cdclk = DIV_ROUND_UP(min_cdclk * 100, 95); /* BSpec says "Do not use DisplayPort with CDCLK less than 432 MHz, diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c index 77d8b3d483ca..07e4f7bc4412 100644 --- a/drivers/gpu/drm/i915/intel_csr.c +++ b/drivers/gpu/drm/i915/intel_csr.c @@ -40,9 +40,9 @@ #define I915_CSR_CNL "i915/cnl_dmc_ver1_06.bin" #define CNL_CSR_VERSION_REQUIRED CSR_VERSION(1, 6) -#define I915_CSR_KBL "i915/kbl_dmc_ver1_01.bin" +#define I915_CSR_KBL "i915/kbl_dmc_ver1_04.bin" MODULE_FIRMWARE(I915_CSR_KBL); -#define KBL_CSR_VERSION_REQUIRED CSR_VERSION(1, 1) +#define KBL_CSR_VERSION_REQUIRED CSR_VERSION(1, 4) #define I915_CSR_SKL "i915/skl_dmc_ver1_26.bin" MODULE_FIRMWARE(I915_CSR_SKL); diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index eff3b51872eb..369f780588fb 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -2098,6 +2098,7 @@ static void intel_ddi_clk_select(struct intel_encoder *encoder, if (IS_CANNONLAKE(dev_priv)) { /* Configure DPCLKA_CFGCR0 to map the DPLL to the DDI. */ val = I915_READ(DPCLKA_CFGCR0); + val &= ~DPCLKA_CFGCR0_DDI_CLK_SEL_MASK(port); val |= DPCLKA_CFGCR0_DDI_CLK_SEL(pll->id, port); I915_WRITE(DPCLKA_CFGCR0, val); @@ -2513,17 +2514,17 @@ void intel_ddi_prepare_link_retrain(struct intel_dp *intel_dp) udelay(600); } -bool intel_ddi_is_audio_enabled(struct drm_i915_private *dev_priv, - struct intel_crtc *intel_crtc) +static bool intel_ddi_is_audio_enabled(struct drm_i915_private *dev_priv, + enum transcoder cpu_transcoder) { - u32 temp; + if (cpu_transcoder == TRANSCODER_EDP) + return false; - if (intel_display_power_is_enabled(dev_priv, POWER_DOMAIN_AUDIO)) { - temp = I915_READ(HSW_AUD_PIN_ELD_CP_VLD); - if (temp & AUDIO_OUTPUT_ENABLE(intel_crtc->pipe)) - return true; - } - return false; + if (!intel_display_power_is_enabled(dev_priv, POWER_DOMAIN_AUDIO)) + return false; + + return I915_READ(HSW_AUD_PIN_ELD_CP_VLD) & + AUDIO_OUTPUT_ENABLE(cpu_transcoder); } void intel_ddi_compute_min_voltage_level(struct drm_i915_private *dev_priv, @@ -2616,7 +2617,7 @@ void intel_ddi_get_config(struct intel_encoder *encoder, } pipe_config->has_audio = - intel_ddi_is_audio_enabled(dev_priv, intel_crtc); + intel_ddi_is_audio_enabled(dev_priv, cpu_transcoder); if (encoder->type == INTEL_OUTPUT_EDP && dev_priv->vbt.edp.bpp && pipe_config->pipe_bpp > dev_priv->vbt.edp.bpp) { diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 2007c69468b9..1f7e312d0d0d 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -489,7 +489,7 @@ static const struct intel_limit intel_limits_bxt = { }; static bool -needs_modeset(struct drm_crtc_state *state) +needs_modeset(const struct drm_crtc_state *state) { return drm_atomic_crtc_needs_modeset(state); } @@ -998,7 +998,8 @@ enum transcoder intel_pipe_to_cpu_transcoder(struct drm_i915_private *dev_priv, return crtc->config->cpu_transcoder; } -static bool pipe_dsl_stopped(struct drm_i915_private *dev_priv, enum pipe pipe) +static bool pipe_scanline_is_moving(struct drm_i915_private *dev_priv, + enum pipe pipe) { i915_reg_t reg = PIPEDSL(pipe); u32 line1, line2; @@ -1013,32 +1014,38 @@ static bool pipe_dsl_stopped(struct drm_i915_private *dev_priv, enum pipe pipe) msleep(5); line2 = I915_READ(reg) & line_mask; - return line1 == line2; + return line1 != line2; } -/* - * intel_wait_for_pipe_off - wait for pipe to turn off - * @crtc: crtc whose pipe to wait for - * - * After disabling a pipe, we can't wait for vblank in the usual way, - * spinning on the vblank interrupt status bit, since we won't actually - * see an interrupt when the pipe is disabled. - * - * On Gen4 and above: - * wait for the pipe register state bit to turn off - * - * Otherwise: - * wait for the display line value to settle (it usually - * ends up stopping at the start of the next frame). - * - */ -static void intel_wait_for_pipe_off(struct intel_crtc *crtc) +static void wait_for_pipe_scanline_moving(struct intel_crtc *crtc, bool state) { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - enum transcoder cpu_transcoder = crtc->config->cpu_transcoder; enum pipe pipe = crtc->pipe; + /* Wait for the display line to settle/start moving */ + if (wait_for(pipe_scanline_is_moving(dev_priv, pipe) == state, 100)) + DRM_ERROR("pipe %c scanline %s wait timed out\n", + pipe_name(pipe), onoff(state)); +} + +static void intel_wait_for_pipe_scanline_stopped(struct intel_crtc *crtc) +{ + wait_for_pipe_scanline_moving(crtc, false); +} + +static void intel_wait_for_pipe_scanline_moving(struct intel_crtc *crtc) +{ + wait_for_pipe_scanline_moving(crtc, true); +} + +static void +intel_wait_for_pipe_off(const struct intel_crtc_state *old_crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->base.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + if (INTEL_GEN(dev_priv) >= 4) { + enum transcoder cpu_transcoder = old_crtc_state->cpu_transcoder; i915_reg_t reg = PIPECONF(cpu_transcoder); /* Wait for the Pipe State to go off */ @@ -1047,9 +1054,7 @@ static void intel_wait_for_pipe_off(struct intel_crtc *crtc) 100)) WARN(1, "pipe_off wait timed out\n"); } else { - /* Wait for the display line to settle */ - if (wait_for(pipe_dsl_stopped(dev_priv, pipe), 100)) - WARN(1, "pipe_off wait timed out\n"); + intel_wait_for_pipe_scanline_stopped(crtc); } } @@ -1190,23 +1195,6 @@ void assert_panel_unlocked(struct drm_i915_private *dev_priv, enum pipe pipe) pipe_name(pipe)); } -static void assert_cursor(struct drm_i915_private *dev_priv, - enum pipe pipe, bool state) -{ - bool cur_state; - - if (IS_I845G(dev_priv) || IS_I865G(dev_priv)) - cur_state = I915_READ(CURCNTR(PIPE_A)) & CURSOR_ENABLE; - else - cur_state = I915_READ(CURCNTR(pipe)) & CURSOR_MODE; - - I915_STATE_WARN(cur_state != state, - "cursor on pipe %c assertion failure (expected %s, current %s)\n", - pipe_name(pipe), onoff(state), onoff(cur_state)); -} -#define assert_cursor_enabled(d, p) assert_cursor(d, p, true) -#define assert_cursor_disabled(d, p) assert_cursor(d, p, false) - void assert_pipe(struct drm_i915_private *dev_priv, enum pipe pipe, bool state) { @@ -1234,77 +1222,25 @@ void assert_pipe(struct drm_i915_private *dev_priv, pipe_name(pipe), onoff(state), onoff(cur_state)); } -static void assert_plane(struct drm_i915_private *dev_priv, - enum plane plane, bool state) +static void assert_plane(struct intel_plane *plane, bool state) { - u32 val; - bool cur_state; + bool cur_state = plane->get_hw_state(plane); - val = I915_READ(DSPCNTR(plane)); - cur_state = !!(val & DISPLAY_PLANE_ENABLE); I915_STATE_WARN(cur_state != state, - "plane %c assertion failure (expected %s, current %s)\n", - plane_name(plane), onoff(state), onoff(cur_state)); + "%s assertion failure (expected %s, current %s)\n", + plane->base.name, onoff(state), onoff(cur_state)); } -#define assert_plane_enabled(d, p) assert_plane(d, p, true) -#define assert_plane_disabled(d, p) assert_plane(d, p, false) +#define assert_plane_enabled(p) assert_plane(p, true) +#define assert_plane_disabled(p) assert_plane(p, false) -static void assert_planes_disabled(struct drm_i915_private *dev_priv, - enum pipe pipe) +static void assert_planes_disabled(struct intel_crtc *crtc) { - int i; - - /* Primary planes are fixed to pipes on gen4+ */ - if (INTEL_GEN(dev_priv) >= 4) { - u32 val = I915_READ(DSPCNTR(pipe)); - I915_STATE_WARN(val & DISPLAY_PLANE_ENABLE, - "plane %c assertion failure, should be disabled but not\n", - plane_name(pipe)); - return; - } - - /* Need to check both planes against the pipe */ - for_each_pipe(dev_priv, i) { - u32 val = I915_READ(DSPCNTR(i)); - enum pipe cur_pipe = (val & DISPPLANE_SEL_PIPE_MASK) >> - DISPPLANE_SEL_PIPE_SHIFT; - I915_STATE_WARN((val & DISPLAY_PLANE_ENABLE) && pipe == cur_pipe, - "plane %c assertion failure, should be off on pipe %c but is still active\n", - plane_name(i), pipe_name(pipe)); - } -} - -static void assert_sprites_disabled(struct drm_i915_private *dev_priv, - enum pipe pipe) -{ - int sprite; + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + struct intel_plane *plane; - if (INTEL_GEN(dev_priv) >= 9) { - for_each_sprite(dev_priv, pipe, sprite) { - u32 val = I915_READ(PLANE_CTL(pipe, sprite)); - I915_STATE_WARN(val & PLANE_CTL_ENABLE, - "plane %d assertion failure, should be off on pipe %c but is still active\n", - sprite, pipe_name(pipe)); - } - } else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { - for_each_sprite(dev_priv, pipe, sprite) { - u32 val = I915_READ(SPCNTR(pipe, PLANE_SPRITE0 + sprite)); - I915_STATE_WARN(val & SP_ENABLE, - "sprite %c assertion failure, should be off on pipe %c but is still active\n", - sprite_name(pipe, sprite), pipe_name(pipe)); - } - } else if (INTEL_GEN(dev_priv) >= 7) { - u32 val = I915_READ(SPRCTL(pipe)); - I915_STATE_WARN(val & SPRITE_ENABLE, - "sprite %c assertion failure, should be off on pipe %c but is still active\n", - plane_name(pipe), pipe_name(pipe)); - } else if (INTEL_GEN(dev_priv) >= 5 || IS_G4X(dev_priv)) { - u32 val = I915_READ(DVSCNTR(pipe)); - I915_STATE_WARN(val & DVS_ENABLE, - "sprite %c assertion failure, should be off on pipe %c but is still active\n", - plane_name(pipe), pipe_name(pipe)); - } + for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, plane) + assert_plane_disabled(plane); } static void assert_vblank_disabled(struct drm_crtc *crtc) @@ -1878,27 +1814,18 @@ enum pipe intel_crtc_pch_transcoder(struct intel_crtc *crtc) return crtc->pipe; } -/** - * intel_enable_pipe - enable a pipe, asserting requirements - * @crtc: crtc responsible for the pipe - * - * Enable @crtc's pipe, making sure that various hardware specific requirements - * are met, if applicable, e.g. PLL enabled, LVDS pairs enabled, etc. - */ -static void intel_enable_pipe(struct intel_crtc *crtc) +static void intel_enable_pipe(const struct intel_crtc_state *new_crtc_state) { - struct drm_device *dev = crtc->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + enum transcoder cpu_transcoder = new_crtc_state->cpu_transcoder; enum pipe pipe = crtc->pipe; - enum transcoder cpu_transcoder = crtc->config->cpu_transcoder; i915_reg_t reg; u32 val; DRM_DEBUG_KMS("enabling pipe %c\n", pipe_name(pipe)); - assert_planes_disabled(dev_priv, pipe); - assert_cursor_disabled(dev_priv, pipe); - assert_sprites_disabled(dev_priv, pipe); + assert_planes_disabled(crtc); /* * A pipe without a PLL won't actually be able to drive bits from @@ -1906,12 +1833,12 @@ static void intel_enable_pipe(struct intel_crtc *crtc) * need the check. */ if (HAS_GMCH_DISPLAY(dev_priv)) { - if (intel_crtc_has_type(crtc->config, INTEL_OUTPUT_DSI)) + if (intel_crtc_has_type(new_crtc_state, INTEL_OUTPUT_DSI)) assert_dsi_pll_enabled(dev_priv); else assert_pll_enabled(dev_priv, pipe); } else { - if (crtc->config->has_pch_encoder) { + if (new_crtc_state->has_pch_encoder) { /* if driving the PCH, we need FDI enabled */ assert_fdi_rx_pll_enabled(dev_priv, intel_crtc_pch_transcoder(crtc)); @@ -1933,31 +1860,21 @@ static void intel_enable_pipe(struct intel_crtc *crtc) POSTING_READ(reg); /* - * Until the pipe starts DSL will read as 0, which would cause - * an apparent vblank timestamp jump, which messes up also the - * frame count when it's derived from the timestamps. So let's - * wait for the pipe to start properly before we call - * drm_crtc_vblank_on() + * Until the pipe starts PIPEDSL reads will return a stale value, + * which causes an apparent vblank timestamp jump when PIPEDSL + * resets to its proper value. That also messes up the frame count + * when it's derived from the timestamps. So let's wait for the + * pipe to start properly before we call drm_crtc_vblank_on() */ - if (dev->max_vblank_count == 0 && - wait_for(intel_get_crtc_scanline(crtc) != crtc->scanline_offset, 50)) - DRM_ERROR("pipe %c didn't start\n", pipe_name(pipe)); + if (dev_priv->drm.max_vblank_count == 0) + intel_wait_for_pipe_scanline_moving(crtc); } -/** - * intel_disable_pipe - disable a pipe, asserting requirements - * @crtc: crtc whose pipes is to be disabled - * - * Disable the pipe of @crtc, making sure that various hardware - * specific requirements are met, if applicable, e.g. plane - * disabled, panel fitter off, etc. - * - * Will wait until the pipe has shut down before returning. - */ -static void intel_disable_pipe(struct intel_crtc *crtc) +static void intel_disable_pipe(const struct intel_crtc_state *old_crtc_state) { + struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->base.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - enum transcoder cpu_transcoder = crtc->config->cpu_transcoder; + enum transcoder cpu_transcoder = old_crtc_state->cpu_transcoder; enum pipe pipe = crtc->pipe; i915_reg_t reg; u32 val; @@ -1968,9 +1885,7 @@ static void intel_disable_pipe(struct intel_crtc *crtc) * Make sure planes won't keep trying to pump pixels to us, * or we might hang the display. */ - assert_planes_disabled(dev_priv, pipe); - assert_cursor_disabled(dev_priv, pipe); - assert_sprites_disabled(dev_priv, pipe); + assert_planes_disabled(crtc); reg = PIPECONF(cpu_transcoder); val = I915_READ(reg); @@ -1981,7 +1896,7 @@ static void intel_disable_pipe(struct intel_crtc *crtc) * Double wide has implications for planes * so best keep it disabled when not needed. */ - if (crtc->config->double_wide) + if (old_crtc_state->double_wide) val &= ~PIPECONF_DOUBLE_WIDE; /* Don't disable pipe or pipe PLLs if needed */ @@ -1990,7 +1905,7 @@ static void intel_disable_pipe(struct intel_crtc *crtc) I915_WRITE(reg, val); if ((val & PIPECONF_ENABLE) == 0) - intel_wait_for_pipe_off(crtc); + intel_wait_for_pipe_off(old_crtc_state); } static unsigned int intel_tile_size(const struct drm_i915_private *dev_priv) @@ -2799,6 +2714,23 @@ intel_set_plane_visible(struct intel_crtc_state *crtc_state, crtc_state->active_planes); } +static void intel_plane_disable_noatomic(struct intel_crtc *crtc, + struct intel_plane *plane) +{ + struct intel_crtc_state *crtc_state = + to_intel_crtc_state(crtc->base.state); + struct intel_plane_state *plane_state = + to_intel_plane_state(plane->base.state); + + intel_set_plane_visible(crtc_state, plane_state, false); + + if (plane->id == PLANE_PRIMARY) + intel_pre_disable_primary_noatomic(&crtc->base); + + trace_intel_disable_plane(&plane->base, crtc); + plane->disable_plane(plane, crtc); +} + static void intel_find_initial_plane_obj(struct intel_crtc *intel_crtc, struct intel_initial_plane_config *plane_config) @@ -2856,12 +2788,7 @@ intel_find_initial_plane_obj(struct intel_crtc *intel_crtc, * simplest solution is to just disable the primary plane now and * pretend the BIOS never had it enabled. */ - intel_set_plane_visible(to_intel_crtc_state(crtc_state), - to_intel_plane_state(plane_state), - false); - intel_pre_disable_primary_noatomic(&intel_crtc->base); - trace_intel_disable_plane(primary, intel_crtc); - intel_plane->disable_plane(intel_plane, intel_crtc); + intel_plane_disable_noatomic(intel_crtc, intel_plane); return; @@ -3281,16 +3208,16 @@ int i9xx_check_plane_surface(struct intel_plane_state *plane_state) return 0; } -static void i9xx_update_primary_plane(struct intel_plane *primary, - const struct intel_crtc_state *crtc_state, - const struct intel_plane_state *plane_state) +static void i9xx_update_plane(struct intel_plane *plane, + const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state) { - struct drm_i915_private *dev_priv = to_i915(primary->base.dev); + struct drm_i915_private *dev_priv = to_i915(plane->base.dev); const struct drm_framebuffer *fb = plane_state->base.fb; - enum plane plane = primary->plane; + enum i9xx_plane_id i9xx_plane = plane->i9xx_plane; u32 linear_offset; u32 dspcntr = plane_state->ctl; - i915_reg_t reg = DSPCNTR(plane); + i915_reg_t reg = DSPCNTR(i9xx_plane); int x = plane_state->main.x; int y = plane_state->main.y; unsigned long irqflags; @@ -3309,34 +3236,34 @@ static void i9xx_update_primary_plane(struct intel_plane *primary, /* pipesrc and dspsize control the size that is scaled from, * which should always be the user's requested size. */ - I915_WRITE_FW(DSPSIZE(plane), + I915_WRITE_FW(DSPSIZE(i9xx_plane), ((crtc_state->pipe_src_h - 1) << 16) | (crtc_state->pipe_src_w - 1)); - I915_WRITE_FW(DSPPOS(plane), 0); - } else if (IS_CHERRYVIEW(dev_priv) && plane == PLANE_B) { - I915_WRITE_FW(PRIMSIZE(plane), + I915_WRITE_FW(DSPPOS(i9xx_plane), 0); + } else if (IS_CHERRYVIEW(dev_priv) && i9xx_plane == PLANE_B) { + I915_WRITE_FW(PRIMSIZE(i9xx_plane), ((crtc_state->pipe_src_h - 1) << 16) | (crtc_state->pipe_src_w - 1)); - I915_WRITE_FW(PRIMPOS(plane), 0); - I915_WRITE_FW(PRIMCNSTALPHA(plane), 0); + I915_WRITE_FW(PRIMPOS(i9xx_plane), 0); + I915_WRITE_FW(PRIMCNSTALPHA(i9xx_plane), 0); } I915_WRITE_FW(reg, dspcntr); - I915_WRITE_FW(DSPSTRIDE(plane), fb->pitches[0]); + I915_WRITE_FW(DSPSTRIDE(i9xx_plane), fb->pitches[0]); if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) { - I915_WRITE_FW(DSPSURF(plane), + I915_WRITE_FW(DSPSURF(i9xx_plane), intel_plane_ggtt_offset(plane_state) + dspaddr_offset); - I915_WRITE_FW(DSPOFFSET(plane), (y << 16) | x); + I915_WRITE_FW(DSPOFFSET(i9xx_plane), (y << 16) | x); } else if (INTEL_GEN(dev_priv) >= 4) { - I915_WRITE_FW(DSPSURF(plane), + I915_WRITE_FW(DSPSURF(i9xx_plane), intel_plane_ggtt_offset(plane_state) + dspaddr_offset); - I915_WRITE_FW(DSPTILEOFF(plane), (y << 16) | x); - I915_WRITE_FW(DSPLINOFF(plane), linear_offset); + I915_WRITE_FW(DSPTILEOFF(i9xx_plane), (y << 16) | x); + I915_WRITE_FW(DSPLINOFF(i9xx_plane), linear_offset); } else { - I915_WRITE_FW(DSPADDR(plane), + I915_WRITE_FW(DSPADDR(i9xx_plane), intel_plane_ggtt_offset(plane_state) + dspaddr_offset); } @@ -3345,25 +3272,49 @@ static void i9xx_update_primary_plane(struct intel_plane *primary, spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); } -static void i9xx_disable_primary_plane(struct intel_plane *primary, - struct intel_crtc *crtc) +static void i9xx_disable_plane(struct intel_plane *plane, + struct intel_crtc *crtc) { - struct drm_i915_private *dev_priv = to_i915(primary->base.dev); - enum plane plane = primary->plane; + struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + enum i9xx_plane_id i9xx_plane = plane->i9xx_plane; unsigned long irqflags; spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); - I915_WRITE_FW(DSPCNTR(plane), 0); - if (INTEL_INFO(dev_priv)->gen >= 4) - I915_WRITE_FW(DSPSURF(plane), 0); + I915_WRITE_FW(DSPCNTR(i9xx_plane), 0); + if (INTEL_GEN(dev_priv) >= 4) + I915_WRITE_FW(DSPSURF(i9xx_plane), 0); else - I915_WRITE_FW(DSPADDR(plane), 0); - POSTING_READ_FW(DSPCNTR(plane)); + I915_WRITE_FW(DSPADDR(i9xx_plane), 0); + POSTING_READ_FW(DSPCNTR(i9xx_plane)); spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); } +static bool i9xx_plane_get_hw_state(struct intel_plane *plane) +{ + struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + enum intel_display_power_domain power_domain; + enum i9xx_plane_id i9xx_plane = plane->i9xx_plane; + enum pipe pipe = plane->pipe; + bool ret; + + /* + * Not 100% correct for planes that can move between pipes, + * but that's only the case for gen2-4 which don't have any + * display power wells. + */ + power_domain = POWER_DOMAIN_PIPE(pipe); + if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) + return false; + + ret = I915_READ(DSPCNTR(i9xx_plane)) & DISPLAY_PLANE_ENABLE; + + intel_display_power_put(dev_priv, power_domain); + + return ret; +} + static u32 intel_fb_stride_alignment(const struct drm_framebuffer *fb, int plane) { @@ -4870,7 +4821,7 @@ void hsw_enable_ips(const struct intel_crtc_state *crtc_state) struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - if (!crtc->config->ips_enabled) + if (!crtc_state->ips_enabled) return; /* @@ -4878,8 +4829,8 @@ void hsw_enable_ips(const struct intel_crtc_state *crtc_state) * This function is called from post_plane_update, which is run after * a vblank wait. */ + WARN_ON(!(crtc_state->active_planes & ~BIT(PLANE_CURSOR))); - assert_plane_enabled(dev_priv, crtc->plane); if (IS_BROADWELL(dev_priv)) { mutex_lock(&dev_priv->pcu_lock); WARN_ON(sandybridge_pcode_write(dev_priv, DISPLAY_IPS_CONTROL, @@ -4913,7 +4864,6 @@ void hsw_disable_ips(const struct intel_crtc_state *crtc_state) if (!crtc_state->ips_enabled) return; - assert_plane_enabled(dev_priv, crtc->plane); if (IS_BROADWELL(dev_priv)) { mutex_lock(&dev_priv->pcu_lock); WARN_ON(sandybridge_pcode_write(dev_priv, DISPLAY_IPS_CONTROL, 0)); @@ -4967,14 +4917,6 @@ intel_post_enable_primary(struct drm_crtc *crtc, int pipe = intel_crtc->pipe; /* - * FIXME IPS should be fine as long as one plane is - * enabled, but in practice it seems to have problems - * when going from primary only to sprite only and vice - * versa. - */ - hsw_enable_ips(new_crtc_state); - - /* * Gen2 reports pipe underruns whenever all planes are disabled. * So don't enable underrun reporting before at least some planes * are enabled. @@ -4989,10 +4931,9 @@ intel_post_enable_primary(struct drm_crtc *crtc, intel_check_pch_fifo_underruns(dev_priv); } -/* FIXME move all this to pre_plane_update() with proper state tracking */ +/* FIXME get rid of this and use pre_plane_update */ static void -intel_pre_disable_primary(struct drm_crtc *crtc, - const struct intel_crtc_state *old_crtc_state) +intel_pre_disable_primary_noatomic(struct drm_crtc *crtc) { struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = to_i915(dev); @@ -5001,32 +4942,12 @@ intel_pre_disable_primary(struct drm_crtc *crtc, /* * Gen2 reports pipe underruns whenever all planes are disabled. - * So diasble underrun reporting before all the planes get disabled. - * FIXME: Need to fix the logic to work when we turn off all planes - * but leave the pipe running. + * So disable underrun reporting before all the planes get disabled. */ if (IS_GEN2(dev_priv)) intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, false); - /* - * FIXME IPS should be fine as long as one plane is - * enabled, but in practice it seems to have problems - * when going from primary only to sprite only and vice - * versa. - */ - hsw_disable_ips(old_crtc_state); -} - -/* FIXME get rid of this and use pre_plane_update */ -static void -intel_pre_disable_primary_noatomic(struct drm_crtc *crtc) -{ - struct drm_device *dev = crtc->dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - int pipe = intel_crtc->pipe; - - intel_pre_disable_primary(crtc, to_intel_crtc_state(crtc->state)); + hsw_disable_ips(to_intel_crtc_state(crtc->state)); /* * Vblank time updates from the shadow to live plane control register @@ -5042,6 +4963,38 @@ intel_pre_disable_primary_noatomic(struct drm_crtc *crtc) intel_wait_for_vblank(dev_priv, pipe); } +static bool hsw_pre_update_disable_ips(const struct intel_crtc_state *old_crtc_state, + const struct intel_crtc_state *new_crtc_state) +{ + if (!old_crtc_state->ips_enabled) + return false; + + if (needs_modeset(&new_crtc_state->base)) + return true; + + return !new_crtc_state->ips_enabled; +} + +static bool hsw_post_update_enable_ips(const struct intel_crtc_state *old_crtc_state, + const struct intel_crtc_state *new_crtc_state) +{ + if (!new_crtc_state->ips_enabled) + return false; + + if (needs_modeset(&new_crtc_state->base)) + return true; + + /* + * We can't read out IPS on broadwell, assume the worst and + * forcibly enable IPS on the first fastset. + */ + if (new_crtc_state->update_pipe && + old_crtc_state->base.adjusted_mode.private_flags & I915_MODE_FLAG_INHERITED) + return true; + + return !old_crtc_state->ips_enabled; +} + static void intel_post_plane_update(struct intel_crtc_state *old_crtc_state) { struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->base.crtc); @@ -5058,6 +5011,9 @@ static void intel_post_plane_update(struct intel_crtc_state *old_crtc_state) if (pipe_config->update_wm_post && pipe_config->base.active) intel_update_watermarks(crtc); + if (hsw_post_update_enable_ips(old_crtc_state, pipe_config)) + hsw_enable_ips(pipe_config); + if (old_pri_state) { struct intel_plane_state *primary_state = intel_atomic_get_new_plane_state(to_intel_atomic_state(old_state), @@ -5088,6 +5044,9 @@ static void intel_pre_plane_update(struct intel_crtc_state *old_crtc_state, struct intel_atomic_state *old_intel_state = to_intel_atomic_state(old_state); + if (hsw_pre_update_disable_ips(old_crtc_state, pipe_config)) + hsw_disable_ips(old_crtc_state); + if (old_pri_state) { struct intel_plane_state *primary_state = intel_atomic_get_new_plane_state(old_intel_state, @@ -5096,10 +5055,13 @@ static void intel_pre_plane_update(struct intel_crtc_state *old_crtc_state, to_intel_plane_state(old_pri_state); intel_fbc_pre_update(crtc, pipe_config, primary_state); - - if (old_primary_state->base.visible && + /* + * Gen2 reports pipe underruns whenever all planes are disabled. + * So disable underrun reporting before all the planes get disabled. + */ + if (IS_GEN2(dev_priv) && old_primary_state->base.visible && (modeset || !primary_state->base.visible)) - intel_pre_disable_primary(&crtc->base, old_crtc_state); + intel_set_cpu_fifo_underrun_reporting(dev_priv, crtc->pipe, false); } /* @@ -5362,7 +5324,7 @@ static void ironlake_crtc_enable(struct intel_crtc_state *pipe_config, if (dev_priv->display.initial_watermarks != NULL) dev_priv->display.initial_watermarks(old_intel_state, intel_crtc->config); - intel_enable_pipe(intel_crtc); + intel_enable_pipe(pipe_config); if (intel_crtc->config->has_pch_encoder) ironlake_pch_enable(pipe_config); @@ -5481,7 +5443,7 @@ static void haswell_crtc_enable(struct intel_crtc_state *pipe_config, /* XXX: Do the pipe assertions at the right place for BXT DSI. */ if (!transcoder_is_dsi(cpu_transcoder)) - intel_enable_pipe(intel_crtc); + intel_enable_pipe(pipe_config); if (intel_crtc->config->has_pch_encoder) lpt_pch_enable(pipe_config); @@ -5547,7 +5509,7 @@ static void ironlake_crtc_disable(struct intel_crtc_state *old_crtc_state, drm_crtc_vblank_off(crtc); assert_vblank_disabled(crtc); - intel_disable_pipe(intel_crtc); + intel_disable_pipe(old_crtc_state); ironlake_pfit_disable(intel_crtc, false); @@ -5599,7 +5561,7 @@ static void haswell_crtc_disable(struct intel_crtc_state *old_crtc_state, /* XXX: Do the pipe assertions at the right place for BXT DSI. */ if (!transcoder_is_dsi(cpu_transcoder)) - intel_disable_pipe(intel_crtc); + intel_disable_pipe(old_crtc_state); if (intel_crtc_has_type(intel_crtc->config, INTEL_OUTPUT_DP_MST)) intel_ddi_set_vc_payload_alloc(intel_crtc->config, false); @@ -5777,7 +5739,7 @@ static void valleyview_crtc_enable(struct intel_crtc_state *pipe_config, dev_priv->display.initial_watermarks(old_intel_state, pipe_config); - intel_enable_pipe(intel_crtc); + intel_enable_pipe(pipe_config); assert_vblank_disabled(crtc); drm_crtc_vblank_on(crtc); @@ -5836,7 +5798,7 @@ static void i9xx_crtc_enable(struct intel_crtc_state *pipe_config, intel_crtc->config); else intel_update_watermarks(intel_crtc); - intel_enable_pipe(intel_crtc); + intel_enable_pipe(pipe_config); assert_vblank_disabled(crtc); drm_crtc_vblank_on(crtc); @@ -5880,7 +5842,7 @@ static void i9xx_crtc_disable(struct intel_crtc_state *old_crtc_state, drm_crtc_vblank_off(crtc); assert_vblank_disabled(crtc); - intel_disable_pipe(intel_crtc); + intel_disable_pipe(old_crtc_state); i9xx_pfit_disable(intel_crtc); @@ -5915,6 +5877,7 @@ static void intel_crtc_disable_noatomic(struct drm_crtc *crtc, struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct drm_i915_private *dev_priv = to_i915(crtc->dev); enum intel_display_power_domain domain; + struct intel_plane *plane; u64 domains; struct drm_atomic_state *state; struct intel_crtc_state *crtc_state; @@ -5923,11 +5886,12 @@ static void intel_crtc_disable_noatomic(struct drm_crtc *crtc, if (!intel_crtc->active) return; - if (crtc->primary->state->visible) { - intel_pre_disable_primary_noatomic(crtc); + for_each_intel_plane_on_crtc(&dev_priv->drm, intel_crtc, plane) { + const struct intel_plane_state *plane_state = + to_intel_plane_state(plane->base.state); - intel_crtc_disable_planes(crtc, 1 << drm_plane_index(crtc->primary)); - crtc->primary->state->visible = false; + if (plane_state->base.visible) + intel_plane_disable_noatomic(intel_crtc, plane); } state = drm_atomic_state_alloc(crtc->dev); @@ -6228,18 +6192,20 @@ retry: return ret; } -static bool pipe_config_supports_ips(struct drm_i915_private *dev_priv, - struct intel_crtc_state *pipe_config) +bool hsw_crtc_state_ips_capable(const struct intel_crtc_state *crtc_state) { - if (pipe_config->ips_force_disable) + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + + /* IPS only exists on ULT machines and is tied to pipe A. */ + if (!hsw_crtc_supports_ips(crtc)) return false; - if (pipe_config->pipe_bpp > 24) + if (!i915_modparams.enable_ips) return false; - /* HSW can handle pixel rate up to cdclk? */ - if (IS_HASWELL(dev_priv)) - return true; + if (crtc_state->pipe_bpp > 24) + return false; /* * We compare against max which means we must take @@ -6248,19 +6214,36 @@ static bool pipe_config_supports_ips(struct drm_i915_private *dev_priv, * * Should measure whether using a lower cdclk w/o IPS */ - return pipe_config->pixel_rate <= - dev_priv->max_cdclk_freq * 95 / 100; + if (IS_BROADWELL(dev_priv) && + crtc_state->pixel_rate > dev_priv->max_cdclk_freq * 95 / 100) + return false; + + return true; } -static void hsw_compute_ips_config(struct intel_crtc *crtc, - struct intel_crtc_state *pipe_config) +static bool hsw_compute_ips_config(struct intel_crtc_state *crtc_state) { - struct drm_device *dev = crtc->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = + to_i915(crtc_state->base.crtc->dev); + struct intel_atomic_state *intel_state = + to_intel_atomic_state(crtc_state->base.state); + + if (!hsw_crtc_state_ips_capable(crtc_state)) + return false; - pipe_config->ips_enabled = i915_modparams.enable_ips && - hsw_crtc_supports_ips(crtc) && - pipe_config_supports_ips(dev_priv, pipe_config); + if (crtc_state->ips_force_disable) + return false; + + /* IPS should be fine as long as at least one plane is enabled. */ + if (!(crtc_state->active_planes & ~BIT(PLANE_CURSOR))) + return false; + + /* pixel rate mustn't exceed 95% of cdclk with IPS on BDW */ + if (IS_BROADWELL(dev_priv) && + crtc_state->pixel_rate > intel_state->cdclk.logical.cdclk * 95 / 100) + return false; + + return true; } static bool intel_crtc_supports_double_wide(const struct intel_crtc *crtc) @@ -6378,9 +6361,6 @@ static int intel_crtc_compute_config(struct intel_crtc *crtc, intel_crtc_compute_pixel_rate(pipe_config); - if (HAS_IPS(dev_priv)) - hsw_compute_ips_config(crtc, pipe_config); - if (pipe_config->has_pch_encoder) return ironlake_fdi_compute_config(crtc, pipe_config); @@ -7437,15 +7417,16 @@ i9xx_get_initial_plane_config(struct intel_crtc *crtc, { struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); + struct intel_plane *plane = to_intel_plane(crtc->base.primary); + enum i9xx_plane_id i9xx_plane = plane->i9xx_plane; + enum pipe pipe = crtc->pipe; u32 val, base, offset; - int pipe = crtc->pipe, plane = crtc->plane; int fourcc, pixel_format; unsigned int aligned_height; struct drm_framebuffer *fb; struct intel_framebuffer *intel_fb; - val = I915_READ(DSPCNTR(plane)); - if (!(val & DISPLAY_PLANE_ENABLE)) + if (!plane->get_hw_state(plane)) return; intel_fb = kzalloc(sizeof(*intel_fb), GFP_KERNEL); @@ -7458,6 +7439,8 @@ i9xx_get_initial_plane_config(struct intel_crtc *crtc, fb->dev = dev; + val = I915_READ(DSPCNTR(i9xx_plane)); + if (INTEL_GEN(dev_priv) >= 4) { if (val & DISPPLANE_TILED) { plane_config->tiling = I915_TILING_X; @@ -7469,14 +7452,17 @@ i9xx_get_initial_plane_config(struct intel_crtc *crtc, fourcc = i9xx_format_to_fourcc(pixel_format); fb->format = drm_format_info(fourcc); - if (INTEL_GEN(dev_priv) >= 4) { + if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) { + offset = I915_READ(DSPOFFSET(i9xx_plane)); + base = I915_READ(DSPSURF(i9xx_plane)) & 0xfffff000; + } else if (INTEL_GEN(dev_priv) >= 4) { if (plane_config->tiling) - offset = I915_READ(DSPTILEOFF(plane)); + offset = I915_READ(DSPTILEOFF(i9xx_plane)); else - offset = I915_READ(DSPLINOFF(plane)); - base = I915_READ(DSPSURF(plane)) & 0xfffff000; + offset = I915_READ(DSPLINOFF(i9xx_plane)); + base = I915_READ(DSPSURF(i9xx_plane)) & 0xfffff000; } else { - base = I915_READ(DSPADDR(plane)); + base = I915_READ(DSPADDR(i9xx_plane)); } plane_config->base = base; @@ -7484,15 +7470,15 @@ i9xx_get_initial_plane_config(struct intel_crtc *crtc, fb->width = ((val >> 16) & 0xfff) + 1; fb->height = ((val >> 0) & 0xfff) + 1; - val = I915_READ(DSPSTRIDE(pipe)); + val = I915_READ(DSPSTRIDE(i9xx_plane)); fb->pitches[0] = val & 0xffffffc0; aligned_height = intel_fb_align_height(fb, 0, fb->height); plane_config->size = fb->pitches[0] * aligned_height; - DRM_DEBUG_KMS("pipe/plane %c/%d with fb: size=%dx%d@%d, offset=%x, pitch %d, size 0x%x\n", - pipe_name(pipe), plane, fb->width, fb->height, + DRM_DEBUG_KMS("%s/%s with fb: size=%dx%d@%d, offset=%x, pitch %d, size 0x%x\n", + crtc->base.name, plane->base.name, fb->width, fb->height, fb->format->cpp[0] * 8, base, fb->pitches[0], plane_config->size); @@ -8461,13 +8447,18 @@ skylake_get_initial_plane_config(struct intel_crtc *crtc, { struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); + struct intel_plane *plane = to_intel_plane(crtc->base.primary); + enum plane_id plane_id = plane->id; + enum pipe pipe = crtc->pipe; u32 val, base, offset, stride_mult, tiling, alpha; - int pipe = crtc->pipe; int fourcc, pixel_format; unsigned int aligned_height; struct drm_framebuffer *fb; struct intel_framebuffer *intel_fb; + if (!plane->get_hw_state(plane)) + return; + intel_fb = kzalloc(sizeof(*intel_fb), GFP_KERNEL); if (!intel_fb) { DRM_DEBUG_KMS("failed to alloc fb\n"); @@ -8478,14 +8469,12 @@ skylake_get_initial_plane_config(struct intel_crtc *crtc, fb->dev = dev; - val = I915_READ(PLANE_CTL(pipe, 0)); - if (!(val & PLANE_CTL_ENABLE)) - goto error; + val = I915_READ(PLANE_CTL(pipe, plane_id)); pixel_format = val & PLANE_CTL_FORMAT_MASK; if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) { - alpha = I915_READ(PLANE_COLOR_CTL(pipe, 0)); + alpha = I915_READ(PLANE_COLOR_CTL(pipe, plane_id)); alpha &= PLANE_COLOR_ALPHA_MASK; } else { alpha = val & PLANE_CTL_ALPHA_MASK; @@ -8521,16 +8510,16 @@ skylake_get_initial_plane_config(struct intel_crtc *crtc, goto error; } - base = I915_READ(PLANE_SURF(pipe, 0)) & 0xfffff000; + base = I915_READ(PLANE_SURF(pipe, plane_id)) & 0xfffff000; plane_config->base = base; - offset = I915_READ(PLANE_OFFSET(pipe, 0)); + offset = I915_READ(PLANE_OFFSET(pipe, plane_id)); - val = I915_READ(PLANE_SIZE(pipe, 0)); + val = I915_READ(PLANE_SIZE(pipe, plane_id)); fb->height = ((val >> 16) & 0xfff) + 1; fb->width = ((val >> 0) & 0x1fff) + 1; - val = I915_READ(PLANE_STRIDE(pipe, 0)); + val = I915_READ(PLANE_STRIDE(pipe, plane_id)); stride_mult = intel_fb_stride_alignment(fb, 0); fb->pitches[0] = (val & 0x3ff) * stride_mult; @@ -8538,8 +8527,8 @@ skylake_get_initial_plane_config(struct intel_crtc *crtc, plane_config->size = fb->pitches[0] * aligned_height; - DRM_DEBUG_KMS("pipe %c with fb: size=%dx%d@%d, offset=%x, pitch %d, size 0x%x\n", - pipe_name(pipe), fb->width, fb->height, + DRM_DEBUG_KMS("%s/%s with fb: size=%dx%d@%d, offset=%x, pitch %d, size 0x%x\n", + crtc->base.name, plane->base.name, fb->width, fb->height, fb->format->cpp[0] * 8, base, fb->pitches[0], plane_config->size); @@ -8574,74 +8563,6 @@ static void ironlake_get_pfit_config(struct intel_crtc *crtc, } } -static void -ironlake_get_initial_plane_config(struct intel_crtc *crtc, - struct intel_initial_plane_config *plane_config) -{ - struct drm_device *dev = crtc->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - u32 val, base, offset; - int pipe = crtc->pipe; - int fourcc, pixel_format; - unsigned int aligned_height; - struct drm_framebuffer *fb; - struct intel_framebuffer *intel_fb; - - val = I915_READ(DSPCNTR(pipe)); - if (!(val & DISPLAY_PLANE_ENABLE)) - return; - - intel_fb = kzalloc(sizeof(*intel_fb), GFP_KERNEL); - if (!intel_fb) { - DRM_DEBUG_KMS("failed to alloc fb\n"); - return; - } - - fb = &intel_fb->base; - - fb->dev = dev; - - if (INTEL_GEN(dev_priv) >= 4) { - if (val & DISPPLANE_TILED) { - plane_config->tiling = I915_TILING_X; - fb->modifier = I915_FORMAT_MOD_X_TILED; - } - } - - pixel_format = val & DISPPLANE_PIXFORMAT_MASK; - fourcc = i9xx_format_to_fourcc(pixel_format); - fb->format = drm_format_info(fourcc); - - base = I915_READ(DSPSURF(pipe)) & 0xfffff000; - if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) { - offset = I915_READ(DSPOFFSET(pipe)); - } else { - if (plane_config->tiling) - offset = I915_READ(DSPTILEOFF(pipe)); - else - offset = I915_READ(DSPLINOFF(pipe)); - } - plane_config->base = base; - - val = I915_READ(PIPESRC(pipe)); - fb->width = ((val >> 16) & 0xfff) + 1; - fb->height = ((val >> 0) & 0xfff) + 1; - - val = I915_READ(DSPSTRIDE(pipe)); - fb->pitches[0] = val & 0xffffffc0; - - aligned_height = intel_fb_align_height(fb, 0, fb->height); - - plane_config->size = fb->pitches[0] * aligned_height; - - DRM_DEBUG_KMS("pipe %c with fb: size=%dx%d@%d, offset=%x, pitch %d, size 0x%x\n", - pipe_name(pipe), fb->width, fb->height, - fb->format->cpp[0] * 8, base, fb->pitches[0], - plane_config->size); - - plane_config->fb = intel_fb; -} - static bool ironlake_get_pipe_config(struct intel_crtc *crtc, struct intel_crtc_state *pipe_config) { @@ -9275,6 +9196,19 @@ static bool haswell_get_pipe_config(struct intel_crtc *crtc, ironlake_get_pfit_config(crtc, pipe_config); } + if (hsw_crtc_supports_ips(crtc)) { + if (IS_HASWELL(dev_priv)) + pipe_config->ips_enabled = I915_READ(IPS_CTL) & IPS_ENABLE; + else { + /* + * We cannot readout IPS state on broadwell, set to + * true so we can set it to a defined state on first + * commit. + */ + pipe_config->ips_enabled = true; + } + } + if (pipe_config->cpu_transcoder != TRANSCODER_EDP && !transcoder_is_dsi(pipe_config->cpu_transcoder)) { pipe_config->pixel_multiplier = @@ -9500,6 +9434,23 @@ static void i845_disable_cursor(struct intel_plane *plane, i845_update_cursor(plane, NULL, NULL); } +static bool i845_cursor_get_hw_state(struct intel_plane *plane) +{ + struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + enum intel_display_power_domain power_domain; + bool ret; + + power_domain = POWER_DOMAIN_PIPE(PIPE_A); + if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) + return false; + + ret = I915_READ(CURCNTR(PIPE_A)) & CURSOR_ENABLE; + + intel_display_power_put(dev_priv, power_domain); + + return ret; +} + static u32 i9xx_cursor_ctl(const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { @@ -9693,6 +9644,28 @@ static void i9xx_disable_cursor(struct intel_plane *plane, i9xx_update_cursor(plane, NULL, NULL); } +static bool i9xx_cursor_get_hw_state(struct intel_plane *plane) +{ + struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + enum intel_display_power_domain power_domain; + enum pipe pipe = plane->pipe; + bool ret; + + /* + * Not 100% correct for planes that can move between pipes, + * but that's only the case for gen2-3 which don't have any + * display power wells. + */ + power_domain = POWER_DOMAIN_PIPE(pipe); + if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) + return false; + + ret = I915_READ(CURCNTR(pipe)) & CURSOR_MODE; + + intel_display_power_put(dev_priv, power_domain); + + return ret; +} /* VESA 640x480x72Hz mode to set on the pipe */ static const struct drm_display_mode load_detect_mode = { @@ -10489,6 +10462,9 @@ static int intel_crtc_atomic_check(struct drm_crtc *crtc, pipe_config); } + if (HAS_IPS(dev_priv)) + pipe_config->ips_enabled = hsw_compute_ips_config(pipe_config); + return ret; } @@ -11655,6 +11631,18 @@ verify_crtc_state(struct drm_crtc *crtc, } static void +intel_verify_planes(struct intel_atomic_state *state) +{ + struct intel_plane *plane; + const struct intel_plane_state *plane_state; + int i; + + for_each_new_intel_plane_in_state(state, plane, + plane_state, i) + assert_plane(plane, plane_state->base.visible); +} + +static void verify_single_dpll_state(struct drm_i915_private *dev_priv, struct intel_shared_dpll *pll, struct drm_crtc *crtc, @@ -12136,7 +12124,7 @@ static int intel_atomic_check(struct drm_device *dev, if (ret) return ret; - intel_fbc_choose_crtc(dev_priv, state); + intel_fbc_choose_crtc(dev_priv, intel_state); return calc_watermark_data(state); } @@ -12447,6 +12435,9 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state) intel_modeset_verify_crtc(crtc, state, old_crtc_state, new_crtc_state); } + if (intel_state->modeset) + intel_verify_planes(intel_state); + if (intel_state->modeset && intel_can_enable_sagv(state)) intel_enable_sagv(dev_priv); @@ -13267,9 +13258,9 @@ intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe) * port is hooked to pipe B. Hence we want plane A feeding pipe B. */ if (HAS_FBC(dev_priv) && INTEL_GEN(dev_priv) < 4) - primary->plane = (enum plane) !pipe; + primary->i9xx_plane = (enum i9xx_plane_id) !pipe; else - primary->plane = (enum plane) pipe; + primary->i9xx_plane = (enum i9xx_plane_id) pipe; primary->id = PLANE_PRIMARY; primary->frontbuffer_bit = INTEL_FRONTBUFFER_PRIMARY(pipe); primary->check_plane = intel_check_primary_plane; @@ -13281,6 +13272,7 @@ intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe) primary->update_plane = skl_update_plane; primary->disable_plane = skl_disable_plane; + primary->get_hw_state = skl_plane_get_hw_state; } else if (INTEL_GEN(dev_priv) >= 9) { intel_primary_formats = skl_primary_formats; num_formats = ARRAY_SIZE(skl_primary_formats); @@ -13291,20 +13283,23 @@ intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe) primary->update_plane = skl_update_plane; primary->disable_plane = skl_disable_plane; + primary->get_hw_state = skl_plane_get_hw_state; } else if (INTEL_GEN(dev_priv) >= 4) { intel_primary_formats = i965_primary_formats; num_formats = ARRAY_SIZE(i965_primary_formats); modifiers = i9xx_format_modifiers; - primary->update_plane = i9xx_update_primary_plane; - primary->disable_plane = i9xx_disable_primary_plane; + primary->update_plane = i9xx_update_plane; + primary->disable_plane = i9xx_disable_plane; + primary->get_hw_state = i9xx_plane_get_hw_state; } else { intel_primary_formats = i8xx_primary_formats; num_formats = ARRAY_SIZE(i8xx_primary_formats); modifiers = i9xx_format_modifiers; - primary->update_plane = i9xx_update_primary_plane; - primary->disable_plane = i9xx_disable_primary_plane; + primary->update_plane = i9xx_update_plane; + primary->disable_plane = i9xx_disable_plane; + primary->get_hw_state = i9xx_plane_get_hw_state; } if (INTEL_GEN(dev_priv) >= 9) @@ -13327,7 +13322,8 @@ intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe) intel_primary_formats, num_formats, modifiers, DRM_PLANE_TYPE_PRIMARY, - "plane %c", plane_name(primary->plane)); + "plane %c", + plane_name(primary->i9xx_plane)); if (ret) goto fail; @@ -13387,17 +13383,19 @@ intel_cursor_plane_create(struct drm_i915_private *dev_priv, cursor->can_scale = false; cursor->max_downscale = 1; cursor->pipe = pipe; - cursor->plane = pipe; + cursor->i9xx_plane = (enum i9xx_plane_id) pipe; cursor->id = PLANE_CURSOR; cursor->frontbuffer_bit = INTEL_FRONTBUFFER_CURSOR(pipe); if (IS_I845G(dev_priv) || IS_I865G(dev_priv)) { cursor->update_plane = i845_update_cursor; cursor->disable_plane = i845_disable_cursor; + cursor->get_hw_state = i845_cursor_get_hw_state; cursor->check_plane = i845_check_cursor; } else { cursor->update_plane = i9xx_update_cursor; cursor->disable_plane = i9xx_disable_cursor; + cursor->get_hw_state = i9xx_cursor_get_hw_state; cursor->check_plane = i9xx_check_cursor; } @@ -13513,14 +13511,13 @@ static int intel_crtc_init(struct drm_i915_private *dev_priv, enum pipe pipe) goto fail; intel_crtc->pipe = pipe; - intel_crtc->plane = primary->plane; /* initialize shared scalers */ intel_crtc_init_scalers(intel_crtc, crtc_state); BUG_ON(pipe >= ARRAY_SIZE(dev_priv->plane_to_crtc_mapping) || - dev_priv->plane_to_crtc_mapping[intel_crtc->plane] != NULL); - dev_priv->plane_to_crtc_mapping[intel_crtc->plane] = intel_crtc; + dev_priv->plane_to_crtc_mapping[primary->i9xx_plane] != NULL); + dev_priv->plane_to_crtc_mapping[primary->i9xx_plane] = intel_crtc; dev_priv->pipe_to_crtc_mapping[intel_crtc->pipe] = intel_crtc; drm_crtc_helper_add(&intel_crtc->base, &intel_helper_funcs); @@ -14200,7 +14197,7 @@ void intel_init_display_hooks(struct drm_i915_private *dev_priv) } else if (HAS_DDI(dev_priv)) { dev_priv->display.get_pipe_config = haswell_get_pipe_config; dev_priv->display.get_initial_plane_config = - ironlake_get_initial_plane_config; + i9xx_get_initial_plane_config; dev_priv->display.crtc_compute_clock = haswell_crtc_compute_clock; dev_priv->display.crtc_enable = haswell_crtc_enable; @@ -14208,7 +14205,7 @@ void intel_init_display_hooks(struct drm_i915_private *dev_priv) } else if (HAS_PCH_SPLIT(dev_priv)) { dev_priv->display.get_pipe_config = ironlake_get_pipe_config; dev_priv->display.get_initial_plane_config = - ironlake_get_initial_plane_config; + i9xx_get_initial_plane_config; dev_priv->display.crtc_compute_clock = ironlake_crtc_compute_clock; dev_priv->display.crtc_enable = ironlake_crtc_enable; @@ -14693,6 +14690,7 @@ int intel_modeset_init(struct drm_device *dev) void i830_enable_pipe(struct drm_i915_private *dev_priv, enum pipe pipe) { + struct intel_crtc *crtc = intel_get_crtc_for_pipe(dev_priv, pipe); /* 640x480@60Hz, ~25175 kHz */ struct dpll clock = { .m1 = 18, @@ -14756,42 +14754,62 @@ void i830_enable_pipe(struct drm_i915_private *dev_priv, enum pipe pipe) I915_WRITE(PIPECONF(pipe), PIPECONF_ENABLE | PIPECONF_PROGRESSIVE); POSTING_READ(PIPECONF(pipe)); + + intel_wait_for_pipe_scanline_moving(crtc); } void i830_disable_pipe(struct drm_i915_private *dev_priv, enum pipe pipe) { + struct intel_crtc *crtc = intel_get_crtc_for_pipe(dev_priv, pipe); + DRM_DEBUG_KMS("disabling pipe %c due to force quirk\n", pipe_name(pipe)); - assert_plane_disabled(dev_priv, PLANE_A); - assert_plane_disabled(dev_priv, PLANE_B); + WARN_ON(I915_READ(DSPCNTR(PLANE_A)) & DISPLAY_PLANE_ENABLE); + WARN_ON(I915_READ(DSPCNTR(PLANE_B)) & DISPLAY_PLANE_ENABLE); + WARN_ON(I915_READ(DSPCNTR(PLANE_C)) & DISPLAY_PLANE_ENABLE); + WARN_ON(I915_READ(CURCNTR(PIPE_A)) & CURSOR_MODE); + WARN_ON(I915_READ(CURCNTR(PIPE_B)) & CURSOR_MODE); I915_WRITE(PIPECONF(pipe), 0); POSTING_READ(PIPECONF(pipe)); - if (wait_for(pipe_dsl_stopped(dev_priv, pipe), 100)) - DRM_ERROR("pipe %c off wait timed out\n", pipe_name(pipe)); + intel_wait_for_pipe_scanline_stopped(crtc); I915_WRITE(DPLL(pipe), DPLL_VGA_MODE_DIS); POSTING_READ(DPLL(pipe)); } -static bool -intel_check_plane_mapping(struct intel_crtc *crtc) +static bool intel_plane_mapping_ok(struct intel_crtc *crtc, + struct intel_plane *plane) { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - u32 val; + enum i9xx_plane_id i9xx_plane = plane->i9xx_plane; + u32 val = I915_READ(DSPCNTR(i9xx_plane)); - if (INTEL_INFO(dev_priv)->num_pipes == 1) - return true; + return (val & DISPLAY_PLANE_ENABLE) == 0 || + (val & DISPPLANE_SEL_PIPE_MASK) == DISPPLANE_SEL_PIPE(crtc->pipe); +} - val = I915_READ(DSPCNTR(!crtc->plane)); +static void +intel_sanitize_plane_mapping(struct drm_i915_private *dev_priv) +{ + struct intel_crtc *crtc; - if ((val & DISPLAY_PLANE_ENABLE) && - (!!(val & DISPPLANE_SEL_PIPE_MASK) == crtc->pipe)) - return false; + if (INTEL_GEN(dev_priv) >= 4) + return; - return true; + for_each_intel_crtc(&dev_priv->drm, crtc) { + struct intel_plane *plane = + to_intel_plane(crtc->base.primary); + + if (intel_plane_mapping_ok(crtc, plane)) + continue; + + DRM_DEBUG_KMS("%s attached to the wrong pipe, disabling plane\n", + plane->base.name); + intel_plane_disable_noatomic(crtc, plane); + } } static bool intel_crtc_has_encoders(struct intel_crtc *crtc) @@ -14847,33 +14865,15 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc, /* Disable everything but the primary plane */ for_each_intel_plane_on_crtc(dev, crtc, plane) { - if (plane->base.type == DRM_PLANE_TYPE_PRIMARY) - continue; + const struct intel_plane_state *plane_state = + to_intel_plane_state(plane->base.state); - trace_intel_disable_plane(&plane->base, crtc); - plane->disable_plane(plane, crtc); + if (plane_state->base.visible && + plane->base.type != DRM_PLANE_TYPE_PRIMARY) + intel_plane_disable_noatomic(crtc, plane); } } - /* We need to sanitize the plane -> pipe mapping first because this will - * disable the crtc (and hence change the state) if it is wrong. Note - * that gen4+ has a fixed plane -> pipe mapping. */ - if (INTEL_GEN(dev_priv) < 4 && !intel_check_plane_mapping(crtc)) { - bool plane; - - DRM_DEBUG_KMS("[CRTC:%d:%s] wrong plane connection detected!\n", - crtc->base.base.id, crtc->base.name); - - /* Pipe has the wrong plane attached and the plane is active. - * Temporarily change the plane mapping and disable everything - * ... */ - plane = crtc->plane; - crtc->base.primary->state->visible = true; - crtc->plane = !plane; - intel_crtc_disable_noatomic(&crtc->base, ctx); - crtc->plane = plane; - } - /* Adjust the state of the output pipe according to whether we * have active connectors/encoders. */ if (crtc->active && !intel_crtc_has_encoders(crtc)) @@ -14947,8 +14947,6 @@ static void intel_sanitize_encoder(struct intel_encoder *encoder) connector->base.dpms = DRM_MODE_DPMS_OFF; connector->base.encoder = NULL; } - /* Enabled encoders without active connectors will be fixed in - * the crtc fixup. */ } void i915_redisable_vga_power_on(struct drm_i915_private *dev_priv) @@ -14978,24 +14976,21 @@ void i915_redisable_vga(struct drm_i915_private *dev_priv) intel_display_power_put(dev_priv, POWER_DOMAIN_VGA); } -static bool primary_get_hw_state(struct intel_plane *plane) -{ - struct drm_i915_private *dev_priv = to_i915(plane->base.dev); - - return I915_READ(DSPCNTR(plane->plane)) & DISPLAY_PLANE_ENABLE; -} - /* FIXME read out full plane state for all planes */ static void readout_plane_state(struct intel_crtc *crtc) { - struct intel_plane *primary = to_intel_plane(crtc->base.primary); - bool visible; + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + struct intel_crtc_state *crtc_state = + to_intel_crtc_state(crtc->base.state); + struct intel_plane *plane; - visible = crtc->active && primary_get_hw_state(primary); + for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, plane) { + struct intel_plane_state *plane_state = + to_intel_plane_state(plane->base.state); + bool visible = plane->get_hw_state(plane); - intel_set_plane_visible(to_intel_crtc_state(crtc->base.state), - to_intel_plane_state(primary->base.state), - visible); + intel_set_plane_visible(crtc_state, plane_state, visible); + } } static void intel_modeset_readout_hw_state(struct drm_device *dev) @@ -15203,6 +15198,8 @@ intel_modeset_setup_hw_state(struct drm_device *dev, /* HW state is read out, now we need to sanitize this mess. */ get_encoder_power_domains(dev_priv); + intel_sanitize_plane_mapping(dev_priv); + for_each_intel_encoder(dev, encoder) { intel_sanitize_encoder(encoder); } @@ -15317,10 +15314,7 @@ static void intel_hpd_poll_fini(struct drm_device *dev) struct intel_connector *connector; struct drm_connector_list_iter conn_iter; - /* First disable polling... */ - drm_kms_helper_poll_fini(dev); - - /* Then kill the work that may have been queued by hpd. */ + /* Kill all the work that may have been queued by hpd. */ drm_connector_list_iter_begin(dev, &conn_iter); for_each_intel_connector_iter(connector, &conn_iter) { if (connector->modeset_retry_work.func) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index bbf2256ba574..35c5299feab6 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -1643,7 +1643,7 @@ intel_dp_compute_config(struct intel_encoder *encoder, pipe_config->has_pch_encoder = true; pipe_config->has_drrs = false; - if (port == PORT_A) + if (IS_G4X(dev_priv) || port == PORT_A) pipe_config->has_audio = false; else if (intel_conn_state->force_audio == HDMI_AUDIO_AUTO) pipe_config->has_audio = intel_dp->has_audio; @@ -1677,6 +1677,10 @@ intel_dp_compute_config(struct intel_encoder *encoder, conn_state->scaling_mode); } + if ((IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) && + adjusted_mode->flags & DRM_MODE_FLAG_INTERLACE) + return false; + if (adjusted_mode->flags & DRM_MODE_FLAG_DBLCLK) return false; @@ -4277,6 +4281,8 @@ intel_dp_check_link_status(struct intel_dp *intel_dp) { struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); struct intel_encoder *intel_encoder = &dp_to_dig_port(intel_dp)->base; + struct drm_connector_state *conn_state = + intel_dp->attached_connector->base.state; u8 link_status[DP_LINK_STATUS_SIZE]; WARN_ON(!drm_modeset_is_locked(&dev_priv->drm.mode_config.connection_mutex)); @@ -4286,10 +4292,16 @@ intel_dp_check_link_status(struct intel_dp *intel_dp) return; } - if (!intel_encoder->base.crtc) + if (!conn_state->crtc) + return; + + WARN_ON(!drm_modeset_is_locked(&conn_state->crtc->mutex)); + + if (!conn_state->crtc->state->active) return; - if (!to_intel_crtc(intel_encoder->base.crtc)->active) + if (conn_state->commit && + !try_wait_for_completion(&conn_state->commit->hw_done)) return; /* @@ -4364,9 +4376,8 @@ intel_dp_short_pulse(struct intel_dp *intel_dp) DRM_DEBUG_DRIVER("CP or sink specific irq unhandled\n"); } - drm_modeset_lock(&dev_priv->drm.mode_config.connection_mutex, NULL); intel_dp_check_link_status(intel_dp); - drm_modeset_unlock(&dev_priv->drm.mode_config.connection_mutex); + if (intel_dp->compliance.test_type == DP_TEST_LINK_TRAINING) { DRM_DEBUG_KMS("Link Training Compliance Test requested\n"); /* Send a Hotplug Uevent to userspace to start modeset */ @@ -4814,8 +4825,19 @@ intel_dp_detect(struct drm_connector *connector, connector->base.id, connector->name); /* If full detect is not performed yet, do a full detect */ - if (!intel_dp->detect_done) + if (!intel_dp->detect_done) { + struct drm_crtc *crtc; + int ret; + + crtc = connector->state->crtc; + if (crtc) { + ret = drm_modeset_lock(&crtc->mutex, ctx); + if (ret) + return ret; + } + status = intel_dp_long_pulse(intel_dp->attached_connector); + } intel_dp->detect_done = false; @@ -5097,7 +5119,38 @@ intel_dp_hpd_pulse(struct intel_digital_port *intel_dig_port, bool long_hpd) } if (!intel_dp->is_mst) { - if (!intel_dp_short_pulse(intel_dp)) { + struct drm_modeset_acquire_ctx ctx; + struct drm_connector *connector = &intel_dp->attached_connector->base; + struct drm_crtc *crtc; + int iret; + bool handled = false; + + drm_modeset_acquire_init(&ctx, 0); +retry: + iret = drm_modeset_lock(&dev_priv->drm.mode_config.connection_mutex, &ctx); + if (iret) + goto err; + + crtc = connector->state->crtc; + if (crtc) { + iret = drm_modeset_lock(&crtc->mutex, &ctx); + if (iret) + goto err; + } + + handled = intel_dp_short_pulse(intel_dp); + +err: + if (iret == -EDEADLK) { + drm_modeset_backoff(&ctx); + goto retry; + } + + drm_modeset_drop_locks(&ctx); + drm_modeset_acquire_fini(&ctx); + WARN(iret, "Acquiring modeset locks failed with %i\n", iret); + + if (!handled) { intel_dp->detect_done = false; goto put_power; } @@ -5131,8 +5184,11 @@ static void intel_dp_add_properties(struct intel_dp *intel_dp, struct drm_connector *connector) { struct drm_i915_private *dev_priv = to_i915(connector->dev); + enum port port = dp_to_dig_port(intel_dp)->base.port; + + if (!IS_G4X(dev_priv) && port != PORT_A) + intel_attach_force_audio_property(connector); - intel_attach_force_audio_property(connector); intel_attach_broadcast_rgb_property(connector); if (intel_dp_is_edp(intel_dp)) { @@ -5306,6 +5362,12 @@ intel_dp_init_panel_power_sequencer(struct intel_dp *intel_dp) */ final->t8 = 1; final->t9 = 1; + + /* + * HW has only a 100msec granularity for t11_t12 so round it up + * accordingly. + */ + final->t11_t12 = roundup(final->t11_t12, 100 * 10); } static void @@ -6034,7 +6096,8 @@ intel_dp_init_connector(struct intel_digital_port *intel_dig_port, drm_connector_init(dev, connector, &intel_dp_connector_funcs, type); drm_connector_helper_add(connector, &intel_dp_connector_helper_funcs); - connector->interlace_allowed = true; + if (!IS_VALLEYVIEW(dev_priv) && !IS_CHERRYVIEW(dev_priv)) + connector->interlace_allowed = true; connector->doublescan_allowed = 0; intel_dp_init_connector_port_info(intel_dig_port); diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index bf8b057f72a6..30f791f89d64 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -48,8 +48,9 @@ * having timed out, since the timeout could be due to preemption or similar and * we've never had a chance to check the condition before the timeout. */ -#define _wait_for(COND, US, W) ({ \ +#define _wait_for(COND, US, Wmin, Wmax) ({ \ unsigned long timeout__ = jiffies + usecs_to_jiffies(US) + 1; \ + long wait__ = (Wmin); /* recommended min for usleep is 10 us */ \ int ret__; \ might_sleep(); \ for (;;) { \ @@ -62,12 +63,14 @@ ret__ = -ETIMEDOUT; \ break; \ } \ - usleep_range((W), (W) * 2); \ + usleep_range(wait__, wait__ * 2); \ + if (wait__ < (Wmax)) \ + wait__ <<= 1; \ } \ ret__; \ }) -#define wait_for(COND, MS) _wait_for((COND), (MS) * 1000, 1000) +#define wait_for(COND, MS) _wait_for((COND), (MS) * 1000, 10, 1000) /* If CONFIG_PREEMPT_COUNT is disabled, in_atomic() always reports false. */ #if defined(CONFIG_DRM_I915_DEBUG) && defined(CONFIG_PREEMPT_COUNT) @@ -116,7 +119,7 @@ int ret__; \ BUILD_BUG_ON(!__builtin_constant_p(US)); \ if ((US) > 10) \ - ret__ = _wait_for((COND), (US), 10); \ + ret__ = _wait_for((COND), (US), 10, 10); \ else \ ret__ = _wait_for_atomic((COND), (US), 0); \ ret__; \ @@ -799,7 +802,6 @@ struct intel_crtc_state { struct intel_crtc { struct drm_crtc base; enum pipe pipe; - enum plane plane; /* * Whether the crtc and the connected output pipeline is active. Implies * that crtc->enabled is set, i.e. the current mode configuration has @@ -844,7 +846,7 @@ struct intel_crtc { struct intel_plane { struct drm_plane base; - u8 plane; + enum i9xx_plane_id i9xx_plane; enum plane_id id; enum pipe pipe; bool can_scale; @@ -866,6 +868,7 @@ struct intel_plane { const struct intel_plane_state *plane_state); void (*disable_plane)(struct intel_plane *plane, struct intel_crtc *crtc); + bool (*get_hw_state)(struct intel_plane *plane); int (*check_plane)(struct intel_plane *plane, struct intel_crtc_state *crtc_state, struct intel_plane_state *state); @@ -1129,7 +1132,7 @@ intel_get_crtc_for_pipe(struct drm_i915_private *dev_priv, enum pipe pipe) } static inline struct intel_crtc * -intel_get_crtc_for_plane(struct drm_i915_private *dev_priv, enum plane plane) +intel_get_crtc_for_plane(struct drm_i915_private *dev_priv, enum i9xx_plane_id plane) { return dev_priv->plane_to_crtc_mapping[plane]; } @@ -1285,8 +1288,6 @@ intel_ddi_get_crtc_new_encoder(struct intel_crtc_state *crtc_state); void intel_ddi_set_pipe_settings(const struct intel_crtc_state *crtc_state); void intel_ddi_prepare_link_retrain(struct intel_dp *intel_dp); bool intel_ddi_connector_get_hw_state(struct intel_connector *intel_connector); -bool intel_ddi_is_audio_enabled(struct drm_i915_private *dev_priv, - struct intel_crtc *intel_crtc); void intel_ddi_get_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config); @@ -1485,6 +1486,7 @@ bool bxt_find_best_dpll(struct intel_crtc_state *crtc_state, int target_clock, int chv_calc_dpll_params(int refclk, struct dpll *pll_clock); bool intel_crtc_active(struct intel_crtc *crtc); +bool hsw_crtc_state_ips_capable(const struct intel_crtc_state *crtc_state); void hsw_enable_ips(const struct intel_crtc_state *crtc_state); void hsw_disable_ips(const struct intel_crtc_state *crtc_state); enum intel_display_power_domain intel_port_to_power_domain(enum port port); @@ -1651,7 +1653,7 @@ static inline void intel_fbdev_restore_mode(struct drm_device *dev) /* intel_fbc.c */ void intel_fbc_choose_crtc(struct drm_i915_private *dev_priv, - struct drm_atomic_state *state); + struct intel_atomic_state *state); bool intel_fbc_is_active(struct drm_i915_private *dev_priv); void intel_fbc_pre_update(struct intel_crtc *crtc, struct intel_crtc_state *crtc_state, @@ -1906,15 +1908,10 @@ bool skl_ddb_allocation_overlaps(struct drm_i915_private *dev_priv, const struct skl_ddb_entry *ddb, int ignore); bool ilk_disable_lp_wm(struct drm_device *dev); -int sanitize_rc6_option(struct drm_i915_private *dev_priv, int enable_rc6); int skl_check_pipe_max_pixel_rate(struct intel_crtc *intel_crtc, struct intel_crtc_state *cstate); void intel_init_ipc(struct drm_i915_private *dev_priv); void intel_enable_ipc(struct drm_i915_private *dev_priv); -static inline int intel_rc6_enabled(void) -{ - return i915_modparams.enable_rc6; -} /* intel_sdvo.c */ bool intel_sdvo_init(struct drm_i915_private *dev_priv, @@ -1934,6 +1931,7 @@ void skl_update_plane(struct intel_plane *plane, const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state); void skl_disable_plane(struct intel_plane *plane, struct intel_crtc *crtc); +bool skl_plane_get_hw_state(struct intel_plane *plane); /* intel_tv.c */ void intel_tv_init(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c index 1b60df3c14a0..f67d321376e4 100644 --- a/drivers/gpu/drm/i915/intel_dsi.c +++ b/drivers/gpu/drm/i915/intel_dsi.c @@ -1670,7 +1670,7 @@ static int intel_dsi_get_panel_orientation(struct intel_connector *connector) { struct drm_i915_private *dev_priv = to_i915(connector->base.dev); int orientation = DRM_MODE_PANEL_ORIENTATION_NORMAL; - enum plane plane; + enum i9xx_plane_id plane; u32 val; if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 9897c7f78c51..86d4c85c8725 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -37,8 +37,6 @@ * Resource Streamer, is 66944 bytes, which rounds to 17 pages. */ #define HSW_CXT_TOTAL_SIZE (17 * PAGE_SIZE) -/* Same as Haswell, but 72064 bytes now. */ -#define GEN8_CXT_TOTAL_SIZE (18 * PAGE_SIZE) #define GEN8_LR_CONTEXT_RENDER_SIZE (20 * PAGE_SIZE) #define GEN9_LR_CONTEXT_RENDER_SIZE (22 * PAGE_SIZE) @@ -164,9 +162,7 @@ __intel_engine_context_size(struct drm_i915_private *dev_priv, u8 class) case 9: return GEN9_LR_CONTEXT_RENDER_SIZE; case 8: - return i915_modparams.enable_execlists ? - GEN8_LR_CONTEXT_RENDER_SIZE : - GEN8_CXT_TOTAL_SIZE; + return GEN8_LR_CONTEXT_RENDER_SIZE; case 7: if (IS_HASWELL(dev_priv)) return HSW_CXT_TOTAL_SIZE; @@ -209,6 +205,15 @@ intel_engine_setup(struct drm_i915_private *dev_priv, GEM_BUG_ON(info->class >= ARRAY_SIZE(intel_engine_classes)); class_info = &intel_engine_classes[info->class]; + if (GEM_WARN_ON(info->class > MAX_ENGINE_CLASS)) + return -EINVAL; + + if (GEM_WARN_ON(info->instance > MAX_ENGINE_INSTANCE)) + return -EINVAL; + + if (GEM_WARN_ON(dev_priv->engine_class[info->class][info->instance])) + return -EINVAL; + GEM_BUG_ON(dev_priv->engine[id]); engine = kzalloc(sizeof(*engine), GFP_KERNEL); if (!engine) @@ -236,8 +241,11 @@ intel_engine_setup(struct drm_i915_private *dev_priv, /* Nothing to do here, execute in order of dependencies */ engine->schedule = NULL; + spin_lock_init(&engine->stats.lock); + ATOMIC_INIT_NOTIFIER_HEAD(&engine->context_status_notifier); + dev_priv->engine_class[info->class][info->instance] = engine; dev_priv->engine[id] = engine; return 0; } @@ -316,7 +324,7 @@ int intel_engines_init(struct drm_i915_private *dev_priv) &intel_engine_classes[engine->class]; int (*init)(struct intel_engine_cs *engine); - if (i915_modparams.enable_execlists) + if (HAS_EXECLISTS(dev_priv)) init = class_info->init_execlists; else init = class_info->init_legacy; @@ -366,18 +374,6 @@ void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno) if (HAS_VEBOX(dev_priv)) I915_WRITE(RING_SYNC_2(engine->mmio_base), 0); } - if (dev_priv->semaphore) { - struct page *page = i915_vma_first_page(dev_priv->semaphore); - void *semaphores; - - /* Semaphores are in noncoherent memory, flush to be safe */ - semaphores = kmap_atomic(page); - memset(semaphores + GEN8_SEMAPHORE_OFFSET(engine->id, 0), - 0, I915_NUM_ENGINES * gen8_semaphore_seqno_size); - drm_clflush_virt_range(semaphores + GEN8_SEMAPHORE_OFFSET(engine->id, 0), - I915_NUM_ENGINES * gen8_semaphore_seqno_size); - kunmap_atomic(semaphores); - } intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno); clear_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted); @@ -1071,6 +1067,15 @@ static int gen9_init_workarounds(struct intel_engine_cs *engine) /* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk,cfl */ WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE); + /* WaProgramL3SqcReg1DefaultForPerf:bxt,glk */ + if (IS_GEN9_LP(dev_priv)) { + u32 val = I915_READ(GEN8_L3SQCREG1); + + val &= ~L3_PRIO_CREDITS_MASK; + val |= L3_GENERAL_PRIO_CREDITS(62) | L3_HIGH_PRIO_CREDITS(2); + I915_WRITE(GEN8_L3SQCREG1, val); + } + /* WaOCLCoherentLineFlush:skl,bxt,kbl,cfl */ I915_WRITE(GEN8_L3SQCREG4, (I915_READ(GEN8_L3SQCREG4) | GEN8_LQSC_FLUSH_COHERENT_LINES)); @@ -1188,7 +1193,6 @@ static int skl_init_workarounds(struct intel_engine_cs *engine) static int bxt_init_workarounds(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; - u32 val; int ret; ret = gen9_init_workarounds(engine); @@ -1203,12 +1207,6 @@ static int bxt_init_workarounds(struct intel_engine_cs *engine) I915_WRITE(FF_SLICE_CS_CHICKEN2, _MASKED_BIT_ENABLE(GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE)); - /* WaProgramL3SqcReg1DefaultForPerf:bxt */ - val = I915_READ(GEN8_L3SQCREG1); - val &= ~L3_PRIO_CREDITS_MASK; - val |= L3_GENERAL_PRIO_CREDITS(62) | L3_HIGH_PRIO_CREDITS(2); - I915_WRITE(GEN8_L3SQCREG1, val); - /* WaToEnableHwFixForPushConstHWBug:bxt */ WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); @@ -1729,6 +1727,15 @@ void intel_engine_dump(struct intel_engine_cs *engine, struct drm_printer *m) I915_READ(RING_MI_MODE(engine->mmio_base)), I915_READ(RING_MI_MODE(engine->mmio_base)) & (MODE_IDLE) ? " [idle]" : ""); } + if (HAS_LEGACY_SEMAPHORES(dev_priv)) { + drm_printf(m, "\tSYNC_0: 0x%08x\n", + I915_READ(RING_SYNC_0(engine->mmio_base))); + drm_printf(m, "\tSYNC_1: 0x%08x\n", + I915_READ(RING_SYNC_1(engine->mmio_base))); + if (HAS_VEBOX(dev_priv)) + drm_printf(m, "\tSYNC_2: 0x%08x\n", + I915_READ(RING_SYNC_2(engine->mmio_base))); + } rcu_read_unlock(); @@ -1739,7 +1746,7 @@ void intel_engine_dump(struct intel_engine_cs *engine, struct drm_printer *m) drm_printf(m, "\tBBADDR: 0x%08x_%08x\n", upper_32_bits(addr), lower_32_bits(addr)); - if (i915_modparams.enable_execlists) { + if (HAS_EXECLISTS(dev_priv)) { const u32 *hws = &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX]; u32 ptr, read, write; unsigned int idx; @@ -1823,6 +1830,114 @@ void intel_engine_dump(struct intel_engine_cs *engine, struct drm_printer *m) drm_printf(m, "\n"); } +static u8 user_class_map[] = { + [I915_ENGINE_CLASS_RENDER] = RENDER_CLASS, + [I915_ENGINE_CLASS_COPY] = COPY_ENGINE_CLASS, + [I915_ENGINE_CLASS_VIDEO] = VIDEO_DECODE_CLASS, + [I915_ENGINE_CLASS_VIDEO_ENHANCE] = VIDEO_ENHANCEMENT_CLASS, +}; + +struct intel_engine_cs * +intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance) +{ + if (class >= ARRAY_SIZE(user_class_map)) + return NULL; + + class = user_class_map[class]; + + GEM_BUG_ON(class > MAX_ENGINE_CLASS); + + if (instance > MAX_ENGINE_INSTANCE) + return NULL; + + return i915->engine_class[class][instance]; +} + +/** + * intel_enable_engine_stats() - Enable engine busy tracking on engine + * @engine: engine to enable stats collection + * + * Start collecting the engine busyness data for @engine. + * + * Returns 0 on success or a negative error code. + */ +int intel_enable_engine_stats(struct intel_engine_cs *engine) +{ + unsigned long flags; + + if (!intel_engine_supports_stats(engine)) + return -ENODEV; + + spin_lock_irqsave(&engine->stats.lock, flags); + if (engine->stats.enabled == ~0) + goto busy; + if (engine->stats.enabled++ == 0) + engine->stats.enabled_at = ktime_get(); + spin_unlock_irqrestore(&engine->stats.lock, flags); + + return 0; + +busy: + spin_unlock_irqrestore(&engine->stats.lock, flags); + + return -EBUSY; +} + +static ktime_t __intel_engine_get_busy_time(struct intel_engine_cs *engine) +{ + ktime_t total = engine->stats.total; + + /* + * If the engine is executing something at the moment + * add it to the total. + */ + if (engine->stats.active) + total = ktime_add(total, + ktime_sub(ktime_get(), engine->stats.start)); + + return total; +} + +/** + * intel_engine_get_busy_time() - Return current accumulated engine busyness + * @engine: engine to report on + * + * Returns accumulated time @engine was busy since engine stats were enabled. + */ +ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine) +{ + ktime_t total; + unsigned long flags; + + spin_lock_irqsave(&engine->stats.lock, flags); + total = __intel_engine_get_busy_time(engine); + spin_unlock_irqrestore(&engine->stats.lock, flags); + + return total; +} + +/** + * intel_disable_engine_stats() - Disable engine busy tracking on engine + * @engine: engine to disable stats collection + * + * Stops collecting the engine busyness data for @engine. + */ +void intel_disable_engine_stats(struct intel_engine_cs *engine) +{ + unsigned long flags; + + if (!intel_engine_supports_stats(engine)) + return; + + spin_lock_irqsave(&engine->stats.lock, flags); + WARN_ON_ONCE(engine->stats.enabled == 0); + if (--engine->stats.enabled == 0) { + engine->stats.total = __intel_engine_get_busy_time(engine); + engine->stats.active = 0; + } + spin_unlock_irqrestore(&engine->stats.lock, flags); +} + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/mock_engine.c" #endif diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c index 1a0f5e0c8d10..4aefc658a5cf 100644 --- a/drivers/gpu/drm/i915/intel_fbc.c +++ b/drivers/gpu/drm/i915/intel_fbc.c @@ -151,7 +151,7 @@ static void i8xx_fbc_activate(struct drm_i915_private *dev_priv) /* Set it up... */ fbc_ctl2 = FBC_CTL_FENCE_DBL | FBC_CTL_IDLE_IMM | FBC_CTL_CPU_FENCE; - fbc_ctl2 |= FBC_CTL_PLANE(params->crtc.plane); + fbc_ctl2 |= FBC_CTL_PLANE(params->crtc.i9xx_plane); I915_WRITE(FBC_CONTROL2, fbc_ctl2); I915_WRITE(FBC_FENCE_OFF, params->crtc.fence_y_offset); } @@ -177,7 +177,7 @@ static void g4x_fbc_activate(struct drm_i915_private *dev_priv) struct intel_fbc_reg_params *params = &dev_priv->fbc.params; u32 dpfc_ctl; - dpfc_ctl = DPFC_CTL_PLANE(params->crtc.plane) | DPFC_SR_EN; + dpfc_ctl = DPFC_CTL_PLANE(params->crtc.i9xx_plane) | DPFC_SR_EN; if (params->fb.format->cpp[0] == 2) dpfc_ctl |= DPFC_CTL_LIMIT_2X; else @@ -224,7 +224,7 @@ static void ilk_fbc_activate(struct drm_i915_private *dev_priv) u32 dpfc_ctl; int threshold = dev_priv->fbc.threshold; - dpfc_ctl = DPFC_CTL_PLANE(params->crtc.plane); + dpfc_ctl = DPFC_CTL_PLANE(params->crtc.i9xx_plane); if (params->fb.format->cpp[0] == 2) threshold++; @@ -306,7 +306,7 @@ static void gen7_fbc_activate(struct drm_i915_private *dev_priv) dpfc_ctl = 0; if (IS_IVYBRIDGE(dev_priv)) - dpfc_ctl |= IVB_DPFC_CTL_PLANE(params->crtc.plane); + dpfc_ctl |= IVB_DPFC_CTL_PLANE(params->crtc.i9xx_plane); if (params->fb.format->cpp[0] == 2) threshold++; @@ -890,7 +890,7 @@ static void intel_fbc_get_reg_params(struct intel_crtc *crtc, params->vma = cache->vma; params->crtc.pipe = crtc->pipe; - params->crtc.plane = crtc->plane; + params->crtc.i9xx_plane = to_intel_plane(crtc->base.primary)->i9xx_plane; params->crtc.fence_y_offset = get_crtc_fence_y_offset(fbc); params->fb.format = cache->fb.format; @@ -1054,11 +1054,11 @@ out: * enable FBC for the chosen CRTC. If it does, it will set dev_priv->fbc.crtc. */ void intel_fbc_choose_crtc(struct drm_i915_private *dev_priv, - struct drm_atomic_state *state) + struct intel_atomic_state *state) { struct intel_fbc *fbc = &dev_priv->fbc; - struct drm_plane *plane; - struct drm_plane_state *plane_state; + struct intel_plane *plane; + struct intel_plane_state *plane_state; bool crtc_chosen = false; int i; @@ -1066,7 +1066,7 @@ void intel_fbc_choose_crtc(struct drm_i915_private *dev_priv, /* Does this atomic commit involve the CRTC currently tied to FBC? */ if (fbc->crtc && - !drm_atomic_get_existing_crtc_state(state, &fbc->crtc->base)) + !intel_atomic_get_new_crtc_state(state, fbc->crtc)) goto out; if (!intel_fbc_can_enable(dev_priv)) @@ -1076,25 +1076,22 @@ void intel_fbc_choose_crtc(struct drm_i915_private *dev_priv, * plane. We could go for fancier schemes such as checking the plane * size, but this would just affect the few platforms that don't tie FBC * to pipe or plane A. */ - for_each_new_plane_in_state(state, plane, plane_state, i) { - struct intel_plane_state *intel_plane_state = - to_intel_plane_state(plane_state); - struct intel_crtc_state *intel_crtc_state; - struct intel_crtc *crtc = to_intel_crtc(plane_state->crtc); + for_each_new_intel_plane_in_state(state, plane, plane_state, i) { + struct intel_crtc_state *crtc_state; + struct intel_crtc *crtc = to_intel_crtc(plane_state->base.crtc); - if (!intel_plane_state->base.visible) + if (!plane_state->base.visible) continue; if (fbc_on_pipe_a_only(dev_priv) && crtc->pipe != PIPE_A) continue; - if (fbc_on_plane_a_only(dev_priv) && crtc->plane != PLANE_A) + if (fbc_on_plane_a_only(dev_priv) && plane->i9xx_plane != PLANE_A) continue; - intel_crtc_state = to_intel_crtc_state( - drm_atomic_get_existing_crtc_state(state, &crtc->base)); + crtc_state = intel_atomic_get_new_crtc_state(state, crtc); - intel_crtc_state->enable_fbc = true; + crtc_state->enable_fbc = true; crtc_chosen = true; break; } diff --git a/drivers/gpu/drm/i915/intel_guc.c b/drivers/gpu/drm/i915/intel_guc.c index 823d0c2e9ad2..d08e760252d4 100644 --- a/drivers/gpu/drm/i915/intel_guc.c +++ b/drivers/gpu/drm/i915/intel_guc.c @@ -231,8 +231,7 @@ int intel_guc_sample_forcewake(struct intel_guc *guc) action[0] = INTEL_GUC_ACTION_SAMPLE_FORCEWAKE; /* WaRsDisableCoarsePowerGating:skl,bxt */ - if (!intel_rc6_enabled() || - NEEDS_WaRsDisableCoarsePowerGating(dev_priv)) + if (!HAS_RC6(dev_priv) || NEEDS_WaRsDisableCoarsePowerGating(dev_priv)) action[1] = 0; else /* bit 0 and 1 are for Render and Media domain separately */ diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h index 75c4cfefdaff..59856726d2bc 100644 --- a/drivers/gpu/drm/i915/intel_guc.h +++ b/drivers/gpu/drm/i915/intel_guc.h @@ -30,8 +30,8 @@ #include "intel_guc_fwif.h" #include "intel_guc_ct.h" #include "intel_guc_log.h" +#include "intel_guc_reg.h" #include "intel_uc_fw.h" -#include "i915_guc_reg.h" #include "i915_vma.h" struct guc_preempt_work { diff --git a/drivers/gpu/drm/i915/intel_guc_fw.c b/drivers/gpu/drm/i915/intel_guc_fw.c index 69ba01599575..89862fa8ab42 100644 --- a/drivers/gpu/drm/i915/intel_guc_fw.c +++ b/drivers/gpu/drm/i915/intel_guc_fw.c @@ -30,14 +30,14 @@ #include "intel_guc_fw.h" #include "i915_drv.h" -#define SKL_FW_MAJOR 6 -#define SKL_FW_MINOR 1 +#define SKL_FW_MAJOR 9 +#define SKL_FW_MINOR 33 -#define BXT_FW_MAJOR 8 -#define BXT_FW_MINOR 7 +#define BXT_FW_MAJOR 9 +#define BXT_FW_MINOR 29 #define KBL_FW_MAJOR 9 -#define KBL_FW_MINOR 14 +#define KBL_FW_MINOR 39 #define GLK_FW_MAJOR 10 #define GLK_FW_MINOR 56 @@ -130,14 +130,14 @@ static int guc_xfer_rsa(struct intel_guc *guc, struct i915_vma *vma) struct drm_i915_private *dev_priv = guc_to_i915(guc); struct intel_uc_fw *guc_fw = &guc->fw; struct sg_table *sg = vma->pages; - u32 rsa[UOS_RSA_SCRATCH_MAX_COUNT]; + u32 rsa[UOS_RSA_SCRATCH_COUNT]; int i; if (sg_pcopy_to_buffer(sg->sgl, sg->nents, rsa, sizeof(rsa), guc_fw->rsa_offset) != sizeof(rsa)) return -EINVAL; - for (i = 0; i < UOS_RSA_SCRATCH_MAX_COUNT; i++) + for (i = 0; i < UOS_RSA_SCRATCH_COUNT; i++) I915_WRITE(UOS_RSA_SCRATCH(i), rsa[i]); return 0; diff --git a/drivers/gpu/drm/i915/i915_guc_reg.h b/drivers/gpu/drm/i915/intel_guc_reg.h index bc1ae7d8f424..19a9247c5664 100644 --- a/drivers/gpu/drm/i915/i915_guc_reg.h +++ b/drivers/gpu/drm/i915/intel_guc_reg.h @@ -21,8 +21,8 @@ * IN THE SOFTWARE. * */ -#ifndef _I915_GUC_REG_H_ -#define _I915_GUC_REG_H_ +#ifndef _INTEL_GUC_REG_H_ +#define _INTEL_GUC_REG_H_ /* Definitions of GuC H/W registers, bits, etc */ @@ -52,7 +52,8 @@ #define SOFT_SCRATCH_COUNT 16 #define UOS_RSA_SCRATCH(i) _MMIO(0xc200 + (i) * 4) -#define UOS_RSA_SCRATCH_MAX_COUNT 64 +#define UOS_RSA_SCRATCH_COUNT 64 + #define DMA_ADDR_0_LOW _MMIO(0xc300) #define DMA_ADDR_0_HIGH _MMIO(0xc304) #define DMA_ADDR_1_LOW _MMIO(0xc308) diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c index cbf5a96f5806..912ff143d531 100644 --- a/drivers/gpu/drm/i915/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/intel_guc_submission.c @@ -718,12 +718,9 @@ static void guc_submit(struct intel_engine_cs *engine) static void port_assign(struct execlist_port *port, struct drm_i915_gem_request *rq) { - GEM_BUG_ON(rq == port_request(port)); + GEM_BUG_ON(port_isset(port)); - if (port_isset(port)) - i915_gem_request_put(port_request(port)); - - port_set(port, port_pack(i915_gem_request_get(rq), port_count(port))); + port_set(port, i915_gem_request_get(rq)); } static void guc_dequeue(struct intel_engine_cs *engine) @@ -743,23 +740,26 @@ static void guc_dequeue(struct intel_engine_cs *engine) if (!rb) goto unlock; - if (HAS_LOGICAL_RING_PREEMPTION(engine->i915) && port_isset(port)) { - struct guc_preempt_work *preempt_work = - &engine->i915->guc.preempt_work[engine->id]; - - if (rb_entry(rb, struct i915_priolist, node)->priority > - max(port_request(port)->priotree.priority, 0)) { - execlists_set_active(execlists, - EXECLISTS_ACTIVE_PREEMPT); - queue_work(engine->i915->guc.preempt_wq, - &preempt_work->work); - goto unlock; - } else if (port_isset(last_port)) { - goto unlock; + if (port_isset(port)) { + if (HAS_LOGICAL_RING_PREEMPTION(engine->i915)) { + struct guc_preempt_work *preempt_work = + &engine->i915->guc.preempt_work[engine->id]; + + if (rb_entry(rb, struct i915_priolist, node)->priority > + max(port_request(port)->priotree.priority, 0)) { + execlists_set_active(execlists, + EXECLISTS_ACTIVE_PREEMPT); + queue_work(engine->i915->guc.preempt_wq, + &preempt_work->work); + goto unlock; + } } port++; + if (port_isset(port)) + goto unlock; } + GEM_BUG_ON(port_isset(port)); do { struct i915_priolist *p = rb_entry(rb, typeof(*p), node); @@ -1453,6 +1453,8 @@ int intel_guc_submission_enable(struct intel_guc *guc) execlists->tasklet.func = guc_submission_tasklet; engine->park = guc_submission_park; engine->unpark = guc_submission_unpark; + + engine->flags &= ~I915_ENGINE_SUPPORTS_STATS; } return 0; diff --git a/drivers/gpu/drm/i915/intel_guc_submission.h b/drivers/gpu/drm/i915/intel_guc_submission.h index e901192ee469..021fe85c8f71 100644 --- a/drivers/gpu/drm/i915/intel_guc_submission.h +++ b/drivers/gpu/drm/i915/intel_guc_submission.h @@ -22,8 +22,8 @@ * */ -#ifndef _I915_GUC_SUBMISSION_H_ -#define _I915_GUC_SUBMISSION_H_ +#ifndef _INTEL_GUC_SUBMISSION_H_ +#define _INTEL_GUC_SUBMISSION_H_ #include <linux/spinlock.h> diff --git a/drivers/gpu/drm/i915/intel_gvt.c b/drivers/gpu/drm/i915/intel_gvt.c index b4a7f31f0214..126f7c769c69 100644 --- a/drivers/gpu/drm/i915/intel_gvt.c +++ b/drivers/gpu/drm/i915/intel_gvt.c @@ -95,11 +95,6 @@ int intel_gvt_init(struct drm_i915_private *dev_priv) return 0; } - if (!i915_modparams.enable_execlists) { - DRM_ERROR("i915 GVT-g loading failed due to disabled execlists mode\n"); - return -EIO; - } - if (i915_modparams.enable_guc_submission) { DRM_ERROR("i915 GVT-g loading failed due to Graphics virtualization is not yet supported with GuC submission\n"); return -EIO; diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/intel_hangcheck.c index 12ac270a5f93..95bbb5a79c4f 100644 --- a/drivers/gpu/drm/i915/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/intel_hangcheck.c @@ -27,13 +27,9 @@ static bool ipehr_is_semaphore_wait(struct intel_engine_cs *engine, u32 ipehr) { - if (INTEL_GEN(engine->i915) >= 8) { - return (ipehr >> 23) == 0x1c; - } else { - ipehr &= ~MI_SEMAPHORE_SYNC_MASK; - return ipehr == (MI_SEMAPHORE_MBOX | MI_SEMAPHORE_COMPARE | - MI_SEMAPHORE_REGISTER); - } + ipehr &= ~MI_SEMAPHORE_SYNC_MASK; + return ipehr == (MI_SEMAPHORE_MBOX | MI_SEMAPHORE_COMPARE | + MI_SEMAPHORE_REGISTER); } static struct intel_engine_cs * @@ -41,31 +37,20 @@ semaphore_wait_to_signaller_ring(struct intel_engine_cs *engine, u32 ipehr, u64 offset) { struct drm_i915_private *dev_priv = engine->i915; + u32 sync_bits = ipehr & MI_SEMAPHORE_SYNC_MASK; struct intel_engine_cs *signaller; enum intel_engine_id id; - if (INTEL_GEN(dev_priv) >= 8) { - for_each_engine(signaller, dev_priv, id) { - if (engine == signaller) - continue; - - if (offset == signaller->semaphore.signal_ggtt[engine->hw_id]) - return signaller; - } - } else { - u32 sync_bits = ipehr & MI_SEMAPHORE_SYNC_MASK; - - for_each_engine(signaller, dev_priv, id) { - if(engine == signaller) - continue; + for_each_engine(signaller, dev_priv, id) { + if (engine == signaller) + continue; - if (sync_bits == signaller->semaphore.mbox.wait[engine->hw_id]) - return signaller; - } + if (sync_bits == signaller->semaphore.mbox.wait[engine->hw_id]) + return signaller; } - DRM_DEBUG_DRIVER("No signaller ring found for %s, ipehr 0x%08x, offset 0x%016llx\n", - engine->name, ipehr, offset); + DRM_DEBUG_DRIVER("No signaller ring found for %s, ipehr 0x%08x\n", + engine->name, ipehr); return ERR_PTR(-ENODEV); } @@ -135,11 +120,6 @@ semaphore_waits_for(struct intel_engine_cs *engine, u32 *seqno) return NULL; *seqno = ioread32(vaddr + head + 4) + 1; - if (INTEL_GEN(dev_priv) >= 8) { - offset = ioread32(vaddr + head + 12); - offset <<= 32; - offset |= ioread32(vaddr + head + 8); - } return semaphore_wait_to_signaller_ring(engine, ipehr, offset); } @@ -273,7 +253,7 @@ engine_stuck(struct intel_engine_cs *engine, u64 acthd) return ENGINE_WAIT_KICK; } - if (INTEL_GEN(dev_priv) >= 6 && tmp & RING_WAIT_SEMAPHORE) { + if (IS_GEN(dev_priv, 6, 7) && tmp & RING_WAIT_SEMAPHORE) { switch (semaphore_passed(engine)) { default: return ENGINE_DEAD; diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index be6c39adebdf..2a8160f603ab 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -154,9 +154,7 @@ #define GEN8_CTX_STATUS_LITE_RESTORE (1 << 15) #define GEN8_CTX_STATUS_COMPLETED_MASK \ - (GEN8_CTX_STATUS_ACTIVE_IDLE | \ - GEN8_CTX_STATUS_PREEMPTED | \ - GEN8_CTX_STATUS_ELEMENT_SWITCH) + (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED) #define CTX_LRI_HEADER_0 0x01 #define CTX_CONTEXT_CONTROL 0x02 @@ -221,37 +219,6 @@ static void execlists_init_reg_state(u32 *reg_state, struct intel_ring *ring); /** - * intel_sanitize_enable_execlists() - sanitize i915.enable_execlists - * @dev_priv: i915 device private - * @enable_execlists: value of i915.enable_execlists module parameter. - * - * Only certain platforms support Execlists (the prerequisites being - * support for Logical Ring Contexts and Aliasing PPGTT or better). - * - * Return: 1 if Execlists is supported and has to be enabled. - */ -int intel_sanitize_enable_execlists(struct drm_i915_private *dev_priv, int enable_execlists) -{ - /* On platforms with execlist available, vGPU will only - * support execlist mode, no ring buffer mode. - */ - if (HAS_LOGICAL_RING_CONTEXTS(dev_priv) && intel_vgpu_active(dev_priv)) - return 1; - - if (INTEL_GEN(dev_priv) >= 9) - return 1; - - if (enable_execlists == 0) - return 0; - - if (HAS_LOGICAL_RING_CONTEXTS(dev_priv) && - USES_PPGTT(dev_priv)) - return 1; - - return 0; -} - -/** * intel_lr_context_descriptor_update() - calculate & cache the descriptor * descriptor for a pinned context * @ctx: Context to work on @@ -412,6 +379,20 @@ execlists_context_status_change(struct drm_i915_gem_request *rq, status, rq); } +static inline void +execlists_context_schedule_in(struct drm_i915_gem_request *rq) +{ + execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN); + intel_engine_context_in(rq->engine); +} + +static inline void +execlists_context_schedule_out(struct drm_i915_gem_request *rq) +{ + intel_engine_context_out(rq->engine); + execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT); +} + static void execlists_update_context_pdps(struct i915_hw_ppgtt *ppgtt, u32 *reg_state) { @@ -463,7 +444,7 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) if (rq) { GEM_BUG_ON(count > !n); if (!count++) - execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN); + execlists_context_schedule_in(rq); port_set(&port[n], port_pack(rq, count)); desc = execlists_update_context(rq); GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc)); @@ -479,6 +460,7 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) elsp_write(desc, elsp); } + execlists_clear_active(&engine->execlists, EXECLISTS_ACTIVE_HWACK); } static bool ctx_single_port_submission(const struct i915_gem_context *ctx) @@ -531,6 +513,7 @@ static void inject_preempt_context(struct intel_engine_cs *engine) elsp_write(0, elsp); elsp_write(ce->lrc_desc, elsp); + execlists_clear_active(&engine->execlists, EXECLISTS_ACTIVE_HWACK); } static void execlists_dequeue(struct intel_engine_cs *engine) @@ -577,9 +560,20 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * know the next preemption status we see corresponds * to this ELSP update. */ + GEM_BUG_ON(!port_count(&port[0])); if (port_count(&port[0]) > 1) goto unlock; + /* + * If we write to ELSP a second time before the HW has had + * a chance to respond to the previous write, we can confuse + * the HW and hit "undefined behaviour". After writing to ELSP, + * we must then wait until we see a context-switch event from + * the HW to indicate that it has had a chance to respond. + */ + if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_HWACK)) + goto unlock; + if (HAS_LOGICAL_RING_PREEMPTION(engine->i915) && rb_entry(rb, struct i915_priolist, node)->priority > max(last->priotree.priority, 0)) { @@ -713,6 +707,7 @@ execlists_cancel_port_requests(struct intel_engine_execlists * const execlists) struct drm_i915_gem_request *rq = port_request(port); GEM_BUG_ON(!execlists->active); + intel_engine_context_out(rq->engine); execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_PREEMPTED); i915_gem_request_put(rq); @@ -873,10 +868,22 @@ static void execlists_submission_tasklet(unsigned long data) GEM_TRACE("%s csb[%dd]: status=0x%08x:0x%08x\n", engine->name, head, status, buf[2*head + 1]); + + if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE | + GEN8_CTX_STATUS_PREEMPTED)) + execlists_set_active(execlists, + EXECLISTS_ACTIVE_HWACK); + if (status & GEN8_CTX_STATUS_ACTIVE_IDLE) + execlists_clear_active(execlists, + EXECLISTS_ACTIVE_HWACK); + if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK)) continue; - if (status & GEN8_CTX_STATUS_ACTIVE_IDLE && + /* We should never get a COMPLETED | IDLE_ACTIVE! */ + GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE); + + if (status & GEN8_CTX_STATUS_COMPLETE && buf[2*head + 1] == PREEMPT_ID) { execlists_cancel_port_requests(execlists); execlists_unwind_incomplete_requests(execlists); @@ -907,9 +914,10 @@ static void execlists_submission_tasklet(unsigned long data) GEM_BUG_ON(count == 0); if (--count == 0) { GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED); + GEM_BUG_ON(port_isset(&port[1]) && + !(status & GEN8_CTX_STATUS_ELEMENT_SWITCH)); GEM_BUG_ON(!i915_gem_request_completed(rq)); - execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT); - + execlists_context_schedule_out(rq); trace_i915_gem_request_out(rq); i915_gem_request_put(rq); @@ -1911,6 +1919,8 @@ static void execlists_set_default_submission(struct intel_engine_cs *engine) engine->park = NULL; engine->unpark = NULL; + + engine->flags |= I915_ENGINE_SUPPORTS_STATS; } static void diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index 17182ce29674..6d4f9b995a11 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -107,8 +107,4 @@ intel_lr_context_descriptor(struct i915_gem_context *ctx, return ctx->engine[engine->id].lrc_desc; } -/* Execlists */ -int intel_sanitize_enable_execlists(struct drm_i915_private *dev_priv, - int enable_execlists); - #endif /* _INTEL_LRC_H_ */ diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c index adc51e452e3e..fa6831f8c004 100644 --- a/drivers/gpu/drm/i915/intel_panel.c +++ b/drivers/gpu/drm/i915/intel_panel.c @@ -543,25 +543,6 @@ static u32 pwm_get_backlight(struct intel_connector *connector) return DIV_ROUND_UP(duty_ns * 100, CRC_PMIC_PWM_PERIOD_NS); } -static u32 intel_panel_get_backlight(struct intel_connector *connector) -{ - struct drm_i915_private *dev_priv = to_i915(connector->base.dev); - struct intel_panel *panel = &connector->panel; - u32 val = 0; - - mutex_lock(&dev_priv->backlight_lock); - - if (panel->backlight.enabled) { - val = panel->backlight.get(connector); - val = intel_panel_compute_brightness(connector, val); - } - - mutex_unlock(&dev_priv->backlight_lock); - - DRM_DEBUG_DRIVER("get backlight PWM = %d\n", val); - return val; -} - static void lpt_set_backlight(const struct drm_connector_state *conn_state, u32 level) { struct intel_connector *connector = to_intel_connector(conn_state->connector); @@ -649,31 +630,6 @@ intel_panel_actually_set_backlight(const struct drm_connector_state *conn_state, panel->backlight.set(conn_state, level); } -/* set backlight brightness to level in range [0..max], scaling wrt hw min */ -static void intel_panel_set_backlight(const struct drm_connector_state *conn_state, - u32 user_level, u32 user_max) -{ - struct intel_connector *connector = to_intel_connector(conn_state->connector); - struct drm_i915_private *dev_priv = to_i915(connector->base.dev); - struct intel_panel *panel = &connector->panel; - u32 hw_level; - - if (!panel->backlight.present) - return; - - mutex_lock(&dev_priv->backlight_lock); - - WARN_ON(panel->backlight.max == 0); - - hw_level = scale_user_to_hw(connector, user_level, user_max); - panel->backlight.level = hw_level; - - if (panel->backlight.enabled) - intel_panel_actually_set_backlight(conn_state, hw_level); - - mutex_unlock(&dev_priv->backlight_lock); -} - /* set backlight brightness to level in range [0..max], assuming hw min is * respected. */ @@ -1182,6 +1138,50 @@ void intel_panel_enable_backlight(const struct intel_crtc_state *crtc_state, } #if IS_ENABLED(CONFIG_BACKLIGHT_CLASS_DEVICE) +static u32 intel_panel_get_backlight(struct intel_connector *connector) +{ + struct drm_i915_private *dev_priv = to_i915(connector->base.dev); + struct intel_panel *panel = &connector->panel; + u32 val = 0; + + mutex_lock(&dev_priv->backlight_lock); + + if (panel->backlight.enabled) { + val = panel->backlight.get(connector); + val = intel_panel_compute_brightness(connector, val); + } + + mutex_unlock(&dev_priv->backlight_lock); + + DRM_DEBUG_DRIVER("get backlight PWM = %d\n", val); + return val; +} + +/* set backlight brightness to level in range [0..max], scaling wrt hw min */ +static void intel_panel_set_backlight(const struct drm_connector_state *conn_state, + u32 user_level, u32 user_max) +{ + struct intel_connector *connector = to_intel_connector(conn_state->connector); + struct drm_i915_private *dev_priv = to_i915(connector->base.dev); + struct intel_panel *panel = &connector->panel; + u32 hw_level; + + if (!panel->backlight.present) + return; + + mutex_lock(&dev_priv->backlight_lock); + + WARN_ON(panel->backlight.max == 0); + + hw_level = scale_user_to_hw(connector, user_level, user_max); + panel->backlight.level = hw_level; + + if (panel->backlight.enabled) + intel_panel_actually_set_backlight(conn_state, hw_level); + + mutex_unlock(&dev_priv->backlight_lock); +} + static int intel_backlight_device_update_status(struct backlight_device *bd) { struct intel_connector *connector = bl_get_data(bd); diff --git a/drivers/gpu/drm/i915/intel_pipe_crc.c b/drivers/gpu/drm/i915/intel_pipe_crc.c index 61641d479b93..1f5cd572a7ff 100644 --- a/drivers/gpu/drm/i915/intel_pipe_crc.c +++ b/drivers/gpu/drm/i915/intel_pipe_crc.c @@ -541,8 +541,6 @@ retry: * completely disable it. */ pipe_config->ips_force_disable = enable; - if (pipe_config->ips_enabled == enable) - pipe_config->base.connectors_changed = true; } if (IS_HASWELL(dev_priv)) { diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 4d2cd432f739..67f326230a7e 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -52,9 +52,6 @@ * which brings the most power savings; deeper states save more power, but * require higher latency to switch to and wake up. */ -#define INTEL_RC6_ENABLE (1<<0) -#define INTEL_RC6p_ENABLE (1<<1) -#define INTEL_RC6pp_ENABLE (1<<2) static void gen9_init_clock_gating(struct drm_i915_private *dev_priv) { @@ -512,38 +509,41 @@ static void vlv_get_fifo_size(struct intel_crtc_state *crtc_state) fifo_state->plane[PLANE_CURSOR] = 63; } -static int i9xx_get_fifo_size(struct drm_i915_private *dev_priv, int plane) +static int i9xx_get_fifo_size(struct drm_i915_private *dev_priv, + enum i9xx_plane_id i9xx_plane) { uint32_t dsparb = I915_READ(DSPARB); int size; size = dsparb & 0x7f; - if (plane) + if (i9xx_plane == PLANE_B) size = ((dsparb >> DSPARB_CSTART_SHIFT) & 0x7f) - size; - DRM_DEBUG_KMS("FIFO size - (0x%08x) %s: %d\n", dsparb, - plane ? "B" : "A", size); + DRM_DEBUG_KMS("FIFO size - (0x%08x) %c: %d\n", + dsparb, plane_name(i9xx_plane), size); return size; } -static int i830_get_fifo_size(struct drm_i915_private *dev_priv, int plane) +static int i830_get_fifo_size(struct drm_i915_private *dev_priv, + enum i9xx_plane_id i9xx_plane) { uint32_t dsparb = I915_READ(DSPARB); int size; size = dsparb & 0x1ff; - if (plane) + if (i9xx_plane == PLANE_B) size = ((dsparb >> DSPARB_BEND_SHIFT) & 0x1ff) - size; size >>= 1; /* Convert to cachelines */ - DRM_DEBUG_KMS("FIFO size - (0x%08x) %s: %d\n", dsparb, - plane ? "B" : "A", size); + DRM_DEBUG_KMS("FIFO size - (0x%08x) %c: %d\n", + dsparb, plane_name(i9xx_plane), size); return size; } -static int i845_get_fifo_size(struct drm_i915_private *dev_priv, int plane) +static int i845_get_fifo_size(struct drm_i915_private *dev_priv, + enum i9xx_plane_id i9xx_plane) { uint32_t dsparb = I915_READ(DSPARB); int size; @@ -551,9 +551,8 @@ static int i845_get_fifo_size(struct drm_i915_private *dev_priv, int plane) size = dsparb & 0x7f; size >>= 2; /* Convert to cachelines */ - DRM_DEBUG_KMS("FIFO size - (0x%08x) %s: %d\n", dsparb, - plane ? "B" : "A", - size); + DRM_DEBUG_KMS("FIFO size - (0x%08x) %c: %d\n", + dsparb, plane_name(i9xx_plane), size); return size; } @@ -2277,8 +2276,8 @@ static void i9xx_update_wm(struct intel_crtc *unused_crtc) else wm_info = &i830_a_wm_info; - fifo_size = dev_priv->display.get_fifo_size(dev_priv, 0); - crtc = intel_get_crtc_for_plane(dev_priv, 0); + fifo_size = dev_priv->display.get_fifo_size(dev_priv, PLANE_A); + crtc = intel_get_crtc_for_plane(dev_priv, PLANE_A); if (intel_crtc_active(crtc)) { const struct drm_display_mode *adjusted_mode = &crtc->config->base.adjusted_mode; @@ -2304,8 +2303,8 @@ static void i9xx_update_wm(struct intel_crtc *unused_crtc) if (IS_GEN2(dev_priv)) wm_info = &i830_bc_wm_info; - fifo_size = dev_priv->display.get_fifo_size(dev_priv, 1); - crtc = intel_get_crtc_for_plane(dev_priv, 1); + fifo_size = dev_priv->display.get_fifo_size(dev_priv, PLANE_B); + crtc = intel_get_crtc_for_plane(dev_priv, PLANE_B); if (intel_crtc_active(crtc)) { const struct drm_display_mode *adjusted_mode = &crtc->config->base.adjusted_mode; @@ -2417,7 +2416,7 @@ static void i845_update_wm(struct intel_crtc *unused_crtc) adjusted_mode = &crtc->config->base.adjusted_mode; planea_wm = intel_calculate_wm(adjusted_mode->crtc_clock, &i845_wm_info, - dev_priv->display.get_fifo_size(dev_priv, 0), + dev_priv->display.get_fifo_size(dev_priv, PLANE_A), 4, pessimal_latency_ns); fwater_lo = I915_READ(FW_BLC) & ~0xfff; fwater_lo |= (3<<8) | planea_wm; @@ -6415,26 +6414,6 @@ static void valleyview_disable_rps(struct drm_i915_private *dev_priv) I915_WRITE(GEN6_RP_CONTROL, 0); } -static void intel_print_rc6_info(struct drm_i915_private *dev_priv, u32 mode) -{ - if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { - if (mode & (GEN7_RC_CTL_TO_MODE | GEN6_RC_CTL_EI_MODE(1))) - mode = GEN6_RC_CTL_RC6_ENABLE; - else - mode = 0; - } - if (HAS_RC6p(dev_priv)) - DRM_DEBUG_DRIVER("Enabling RC6 states: " - "RC6 %s RC6p %s RC6pp %s\n", - onoff(mode & GEN6_RC_CTL_RC6_ENABLE), - onoff(mode & GEN6_RC_CTL_RC6p_ENABLE), - onoff(mode & GEN6_RC_CTL_RC6pp_ENABLE)); - - else - DRM_DEBUG_DRIVER("Enabling RC6 states: RC6 %s\n", - onoff(mode & GEN6_RC_CTL_RC6_ENABLE)); -} - static bool bxt_check_bios_rc6_setup(struct drm_i915_private *dev_priv) { struct i915_ggtt *ggtt = &dev_priv->ggtt; @@ -6497,42 +6476,30 @@ static bool bxt_check_bios_rc6_setup(struct drm_i915_private *dev_priv) return enable_rc6; } -int sanitize_rc6_option(struct drm_i915_private *dev_priv, int enable_rc6) +static bool sanitize_rc6(struct drm_i915_private *i915) { - /* No RC6 before Ironlake and code is gone for ilk. */ - if (INTEL_INFO(dev_priv)->gen < 6) - return 0; + struct intel_device_info *info = mkwrite_device_info(i915); - if (!enable_rc6) - return 0; + /* Powersaving is controlled by the host when inside a VM */ + if (intel_vgpu_active(i915)) + info->has_rc6 = 0; - if (IS_GEN9_LP(dev_priv) && !bxt_check_bios_rc6_setup(dev_priv)) { + if (info->has_rc6 && + IS_GEN9_LP(i915) && !bxt_check_bios_rc6_setup(i915)) { DRM_INFO("RC6 disabled by BIOS\n"); - return 0; + info->has_rc6 = 0; } - /* Respect the kernel parameter if it is set */ - if (enable_rc6 >= 0) { - int mask; - - if (HAS_RC6p(dev_priv)) - mask = INTEL_RC6_ENABLE | INTEL_RC6p_ENABLE | - INTEL_RC6pp_ENABLE; - else - mask = INTEL_RC6_ENABLE; - - if ((enable_rc6 & mask) != enable_rc6) - DRM_DEBUG_DRIVER("Adjusting RC6 mask to %d " - "(requested %d, valid %d)\n", - enable_rc6 & mask, enable_rc6, mask); - - return enable_rc6 & mask; - } - - if (IS_IVYBRIDGE(dev_priv)) - return (INTEL_RC6_ENABLE | INTEL_RC6p_ENABLE); + /* + * We assume that we do not have any deep rc6 levels if we don't have + * have the previous rc6 level supported, i.e. we use HAS_RC6() + * as the initial coarse check for rc6 in general, moving on to + * progressively finer/deeper levels. + */ + if (!info->has_rc6 && info->has_rc6p) + info->has_rc6p = 0; - return INTEL_RC6_ENABLE; + return info->has_rc6; } static void gen6_init_rps_frequencies(struct drm_i915_private *dev_priv) @@ -6602,9 +6569,10 @@ static void gen9_enable_rps(struct drm_i915_private *dev_priv) { intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); - /* Program defaults and thresholds for RPS*/ - I915_WRITE(GEN6_RC_VIDEO_FREQ, - GEN9_FREQUENCY(dev_priv->gt_pm.rps.rp1_freq)); + /* Program defaults and thresholds for RPS */ + if (IS_GEN9(dev_priv)) + I915_WRITE(GEN6_RC_VIDEO_FREQ, + GEN9_FREQUENCY(dev_priv->gt_pm.rps.rp1_freq)); /* 1 second timeout*/ I915_WRITE(GEN6_RP_DOWN_TIMEOUT, @@ -6624,7 +6592,7 @@ static void gen9_enable_rc6(struct drm_i915_private *dev_priv) { struct intel_engine_cs *engine; enum intel_engine_id id; - u32 rc6_mode, rc6_mask = 0; + u32 rc6_mode; /* 1a: Software RC state - RC0 */ I915_WRITE(GEN6_RC_STATE, 0); @@ -6665,9 +6633,6 @@ static void gen9_enable_rc6(struct drm_i915_private *dev_priv) I915_WRITE(GEN9_RENDER_PG_IDLE_HYSTERESIS, 25); /* 3a: Enable RC6 */ - if (intel_rc6_enabled() & INTEL_RC6_ENABLE) - rc6_mask = GEN6_RC_CTL_RC6_ENABLE; - DRM_INFO("RC6 %s\n", onoff(rc6_mask & GEN6_RC_CTL_RC6_ENABLE)); I915_WRITE(GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */ /* WaRsUseTimeoutMode:cnl (pre-prod) */ @@ -6677,7 +6642,9 @@ static void gen9_enable_rc6(struct drm_i915_private *dev_priv) rc6_mode = GEN6_RC_CTL_EI_MODE(1); I915_WRITE(GEN6_RC_CONTROL, - GEN6_RC_CTL_HW_ENABLE | rc6_mode | rc6_mask); + GEN6_RC_CTL_HW_ENABLE | + GEN6_RC_CTL_RC6_ENABLE | + rc6_mode); /* * 3b: Enable Coarse Power Gating only when RC6 is enabled. @@ -6686,8 +6653,8 @@ static void gen9_enable_rc6(struct drm_i915_private *dev_priv) if (NEEDS_WaRsDisableCoarsePowerGating(dev_priv)) I915_WRITE(GEN9_PG_ENABLE, 0); else - I915_WRITE(GEN9_PG_ENABLE, (rc6_mask & GEN6_RC_CTL_RC6_ENABLE) ? - (GEN9_RENDER_PG_ENABLE | GEN9_MEDIA_PG_ENABLE) : 0); + I915_WRITE(GEN9_PG_ENABLE, + GEN9_RENDER_PG_ENABLE | GEN9_MEDIA_PG_ENABLE); intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); } @@ -6696,7 +6663,6 @@ static void gen8_enable_rc6(struct drm_i915_private *dev_priv) { struct intel_engine_cs *engine; enum intel_engine_id id; - uint32_t rc6_mask = 0; /* 1a: Software RC state - RC0 */ I915_WRITE(GEN6_RC_STATE, 0); @@ -6718,13 +6684,11 @@ static void gen8_enable_rc6(struct drm_i915_private *dev_priv) I915_WRITE(GEN6_RC6_THRESHOLD, 625); /* 800us/1.28 for TO */ /* 3: Enable RC6 */ - if (intel_rc6_enabled() & INTEL_RC6_ENABLE) - rc6_mask = GEN6_RC_CTL_RC6_ENABLE; - intel_print_rc6_info(dev_priv, rc6_mask); - I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE | - GEN7_RC_CTL_TO_MODE | - rc6_mask); + I915_WRITE(GEN6_RC_CONTROL, + GEN6_RC_CTL_HW_ENABLE | + GEN7_RC_CTL_TO_MODE | + GEN6_RC_CTL_RC6_ENABLE); intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); } @@ -6773,9 +6737,8 @@ static void gen6_enable_rc6(struct drm_i915_private *dev_priv) { struct intel_engine_cs *engine; enum intel_engine_id id; - u32 rc6vids, rc6_mask = 0; + u32 rc6vids, rc6_mask; u32 gtfifodbg; - int rc6_mode; int ret; I915_WRITE(GEN6_RC_STATE, 0); @@ -6810,22 +6773,12 @@ static void gen6_enable_rc6(struct drm_i915_private *dev_priv) I915_WRITE(GEN6_RC6p_THRESHOLD, 150000); I915_WRITE(GEN6_RC6pp_THRESHOLD, 64000); /* unused */ - /* Check if we are enabling RC6 */ - rc6_mode = intel_rc6_enabled(); - if (rc6_mode & INTEL_RC6_ENABLE) - rc6_mask |= GEN6_RC_CTL_RC6_ENABLE; - /* We don't use those on Haswell */ - if (!IS_HASWELL(dev_priv)) { - if (rc6_mode & INTEL_RC6p_ENABLE) - rc6_mask |= GEN6_RC_CTL_RC6p_ENABLE; - - if (rc6_mode & INTEL_RC6pp_ENABLE) - rc6_mask |= GEN6_RC_CTL_RC6pp_ENABLE; - } - - intel_print_rc6_info(dev_priv, rc6_mask); - + rc6_mask = GEN6_RC_CTL_RC6_ENABLE; + if (HAS_RC6p(dev_priv)) + rc6_mask |= GEN6_RC_CTL_RC6p_ENABLE; + if (HAS_RC6pp(dev_priv)) + rc6_mask |= GEN6_RC_CTL_RC6pp_ENABLE; I915_WRITE(GEN6_RC_CONTROL, rc6_mask | GEN6_RC_CTL_EI_MODE(1) | @@ -7268,7 +7221,7 @@ static void cherryview_enable_rc6(struct drm_i915_private *dev_priv) { struct intel_engine_cs *engine; enum intel_engine_id id; - u32 gtfifodbg, rc6_mode = 0, pcbr; + u32 gtfifodbg, rc6_mode, pcbr; gtfifodbg = I915_READ(GTFIFODBG) & ~(GT_FIFO_SBDEDICATE_FREE_ENTRY_CHV | GT_FIFO_FREE_ENTRIES_CHV); @@ -7309,10 +7262,9 @@ static void cherryview_enable_rc6(struct drm_i915_private *dev_priv) pcbr = I915_READ(VLV_PCBR); /* 3: Enable RC6 */ - if ((intel_rc6_enabled() & INTEL_RC6_ENABLE) && - (pcbr >> VLV_PCBR_ADDR_SHIFT)) + rc6_mode = 0; + if (pcbr >> VLV_PCBR_ADDR_SHIFT) rc6_mode = GEN7_RC_CTL_TO_MODE; - I915_WRITE(GEN6_RC_CONTROL, rc6_mode); intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); @@ -7364,7 +7316,7 @@ static void valleyview_enable_rc6(struct drm_i915_private *dev_priv) { struct intel_engine_cs *engine; enum intel_engine_id id; - u32 gtfifodbg, rc6_mode = 0; + u32 gtfifodbg; valleyview_check_pctx(dev_priv); @@ -7397,12 +7349,8 @@ static void valleyview_enable_rc6(struct drm_i915_private *dev_priv) VLV_MEDIA_RC6_COUNT_EN | VLV_RENDER_RC6_COUNT_EN)); - if (intel_rc6_enabled() & INTEL_RC6_ENABLE) - rc6_mode = GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL; - - intel_print_rc6_info(dev_priv, rc6_mode); - - I915_WRITE(GEN6_RC_CONTROL, rc6_mode); + I915_WRITE(GEN6_RC_CONTROL, + GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL); intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); } @@ -7929,7 +7877,7 @@ void intel_init_gt_powersave(struct drm_i915_private *dev_priv) * RPM depends on RC6 to save restore the GT HW context, so make RC6 a * requirement. */ - if (!i915_modparams.enable_rc6) { + if (!sanitize_rc6(dev_priv)) { DRM_INFO("RC6 disabled, disabling runtime PM support\n"); intel_runtime_pm_get(dev_priv); } @@ -7982,7 +7930,7 @@ void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv) if (IS_VALLEYVIEW(dev_priv)) valleyview_cleanup_gt_powersave(dev_priv); - if (!i915_modparams.enable_rc6) + if (!HAS_RC6(dev_priv)) intel_runtime_pm_put(dev_priv); } @@ -8149,7 +8097,8 @@ void intel_enable_gt_powersave(struct drm_i915_private *dev_priv) mutex_lock(&dev_priv->pcu_lock); - intel_enable_rc6(dev_priv); + if (HAS_RC6(dev_priv)) + intel_enable_rc6(dev_priv); intel_enable_rps(dev_priv); if (HAS_LLC(dev_priv)) intel_enable_llc_pstate(dev_priv); @@ -9291,7 +9240,7 @@ int skl_pcode_request(struct drm_i915_private *dev_priv, u32 mbox, u32 request, ret = 0; goto out; } - ret = _wait_for(COND, timeout_base_ms * 1000, 10); + ret = _wait_for(COND, timeout_base_ms * 1000, 10, 10); if (!ret) goto out; @@ -9393,12 +9342,13 @@ static u64 vlv_residency_raw(struct drm_i915_private *dev_priv, const i915_reg_t reg) { u32 lower, upper, tmp; + unsigned long flags; int loop = 2; /* The register accessed do not need forcewake. We borrow * uncore lock to prevent concurrent access to range reg. */ - spin_lock_irq(&dev_priv->uncore.lock); + spin_lock_irqsave(&dev_priv->uncore.lock, flags); /* vlv and chv residency counters are 40 bits in width. * With a control bit, we can choose between upper or lower @@ -9429,39 +9379,51 @@ static u64 vlv_residency_raw(struct drm_i915_private *dev_priv, * now. */ - spin_unlock_irq(&dev_priv->uncore.lock); + spin_unlock_irqrestore(&dev_priv->uncore.lock, flags); return lower | (u64)upper << 8; } -u64 intel_rc6_residency_us(struct drm_i915_private *dev_priv, +u64 intel_rc6_residency_ns(struct drm_i915_private *dev_priv, const i915_reg_t reg) { - u64 time_hw, units, div; + u64 time_hw; + u32 mul, div; - if (!intel_rc6_enabled()) + if (!HAS_RC6(dev_priv)) return 0; - intel_runtime_pm_get(dev_priv); - /* On VLV and CHV, residency time is in CZ units rather than 1.28us */ if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { - units = 1000; + mul = 1000000; div = dev_priv->czclk_freq; - time_hw = vlv_residency_raw(dev_priv, reg); - } else if (IS_GEN9_LP(dev_priv)) { - units = 1000; - div = 1200; /* 833.33ns */ - - time_hw = I915_READ(reg); } else { - units = 128000; /* 1.28us */ - div = 100000; + /* 833.33ns units on Gen9LP, 1.28us elsewhere. */ + if (IS_GEN9_LP(dev_priv)) { + mul = 10000; + div = 12; + } else { + mul = 1280; + div = 1; + } time_hw = I915_READ(reg); } - intel_runtime_pm_put(dev_priv); - return DIV_ROUND_UP_ULL(time_hw * units, div); + return DIV_ROUND_UP_ULL(time_hw * mul, div); +} + +u32 intel_get_cagf(struct drm_i915_private *dev_priv, u32 rpstat) +{ + u32 cagf; + + if (INTEL_GEN(dev_priv) >= 9) + cagf = (rpstat & GEN9_CAGF_MASK) >> GEN9_CAGF_SHIFT; + else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) + cagf = (rpstat & HSW_CAGF_MASK) >> HSW_CAGF_SHIFT; + else + cagf = (rpstat & GEN6_CAGF_MASK) >> GEN6_CAGF_SHIFT; + + return cagf; } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 12e734b29463..e2085820b586 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -340,50 +340,6 @@ gen7_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) return 0; } -static int -gen8_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) -{ - u32 flags; - u32 *cs; - - cs = intel_ring_begin(req, mode & EMIT_INVALIDATE ? 12 : 6); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - flags = PIPE_CONTROL_CS_STALL; - - if (mode & EMIT_FLUSH) { - flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; - flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; - flags |= PIPE_CONTROL_DC_FLUSH_ENABLE; - flags |= PIPE_CONTROL_FLUSH_ENABLE; - } - if (mode & EMIT_INVALIDATE) { - flags |= PIPE_CONTROL_TLB_INVALIDATE; - flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; - flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; - flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; - flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; - flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; - flags |= PIPE_CONTROL_QW_WRITE; - flags |= PIPE_CONTROL_GLOBAL_GTT_IVB; - - /* WaCsStallBeforeStateCacheInvalidate:bdw,chv */ - cs = gen8_emit_pipe_control(cs, - PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_STALL_AT_SCOREBOARD, - 0); - } - - cs = gen8_emit_pipe_control(cs, flags, - i915_ggtt_offset(req->engine->scratch) + - 2 * CACHELINE_BYTES); - - intel_ring_advance(req, cs); - - return 0; -} - static void ring_setup_phys_status_page(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; @@ -427,7 +383,6 @@ static void intel_ring_setup_status_page(struct intel_engine_cs *engine) } else if (IS_GEN6(dev_priv)) { mmio = RING_HWS_PGA_GEN6(engine->mmio_base); } else { - /* XXX: gen8 returns to sanity */ mmio = RING_HWS_PGA(engine->mmio_base); } @@ -437,13 +392,7 @@ static void intel_ring_setup_status_page(struct intel_engine_cs *engine) I915_WRITE(mmio, engine->status_page.ggtt_offset); POSTING_READ(mmio); - /* - * Flush the TLB for this page - * - * FIXME: These two bits have disappeared on gen8, so a question - * arises: do we still need this and if so how should we go about - * invalidating the TLB? - */ + /* Flush the TLB for this page */ if (IS_GEN(dev_priv, 6, 7)) { i915_reg_t reg = RING_INSTPM(engine->mmio_base); @@ -611,8 +560,6 @@ static void reset_ring_common(struct intel_engine_cs *engine, struct intel_context *ce = &request->ctx->engine[engine->id]; struct i915_hw_ppgtt *ppgtt; - /* FIXME consider gen8 reset */ - if (ce->state) { I915_WRITE(CCID, i915_ggtt_offset(ce->state) | @@ -644,6 +591,7 @@ static void reset_ring_common(struct intel_engine_cs *engine, request->ring->head = request->postfix; } else { engine->legacy_active_context = NULL; + engine->legacy_active_ppgtt = NULL; } } @@ -713,62 +661,6 @@ static int init_render_ring(struct intel_engine_cs *engine) return init_workarounds_ring(engine); } -static void render_ring_cleanup(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - - i915_vma_unpin_and_release(&dev_priv->semaphore); -} - -static u32 *gen8_rcs_signal(struct drm_i915_gem_request *req, u32 *cs) -{ - struct drm_i915_private *dev_priv = req->i915; - struct intel_engine_cs *waiter; - enum intel_engine_id id; - - for_each_engine(waiter, dev_priv, id) { - u64 gtt_offset = req->engine->semaphore.signal_ggtt[id]; - if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID) - continue; - - *cs++ = GFX_OP_PIPE_CONTROL(6); - *cs++ = PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_QW_WRITE | - PIPE_CONTROL_CS_STALL; - *cs++ = lower_32_bits(gtt_offset); - *cs++ = upper_32_bits(gtt_offset); - *cs++ = req->global_seqno; - *cs++ = 0; - *cs++ = MI_SEMAPHORE_SIGNAL | - MI_SEMAPHORE_TARGET(waiter->hw_id); - *cs++ = 0; - } - - return cs; -} - -static u32 *gen8_xcs_signal(struct drm_i915_gem_request *req, u32 *cs) -{ - struct drm_i915_private *dev_priv = req->i915; - struct intel_engine_cs *waiter; - enum intel_engine_id id; - - for_each_engine(waiter, dev_priv, id) { - u64 gtt_offset = req->engine->semaphore.signal_ggtt[id]; - if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID) - continue; - - *cs++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW; - *cs++ = lower_32_bits(gtt_offset) | MI_FLUSH_DW_USE_GTT; - *cs++ = upper_32_bits(gtt_offset); - *cs++ = req->global_seqno; - *cs++ = MI_SEMAPHORE_SIGNAL | - MI_SEMAPHORE_TARGET(waiter->hw_id); - *cs++ = 0; - } - - return cs; -} - static u32 *gen6_signal(struct drm_i915_gem_request *req, u32 *cs) { struct drm_i915_private *dev_priv = req->i915; @@ -851,70 +743,6 @@ static void gen6_sema_emit_breadcrumb(struct drm_i915_gem_request *req, u32 *cs) req->engine->semaphore.signal(req, cs)); } -static void gen8_render_emit_breadcrumb(struct drm_i915_gem_request *req, - u32 *cs) -{ - struct intel_engine_cs *engine = req->engine; - - if (engine->semaphore.signal) - cs = engine->semaphore.signal(req, cs); - - *cs++ = GFX_OP_PIPE_CONTROL(6); - *cs++ = PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_QW_WRITE; - *cs++ = intel_hws_seqno_address(engine); - *cs++ = 0; - *cs++ = req->global_seqno; - /* We're thrashing one dword of HWS. */ - *cs++ = 0; - *cs++ = MI_USER_INTERRUPT; - *cs++ = MI_NOOP; - - req->tail = intel_ring_offset(req, cs); - assert_ring_tail_valid(req->ring, req->tail); -} - -static const int gen8_render_emit_breadcrumb_sz = 8; - -/** - * intel_ring_sync - sync the waiter to the signaller on seqno - * - * @waiter - ring that is waiting - * @signaller - ring which has, or will signal - * @seqno - seqno which the waiter will block on - */ - -static int -gen8_ring_sync_to(struct drm_i915_gem_request *req, - struct drm_i915_gem_request *signal) -{ - struct drm_i915_private *dev_priv = req->i915; - u64 offset = GEN8_WAIT_OFFSET(req->engine, signal->engine->id); - struct i915_hw_ppgtt *ppgtt; - u32 *cs; - - cs = intel_ring_begin(req, 4); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - *cs++ = MI_SEMAPHORE_WAIT | MI_SEMAPHORE_GLOBAL_GTT | - MI_SEMAPHORE_SAD_GTE_SDD; - *cs++ = signal->global_seqno; - *cs++ = lower_32_bits(offset); - *cs++ = upper_32_bits(offset); - intel_ring_advance(req, cs); - - /* When the !RCS engines idle waiting upon a semaphore, they lose their - * pagetables and we must reload them before executing the batch. - * We do this on the i915_switch_context() following the wait and - * before the dispatch. - */ - ppgtt = req->ctx->ppgtt; - if (ppgtt && req->engine->id != RCS) - ppgtt->pd_dirty_rings |= intel_engine_flag(req->engine); - return 0; -} - static int gen6_ring_sync_to(struct drm_i915_gem_request *req, struct drm_i915_gem_request *signal) @@ -1090,25 +918,6 @@ hsw_vebox_irq_disable(struct intel_engine_cs *engine) gen6_mask_pm_irq(dev_priv, engine->irq_enable_mask); } -static void -gen8_irq_enable(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - - I915_WRITE_IMR(engine, - ~(engine->irq_enable_mask | - engine->irq_keep_mask)); - POSTING_READ_FW(RING_IMR(engine->mmio_base)); -} - -static void -gen8_irq_disable(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - - I915_WRITE_IMR(engine, ~engine->irq_keep_mask); -} - static int i965_emit_bb_start(struct drm_i915_gem_request *req, u64 offset, u32 length, @@ -1576,6 +1385,190 @@ void intel_legacy_submission_resume(struct drm_i915_private *dev_priv) intel_ring_reset(engine->buffer, 0); } +static inline int mi_set_context(struct drm_i915_gem_request *rq, u32 flags) +{ + struct drm_i915_private *i915 = rq->i915; + struct intel_engine_cs *engine = rq->engine; + enum intel_engine_id id; + const int num_rings = + /* Use an extended w/a on gen7 if signalling from other rings */ + (HAS_LEGACY_SEMAPHORES(i915) && IS_GEN7(i915)) ? + INTEL_INFO(i915)->num_rings - 1 : + 0; + int len; + u32 *cs; + + flags |= MI_MM_SPACE_GTT; + if (IS_HASWELL(i915)) + /* These flags are for resource streamer on HSW+ */ + flags |= HSW_MI_RS_SAVE_STATE_EN | HSW_MI_RS_RESTORE_STATE_EN; + else + flags |= MI_SAVE_EXT_STATE_EN | MI_RESTORE_EXT_STATE_EN; + + len = 4; + if (IS_GEN7(i915)) + len += 2 + (num_rings ? 4*num_rings + 6 : 0); + + cs = intel_ring_begin(rq, len); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + /* WaProgramMiArbOnOffAroundMiSetContext:ivb,vlv,hsw,bdw,chv */ + if (IS_GEN7(i915)) { + *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; + if (num_rings) { + struct intel_engine_cs *signaller; + + *cs++ = MI_LOAD_REGISTER_IMM(num_rings); + for_each_engine(signaller, i915, id) { + if (signaller == engine) + continue; + + *cs++ = i915_mmio_reg_offset( + RING_PSMI_CTL(signaller->mmio_base)); + *cs++ = _MASKED_BIT_ENABLE( + GEN6_PSMI_SLEEP_MSG_DISABLE); + } + } + } + + *cs++ = MI_NOOP; + *cs++ = MI_SET_CONTEXT; + *cs++ = i915_ggtt_offset(rq->ctx->engine[RCS].state) | flags; + /* + * w/a: MI_SET_CONTEXT must always be followed by MI_NOOP + * WaMiSetContext_Hang:snb,ivb,vlv + */ + *cs++ = MI_NOOP; + + if (IS_GEN7(i915)) { + if (num_rings) { + struct intel_engine_cs *signaller; + i915_reg_t last_reg = {}; /* keep gcc quiet */ + + *cs++ = MI_LOAD_REGISTER_IMM(num_rings); + for_each_engine(signaller, i915, id) { + if (signaller == engine) + continue; + + last_reg = RING_PSMI_CTL(signaller->mmio_base); + *cs++ = i915_mmio_reg_offset(last_reg); + *cs++ = _MASKED_BIT_DISABLE( + GEN6_PSMI_SLEEP_MSG_DISABLE); + } + + /* Insert a delay before the next switch! */ + *cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT; + *cs++ = i915_mmio_reg_offset(last_reg); + *cs++ = i915_ggtt_offset(engine->scratch); + *cs++ = MI_NOOP; + } + *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; + } + + intel_ring_advance(rq, cs); + + return 0; +} + +static int remap_l3(struct drm_i915_gem_request *rq, int slice) +{ + u32 *cs, *remap_info = rq->i915->l3_parity.remap_info[slice]; + int i; + + if (!remap_info) + return 0; + + cs = intel_ring_begin(rq, GEN7_L3LOG_SIZE/4 * 2 + 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + /* + * Note: We do not worry about the concurrent register cacheline hang + * here because no other code should access these registers other than + * at initialization time. + */ + *cs++ = MI_LOAD_REGISTER_IMM(GEN7_L3LOG_SIZE/4); + for (i = 0; i < GEN7_L3LOG_SIZE/4; i++) { + *cs++ = i915_mmio_reg_offset(GEN7_L3LOG(slice, i)); + *cs++ = remap_info[i]; + } + *cs++ = MI_NOOP; + intel_ring_advance(rq, cs); + + return 0; +} + +static int switch_context(struct drm_i915_gem_request *rq) +{ + struct intel_engine_cs *engine = rq->engine; + struct i915_gem_context *to_ctx = rq->ctx; + struct i915_hw_ppgtt *to_mm = + to_ctx->ppgtt ?: rq->i915->mm.aliasing_ppgtt; + struct i915_gem_context *from_ctx = engine->legacy_active_context; + struct i915_hw_ppgtt *from_mm = engine->legacy_active_ppgtt; + u32 hw_flags = 0; + int ret, i; + + lockdep_assert_held(&rq->i915->drm.struct_mutex); + GEM_BUG_ON(HAS_EXECLISTS(rq->i915)); + + if (to_mm != from_mm || + (to_mm && intel_engine_flag(engine) & to_mm->pd_dirty_rings)) { + trace_switch_mm(engine, to_ctx); + ret = to_mm->switch_mm(to_mm, rq); + if (ret) + goto err; + + to_mm->pd_dirty_rings &= ~intel_engine_flag(engine); + engine->legacy_active_ppgtt = to_mm; + hw_flags = MI_FORCE_RESTORE; + } + + if (to_ctx->engine[engine->id].state && + (to_ctx != from_ctx || hw_flags & MI_FORCE_RESTORE)) { + GEM_BUG_ON(engine->id != RCS); + + /* + * The kernel context(s) is treated as pure scratch and is not + * expected to retain any state (as we sacrifice it during + * suspend and on resume it may be corrupted). This is ok, + * as nothing actually executes using the kernel context; it + * is purely used for flushing user contexts. + */ + if (i915_gem_context_is_kernel(to_ctx)) + hw_flags = MI_RESTORE_INHIBIT; + + ret = mi_set_context(rq, hw_flags); + if (ret) + goto err_mm; + + engine->legacy_active_context = to_ctx; + } + + if (to_ctx->remap_slice) { + for (i = 0; i < MAX_L3_SLICES; i++) { + if (!(to_ctx->remap_slice & BIT(i))) + continue; + + ret = remap_l3(rq, i); + if (ret) + goto err_ctx; + } + + to_ctx->remap_slice = 0; + } + + return 0; + +err_ctx: + engine->legacy_active_context = from_ctx; +err_mm: + engine->legacy_active_ppgtt = from_mm; +err: + return ret; +} + static int ring_request_alloc(struct drm_i915_gem_request *request) { int ret; @@ -1592,6 +1585,10 @@ static int ring_request_alloc(struct drm_i915_gem_request *request) if (ret) return ret; + ret = switch_context(request); + if (ret) + return ret; + request->reserved_space -= LEGACY_REQUEST_SIZE; return 0; } @@ -1792,8 +1789,6 @@ static int gen6_bsd_ring_flush(struct drm_i915_gem_request *req, u32 mode) return PTR_ERR(cs); cmd = MI_FLUSH_DW; - if (INTEL_GEN(req->i915) >= 8) - cmd += 1; /* We always require a command barrier so that subsequent * commands, such as breadcrumb interrupts, are strictly ordered @@ -1813,38 +1808,9 @@ static int gen6_bsd_ring_flush(struct drm_i915_gem_request *req, u32 mode) *cs++ = cmd; *cs++ = I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT; - if (INTEL_GEN(req->i915) >= 8) { - *cs++ = 0; /* upper addr */ - *cs++ = 0; /* value */ - } else { - *cs++ = 0; - *cs++ = MI_NOOP; - } - intel_ring_advance(req, cs); - return 0; -} - -static int -gen8_emit_bb_start(struct drm_i915_gem_request *req, - u64 offset, u32 len, - unsigned int dispatch_flags) -{ - bool ppgtt = USES_PPGTT(req->i915) && - !(dispatch_flags & I915_DISPATCH_SECURE); - u32 *cs; - - cs = intel_ring_begin(req, 4); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - /* FIXME(BDW): Address space and security selectors. */ - *cs++ = MI_BATCH_BUFFER_START_GEN8 | (ppgtt << 8) | (dispatch_flags & - I915_DISPATCH_RS ? MI_BATCH_RESOURCE_STREAMER : 0); - *cs++ = lower_32_bits(offset); - *cs++ = upper_32_bits(offset); + *cs++ = 0; *cs++ = MI_NOOP; intel_ring_advance(req, cs); - return 0; } @@ -1901,8 +1867,6 @@ static int gen6_ring_flush(struct drm_i915_gem_request *req, u32 mode) return PTR_ERR(cs); cmd = MI_FLUSH_DW; - if (INTEL_GEN(req->i915) >= 8) - cmd += 1; /* We always require a command barrier so that subsequent * commands, such as breadcrumb interrupts, are strictly ordered @@ -1921,13 +1885,8 @@ static int gen6_ring_flush(struct drm_i915_gem_request *req, u32 mode) cmd |= MI_INVALIDATE_TLB; *cs++ = cmd; *cs++ = I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT; - if (INTEL_GEN(req->i915) >= 8) { - *cs++ = 0; /* upper addr */ - *cs++ = 0; /* value */ - } else { - *cs++ = 0; - *cs++ = MI_NOOP; - } + *cs++ = 0; + *cs++ = MI_NOOP; intel_ring_advance(req, cs); return 0; @@ -1936,110 +1895,61 @@ static int gen6_ring_flush(struct drm_i915_gem_request *req, u32 mode) static void intel_ring_init_semaphores(struct drm_i915_private *dev_priv, struct intel_engine_cs *engine) { - struct drm_i915_gem_object *obj; - int ret, i; + int i; - if (!i915_modparams.semaphores) + if (!HAS_LEGACY_SEMAPHORES(dev_priv)) return; - if (INTEL_GEN(dev_priv) >= 8 && !dev_priv->semaphore) { - struct i915_vma *vma; - - obj = i915_gem_object_create(dev_priv, PAGE_SIZE); - if (IS_ERR(obj)) - goto err; - - vma = i915_vma_instance(obj, &dev_priv->ggtt.base, NULL); - if (IS_ERR(vma)) - goto err_obj; - - ret = i915_gem_object_set_to_gtt_domain(obj, false); - if (ret) - goto err_obj; - - ret = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH); - if (ret) - goto err_obj; - - dev_priv->semaphore = vma; - } - - if (INTEL_GEN(dev_priv) >= 8) { - u32 offset = i915_ggtt_offset(dev_priv->semaphore); + GEM_BUG_ON(INTEL_GEN(dev_priv) < 6); + engine->semaphore.sync_to = gen6_ring_sync_to; + engine->semaphore.signal = gen6_signal; - engine->semaphore.sync_to = gen8_ring_sync_to; - engine->semaphore.signal = gen8_xcs_signal; - - for (i = 0; i < I915_NUM_ENGINES; i++) { - u32 ring_offset; - - if (i != engine->id) - ring_offset = offset + GEN8_SEMAPHORE_OFFSET(engine->id, i); - else - ring_offset = MI_SEMAPHORE_SYNC_INVALID; - - engine->semaphore.signal_ggtt[i] = ring_offset; - } - } else if (INTEL_GEN(dev_priv) >= 6) { - engine->semaphore.sync_to = gen6_ring_sync_to; - engine->semaphore.signal = gen6_signal; - - /* - * The current semaphore is only applied on pre-gen8 - * platform. And there is no VCS2 ring on the pre-gen8 - * platform. So the semaphore between RCS and VCS2 is - * initialized as INVALID. Gen8 will initialize the - * sema between VCS2 and RCS later. - */ - for (i = 0; i < GEN6_NUM_SEMAPHORES; i++) { - static const struct { - u32 wait_mbox; - i915_reg_t mbox_reg; - } sem_data[GEN6_NUM_SEMAPHORES][GEN6_NUM_SEMAPHORES] = { - [RCS_HW] = { - [VCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_RV, .mbox_reg = GEN6_VRSYNC }, - [BCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_RB, .mbox_reg = GEN6_BRSYNC }, - [VECS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_RVE, .mbox_reg = GEN6_VERSYNC }, - }, - [VCS_HW] = { - [RCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VR, .mbox_reg = GEN6_RVSYNC }, - [BCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VB, .mbox_reg = GEN6_BVSYNC }, - [VECS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VVE, .mbox_reg = GEN6_VEVSYNC }, - }, - [BCS_HW] = { - [RCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_BR, .mbox_reg = GEN6_RBSYNC }, - [VCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_BV, .mbox_reg = GEN6_VBSYNC }, - [VECS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_BVE, .mbox_reg = GEN6_VEBSYNC }, - }, - [VECS_HW] = { - [RCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VER, .mbox_reg = GEN6_RVESYNC }, - [VCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VEV, .mbox_reg = GEN6_VVESYNC }, - [BCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VEB, .mbox_reg = GEN6_BVESYNC }, - }, - }; + /* + * The current semaphore is only applied on pre-gen8 + * platform. And there is no VCS2 ring on the pre-gen8 + * platform. So the semaphore between RCS and VCS2 is + * initialized as INVALID. + */ + for (i = 0; i < GEN6_NUM_SEMAPHORES; i++) { + static const struct { u32 wait_mbox; i915_reg_t mbox_reg; + } sem_data[GEN6_NUM_SEMAPHORES][GEN6_NUM_SEMAPHORES] = { + [RCS_HW] = { + [VCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_RV, .mbox_reg = GEN6_VRSYNC }, + [BCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_RB, .mbox_reg = GEN6_BRSYNC }, + [VECS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_RVE, .mbox_reg = GEN6_VERSYNC }, + }, + [VCS_HW] = { + [RCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VR, .mbox_reg = GEN6_RVSYNC }, + [BCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VB, .mbox_reg = GEN6_BVSYNC }, + [VECS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VVE, .mbox_reg = GEN6_VEVSYNC }, + }, + [BCS_HW] = { + [RCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_BR, .mbox_reg = GEN6_RBSYNC }, + [VCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_BV, .mbox_reg = GEN6_VBSYNC }, + [VECS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_BVE, .mbox_reg = GEN6_VEBSYNC }, + }, + [VECS_HW] = { + [RCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VER, .mbox_reg = GEN6_RVESYNC }, + [VCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VEV, .mbox_reg = GEN6_VVESYNC }, + [BCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VEB, .mbox_reg = GEN6_BVESYNC }, + }, + }; + u32 wait_mbox; + i915_reg_t mbox_reg; - if (i == engine->hw_id) { - wait_mbox = MI_SEMAPHORE_SYNC_INVALID; - mbox_reg = GEN6_NOSYNC; - } else { - wait_mbox = sem_data[engine->hw_id][i].wait_mbox; - mbox_reg = sem_data[engine->hw_id][i].mbox_reg; - } - - engine->semaphore.mbox.wait[i] = wait_mbox; - engine->semaphore.mbox.signal[i] = mbox_reg; + if (i == engine->hw_id) { + wait_mbox = MI_SEMAPHORE_SYNC_INVALID; + mbox_reg = GEN6_NOSYNC; + } else { + wait_mbox = sem_data[engine->hw_id][i].wait_mbox; + mbox_reg = sem_data[engine->hw_id][i].mbox_reg; } - } - return; - -err_obj: - i915_gem_object_put(obj); -err: - DRM_DEBUG_DRIVER("Failed to allocate space for semaphores, disabling\n"); - i915_modparams.semaphores = 0; + engine->semaphore.mbox.wait[i] = wait_mbox; + engine->semaphore.mbox.signal[i] = mbox_reg; + } } static void intel_ring_init_irq(struct drm_i915_private *dev_priv, @@ -2047,11 +1957,7 @@ static void intel_ring_init_irq(struct drm_i915_private *dev_priv, { engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << engine->irq_shift; - if (INTEL_GEN(dev_priv) >= 8) { - engine->irq_enable = gen8_irq_enable; - engine->irq_disable = gen8_irq_disable; - engine->irq_seqno_barrier = gen6_seqno_barrier; - } else if (INTEL_GEN(dev_priv) >= 6) { + if (INTEL_GEN(dev_priv) >= 6) { engine->irq_enable = gen6_irq_enable; engine->irq_disable = gen6_irq_disable; engine->irq_seqno_barrier = gen6_seqno_barrier; @@ -2086,6 +1992,9 @@ static void gen6_bsd_set_default_submission(struct intel_engine_cs *engine) static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv, struct intel_engine_cs *engine) { + /* gen8+ are only supported with execlists */ + GEM_BUG_ON(INTEL_GEN(dev_priv) >= 8); + intel_ring_init_irq(dev_priv, engine); intel_ring_init_semaphores(dev_priv, engine); @@ -2099,26 +2008,20 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv, engine->emit_breadcrumb = i9xx_emit_breadcrumb; engine->emit_breadcrumb_sz = i9xx_emit_breadcrumb_sz; - if (i915_modparams.semaphores) { + if (HAS_LEGACY_SEMAPHORES(dev_priv)) { int num_rings; engine->emit_breadcrumb = gen6_sema_emit_breadcrumb; num_rings = INTEL_INFO(dev_priv)->num_rings - 1; - if (INTEL_GEN(dev_priv) >= 8) { - engine->emit_breadcrumb_sz += num_rings * 6; - } else { - engine->emit_breadcrumb_sz += num_rings * 3; - if (num_rings & 1) - engine->emit_breadcrumb_sz++; - } + engine->emit_breadcrumb_sz += num_rings * 3; + if (num_rings & 1) + engine->emit_breadcrumb_sz++; } engine->set_default_submission = i9xx_set_default_submission; - if (INTEL_GEN(dev_priv) >= 8) - engine->emit_bb_start = gen8_emit_bb_start; - else if (INTEL_GEN(dev_priv) >= 6) + if (INTEL_GEN(dev_priv) >= 6) engine->emit_bb_start = gen6_emit_bb_start; else if (INTEL_GEN(dev_priv) >= 4) engine->emit_bb_start = i965_emit_bb_start; @@ -2138,20 +2041,7 @@ int intel_init_render_ring_buffer(struct intel_engine_cs *engine) if (HAS_L3_DPF(dev_priv)) engine->irq_keep_mask = GT_RENDER_L3_PARITY_ERROR_INTERRUPT; - if (INTEL_GEN(dev_priv) >= 8) { - engine->init_context = intel_rcs_ctx_init; - engine->emit_breadcrumb = gen8_render_emit_breadcrumb; - engine->emit_breadcrumb_sz = gen8_render_emit_breadcrumb_sz; - engine->emit_flush = gen8_render_ring_flush; - if (i915_modparams.semaphores) { - int num_rings; - - engine->semaphore.signal = gen8_rcs_signal; - - num_rings = INTEL_INFO(dev_priv)->num_rings - 1; - engine->emit_breadcrumb_sz += num_rings * 8; - } - } else if (INTEL_GEN(dev_priv) >= 6) { + if (INTEL_GEN(dev_priv) >= 6) { engine->init_context = intel_rcs_ctx_init; engine->emit_flush = gen7_render_ring_flush; if (IS_GEN6(dev_priv)) @@ -2170,7 +2060,6 @@ int intel_init_render_ring_buffer(struct intel_engine_cs *engine) engine->emit_bb_start = hsw_emit_bb_start; engine->init_hw = init_render_ring; - engine->cleanup = render_ring_cleanup; ret = intel_init_ring_buffer(engine); if (ret) @@ -2200,8 +2089,7 @@ int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine) if (IS_GEN6(dev_priv)) engine->set_default_submission = gen6_bsd_set_default_submission; engine->emit_flush = gen6_bsd_ring_flush; - if (INTEL_GEN(dev_priv) < 8) - engine->irq_enable_mask = GT_BSD_USER_INTERRUPT; + engine->irq_enable_mask = GT_BSD_USER_INTERRUPT; } else { engine->mmio_base = BSD_RING_BASE; engine->emit_flush = bsd_ring_flush; @@ -2221,8 +2109,7 @@ int intel_init_blt_ring_buffer(struct intel_engine_cs *engine) intel_ring_default_vfuncs(dev_priv, engine); engine->emit_flush = gen6_ring_flush; - if (INTEL_GEN(dev_priv) < 8) - engine->irq_enable_mask = GT_BLT_USER_INTERRUPT; + engine->irq_enable_mask = GT_BLT_USER_INTERRUPT; return intel_init_ring_buffer(engine); } @@ -2234,12 +2121,9 @@ int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine) intel_ring_default_vfuncs(dev_priv, engine); engine->emit_flush = gen6_ring_flush; - - if (INTEL_GEN(dev_priv) < 8) { - engine->irq_enable_mask = PM_VEBOX_USER_INTERRUPT; - engine->irq_enable = hsw_vebox_irq_enable; - engine->irq_disable = hsw_vebox_irq_disable; - } + engine->irq_enable_mask = PM_VEBOX_USER_INTERRUPT; + engine->irq_enable = hsw_vebox_irq_enable; + engine->irq_disable = hsw_vebox_irq_disable; return intel_init_ring_buffer(engine); } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index f867aa6c31fc..c68ab3ead83c 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -6,6 +6,7 @@ #include "i915_gem_batch_pool.h" #include "i915_gem_request.h" #include "i915_gem_timeline.h" +#include "i915_pmu.h" #include "i915_selftest.h" struct drm_printer; @@ -47,16 +48,6 @@ struct intel_hw_status_page { /* seqno size is actually only a uint32, but since we plan to use MI_FLUSH_DW to * do the writes, and that must have qw aligned offsets, simply pretend it's 8b. */ -#define gen8_semaphore_seqno_size sizeof(uint64_t) -#define GEN8_SEMAPHORE_OFFSET(__from, __to) \ - (((__from) * I915_NUM_ENGINES + (__to)) * gen8_semaphore_seqno_size) -#define GEN8_SIGNAL_OFFSET(__ring, to) \ - (dev_priv->semaphore->node.start + \ - GEN8_SEMAPHORE_OFFSET((__ring)->id, (to))) -#define GEN8_WAIT_OFFSET(__ring, from) \ - (dev_priv->semaphore->node.start + \ - GEN8_SEMAPHORE_OFFSET(from, (__ring)->id)) - enum intel_engine_hangcheck_action { ENGINE_IDLE = 0, ENGINE_WAIT, @@ -252,6 +243,7 @@ struct intel_engine_execlists { unsigned int active; #define EXECLISTS_ACTIVE_USER 0 #define EXECLISTS_ACTIVE_PREEMPT 1 +#define EXECLISTS_ACTIVE_HWACK 2 /** * @port_mask: number of execlist ports - 1 @@ -348,6 +340,43 @@ struct intel_engine_cs { I915_SELFTEST_DECLARE(bool mock : 1); } breadcrumbs; + struct { + /** + * @enable: Bitmask of enable sample events on this engine. + * + * Bits correspond to sample event types, for instance + * I915_SAMPLE_QUEUED is bit 0 etc. + */ + u32 enable; + /** + * @enable_count: Reference count for the enabled samplers. + * + * Index number corresponds to the bit number from @enable. + */ + unsigned int enable_count[I915_PMU_SAMPLE_BITS]; + /** + * @sample: Counter values for sampling events. + * + * Our internal timer stores the current counters in this field. + */ +#define I915_ENGINE_SAMPLE_MAX (I915_SAMPLE_SEMA + 1) + struct i915_pmu_sample sample[I915_ENGINE_SAMPLE_MAX]; + /** + * @busy_stats: Has enablement of engine stats tracking been + * requested. + */ + bool busy_stats; + /** + * @disable_busy_stats: Work item for busy stats disabling. + * + * Same as with @enable_busy_stats action, with the difference + * that we delay it in case there are rapid enable-disable + * actions, which can happen during tool startup (like perf + * stat). + */ + struct delayed_work disable_busy_stats; + } pmu; + /* * A pool of objects to use as shadow copies of client batch buffers * when the command parser is enabled. Prevents the client from @@ -467,18 +496,15 @@ struct intel_engine_cs { * ie. transpose of f(x, y) */ struct { - union { #define GEN6_SEMAPHORE_LAST VECS_HW #define GEN6_NUM_SEMAPHORES (GEN6_SEMAPHORE_LAST + 1) #define GEN6_SEMAPHORES_MASK GENMASK(GEN6_SEMAPHORE_LAST, 0) - struct { - /* our mbox written by others */ - u32 wait[GEN6_NUM_SEMAPHORES]; - /* mboxes this ring signals to */ - i915_reg_t signal[GEN6_NUM_SEMAPHORES]; - } mbox; - u64 signal_ggtt[I915_NUM_ENGINES]; - }; + struct { + /* our mbox written by others */ + u32 wait[GEN6_NUM_SEMAPHORES]; + /* mboxes this ring signals to */ + i915_reg_t signal[GEN6_NUM_SEMAPHORES]; + } mbox; /* AKA wait() */ int (*sync_to)(struct drm_i915_gem_request *req, @@ -506,13 +532,16 @@ struct intel_engine_cs { * stream (ring). */ struct i915_gem_context *legacy_active_context; + struct i915_hw_ppgtt *legacy_active_ppgtt; /* status_notifier: list of callbacks for context-switch changes */ struct atomic_notifier_head context_status_notifier; struct intel_engine_hangcheck hangcheck; - bool needs_cmd_parser; +#define I915_ENGINE_NEEDS_CMD_PARSER BIT(0) +#define I915_ENGINE_SUPPORTS_STATS BIT(1) + unsigned int flags; /* * Table of commands the command parser needs to know about @@ -537,8 +566,50 @@ struct intel_engine_cs { * certain bits to encode the command length in the header). */ u32 (*get_cmd_length_mask)(u32 cmd_header); + + struct { + /** + * @lock: Lock protecting the below fields. + */ + spinlock_t lock; + /** + * @enabled: Reference count indicating number of listeners. + */ + unsigned int enabled; + /** + * @active: Number of contexts currently scheduled in. + */ + unsigned int active; + /** + * @enabled_at: Timestamp when busy stats were enabled. + */ + ktime_t enabled_at; + /** + * @start: Timestamp of the last idle to active transition. + * + * Idle is defined as active == 0, active is active > 0. + */ + ktime_t start; + /** + * @total: Total time this engine was busy. + * + * Accumulated time not counting the most recent block in cases + * where engine is currently busy (active > 0). + */ + ktime_t total; + } stats; }; +static inline bool intel_engine_needs_cmd_parser(struct intel_engine_cs *engine) +{ + return engine->flags & I915_ENGINE_NEEDS_CMD_PARSER; +} + +static inline bool intel_engine_supports_stats(struct intel_engine_cs *engine) +{ + return engine->flags & I915_ENGINE_SUPPORTS_STATS; +} + static inline void execlists_set_active(struct intel_engine_execlists *execlists, unsigned int bit) @@ -939,4 +1010,67 @@ bool intel_engine_can_store_dword(struct intel_engine_cs *engine); void intel_engine_dump(struct intel_engine_cs *engine, struct drm_printer *p); +struct intel_engine_cs * +intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance); + +static inline void intel_engine_context_in(struct intel_engine_cs *engine) +{ + unsigned long flags; + + if (READ_ONCE(engine->stats.enabled) == 0) + return; + + spin_lock_irqsave(&engine->stats.lock, flags); + + if (engine->stats.enabled > 0) { + if (engine->stats.active++ == 0) + engine->stats.start = ktime_get(); + GEM_BUG_ON(engine->stats.active == 0); + } + + spin_unlock_irqrestore(&engine->stats.lock, flags); +} + +static inline void intel_engine_context_out(struct intel_engine_cs *engine) +{ + unsigned long flags; + + if (READ_ONCE(engine->stats.enabled) == 0) + return; + + spin_lock_irqsave(&engine->stats.lock, flags); + + if (engine->stats.enabled > 0) { + ktime_t last; + + if (engine->stats.active && --engine->stats.active == 0) { + /* + * Decrement the active context count and in case GPU + * is now idle add up to the running total. + */ + last = ktime_sub(ktime_get(), engine->stats.start); + + engine->stats.total = ktime_add(engine->stats.total, + last); + } else if (engine->stats.active == 0) { + /* + * After turning on engine stats, context out might be + * the first event in which case we account from the + * time stats gathering was turned on. + */ + last = ktime_sub(ktime_get(), engine->stats.enabled_at); + + engine->stats.total = ktime_add(engine->stats.total, + last); + } + } + + spin_unlock_irqrestore(&engine->stats.lock, flags); +} + +int intel_enable_engine_stats(struct intel_engine_cs *engine); +void intel_disable_engine_stats(struct intel_engine_cs *engine); + +ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine); + #endif /* _INTEL_RINGBUFFER_H_ */ diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c index ce615704982a..dd485f59eb1d 100644 --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c @@ -325,6 +325,26 @@ skl_disable_plane(struct intel_plane *plane, struct intel_crtc *crtc) spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); } +bool +skl_plane_get_hw_state(struct intel_plane *plane) +{ + struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + enum intel_display_power_domain power_domain; + enum plane_id plane_id = plane->id; + enum pipe pipe = plane->pipe; + bool ret; + + power_domain = POWER_DOMAIN_PIPE(pipe); + if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) + return false; + + ret = I915_READ(PLANE_CTL(pipe, plane_id)) & PLANE_CTL_ENABLE; + + intel_display_power_put(dev_priv, power_domain); + + return ret; +} + static void chv_update_csc(struct intel_plane *plane, uint32_t format) { @@ -502,6 +522,26 @@ vlv_disable_plane(struct intel_plane *plane, struct intel_crtc *crtc) spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); } +static bool +vlv_plane_get_hw_state(struct intel_plane *plane) +{ + struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + enum intel_display_power_domain power_domain; + enum plane_id plane_id = plane->id; + enum pipe pipe = plane->pipe; + bool ret; + + power_domain = POWER_DOMAIN_PIPE(pipe); + if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) + return false; + + ret = I915_READ(SPCNTR(pipe, plane_id)) & SP_ENABLE; + + intel_display_power_put(dev_priv, power_domain); + + return ret; +} + static u32 ivb_sprite_ctl(const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { @@ -642,6 +682,25 @@ ivb_disable_plane(struct intel_plane *plane, struct intel_crtc *crtc) spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); } +static bool +ivb_plane_get_hw_state(struct intel_plane *plane) +{ + struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + enum intel_display_power_domain power_domain; + enum pipe pipe = plane->pipe; + bool ret; + + power_domain = POWER_DOMAIN_PIPE(pipe); + if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) + return false; + + ret = I915_READ(SPRCTL(pipe)) & SPRITE_ENABLE; + + intel_display_power_put(dev_priv, power_domain); + + return ret; +} + static u32 g4x_sprite_ctl(const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { @@ -773,6 +832,25 @@ g4x_disable_plane(struct intel_plane *plane, struct intel_crtc *crtc) spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); } +static bool +g4x_plane_get_hw_state(struct intel_plane *plane) +{ + struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + enum intel_display_power_domain power_domain; + enum pipe pipe = plane->pipe; + bool ret; + + power_domain = POWER_DOMAIN_PIPE(pipe); + if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) + return false; + + ret = I915_READ(DVSCNTR(pipe)) & DVS_ENABLE; + + intel_display_power_put(dev_priv, power_domain); + + return ret; +} + static int intel_check_sprite_plane(struct intel_plane *plane, struct intel_crtc_state *crtc_state, @@ -1231,6 +1309,7 @@ intel_sprite_plane_create(struct drm_i915_private *dev_priv, intel_plane->update_plane = skl_update_plane; intel_plane->disable_plane = skl_disable_plane; + intel_plane->get_hw_state = skl_plane_get_hw_state; plane_formats = skl_plane_formats; num_plane_formats = ARRAY_SIZE(skl_plane_formats); @@ -1241,6 +1320,7 @@ intel_sprite_plane_create(struct drm_i915_private *dev_priv, intel_plane->update_plane = skl_update_plane; intel_plane->disable_plane = skl_disable_plane; + intel_plane->get_hw_state = skl_plane_get_hw_state; plane_formats = skl_plane_formats; num_plane_formats = ARRAY_SIZE(skl_plane_formats); @@ -1251,6 +1331,7 @@ intel_sprite_plane_create(struct drm_i915_private *dev_priv, intel_plane->update_plane = vlv_update_plane; intel_plane->disable_plane = vlv_disable_plane; + intel_plane->get_hw_state = vlv_plane_get_hw_state; plane_formats = vlv_plane_formats; num_plane_formats = ARRAY_SIZE(vlv_plane_formats); @@ -1266,6 +1347,7 @@ intel_sprite_plane_create(struct drm_i915_private *dev_priv, intel_plane->update_plane = ivb_update_plane; intel_plane->disable_plane = ivb_disable_plane; + intel_plane->get_hw_state = ivb_plane_get_hw_state; plane_formats = snb_plane_formats; num_plane_formats = ARRAY_SIZE(snb_plane_formats); @@ -1276,6 +1358,7 @@ intel_sprite_plane_create(struct drm_i915_private *dev_priv, intel_plane->update_plane = g4x_update_plane; intel_plane->disable_plane = g4x_disable_plane; + intel_plane->get_hw_state = g4x_plane_get_hw_state; modifiers = i9xx_plane_format_modifiers; if (IS_GEN6(dev_priv)) { @@ -1301,7 +1384,7 @@ intel_sprite_plane_create(struct drm_i915_private *dev_priv, } intel_plane->pipe = pipe; - intel_plane->plane = plane; + intel_plane->i9xx_plane = plane; intel_plane->id = PLANE_SPRITE0 + plane; intel_plane->frontbuffer_bit = INTEL_FRONTBUFFER_SPRITE(pipe, plane); intel_plane->check_plane = intel_check_sprite_plane; diff --git a/drivers/gpu/drm/i915/intel_uc_fw.c b/drivers/gpu/drm/i915/intel_uc_fw.c index 4bc82d3005ff..b376dd3b28cc 100644 --- a/drivers/gpu/drm/i915/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/intel_uc_fw.c @@ -105,7 +105,7 @@ void intel_uc_fw_fetch(struct drm_i915_private *dev_priv, } /* now RSA */ - if (css->key_size_dw != UOS_RSA_SCRATCH_MAX_COUNT) { + if (css->key_size_dw != UOS_RSA_SCRATCH_COUNT) { DRM_WARN("%s: Mismatched firmware RSA key size (%u)\n", intel_uc_fw_type_repr(uc_fw->type), css->key_size_dw); err = -ENOEXEC; diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index b4621271e7a2..89547b614aa6 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -565,9 +565,6 @@ void intel_uncore_runtime_resume(struct drm_i915_private *dev_priv) void intel_uncore_sanitize(struct drm_i915_private *dev_priv) { - i915_modparams.enable_rc6 = - sanitize_rc6_option(dev_priv, i915_modparams.enable_rc6); - /* BIOS often leaves RC6 enabled, but disable it for hw init */ intel_sanitize_gt_powersave(dev_priv); } diff --git a/drivers/gpu/drm/i915/selftests/huge_pages.c b/drivers/gpu/drm/i915/selftests/huge_pages.c index 01af540b6ef9..e6b31041cc88 100644 --- a/drivers/gpu/drm/i915/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/selftests/huge_pages.c @@ -27,6 +27,7 @@ #include <linux/prime_numbers.h> #include "mock_drm.h" +#include "i915_random.h" static const unsigned int page_sizes[] = { I915_GTT_PAGE_SIZE_2M, @@ -989,17 +990,9 @@ static int gpu_write(struct i915_vma *vma, i915_vma_unpin(batch); i915_vma_close(batch); - err = rq->engine->emit_flush(rq, EMIT_INVALIDATE); - if (err) - goto err_request; - - err = i915_switch_context(rq); - if (err) - goto err_request; - - err = rq->engine->emit_bb_start(rq, - batch->node.start, batch->node.size, - flags); + err = engine->emit_bb_start(rq, + batch->node.start, batch->node.size, + flags); if (err) goto err_request; @@ -1047,19 +1040,78 @@ static int cpu_check(struct drm_i915_gem_object *obj, u32 dword, u32 val) return err; } +static int __igt_write_huge(struct i915_gem_context *ctx, + struct intel_engine_cs *engine, + struct drm_i915_gem_object *obj, + u64 size, u64 offset, + u32 dword, u32 val) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct i915_address_space *vm = ctx->ppgtt ? &ctx->ppgtt->base : &i915->ggtt.base; + unsigned int flags = PIN_USER | PIN_OFFSET_FIXED; + struct i915_vma *vma; + int err; + + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + err = i915_vma_unbind(vma); + if (err) + goto out_vma_close; + + err = i915_vma_pin(vma, size, 0, flags | offset); + if (err) { + /* + * The ggtt may have some pages reserved so + * refrain from erroring out. + */ + if (err == -ENOSPC && i915_is_ggtt(vm)) + err = 0; + + goto out_vma_close; + } + + err = igt_check_page_sizes(vma); + if (err) + goto out_vma_unpin; + + err = gpu_write(vma, ctx, engine, dword, val); + if (err) { + pr_err("gpu-write failed at offset=%llx\n", offset); + goto out_vma_unpin; + } + + err = cpu_check(obj, dword, val); + if (err) { + pr_err("cpu-check failed at offset=%llx\n", offset); + goto out_vma_unpin; + } + +out_vma_unpin: + i915_vma_unpin(vma); +out_vma_close: + i915_vma_close(vma); + + return err; +} + static int igt_write_huge(struct i915_gem_context *ctx, struct drm_i915_gem_object *obj) { struct drm_i915_private *i915 = to_i915(obj->base.dev); struct i915_address_space *vm = ctx->ppgtt ? &ctx->ppgtt->base : &i915->ggtt.base; + static struct intel_engine_cs *engines[I915_NUM_ENGINES]; struct intel_engine_cs *engine; - struct i915_vma *vma; - unsigned int flags = PIN_USER | PIN_OFFSET_FIXED; + I915_RND_STATE(prng); + IGT_TIMEOUT(end_time); unsigned int max_page_size; unsigned int id; u64 max; u64 num; u64 size; + int *order; + int i, n; int err = 0; GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); @@ -1071,78 +1123,56 @@ static int igt_write_huge(struct i915_gem_context *ctx, max_page_size = rounddown_pow_of_two(obj->mm.page_sizes.sg); max = div_u64((vm->total - size), max_page_size); - vma = i915_vma_instance(obj, vm, NULL); - if (IS_ERR(vma)) - return PTR_ERR(vma); - + n = 0; for_each_engine(engine, i915, id) { - IGT_TIMEOUT(end_time); - if (!intel_engine_can_store_dword(engine)) { - pr_info("store-dword-imm not supported on engine=%u\n", - id); + pr_info("store-dword-imm not supported on engine=%u\n", id); continue; } + engines[n++] = engine; + } - /* - * Try various offsets until we timeout -- we want to avoid - * issues hidden by effectively always using offset = 0. - */ - for_each_prime_number_from(num, 0, max) { - u64 offset = num * max_page_size; - u32 dword; - - err = i915_vma_unbind(vma); - if (err) - goto out_vma_close; - - err = i915_vma_pin(vma, size, max_page_size, flags | offset); - if (err) { - /* - * The ggtt may have some pages reserved so - * refrain from erroring out. - */ - if (err == -ENOSPC && i915_is_ggtt(vm)) { - err = 0; - continue; - } - - goto out_vma_close; - } + if (!n) + return 0; - err = igt_check_page_sizes(vma); - if (err) - goto out_vma_unpin; + /* + * To keep things interesting when alternating between engines in our + * randomized order, lets also make feeding to the same engine a few + * times in succession a possibility by enlarging the permutation array. + */ + order = i915_random_order(n * I915_NUM_ENGINES, &prng); + if (!order) + return -ENOMEM; - dword = offset_in_page(num) / 4; + /* + * Try various offsets in an ascending/descending fashion until we + * timeout -- we want to avoid issues hidden by effectively always using + * offset = 0. + */ + i = 0; + for_each_prime_number_from(num, 0, max) { + u64 offset_low = num * max_page_size; + u64 offset_high = (max - num) * max_page_size; + u32 dword = offset_in_page(num) / 4; - err = gpu_write(vma, ctx, engine, dword, num + 1); - if (err) { - pr_err("gpu-write failed at offset=%llx", offset); - goto out_vma_unpin; - } + engine = engines[order[i] % n]; + i = (i + 1) % (n * I915_NUM_ENGINES); - err = cpu_check(obj, dword, num + 1); - if (err) { - pr_err("cpu-check failed at offset=%llx", offset); - goto out_vma_unpin; - } + err = __igt_write_huge(ctx, engine, obj, size, offset_low, dword, num + 1); + if (err) + break; - i915_vma_unpin(vma); + err = __igt_write_huge(ctx, engine, obj, size, offset_high, dword, num + 1); + if (err) + break; - if (num > 0 && - igt_timeout(end_time, - "%s timed out on engine=%u at offset=%llx, max_page_size=%x\n", - __func__, id, offset, max_page_size)) - break; - } + if (igt_timeout(end_time, + "%s timed out on engine=%u, offset_low=%llx offset_high=%llx, max_page_size=%x\n", + __func__, engine->id, offset_low, offset_high, max_page_size)) + break; } -out_vma_unpin: - if (i915_vma_is_pinned(vma)) - i915_vma_unpin(vma); -out_vma_close: - i915_vma_close(vma); + kfree(order); return err; } @@ -1719,6 +1749,7 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *dev_priv) return PTR_ERR(file); mutex_lock(&dev_priv->drm.struct_mutex); + intel_runtime_pm_get(dev_priv); ctx = live_context(dev_priv, file); if (IS_ERR(ctx)) { @@ -1729,6 +1760,7 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *dev_priv) err = i915_subtests(tests, ctx); out_unlock: + intel_runtime_pm_put(dev_priv); mutex_unlock(&dev_priv->drm.struct_mutex); mock_file_free(dev_priv, file); diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c index c82780a9d455..56a803d11916 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c @@ -158,14 +158,6 @@ static int gpu_fill(struct drm_i915_gem_object *obj, goto err_batch; } - err = engine->emit_flush(rq, EMIT_INVALIDATE); - if (err) - goto err_request; - - err = i915_switch_context(rq); - if (err) - goto err_request; - flags = 0; if (INTEL_GEN(vm->i915) <= 5) flags |= I915_DISPATCH_SECURE; @@ -272,6 +264,23 @@ out_unmap: return err; } +static int file_add_object(struct drm_file *file, + struct drm_i915_gem_object *obj) +{ + int err; + + GEM_BUG_ON(obj->base.handle_count); + + /* tie the object to the drm_file for easy reaping */ + err = idr_alloc(&file->object_idr, &obj->base, 1, 0, GFP_KERNEL); + if (err < 0) + return err; + + i915_gem_object_get(obj); + obj->base.handle_count++; + return 0; +} + static struct drm_i915_gem_object * create_test_object(struct i915_gem_context *ctx, struct drm_file *file, @@ -281,7 +290,6 @@ create_test_object(struct i915_gem_context *ctx, struct i915_address_space *vm = ctx->ppgtt ? &ctx->ppgtt->base : &ctx->i915->ggtt.base; u64 size; - u32 handle; int err; size = min(vm->total / 2, 1024ull * DW_PER_PAGE * PAGE_SIZE); @@ -291,8 +299,7 @@ create_test_object(struct i915_gem_context *ctx, if (IS_ERR(obj)) return obj; - /* tie the handle to the drm_file for easy reaping */ - err = drm_gem_handle_create(file, &obj->base, &handle); + err = file_add_object(file, obj); i915_gem_object_put(obj); if (err) return ERR_PTR(err); @@ -369,7 +376,9 @@ static int igt_ctx_exec(void *arg) } } + intel_runtime_pm_get(i915); err = gpu_fill(obj, ctx, engine, dw); + intel_runtime_pm_put(i915); if (err) { pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n", ndwords, dw, max_dwords(obj), diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c index f463105ff48d..e1ddad635d73 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c @@ -355,6 +355,7 @@ static int igt_evict_contexts(void *arg) return 0; mutex_lock(&i915->drm.struct_mutex); + intel_runtime_pm_get(i915); /* Reserve a block so that we know we have enough to fit a few rq */ memset(&hole, 0, sizeof(hole)); @@ -463,6 +464,7 @@ out_locked: } if (drm_mm_node_allocated(&hole)) drm_mm_remove_node(&hole); + intel_runtime_pm_put(i915); mutex_unlock(&i915->drm.struct_mutex); return err; diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_request.c b/drivers/gpu/drm/i915/selftests/i915_gem_request.c index 6bce99050e94..647bf2bbd799 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_request.c @@ -459,14 +459,6 @@ empty_request(struct intel_engine_cs *engine, if (IS_ERR(request)) return request; - err = engine->emit_flush(request, EMIT_INVALIDATE); - if (err) - goto out_request; - - err = i915_switch_context(request); - if (err) - goto out_request; - err = engine->emit_bb_start(request, batch->node.start, batch->node.size, @@ -675,12 +667,6 @@ static int live_all_engines(void *arg) goto out_request; } - err = engine->emit_flush(request[id], EMIT_INVALIDATE); - GEM_BUG_ON(err); - - err = i915_switch_context(request[id]); - GEM_BUG_ON(err); - err = engine->emit_bb_start(request[id], batch->node.start, batch->node.size, @@ -797,12 +783,6 @@ static int live_sequential_engines(void *arg) } } - err = engine->emit_flush(request[id], EMIT_INVALIDATE); - GEM_BUG_ON(err); - - err = i915_switch_context(request[id]); - GEM_BUG_ON(err); - err = engine->emit_bb_start(request[id], batch->node.start, batch->node.size, diff --git a/drivers/gpu/drm/i915/selftests/i915_random.c b/drivers/gpu/drm/i915/selftests/i915_random.c index b85872cc7fbe..2088ae57aa89 100644 --- a/drivers/gpu/drm/i915/selftests/i915_random.c +++ b/drivers/gpu/drm/i915/selftests/i915_random.c @@ -57,7 +57,7 @@ unsigned int *i915_random_order(unsigned int count, struct rnd_state *state) { unsigned int *order, i; - order = kmalloc_array(count, sizeof(*order), GFP_KERNEL); + order = kmalloc_array(count, sizeof(*order), GFP_KERNEL | __GFP_NOWARN); if (!order) return order; diff --git a/drivers/gpu/drm/i915/selftests/intel_guc.c b/drivers/gpu/drm/i915/selftests/intel_guc.c index f10029e18820..7b23597858bb 100644 --- a/drivers/gpu/drm/i915/selftests/intel_guc.c +++ b/drivers/gpu/drm/i915/selftests/intel_guc.c @@ -27,7 +27,7 @@ /* max doorbell number + negative test for each client type */ #define ATTEMPTS (GUC_NUM_DOORBELLS + GUC_CLIENT_PRIORITY_NUM) -struct intel_guc_client *clients[ATTEMPTS]; +static struct intel_guc_client *clients[ATTEMPTS]; static bool available_dbs(struct intel_guc *guc, u32 priority) { @@ -131,6 +131,8 @@ static int igt_guc_init_doorbell_hw(void *args) pr_err("Failed to create clients\n"); goto unlock; } + GEM_BUG_ON(!guc->execbuf_client); + GEM_BUG_ON(!guc->preempt_client); err = validate_client(guc->execbuf_client, GUC_CLIENT_PRIORITY_KMD_NORMAL, false); diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c index 71ce06680d66..1bbb8c46e2d9 100644 --- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c @@ -114,14 +114,6 @@ static int emit_recurse_batch(struct hang *h, if (err) goto unpin_vma; - err = rq->engine->emit_flush(rq, EMIT_INVALIDATE); - if (err) - goto unpin_hws; - - err = i915_switch_context(rq); - if (err) - goto unpin_hws; - i915_vma_move_to_active(vma, rq, 0); if (!i915_gem_object_has_active_reference(vma->obj)) { i915_gem_object_get(vma->obj); @@ -173,7 +165,6 @@ static int emit_recurse_batch(struct hang *h, err = rq->engine->emit_bb_start(rq, vma->node.start, PAGE_SIZE, flags); -unpin_hws: i915_vma_unpin(hws); unpin_vma: i915_vma_unpin(vma); diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index b57985929553..536ee4febd74 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -102,6 +102,46 @@ enum drm_i915_gem_engine_class { I915_ENGINE_CLASS_INVALID = -1 }; +/** + * DOC: perf_events exposed by i915 through /sys/bus/event_sources/drivers/i915 + * + */ + +enum drm_i915_pmu_engine_sample { + I915_SAMPLE_BUSY = 0, + I915_SAMPLE_WAIT = 1, + I915_SAMPLE_SEMA = 2 +}; + +#define I915_PMU_SAMPLE_BITS (4) +#define I915_PMU_SAMPLE_MASK (0xf) +#define I915_PMU_SAMPLE_INSTANCE_BITS (8) +#define I915_PMU_CLASS_SHIFT \ + (I915_PMU_SAMPLE_BITS + I915_PMU_SAMPLE_INSTANCE_BITS) + +#define __I915_PMU_ENGINE(class, instance, sample) \ + ((class) << I915_PMU_CLASS_SHIFT | \ + (instance) << I915_PMU_SAMPLE_BITS | \ + (sample)) + +#define I915_PMU_ENGINE_BUSY(class, instance) \ + __I915_PMU_ENGINE(class, instance, I915_SAMPLE_BUSY) + +#define I915_PMU_ENGINE_WAIT(class, instance) \ + __I915_PMU_ENGINE(class, instance, I915_SAMPLE_WAIT) + +#define I915_PMU_ENGINE_SEMA(class, instance) \ + __I915_PMU_ENGINE(class, instance, I915_SAMPLE_SEMA) + +#define __I915_PMU_OTHER(x) (__I915_PMU_ENGINE(0xff, 0xff, 0xf) + 1 + (x)) + +#define I915_PMU_ACTUAL_FREQUENCY __I915_PMU_OTHER(0) +#define I915_PMU_REQUESTED_FREQUENCY __I915_PMU_OTHER(1) +#define I915_PMU_INTERRUPTS __I915_PMU_OTHER(2) +#define I915_PMU_RC6_RESIDENCY __I915_PMU_OTHER(3) + +#define I915_PMU_LAST I915_PMU_RC6_RESIDENCY + /* Each region is a minimum of 16k, and there are at most 255 of them. */ #define I915_NR_TEX_REGIONS 255 /* table size 2k - maximum due to use |