diff options
author | Arun Siluvery <arun.siluvery@linux.intel.com> | 2016-04-04 18:50:56 +0100 |
---|---|---|
committer | Tvrtko Ursulin <tvrtko.ursulin@intel.com> | 2016-04-05 13:29:24 +0100 |
commit | 6b332fa20f671265638d1d62496f9607c5f6e92f (patch) | |
tree | 1eacf76a2191fd93be61abcf069a2fe2e96b6813 /drivers | |
parent | 168cf367d7017a9d19522f8e59462c8b01c1212e (diff) | |
download | lwn-6b332fa20f671265638d1d62496f9607c5f6e92f.tar.gz lwn-6b332fa20f671265638d1d62496f9607c5f6e92f.zip |
drm/i915/guc: reset GuC and retry on firmware load failure
Due to timing issues in the HW, some of the status bits required for GuC
authentication occasionally don't get set; when that happens, the GuC
cannot be initialized and we will be left with a wedged GPU. The W/A
suggested is to perform a soft reset of the GuC and attempt to reload
the F/W again for few times before giving up.
As the failure is dependent on timing, tests performed by triggering
manual full gpu reset (i915_wedged) showed that we could sometimes hit
this after several thousand iterations, but sometimes tests ran even
longer without any issues. Reset and reload mechanism proved helpful
when we indeed hit f/w load failure, so it is better to include this
to improve driver stability.
This change implements the following WAs,
WaEnableuKernelHeaderValidFix:skl,bxt
WaEnableGuCBootHashCheckNotSet:skl,bxt
Signed-off-by: Arun Siluvery <arun.siluvery@linux.intel.com>
Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
Reviewed-by: Alex Dai <yu.dai@intel.com>
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/gpu/drm/i915/i915_drv.h | 1 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/i915_guc_reg.h | 1 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/i915_reg.h | 1 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/intel_guc_loader.c | 49 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/intel_uncore.c | 19 |
5 files changed, 69 insertions, 2 deletions
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 6443745d4182..466b8b68f467 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2747,6 +2747,7 @@ extern long i915_compat_ioctl(struct file *filp, unsigned int cmd, extern int intel_gpu_reset(struct drm_device *dev, u32 engine_mask); extern bool intel_has_gpu_reset(struct drm_device *dev); extern int i915_reset(struct drm_device *dev); +extern int intel_guc_reset(struct drm_i915_private *dev_priv); extern void intel_engine_init_hangcheck(struct intel_engine_cs *engine); extern unsigned long i915_chipset_val(struct drm_i915_private *dev_priv); extern unsigned long i915_mch_val(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/i915_guc_reg.h b/drivers/gpu/drm/i915/i915_guc_reg.h index e4ba5822289b..94ceee5f0544 100644 --- a/drivers/gpu/drm/i915/i915_guc_reg.h +++ b/drivers/gpu/drm/i915/i915_guc_reg.h @@ -27,6 +27,7 @@ /* Definitions of GuC H/W registers, bits, etc */ #define GUC_STATUS _MMIO(0xc000) +#define GS_MIA_IN_RESET (1 << 0) #define GS_BOOTROM_SHIFT 1 #define GS_BOOTROM_MASK (0x7F << GS_BOOTROM_SHIFT) #define GS_BOOTROM_RSA_FAILED (0x50 << GS_BOOTROM_SHIFT) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 683274f27405..30fea341ae66 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -165,6 +165,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define GEN6_GRDOM_MEDIA (1 << 2) #define GEN6_GRDOM_BLT (1 << 3) #define GEN6_GRDOM_VECS (1 << 4) +#define GEN9_GRDOM_GUC (1 << 5) #define GEN8_GRDOM_MEDIA2 (1 << 7) #define RING_PP_DIR_BASE(ring) _MMIO((ring)->mmio_base+0x228) diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c index b4976f985369..d84c5608f068 100644 --- a/drivers/gpu/drm/i915/intel_guc_loader.c +++ b/drivers/gpu/drm/i915/intel_guc_loader.c @@ -353,6 +353,24 @@ static int guc_ucode_xfer(struct drm_i915_private *dev_priv) return ret; } +static int i915_reset_guc(struct drm_i915_private *dev_priv) +{ + int ret; + u32 guc_status; + + ret = intel_guc_reset(dev_priv); + if (ret) { + DRM_ERROR("GuC reset failed, ret = %d\n", ret); + return ret; + } + + guc_status = I915_READ(GUC_STATUS); + WARN(!(guc_status & GS_MIA_IN_RESET), + "GuC status: 0x%x, MIA core expected to be in reset\n", guc_status); + + return ret; +} + /** * intel_guc_ucode_load() - load GuC uCode into the device * @dev: drm device @@ -417,9 +435,36 @@ int intel_guc_ucode_load(struct drm_device *dev) if (err) goto fail; + /* + * WaEnableuKernelHeaderValidFix:skl,bxt + * For BXT, this is only upto B0 but below WA is required for later + * steppings also so this is extended as well. + */ + /* WaEnableGuCBootHashCheckNotSet:skl,bxt */ err = guc_ucode_xfer(dev_priv); - if (err) - goto fail; + if (err) { + int retries = 3; + + DRM_ERROR("GuC fw load failed, err=%d, attempting reset and retry\n", err); + + while (retries--) { + err = i915_reset_guc(dev_priv); + if (err) + break; + + err = guc_ucode_xfer(dev_priv); + if (!err) { + DRM_DEBUG_DRIVER("GuC fw reload succeeded after reset\n"); + break; + } + DRM_DEBUG_DRIVER("GuC fw reload retries left: %d\n", retries); + } + + if (err) { + DRM_ERROR("GuC fw reload attempt failed, ret=%d\n", err); + goto fail; + } + } guc_fw->guc_fw_load_status = GUC_FIRMWARE_SUCCESS; diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index ac1c545436af..fbc1d215ca67 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -1673,6 +1673,25 @@ bool intel_has_gpu_reset(struct drm_device *dev) return intel_get_gpu_reset(dev) != NULL; } +int intel_guc_reset(struct drm_i915_private *dev_priv) +{ + int ret; + unsigned long irqflags; + + if (!i915.enable_guc_submission) + return -EINVAL; + + intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); + spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); + + ret = gen6_hw_domain_reset(dev_priv, GEN9_GRDOM_GUC); + + spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); + intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); + + return ret; +} + bool intel_uncore_unclaimed_mmio(struct drm_i915_private *dev_priv) { return check_for_unclaimed_mmio(dev_priv); |