summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
diff options
context:
space:
mode:
authorLucas De Marchi <lucas.demarchi@intel.com>2024-02-20 09:26:12 -0800
committerLucas De Marchi <lucas.demarchi@intel.com>2024-02-20 09:57:17 -0800
commitfbb944086f2fa36c633be71cfcb38ce9f37eb90e (patch)
tree78428083a0de1e910a2eb025102b462bebdb8bfc /drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
parent0f688c0eb63a643ef0568b29b12cefbb23181e1a (diff)
parent9ac4beb7578a88baa4f7e6a59eeb5be79d7b011a (diff)
downloadlwn-fbb944086f2fa36c633be71cfcb38ce9f37eb90e.tar.gz
lwn-fbb944086f2fa36c633be71cfcb38ce9f37eb90e.zip
Merge drm/drm-next into drm-xe-next
Bring changes from drm-misc-next that got merged in drm-next back to drm-xe so they can be used for additional features. Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c80
1 files changed, 50 insertions, 30 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index b158d27d0a71..d534e192e260 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -96,6 +96,9 @@ MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
#define AMDGPU_RESUME_MS 2000
#define AMDGPU_MAX_RETRY_LIMIT 2
#define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL)
+#define AMDGPU_PCIE_INDEX_FALLBACK (0x38 >> 2)
+#define AMDGPU_PCIE_INDEX_HI_FALLBACK (0x44 >> 2)
+#define AMDGPU_PCIE_DATA_FALLBACK (0x3C >> 2)
static const struct drm_driver amdgpu_kms_driver;
@@ -781,12 +784,22 @@ u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev,
void __iomem *pcie_index_hi_offset;
void __iomem *pcie_data_offset;
- pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
- pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
- if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
- pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
- else
+ if (unlikely(!adev->nbio.funcs)) {
+ pcie_index = AMDGPU_PCIE_INDEX_FALLBACK;
+ pcie_data = AMDGPU_PCIE_DATA_FALLBACK;
+ } else {
+ pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
+ pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
+ }
+
+ if (reg_addr >> 32) {
+ if (unlikely(!adev->nbio.funcs))
+ pcie_index_hi = AMDGPU_PCIE_INDEX_HI_FALLBACK;
+ else
+ pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
+ } else {
pcie_index_hi = 0;
+ }
spin_lock_irqsave(&adev->pcie_idx_lock, flags);
pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
@@ -1218,8 +1231,6 @@ static int amdgpu_device_asic_init(struct amdgpu_device *adev)
amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0)) {
amdgpu_psp_wait_for_bootloader(adev);
ret = amdgpu_atomfirmware_asic_init(adev, true);
- /* TODO: check the return val and stop device initialization if boot fails */
- amdgpu_psp_query_boot_status(adev);
return ret;
} else {
return amdgpu_atom_asic_init(adev->mode_info.atom_context);
@@ -1442,6 +1453,10 @@ int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
if (amdgpu_sriov_vf(adev))
return 0;
+ /* PCI_EXT_CAP_ID_VNDR extended capability is located at 0x100 */
+ if (!pci_find_ext_capability(adev->pdev, PCI_EXT_CAP_ID_VNDR))
+ DRM_WARN("System can't access extended configuration space,please check!!\n");
+
/* skip if the bios has already enabled large BAR */
if (adev->gmc.real_vram_size &&
(pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size))
@@ -4121,23 +4136,13 @@ int amdgpu_device_init(struct amdgpu_device *adev,
}
}
} else {
- switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
- case IP_VERSION(13, 0, 0):
- case IP_VERSION(13, 0, 7):
- case IP_VERSION(13, 0, 10):
- r = psp_gpu_reset(adev);
- break;
- default:
- tmp = amdgpu_reset_method;
- /* It should do a default reset when loading or reloading the driver,
- * regardless of the module parameter reset_method.
- */
- amdgpu_reset_method = AMD_RESET_METHOD_NONE;
- r = amdgpu_asic_reset(adev);
- amdgpu_reset_method = tmp;
- break;
- }
-
+ tmp = amdgpu_reset_method;
+ /* It should do a default reset when loading or reloading the driver,
+ * regardless of the module parameter reset_method.
+ */
+ amdgpu_reset_method = AMD_RESET_METHOD_NONE;
+ r = amdgpu_asic_reset(adev);
+ amdgpu_reset_method = tmp;
if (r) {
dev_err(adev->dev, "asic reset on init failed\n");
goto failed;
@@ -5031,7 +5036,7 @@ bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_ring *ring = adev->rings[i];
- if (!ring || !drm_sched_wqueue_ready(&ring->sched))
+ if (!amdgpu_ring_sched_ready(ring))
continue;
spin_lock(&ring->sched.job_list_lock);
@@ -5170,7 +5175,7 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_ring *ring = adev->rings[i];
- if (!ring || !drm_sched_wqueue_ready(&ring->sched))
+ if (!amdgpu_ring_sched_ready(ring))
continue;
/* Clear job fence from fence drv to avoid force_completion
@@ -5637,7 +5642,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_ring *ring = tmp_adev->rings[i];
- if (!ring || !drm_sched_wqueue_ready(&ring->sched))
+ if (!amdgpu_ring_sched_ready(ring))
continue;
drm_sched_stop(&ring->sched, job ? &job->base : NULL);
@@ -5690,6 +5695,7 @@ retry: /* Rest of adevs pre asic reset from XGMI hive. */
/* Aldebaran and gfx_11_0_3 support ras in SRIOV, so need resume ras during reset */
if (amdgpu_ip_version(adev, GC_HWIP, 0) ==
IP_VERSION(9, 4, 2) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3))
amdgpu_ras_resume(adev);
} else {
@@ -5706,7 +5712,7 @@ skip_hw_reset:
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_ring *ring = tmp_adev->rings[i];
- if (!ring || !drm_sched_wqueue_ready(&ring->sched))
+ if (!amdgpu_ring_sched_ready(ring))
continue;
drm_sched_start(&ring->sched, true);
@@ -6061,7 +6067,7 @@ pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_sta
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_ring *ring = adev->rings[i];
- if (!ring || !drm_sched_wqueue_ready(&ring->sched))
+ if (!amdgpu_ring_sched_ready(ring))
continue;
drm_sched_stop(&ring->sched, NULL);
@@ -6111,6 +6117,20 @@ pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
struct amdgpu_reset_context reset_context;
u32 memsize;
struct list_head device_list;
+ struct amdgpu_hive_info *hive;
+ int hive_ras_recovery = 0;
+ struct amdgpu_ras *ras;
+
+ /* PCI error slot reset should be skipped During RAS recovery */
+ hive = amdgpu_get_xgmi_hive(adev);
+ if (hive) {
+ hive_ras_recovery = atomic_read(&hive->ras_recovery);
+ amdgpu_put_xgmi_hive(hive);
+ }
+ ras = amdgpu_ras_get_context(adev);
+ if ((amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3)) &&
+ ras && (atomic_read(&ras->in_recovery) || hive_ras_recovery))
+ return PCI_ERS_RESULT_RECOVERED;
DRM_INFO("PCI error: slot reset callback!!\n");
@@ -6189,7 +6209,7 @@ void amdgpu_pci_resume(struct pci_dev *pdev)
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_ring *ring = adev->rings[i];
- if (!ring || !drm_sched_wqueue_ready(&ring->sched))
+ if (!amdgpu_ring_sched_ready(ring))
continue;
drm_sched_start(&ring->sched, true);