From 216c1282dde38ca87ebdf1ccacee5a0682901574 Mon Sep 17 00:00:00 2001 From: Christian König Date: Mon, 27 Nov 2023 15:26:41 +0100 Subject: drm/amdgpu: use GTT only as fallback for VRAM|GTT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Try to fill up VRAM as well by setting the busy flag on GTT allocations. This fixes the issue that when VRAM was evacuated for suspend it's never filled up again unless the application is restarted. Signed-off-by: Christian König Reviewed-by: Zack Rusin Link: https://patchwork.freedesktop.org/patch/msgid/20240229134003.3688-2-christian.koenig@amd.com --- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 010b0cb7693c..8bc79924d171 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -173,6 +173,12 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain) abo->flags & AMDGPU_GEM_CREATE_PREEMPTIBLE ? AMDGPU_PL_PREEMPT : TTM_PL_TT; places[c].flags = 0; + /* + * When GTT is just an alternative to VRAM make sure that we + * only use it as fallback and still try to fill up VRAM first. + */ + if (domain & abo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) + places[c].flags |= TTM_PL_FLAG_FALLBACK; c++; } -- cgit v1.2.3 From dc406d92a097c669e6468ac4f694b4c927c47ab6 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Thu, 7 Mar 2024 21:31:50 +0530 Subject: drm/amdgpu: add recent pagefault info in vm_manager MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently page fault information is stored per vm and which could be freed or stale during reset. Add it pagefault information in the vm_manager which is a global space for vm's and remains valid across. Signed-off-by: Sunil Khatri Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 8 ++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 2 ++ 2 files changed, 10 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 4299ce386322..81fb3465e197 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2924,6 +2924,14 @@ void amdgpu_vm_update_fault_cache(struct amdgpu_device *adev, if (vm && status) { vm->fault_info.addr = addr; vm->fault_info.status = status; + /* + * Update the fault information globally for later usage + * when vm could be stale or freed. + */ + adev->vm_manager.fault_info.addr = addr; + adev->vm_manager.fault_info.vmhub = vmhub; + adev->vm_manager.fault_info.status = status; + if (AMDGPU_IS_GFXHUB(vmhub)) { vm->fault_info.vmhub = AMDGPU_VMHUB_TYPE_GFX; vm->fault_info.vmhub |= diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 047ec1930d12..8efa8422f4f7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -422,6 +422,8 @@ struct amdgpu_vm_manager { * look up VM of a page fault */ struct xarray pasids; + /* Global registration of recent page fault information */ + struct amdgpu_vm_fault_info fault_info; }; struct amdgpu_bo_va_mapping; -- cgit v1.2.3 From fb0f5f541475184f87965fa892570c053fd9eda3 Mon Sep 17 00:00:00 2001 From: Tao Zhou Date: Mon, 19 Feb 2024 15:51:29 +0800 Subject: drm/amdgpu: add utcl2 poison query for gfxhub Implement it for gfxhub 1.0 and 1.2. v2: input logical xcc id for poison query interface. Signed-off-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h | 2 ++ drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c | 17 +++++++++++++++++ drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c | 15 +++++++++++++++ 3 files changed, 34 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h index c7b44aeb671b..103a837ccc71 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h @@ -38,6 +38,8 @@ struct amdgpu_gfxhub_funcs { void (*mode2_save_regs)(struct amdgpu_device *adev); void (*mode2_restore_regs)(struct amdgpu_device *adev); void (*halt)(struct amdgpu_device *adev); + bool (*query_utcl2_poison_status)(struct amdgpu_device *adev, + int xcc_id); }; struct amdgpu_gfxhub { diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c index 22175da0e16a..d200310d1731 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c @@ -443,6 +443,22 @@ static void gfxhub_v1_0_init(struct amdgpu_device *adev) mmVM_INVALIDATE_ENG0_ADDR_RANGE_LO32; } +static bool gfxhub_v1_0_query_utcl2_poison_status(struct amdgpu_device *adev, + int xcc_id) +{ + u32 status = 0; + struct amdgpu_vmhub *hub; + + if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 2)) + return false; + + hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; + status = RREG32(hub->vm_l2_pro_fault_status); + /* reset page fault status */ + WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1); + + return REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, FED); +} const struct amdgpu_gfxhub_funcs gfxhub_v1_0_funcs = { .get_mc_fb_offset = gfxhub_v1_0_get_mc_fb_offset, @@ -452,4 +468,5 @@ const struct amdgpu_gfxhub_funcs gfxhub_v1_0_funcs = { .set_fault_enable_default = gfxhub_v1_0_set_fault_enable_default, .init = gfxhub_v1_0_init, .get_xgmi_info = gfxhub_v1_1_get_xgmi_info, + .query_utcl2_poison_status = gfxhub_v1_0_query_utcl2_poison_status, }; diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c index 49aecdcee006..77df8c9cbad2 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c @@ -620,6 +620,20 @@ static int gfxhub_v1_2_get_xgmi_info(struct amdgpu_device *adev) return 0; } +static bool gfxhub_v1_2_query_utcl2_poison_status(struct amdgpu_device *adev, + int xcc_id) +{ + u32 fed, status; + + status = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regVM_L2_PROTECTION_FAULT_STATUS); + fed = REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, FED); + /* reset page fault status */ + WREG32_P(SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), + regVM_L2_PROTECTION_FAULT_STATUS), 1, ~1); + + return fed; +} + const struct amdgpu_gfxhub_funcs gfxhub_v1_2_funcs = { .get_mc_fb_offset = gfxhub_v1_2_get_mc_fb_offset, .setup_vm_pt_regs = gfxhub_v1_2_setup_vm_pt_regs, @@ -628,6 +642,7 @@ const struct amdgpu_gfxhub_funcs gfxhub_v1_2_funcs = { .set_fault_enable_default = gfxhub_v1_2_set_fault_enable_default, .init = gfxhub_v1_2_init, .get_xgmi_info = gfxhub_v1_2_get_xgmi_info, + .query_utcl2_poison_status = gfxhub_v1_2_query_utcl2_poison_status, }; static int gfxhub_v1_2_xcp_resume(void *handle, uint32_t inst_mask) -- cgit v1.2.3 From 583681d4a417a67afbcc3664d31fb58d0e59aeae Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Wed, 6 Mar 2024 22:59:34 +0530 Subject: drm/amdgpu: add vm fault information to devcoredump MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add page fault information to the devcoredump. Output of devcoredump: **** AMDGPU Device Coredump **** version: 1 kernel: 6.7.0-amd-staging-drm-next module: amdgpu time: 29.725011811 process_name: soft_recovery_p PID: 1720 Ring timed out details IP Type: 0 Ring Name: gfx_0.0.0 [gfxhub] Page fault observed Faulty page starting at address: 0x0000000000000000 Protection fault status register: 0x301031 VRAM is lost due to GPU reset! Signed-off-by: Sunil Khatri Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c index 147100c27c2d..6d059f853adc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c @@ -203,6 +203,18 @@ amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count, coredump->ring->name); } + if (coredump->adev) { + struct amdgpu_vm_fault_info *fault_info = + &coredump->adev->vm_manager.fault_info; + + drm_printf(&p, "\n[%s] Page fault observed\n", + fault_info->vmhub ? "mmhub" : "gfxhub"); + drm_printf(&p, "Faulty page starting at address: 0x%016llx\n", + fault_info->addr); + drm_printf(&p, "Protection fault status register: 0x%x\n\n", + fault_info->status); + } + if (coredump->reset_vram_lost) drm_printf(&p, "VRAM is lost due to GPU reset!\n"); if (coredump->adev->reset_info.num_regs) { -- cgit v1.2.3 From 3eb899c40a6190f6eee0bea7c340a4cb32112548 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Mon, 11 Mar 2024 16:58:54 +0530 Subject: drm/amdgpu: add ring buffer information in devcoredump MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add relevant ringbuffer information such as rptr, wptr,rb mask, ring name, ring size and also the rings content for each ring on a gpu reset. Signed-off-by: Sunil Khatri Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c index 6d059f853adc..a0dbccad2f53 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c @@ -215,6 +215,27 @@ amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count, fault_info->status); } + drm_printf(&p, "Ring buffer information\n"); + for (int i = 0; i < coredump->adev->num_rings; i++) { + int j = 0; + struct amdgpu_ring *ring = coredump->adev->rings[i]; + + drm_printf(&p, "ring name: %s\n", ring->name); + drm_printf(&p, "Rptr: 0x%llx Wptr: 0x%llx RB mask: %x\n", + amdgpu_ring_get_rptr(ring), + amdgpu_ring_get_wptr(ring), + ring->buf_mask); + drm_printf(&p, "Ring size in dwords: %d\n", + ring->ring_size / 4); + drm_printf(&p, "Ring contents\n"); + drm_printf(&p, "Offset \t Value\n"); + + while (j < ring->ring_size) { + drm_printf(&p, "0x%x \t 0x%x\n", j, ring->ring[j/4]); + j += 4; + } + } + if (coredump->reset_vram_lost) drm_printf(&p, "VRAM is lost due to GPU reset!\n"); if (coredump->adev->reset_info.num_regs) { -- cgit v1.2.3 From 71a8d61ebc38f0f1b96011518fefd0a1e07efa74 Mon Sep 17 00:00:00 2001 From: Tao Zhou Date: Mon, 19 Feb 2024 15:55:24 +0800 Subject: drm/amdgpu: retire gfx ras query_utcl2_poison_status Replace it with related interface in gfxhub functions. v2: replace node id with xcc id. get node id for query_utcl2_poison_status Signed-off-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 7 ++++--- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 3 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 1 - drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c | 12 ------------ drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c | 14 ++++++++++++-- drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c | 14 ++++++++++++-- 6 files changed, 30 insertions(+), 21 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 35dd6effa9a3..d5fde8adf19b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -769,10 +769,11 @@ int amdgpu_amdkfd_send_close_event_drain_irq(struct amdgpu_device *adev, return 0; } -bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev) +bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev, + int xcc_id) { - if (adev->gfx.ras && adev->gfx.ras->query_utcl2_poison_status) - return adev->gfx.ras->query_utcl2_poison_status(adev); + if (adev->gfxhub.funcs->query_utcl2_poison_status) + return adev->gfxhub.funcs->query_utcl2_poison_status(adev, xcc_id); else return false; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 0ef223c2affb..caee36e52a09 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -341,7 +341,8 @@ bool amdgpu_amdkfd_is_fed(struct amdgpu_device *adev); bool amdgpu_amdkfd_bo_mapped_to_dev(struct amdgpu_device *adev, struct kgd_mem *mem); void amdgpu_amdkfd_block_mmu_notifications(void *p); int amdgpu_amdkfd_criu_resume(void *p); -bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev); +bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev, + int xcc_id); int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, uint64_t size, u32 alloc_flag, int8_t xcp_id); void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 8fcf889ddce9..04a86dff71e6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -259,7 +259,6 @@ struct amdgpu_cu_info { struct amdgpu_gfx_ras { struct amdgpu_ras_block_object ras_block; void (*enable_watchdog_timer)(struct amdgpu_device *adev); - bool (*query_utcl2_poison_status)(struct amdgpu_device *adev); int (*rlc_gc_fed_irq)(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c index 065b2bd5f5a6..3f4fd2f08163 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c @@ -1909,18 +1909,7 @@ static void gfx_v9_4_2_reset_sq_timeout_status(struct amdgpu_device *adev) mutex_unlock(&adev->grbm_idx_mutex); } -static bool gfx_v9_4_2_query_uctl2_poison_status(struct amdgpu_device *adev) -{ - u32 status = 0; - struct amdgpu_vmhub *hub; - - hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; - status = RREG32(hub->vm_l2_pro_fault_status); - /* reset page fault status */ - WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1); - return REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, FED); -} struct amdgpu_ras_block_hw_ops gfx_v9_4_2_ras_ops = { .query_ras_error_count = &gfx_v9_4_2_query_ras_error_count, @@ -1934,5 +1923,4 @@ struct amdgpu_gfx_ras gfx_v9_4_2_ras = { .hw_ops = &gfx_v9_4_2_ras_ops, }, .enable_watchdog_timer = &gfx_v9_4_2_enable_watchdog_timer, - .query_utcl2_poison_status = gfx_v9_4_2_query_uctl2_poison_status, }; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c index 9a06c6fb6605..a8e76287dde0 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c @@ -367,10 +367,20 @@ static void event_interrupt_wq_v10(struct kfd_node *dev, client_id == SOC15_IH_CLIENTID_UTCL2) { struct kfd_vm_fault_info info = {0}; uint16_t ring_id = SOC15_RING_ID_FROM_IH_ENTRY(ih_ring_entry); + uint32_t node_id = SOC15_NODEID_FROM_IH_ENTRY(ih_ring_entry); + uint32_t vmid_type = SOC15_VMID_TYPE_FROM_IH_ENTRY(ih_ring_entry); + int xcc_id = 0; struct kfd_hsa_memory_exception_data exception_data; - if (client_id == SOC15_IH_CLIENTID_UTCL2 && - amdgpu_amdkfd_ras_query_utcl2_poison_status(dev->adev)) { + if (!vmid_type && dev->adev->gfx.funcs->ih_node_to_logical_xcc) { + xcc_id = dev->adev->gfx.funcs->ih_node_to_logical_xcc(dev->adev, + node_id); + if (xcc_id < 0) + xcc_id = 0; + } + + if (client_id == SOC15_IH_CLIENTID_UTCL2 && !vmid_type && + amdgpu_amdkfd_ras_query_utcl2_poison_status(dev->adev, xcc_id)) { event_interrupt_poison_consumption(dev, pasid, client_id); return; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c index 91dd5e045b51..ff7392336795 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c @@ -413,10 +413,20 @@ static void event_interrupt_wq_v9(struct kfd_node *dev, client_id == SOC15_IH_CLIENTID_UTCL2) { struct kfd_vm_fault_info info = {0}; uint16_t ring_id = SOC15_RING_ID_FROM_IH_ENTRY(ih_ring_entry); + uint32_t node_id = SOC15_NODEID_FROM_IH_ENTRY(ih_ring_entry); + uint32_t vmid_type = SOC15_VMID_TYPE_FROM_IH_ENTRY(ih_ring_entry); + int xcc_id = 0; struct kfd_hsa_memory_exception_data exception_data; - if (client_id == SOC15_IH_CLIENTID_UTCL2 && - amdgpu_amdkfd_ras_query_utcl2_poison_status(dev->adev)) { + if (!vmid_type && dev->adev->gfx.funcs->ih_node_to_logical_xcc) { + xcc_id = dev->adev->gfx.funcs->ih_node_to_logical_xcc(dev->adev, + node_id); + if (xcc_id < 0) + xcc_id = 0; + } + + if (client_id == SOC15_IH_CLIENTID_UTCL2 && !vmid_type && + amdgpu_amdkfd_ras_query_utcl2_poison_status(dev->adev, xcc_id)) { event_interrupt_poison_consumption_v9(dev, pasid, client_id); return; } -- cgit v1.2.3 From 0c501d3c11bba2a8a5f98bbad557465ccbfb59a3 Mon Sep 17 00:00:00 2001 From: Tao Zhou Date: Mon, 19 Feb 2024 16:03:17 +0800 Subject: drm/amdgpu: skip GFX FED error in page fault handling Let kfd interrupt handler process it. v2: return 0 instead of 1 for fed error. drop the usage of strcmp in interrupt handler. Signed-off-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 47b63a4ce68b..05d52b9274a9 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -548,7 +548,7 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, { bool retry_fault = !!(entry->src_data[1] & 0x80); bool write_fault = !!(entry->src_data[1] & 0x20); - uint32_t status = 0, cid = 0, rw = 0; + uint32_t status = 0, cid = 0, rw = 0, fed = 0; struct amdgpu_task_info *task_info; struct amdgpu_vmhub *hub; const char *mmhub_cid; @@ -664,6 +664,14 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, status = RREG32(hub->vm_l2_pro_fault_status); cid = REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, CID); rw = REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, RW); + fed = REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, FED); + + /* for gfx fed error, kfd will handle it, return directly */ + if (fed && amdgpu_ras_is_poison_mode_supported(adev) && + (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, 4, 2)) && + (vmhub < AMDGPU_MMHUB0_START)) + return 0; + WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1); amdgpu_vm_update_fault_cache(adev, entry->pasid, addr, status, vmhub); -- cgit v1.2.3 From 97d5aa60306d286326026a40e1e3b992b5b31538 Mon Sep 17 00:00:00 2001 From: Shashank Sharma Date: Tue, 12 Mar 2024 16:23:46 +0100 Subject: drm/amdgpu: cleanup unused variable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch removes an unused input variable in the MES doorbell function. Cc: Christian König Cc: Alex Deucher Reviewed-by: Christian König Signed-off-by: Shashank Sharma Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index a98e03e0a51f..eb75c524b4a6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -40,7 +40,6 @@ int amdgpu_mes_doorbell_process_slice(struct amdgpu_device *adev) } static int amdgpu_mes_kernel_doorbell_get(struct amdgpu_device *adev, - struct amdgpu_mes_process *process, int ip_type, uint64_t *doorbell_index) { unsigned int offset, found; @@ -65,7 +64,6 @@ static int amdgpu_mes_kernel_doorbell_get(struct amdgpu_device *adev, } static void amdgpu_mes_kernel_doorbell_free(struct amdgpu_device *adev, - struct amdgpu_mes_process *process, uint32_t doorbell_index) { unsigned int old, rel_index; @@ -653,7 +651,7 @@ int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id, *queue_id = queue->queue_id = r; /* allocate a doorbell index for the queue */ - r = amdgpu_mes_kernel_doorbell_get(adev, gang->process, + r = amdgpu_mes_kernel_doorbell_get(adev, qprops->queue_type, &qprops->doorbell_off); if (r) @@ -711,8 +709,7 @@ int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id, return 0; clean_up_doorbell: - amdgpu_mes_kernel_doorbell_free(adev, gang->process, - qprops->doorbell_off); + amdgpu_mes_kernel_doorbell_free(adev, qprops->doorbell_off); clean_up_queue_id: spin_lock_irqsave(&adev->mes.queue_id_lock, flags); idr_remove(&adev->mes.queue_id_idr, queue->queue_id); @@ -766,8 +763,7 @@ int amdgpu_mes_remove_hw_queue(struct amdgpu_device *adev, int queue_id) queue_id); list_del(&queue->list); - amdgpu_mes_kernel_doorbell_free(adev, gang->process, - queue->doorbell_off); + amdgpu_mes_kernel_doorbell_free(adev, queue->doorbell_off); amdgpu_mes_unlock(&adev->mes); amdgpu_mes_queue_free_mqd(queue); -- cgit v1.2.3 From dfe9c3cde22957f65476210aec48e3586983d927 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Wed, 13 Mar 2024 15:07:10 +0530 Subject: drm/amdgpu: Do a basic health check before reset Check if the device is present in the bus before trying to recover. It could be that device itself is lost from the bus in some hang situations. Signed-off-by: Lijo Lazar Reviewed-by: Asad Kamal Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 5dc24c971b41..efb3b7e74b80 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -5532,6 +5532,23 @@ static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev) } +static int amdgpu_device_health_check(struct list_head *device_list_handle) +{ + struct amdgpu_device *tmp_adev; + int ret = 0; + u32 status; + + list_for_each_entry(tmp_adev, device_list_handle, reset_list) { + pci_read_config_dword(tmp_adev->pdev, PCI_COMMAND, &status); + if (PCI_POSSIBLE_ERROR(status)) { + dev_err(tmp_adev->dev, "device lost from bus!"); + ret = -ENODEV; + } + } + + return ret; +} + /** * amdgpu_device_gpu_recover - reset the asic and recover scheduler * @@ -5603,6 +5620,12 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, device_list_handle = &device_list; } + if (!amdgpu_sriov_vf(adev)) { + r = amdgpu_device_health_check(device_list_handle); + if (r) + goto end_reset; + } + /* We need to lock reset domain only once both for XGMI and single device */ tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device, reset_list); @@ -5768,6 +5791,7 @@ skip_sched_resume: reset_list); amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain); +end_reset: if (hive) { mutex_unlock(&hive->hive_lock); amdgpu_put_xgmi_hive(hive); -- cgit v1.2.3 From 6bb89d1340425ff030b7bdb0eec53ca6d12d1313 Mon Sep 17 00:00:00 2001 From: Victor Skvortsov Date: Thu, 14 Mar 2024 17:27:46 -0400 Subject: drm/amdgpu: Skip virt_exchange_init on SDMA poison consumption Host will initiate an FLR in SDMA poison consumption scenario. Guest should wait for FLR message to re-init data exchange. Signed-off-by: Victor Skvortsov Reviewed-by: Zhigang Luo Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c index 77f5b55decf9..a1bad772d932 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c @@ -444,7 +444,8 @@ static void xgpu_nv_ras_poison_handler(struct amdgpu_device *adev, amdgpu_virt_fini_data_exchange(adev); xgpu_nv_send_access_requests_with_param(adev, IDH_RAS_POISON, block, 0, 0); - amdgpu_virt_init_data_exchange(adev); + if (block != AMDGPU_RAS_BLOCK__SDMA) + amdgpu_virt_init_data_exchange(adev); } } -- cgit v1.2.3 From d72e2bdac4ad5422f8d880d323f6ab6a7469cb2b Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Fri, 8 Mar 2024 15:56:20 +0530 Subject: drm/amdgpu: add the hw_ip version of all IP's MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add all the IP's version information on a SOC to the devcoredump. Signed-off-by: Sunil Khatri Reviewed-by: Alex Deucher Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c | 60 +++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c index a0dbccad2f53..1434e9a5506b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c @@ -29,6 +29,42 @@ #include "sienna_cichlid.h" #include "smu_v13_0_10.h" +const char *hw_ip_names[MAX_HWIP] = { + [GC_HWIP] = "GC", + [HDP_HWIP] = "HDP", + [SDMA0_HWIP] = "SDMA0", + [SDMA1_HWIP] = "SDMA1", + [SDMA2_HWIP] = "SDMA2", + [SDMA3_HWIP] = "SDMA3", + [SDMA4_HWIP] = "SDMA4", + [SDMA5_HWIP] = "SDMA5", + [SDMA6_HWIP] = "SDMA6", + [SDMA7_HWIP] = "SDMA7", + [LSDMA_HWIP] = "LSDMA", + [MMHUB_HWIP] = "MMHUB", + [ATHUB_HWIP] = "ATHUB", + [NBIO_HWIP] = "NBIO", + [MP0_HWIP] = "MP0", + [MP1_HWIP] = "MP1", + [UVD_HWIP] = "UVD/JPEG/VCN", + [VCN1_HWIP] = "VCN1", + [VCE_HWIP] = "VCE", + [VPE_HWIP] = "VPE", + [DF_HWIP] = "DF", + [DCE_HWIP] = "DCE", + [OSSSYS_HWIP] = "OSSSYS", + [SMUIO_HWIP] = "SMUIO", + [PWR_HWIP] = "PWR", + [NBIF_HWIP] = "NBIF", + [THM_HWIP] = "THM", + [CLK_HWIP] = "CLK", + [UMC_HWIP] = "UMC", + [RSMU_HWIP] = "RSMU", + [XGMI_HWIP] = "XGMI", + [DCI_HWIP] = "DCI", + [PCIE_HWIP] = "PCIE", +}; + int amdgpu_reset_init(struct amdgpu_device *adev) { int ret = 0; @@ -196,6 +232,30 @@ amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count, coredump->reset_task_info.process_name, coredump->reset_task_info.pid); + /* GPU IP's information of the SOC */ + if (coredump->adev) { + drm_printf(&p, "\nIP Information\n"); + drm_printf(&p, "SOC Family: %d\n", coredump->adev->family); + drm_printf(&p, "SOC Revision id: %d\n", coredump->adev->rev_id); + drm_printf(&p, "SOC External Revision id: %d\n", + coredump->adev->external_rev_id); + + for (int i = 1; i < MAX_HWIP; i++) { + for (int j = 0; j < HWIP_MAX_INSTANCE; j++) { + int ver = coredump->adev->ip_versions[i][j]; + + if (ver) + drm_printf(&p, "HWIP: %s[%d][%d]: v%d.%d.%d.%d.%d\n", + hw_ip_names[i], i, j, + IP_VERSION_MAJ(ver), + IP_VERSION_MIN(ver), + IP_VERSION_REV(ver), + IP_VERSION_VARIANT(ver), + IP_VERSION_SUBREV(ver)); + } + } + } + if (coredump->ring) { drm_printf(&p, "\nRing timed out details\n"); drm_printf(&p, "IP Type: %d Ring Name: %s\n", -- cgit v1.2.3 From ab66c832847fcdffc97d4591ba5547e3990d9d33 Mon Sep 17 00:00:00 2001 From: Zhigang Luo Date: Thu, 29 Feb 2024 16:04:35 -0500 Subject: drm/amdgpu: trigger flr_work if reading pf2vf data failed if reading pf2vf data failed 30 times continuously, it means something is wrong. Need to trigger flr_work to recover the issue. also use dev_err to print the error message to get which device has issue and add warning message if waiting IDH_FLR_NOTIFICATION_CMPL timeout. Signed-off-by: Zhigang Luo Acked-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 15 ++++++++++----- drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 29 ++++++++++++++++++++++++----- drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h | 3 +++ drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c | 2 ++ drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c | 2 ++ 5 files changed, 41 insertions(+), 10 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index efb3b7e74b80..3204b8f6edeb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -143,6 +143,8 @@ const char *amdgpu_asic_name[] = { "LAST", }; +static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev); + /** * DOC: pcie_replay_count * @@ -4968,6 +4970,8 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, retry: amdgpu_amdkfd_pre_reset(adev); + amdgpu_device_stop_pending_resets(adev); + if (from_hypervisor) r = amdgpu_virt_request_full_gpu(adev, true); else @@ -5708,11 +5712,12 @@ retry: /* Rest of adevs pre asic reset from XGMI hive. */ tmp_adev->asic_reset_res = r; } - /* - * Drop all pending non scheduler resets. Scheduler resets - * were already dropped during drm_sched_stop - */ - amdgpu_device_stop_pending_resets(tmp_adev); + if (!amdgpu_sriov_vf(tmp_adev)) + /* + * Drop all pending non scheduler resets. Scheduler resets + * were already dropped during drm_sched_stop + */ + amdgpu_device_stop_pending_resets(tmp_adev); } /* Actual ASIC resets if needed.*/ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 7a4eae36778a..aed60aaf1a55 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -32,6 +32,7 @@ #include "amdgpu.h" #include "amdgpu_ras.h" +#include "amdgpu_reset.h" #include "vi.h" #include "soc15.h" #include "nv.h" @@ -424,7 +425,7 @@ static int amdgpu_virt_read_pf2vf_data(struct amdgpu_device *adev) return -EINVAL; if (pf2vf_info->size > 1024) { - DRM_ERROR("invalid pf2vf message size\n"); + dev_err(adev->dev, "invalid pf2vf message size: 0x%x\n", pf2vf_info->size); return -EINVAL; } @@ -435,7 +436,9 @@ static int amdgpu_virt_read_pf2vf_data(struct amdgpu_device *adev) adev->virt.fw_reserve.p_pf2vf, pf2vf_info->size, adev->virt.fw_reserve.checksum_key, checksum); if (checksum != checkval) { - DRM_ERROR("invalid pf2vf message\n"); + dev_err(adev->dev, + "invalid pf2vf message: header checksum=0x%x calculated checksum=0x%x\n", + checksum, checkval); return -EINVAL; } @@ -449,7 +452,9 @@ static int amdgpu_virt_read_pf2vf_data(struct amdgpu_device *adev) adev->virt.fw_reserve.p_pf2vf, pf2vf_info->size, 0, checksum); if (checksum != checkval) { - DRM_ERROR("invalid pf2vf message\n"); + dev_err(adev->dev, + "invalid pf2vf message: header checksum=0x%x calculated checksum=0x%x\n", + checksum, checkval); return -EINVAL; } @@ -485,7 +490,7 @@ static int amdgpu_virt_read_pf2vf_data(struct amdgpu_device *adev) ((struct amd_sriov_msg_pf2vf_info *)pf2vf_info)->uuid; break; default: - DRM_ERROR("invalid pf2vf version\n"); + dev_err(adev->dev, "invalid pf2vf version: 0x%x\n", pf2vf_info->version); return -EINVAL; } @@ -584,8 +589,21 @@ static void amdgpu_virt_update_vf2pf_work_item(struct work_struct *work) int ret; ret = amdgpu_virt_read_pf2vf_data(adev); - if (ret) + if (ret) { + adev->virt.vf2pf_update_retry_cnt++; + if ((adev->virt.vf2pf_update_retry_cnt >= AMDGPU_VF2PF_UPDATE_MAX_RETRY_LIMIT) && + amdgpu_sriov_runtime(adev) && !amdgpu_in_reset(adev)) { + if (amdgpu_reset_domain_schedule(adev->reset_domain, + &adev->virt.flr_work)) + return; + else + dev_err(adev->dev, "Failed to queue work! at %s", __func__); + } + goto out; + } + + adev->virt.vf2pf_update_retry_cnt = 0; amdgpu_virt_write_vf2pf_data(adev); out: @@ -606,6 +624,7 @@ void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev) adev->virt.fw_reserve.p_pf2vf = NULL; adev->virt.fw_reserve.p_vf2pf = NULL; adev->virt.vf2pf_update_interval_ms = 0; + adev->virt.vf2pf_update_retry_cnt = 0; if (adev->mman.fw_vram_usage_va && adev->mman.drv_vram_usage_va) { DRM_WARN("Currently fw_vram and drv_vram should not have values at the same time!"); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index 3f59b7b5523f..a858bc98cad4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -52,6 +52,8 @@ /* tonga/fiji use this offset */ #define mmBIF_IOV_FUNC_IDENTIFIER 0x1503 +#define AMDGPU_VF2PF_UPDATE_MAX_RETRY_LIMIT 30 + enum amdgpu_sriov_vf_mode { SRIOV_VF_MODE_BARE_METAL = 0, SRIOV_VF_MODE_ONE_VF, @@ -257,6 +259,7 @@ struct amdgpu_virt { /* vf2pf message */ struct delayed_work vf2pf_work; uint32_t vf2pf_update_interval_ms; + int vf2pf_update_retry_cnt; /* multimedia bandwidth config */ bool is_mm_bw_enabled; diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c index a2bd2c3b1ef9..0c7275bca8f7 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c @@ -276,6 +276,8 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work) timeout -= 10; } while (timeout > 1); + dev_warn(adev->dev, "waiting IDH_FLR_NOTIFICATION_CMPL timeout\n"); + flr_done: atomic_set(&adev->reset_domain->in_gpu_reset, 0); up_write(&adev->reset_domain->sem); diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c index a1bad772d932..89992c1c9a62 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c @@ -309,6 +309,8 @@ static void xgpu_nv_mailbox_flr_work(struct work_struct *work) timeout -= 10; } while (timeout > 1); + dev_warn(adev->dev, "waiting IDH_FLR_NOTIFICATION_CMPL timeout\n"); + flr_done: atomic_set(&adev->reset_domain->in_gpu_reset, 0); up_write(&adev->reset_domain->sem); -- cgit v1.2.3 From 9dc57c2adf2c307a672f15b4be17c6c14e37cfb9 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Wed, 13 Mar 2024 12:50:43 +0800 Subject: drm/amdgpu: add ras event id support add amdgpu ras event id support to better distinguish different error information sources in dmesg logs. the following log will be identify by event id: {event_id} interrupt to inform RAS event {event_id} ACA logs {event_id} errors statistic since from current injection/error query {event_id} errors statistic since from gpu load Signed-off-by: Yang Wang Reviewed-by: Hawking Zhang Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c | 32 ++--- drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h | 3 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 207 ++++++++++++++++++++----------- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 30 +++++ drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h | 1 + drivers/gpu/drm/amd/amdgpu/umc_v12_0.c | 10 +- 6 files changed, 195 insertions(+), 88 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c index 24ad4b97177b..0734490347db 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c @@ -210,22 +210,26 @@ int amdgpu_mca_smu_set_debug_mode(struct amdgpu_device *adev, bool enable) return -EOPNOTSUPP; } -static void amdgpu_mca_smu_mca_bank_dump(struct amdgpu_device *adev, int idx, struct mca_bank_entry *entry) +static void amdgpu_mca_smu_mca_bank_dump(struct amdgpu_device *adev, int idx, struct mca_bank_entry *entry, + struct ras_query_context *qctx) { - dev_info(adev->dev, HW_ERR "Accelerator Check Architecture events logged\n"); - dev_info(adev->dev, HW_ERR "aca entry[%02d].STATUS=0x%016llx\n", - idx, entry->regs[MCA_REG_IDX_STATUS]); - dev_info(adev->dev, HW_ERR "aca entry[%02d].ADDR=0x%016llx\n", - idx, entry->regs[MCA_REG_IDX_ADDR]); - dev_info(adev->dev, HW_ERR "aca entry[%02d].MISC0=0x%016llx\n", - idx, entry->regs[MCA_REG_IDX_MISC0]); - dev_info(adev->dev, HW_ERR "aca entry[%02d].IPID=0x%016llx\n", - idx, entry->regs[MCA_REG_IDX_IPID]); - dev_info(adev->dev, HW_ERR "aca entry[%02d].SYND=0x%016llx\n", - idx, entry->regs[MCA_REG_IDX_SYND]); + u64 event_id = qctx->event_id; + + RAS_EVENT_LOG(adev, event_id, HW_ERR "Accelerator Check Architecture events logged\n"); + RAS_EVENT_LOG(adev, event_id, HW_ERR "aca entry[%02d].STATUS=0x%016llx\n", + idx, entry->regs[MCA_REG_IDX_STATUS]); + RAS_EVENT_LOG(adev, event_id, HW_ERR "aca entry[%02d].ADDR=0x%016llx\n", + idx, entry->regs[MCA_REG_IDX_ADDR]); + RAS_EVENT_LOG(adev, event_id, HW_ERR "aca entry[%02d].MISC0=0x%016llx\n", + idx, entry->regs[MCA_REG_IDX_MISC0]); + RAS_EVENT_LOG(adev, event_id, HW_ERR "aca entry[%02d].IPID=0x%016llx\n", + idx, entry->regs[MCA_REG_IDX_IPID]); + RAS_EVENT_LOG(adev, event_id, HW_ERR "aca entry[%02d].SYND=0x%016llx\n", + idx, entry->regs[MCA_REG_IDX_SYND]); } -int amdgpu_mca_smu_log_ras_error(struct amdgpu_device *adev, enum amdgpu_ras_block blk, enum amdgpu_mca_error_type type, struct ras_err_data *err_data) +int amdgpu_mca_smu_log_ras_error(struct amdgpu_device *adev, enum amdgpu_ras_block blk, enum amdgpu_mca_error_type type, + struct ras_err_data *err_data, struct ras_query_context *qctx) { struct amdgpu_smuio_mcm_config_info mcm_info; struct ras_err_addr err_addr = {0}; @@ -244,7 +248,7 @@ int amdgpu_mca_smu_log_ras_error(struct amdgpu_device *adev, enum amdgpu_ras_blo list_for_each_entry(node, &mca_set.list, node) { entry = &node->entry; - amdgpu_mca_smu_mca_bank_dump(adev, i++, entry); + amdgpu_mca_smu_mca_bank_dump(adev, i++, entry, qctx); count = 0; ret = amdgpu_mca_smu_parse_mca_error_count(adev, blk, type, entry, &count); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h index b964110ed1e0..e5bf07ce3451 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h @@ -169,6 +169,7 @@ void amdgpu_mca_smu_debugfs_init(struct amdgpu_device *adev, struct dentry *root void amdgpu_mca_bank_set_init(struct mca_bank_set *mca_set); int amdgpu_mca_bank_set_add_entry(struct mca_bank_set *mca_set, struct mca_bank_entry *entry); void amdgpu_mca_bank_set_release(struct mca_bank_set *mca_set); -int amdgpu_mca_smu_log_ras_error(struct amdgpu_device *adev, enum amdgpu_ras_block blk, enum amdgpu_mca_error_type type, struct ras_err_data *err_data); +int amdgpu_mca_smu_log_ras_error(struct amdgpu_device *adev, enum amdgpu_ras_block blk, enum amdgpu_mca_error_type type, + struct ras_err_data *err_data, struct ras_query_context *qctx); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 8ebab6f22e5a..26662c76b293 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -1045,6 +1045,7 @@ static void amdgpu_ras_get_ecc_info(struct amdgpu_device *adev, struct ras_err_d static void amdgpu_ras_error_print_error_data(struct amdgpu_device *adev, struct ras_manager *ras_mgr, struct ras_err_data *err_data, + struct ras_query_context *qctx, const char *blk_name, bool is_ue, bool is_de) @@ -1052,27 +1053,28 @@ static void amdgpu_ras_error_print_error_data(struct amdgpu_device *adev, struct amdgpu_smuio_mcm_config_info *mcm_info; struct ras_err_node *err_node; struct ras_err_info *err_info; + u64 event_id = qctx->event_id; if (is_ue) { for_each_ras_error(err_node, err_data) { err_info = &err_node->err_info; mcm_info = &err_info->mcm_info; if (err_info->ue_count) { - dev_info(adev->dev, "socket: %d, die: %d, " - "%lld new uncorrectable hardware errors detected in %s block\n", - mcm_info->socket_id, - mcm_info->die_id, - err_info->ue_count, - blk_name); + RAS_EVENT_LOG(adev, event_id, "socket: %d, die: %d, " + "%lld new uncorrectable hardware errors detected in %s block\n", + mcm_info->socket_id, + mcm_info->die_id, + err_info->ue_count, + blk_name); } } for_each_ras_error(err_node, &ras_mgr->err_data) { err_info = &err_node->err_info; mcm_info = &err_info->mcm_info; - dev_info(adev->dev, "socket: %d, die: %d, " - "%lld uncorrectable hardware errors detected in total in %s block\n", - mcm_info->socket_id, mcm_info->die_id, err_info->ue_count, blk_name); + RAS_EVENT_LOG(adev, event_id, "socket: %d, die: %d, " + "%lld uncorrectable hardware errors detected in total in %s block\n", + mcm_info->socket_id, mcm_info->die_id, err_info->ue_count, blk_name); } } else { @@ -1081,44 +1083,44 @@ static void amdgpu_ras_error_print_error_data(struct amdgpu_device *adev, err_info = &err_node->err_info; mcm_info = &err_info->mcm_info; if (err_info->de_count) { - dev_info(adev->dev, "socket: %d, die: %d, " - "%lld new deferred hardware errors detected in %s block\n", - mcm_info->socket_id, - mcm_info->die_id, - err_info->de_count, - blk_name); + RAS_EVENT_LOG(adev, event_id, "socket: %d, die: %d, " + "%lld new deferred hardware errors detected in %s block\n", + mcm_info->socket_id, + mcm_info->die_id, + err_info->de_count, + blk_name); } } for_each_ras_error(err_node, &ras_mgr->err_data) { err_info = &err_node->err_info; mcm_info = &err_info->mcm_info; - dev_info(adev->dev, "socket: %d, die: %d, " - "%lld deferred hardware errors detected in total in %s block\n", - mcm_info->socket_id, mcm_info->die_id, - err_info->de_count, blk_name); + RAS_EVENT_LOG(adev, event_id, "socket: %d, die: %d, " + "%lld deferred hardware errors detected in total in %s block\n", + mcm_info->socket_id, mcm_info->die_id, + err_info->de_count, blk_name); } } else { for_each_ras_error(err_node, err_data) { err_info = &err_node->err_info; mcm_info = &err_info->mcm_info; if (err_info->ce_count) { - dev_info(adev->dev, "socket: %d, die: %d, " - "%lld new correctable hardware errors detected in %s block\n", - mcm_info->socket_id, - mcm_info->die_id, - err_info->ce_count, - blk_name); + RAS_EVENT_LOG(adev, event_id, "socket: %d, die: %d, " + "%lld new correctable hardware errors detected in %s block\n", + mcm_info->socket_id, + mcm_info->die_id, + err_info->ce_count, + blk_name); } } for_each_ras_error(err_node, &ras_mgr->err_data) { err_info = &err_node->err_info; mcm_info = &err_info->mcm_info; - dev_info(adev->dev, "socket: %d, die: %d, " - "%lld correctable hardware errors detected in total in %s block\n", - mcm_info->socket_id, mcm_info->die_id, - err_info->ce_count, blk_name); + RAS_EVENT_LOG(adev, event_id, "socket: %d, die: %d, " + "%lld correctable hardware errors detected in total in %s block\n", + mcm_info->socket_id, mcm_info->die_id, + err_info->ce_count, blk_name); } } } @@ -1131,77 +1133,79 @@ static inline bool err_data_has_source_info(struct ras_err_data *data) static void amdgpu_ras_error_generate_report(struct amdgpu_device *adev, struct ras_query_if *query_if, - struct ras_err_data *err_data) + struct ras_err_data *err_data, + struct ras_query_context *qctx) { struct ras_manager *ras_mgr = amdgpu_ras_find_obj(adev, &query_if->head); const char *blk_name = get_ras_block_str(&query_if->head); + u64 event_id = qctx->event_id; if (err_data->ce_count) { if (err_data_has_source_info(err_data)) { - amdgpu_ras_error_print_error_data(adev, ras_mgr, err_data, + amdgpu_ras_error_print_error_data(adev, ras_mgr, err_data, qctx, blk_name, false, false); } else if (!adev->aid_mask && adev->smuio.funcs && adev->smuio.funcs->get_socket_id && adev->smuio.funcs->get_die_id) { - dev_info(adev->dev, "socket: %d, die: %d " - "%ld correctable hardware errors " - "detected in %s block\n", - adev->smuio.funcs->get_socket_id(adev), - adev->smuio.funcs->get_die_id(adev), - ras_mgr->err_data.ce_count, - blk_name); + RAS_EVENT_LOG(adev, event_id, "socket: %d, die: %d " + "%ld correctable hardware errors " + "detected in %s block\n", + adev->smuio.funcs->get_socket_id(adev), + adev->smuio.funcs->get_die_id(adev), + ras_mgr->err_data.ce_count, + blk_name); } else { - dev_info(adev->dev, "%ld correctable hardware errors " - "detected in %s block\n", - ras_mgr->err_data.ce_count, - blk_name); + RAS_EVENT_LOG(adev, event_id, "%ld correctable hardware errors " + "detected in %s block\n", + ras_mgr->err_data.ce_count, + blk_name); } } if (err_data->ue_count) { if (err_data_has_source_info(err_data)) { - amdgpu_ras_error_print_error_data(adev, ras_mgr, err_data, + amdgpu_ras_error_print_error_data(adev, ras_mgr, err_data, qctx, blk_name, true, false); } else if (!adev->aid_mask && adev->smuio.funcs && adev->smuio.funcs->get_socket_id && adev->smuio.funcs->get_die_id) { - dev_info(adev->dev, "socket: %d, die: %d " - "%ld uncorrectable hardware errors " - "detected in %s block\n", - adev->smuio.funcs->get_socket_id(adev), - adev->smuio.funcs->get_die_id(adev), - ras_mgr->err_data.ue_count, - blk_name); + RAS_EVENT_LOG(adev, event_id, "socket: %d, die: %d " + "%ld uncorrectable hardware errors " + "detected in %s block\n", + adev->smuio.funcs->get_socket_id(adev), + adev->smuio.funcs->get_die_id(adev), + ras_mgr->err_data.ue_count, + blk_name); } else { - dev_info(adev->dev, "%ld uncorrectable hardware errors " - "detected in %s block\n", - ras_mgr->err_data.ue_count, - blk_name); + RAS_EVENT_LOG(adev, event_id, "%ld uncorrectable hardware errors " + "detected in %s block\n", + ras_mgr->err_data.ue_count, + blk_name); } } if (err_data->de_count) { if (err_data_has_source_info(err_data)) { - amdgpu_ras_error_print_error_data(adev, ras_mgr, err_data, + amdgpu_ras_error_print_error_data(adev, ras_mgr, err_data, qctx, blk_name, false, true); } else if (!adev->aid_mask && adev->smuio.funcs && adev->smuio.funcs->get_socket_id && adev->smuio.funcs->get_die_id) { - dev_info(adev->dev, "socket: %d, die: %d " - "%ld deferred hardware errors " - "detected in %s block\n", - adev->smuio.funcs->get_socket_id(adev), - adev->smuio.funcs->get_die_id(adev), - ras_mgr->err_data.de_count, - blk_name); + RAS_EVENT_LOG(adev, event_id, "socket: %d, die: %d " + "%ld deferred hardware errors " + "detected in %s block\n", + adev->smuio.funcs->get_socket_id(adev), + adev->smuio.funcs->get_die_id(adev), + ras_mgr->err_data.de_count, + blk_name); } else { - dev_info(adev->dev, "%ld deferred hardware errors " - "detected in %s block\n", - ras_mgr->err_data.de_count, - blk_name); + RAS_EVENT_LOG(adev, event_id, "%ld deferred hardware errors " + "detected in %s block\n", + ras_mgr->err_data.de_count, + blk_name); } } } @@ -1294,6 +1298,7 @@ ssize_t amdgpu_ras_aca_sysfs_read(struct device *dev, struct device_attribute *a static int amdgpu_ras_query_error_status_helper(struct amdgpu_device *adev, struct ras_query_if *info, struct ras_err_data *err_data, + struct ras_query_context *qctx, unsigned int error_query_mode) { enum amdgpu_ras_block blk = info ? info->head.block : AMDGPU_RAS_BLOCK_COUNT; @@ -1338,8 +1343,8 @@ static int amdgpu_ras_query_error_status_helper(struct amdgpu_device *adev, return ret; } else { /* FIXME: add code to check return value later */ - amdgpu_mca_smu_log_ras_error(adev, blk, AMDGPU_MCA_ERROR_TYPE_UE, err_data); - amdgpu_mca_smu_log_ras_error(adev, blk, AMDGPU_MCA_ERROR_TYPE_CE, err_data); + amdgpu_mca_smu_log_ras_error(adev, blk, AMDGPU_MCA_ERROR_TYPE_UE, err_data, qctx); + amdgpu_mca_smu_log_ras_error(adev, blk, AMDGPU_MCA_ERROR_TYPE_CE, err_data, qctx); } } @@ -1351,6 +1356,7 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev, struct ras_query_i { struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head); struct ras_err_data err_data; + struct ras_query_context qctx; unsigned int error_query_mode; int ret; @@ -1364,8 +1370,12 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev, struct ras_query_i if (!amdgpu_ras_get_error_query_mode(adev, &error_query_mode)) return -EINVAL; + memset(&qctx, 0, sizeof(qctx)); + qctx.event_id = amdgpu_ras_acquire_event_id(adev, amdgpu_ras_intr_triggered() ? + RAS_EVENT_TYPE_ISR : RAS_EVENT_TYPE_INVALID); ret = amdgpu_ras_query_error_status_helper(adev, info, &err_data, + &qctx, error_query_mode); if (ret) goto out_fini_err_data; @@ -1376,7 +1386,7 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev, struct ras_query_i info->ce_count = obj->err_data.ce_count; info->de_count = obj->err_data.de_count; - amdgpu_ras_error_generate_report(adev, info, &err_data); + amdgpu_ras_error_generate_report(adev, info, &err_data, &qctx); out_fini_err_data: amdgpu_ras_error_data_fini(&err_data); @@ -3036,6 +3046,35 @@ static int amdgpu_get_ras_schema(struct amdgpu_device *adev) AMDGPU_RAS_ERROR__PARITY; } +static void ras_event_mgr_init(struct ras_event_manager *mgr) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(mgr->seqnos); i++) + atomic64_set(&mgr->seqnos[i], 0); +} + +static void amdgpu_ras_event_mgr_init(struct amdgpu_device *adev) +{ + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + struct amdgpu_hive_info *hive; + + if (!ras) + return; + + hive = amdgpu_get_xgmi_hive(adev); + ras->event_mgr = hive ? &hive->event_mgr : &ras->__event_mgr; + + /* init event manager with node 0 on xgmi system */ + if (!amdgpu_in_reset(adev)) { + if (!hive || adev->gmc.xgmi.node_id == 0) + ras_event_mgr_init(ras->event_mgr); + } + + if (hive) + amdgpu_put_xgmi_hive(hive); +} + int amdgpu_ras_init(struct amdgpu_device *adev) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); @@ -3356,6 +3395,8 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev) if (amdgpu_sriov_vf(adev)) return 0; + amdgpu_ras_event_mgr_init(adev); + if (amdgpu_aca_is_enabled(adev)) { if (amdgpu_in_reset(adev)) r = amdgpu_aca_reset(adev); @@ -3472,13 +3513,37 @@ void amdgpu_ras_set_fed(struct amdgpu_device *adev, bool status) atomic_set(&ras->fed, !!status); } +bool amdgpu_ras_event_id_is_valid(struct amdgpu_device *adev, u64 id) +{ + return !(id & BIT_ULL(63)); +} + +u64 amdgpu_ras_acquire_event_id(struct amdgpu_device *adev, enum ras_event_type type) +{ + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + u64 id; + + switch (type) { + case RAS_EVENT_TYPE_ISR: + id = (u64)atomic64_read(&ras->event_mgr->seqnos[type]); + break; + case RAS_EVENT_TYPE_INVALID: + default: + id = BIT_ULL(63) | 0ULL; + break; + } + + return id; +} + void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev) { if (atomic_cmpxchg(&amdgpu_ras_in_intr, 0, 1) == 0) { struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + u64 event_id = (u64)atomic64_inc_return(&ras->event_mgr->seqnos[RAS_EVENT_TYPE_ISR]); - dev_info(adev->dev, "uncorrectable hardware error" - "(ERREVENT_ATHUB_INTERRUPT) detected!\n"); + RAS_EVENT_LOG(adev, event_id, "uncorrectable hardware error" + "(ERREVENT_ATHUB_INTERRUPT) detected!\n"); ras->gpu_reset_flags |= AMDGPU_RAS_GPU_RESET_MODE1_RESET; amdgpu_ras_reset_gpu(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index e0f8ce9d8440..8d26989c75c8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -64,6 +64,14 @@ struct amdgpu_iv_entry; /* The high three bits indicates socketid */ #define AMDGPU_RAS_GET_FEATURES(val) ((val) & ~AMDGPU_RAS_FEATURES_SOCKETID_MASK) +#define RAS_EVENT_LOG(_adev, _id, _fmt, ...) \ +do { \ + if (amdgpu_ras_event_id_is_valid((_adev), (_id))) \ + dev_info((_adev)->dev, "{%llu}" _fmt, (_id), ##__VA_ARGS__); \ + else \ + dev_info((_adev)->dev, _fmt, ##__VA_ARGS__); \ +} while (0) + enum amdgpu_ras_block { AMDGPU_RAS_BLOCK__UMC = 0, AMDGPU_RAS_BLOCK__SDMA, @@ -419,6 +427,21 @@ struct umc_ecc_info { int record_ce_addr_supported; }; +enum ras_event_type { + RAS_EVENT_TYPE_INVALID = -1, + RAS_EVENT_TYPE_ISR = 0, + RAS_EVENT_TYPE_COUNT, +}; + +struct ras_event_manager { + atomic64_t seqnos[RAS_EVENT_TYPE_COUNT]; +}; + +struct ras_query_context { + enum ras_event_type type; + u64 event_id; +}; + struct amdgpu_ras { /* ras infrastructure */ /* for ras itself. */ @@ -479,6 +502,11 @@ struct amdgpu_ras { atomic_t page_retirement_req_cnt; /* Fatal error detected flag */ atomic_t fed; + + /* RAS event manager */ + struct ras_event_manager __event_mgr; + struct ras_event_manager *event_mgr; + }; struct ras_fs_data { @@ -879,4 +907,6 @@ void amdgpu_ras_del_mca_err_addr(struct ras_err_info *err_info, void amdgpu_ras_set_fed(struct amdgpu_device *adev, bool status); bool amdgpu_ras_get_fed_status(struct amdgpu_device *adev); +bool amdgpu_ras_event_id_is_valid(struct amdgpu_device *adev, u64 id); +u64 amdgpu_ras_acquire_event_id(struct amdgpu_device *adev, enum ras_event_type type); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h index 1592c63b3099..a3bfc16de6d4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h @@ -44,6 +44,7 @@ struct amdgpu_hive_info { struct amdgpu_reset_domain *reset_domain; atomic_t ras_recovery; + struct ras_event_manager event_mgr; }; struct amdgpu_pcs_ras_field { diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c index 77af4e25ff46..4a02e1f041da 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c @@ -404,10 +404,16 @@ static int umc_v12_0_err_cnt_init_per_channel(struct amdgpu_device *adev, static void umc_v12_0_ecc_info_query_ras_error_count(struct amdgpu_device *adev, void *ras_error_status) { + struct ras_query_context qctx; + + memset(&qctx, 0, sizeof(qctx)); + qctx.event_id = amdgpu_ras_acquire_event_id(adev, amdgpu_ras_intr_triggered() ? + RAS_EVENT_TYPE_ISR : RAS_EVENT_TYPE_INVALID); + amdgpu_mca_smu_log_ras_error(adev, - AMDGPU_RAS_BLOCK__UMC, AMDGPU_MCA_ERROR_TYPE_CE, ras_error_status); + AMDGPU_RAS_BLOCK__UMC, AMDGPU_MCA_ERROR_TYPE_CE, ras_error_status, &qctx); amdgpu_mca_smu_log_ras_error(adev, - AMDGPU_RAS_BLOCK__UMC, AMDGPU_MCA_ERROR_TYPE_UE, ras_error_status); + AMDGPU_RAS_BLOCK__UMC, AMDGPU_MCA_ERROR_TYPE_UE, ras_error_status, &qctx); } static void umc_v12_0_ecc_info_query_ras_error_address(struct amdgpu_device *adev, -- cgit v1.2.3 From 765bea0d73b1de24d3c32d35a75b0c05c498a3fb Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Tue, 27 Feb 2024 10:30:10 +0800 Subject: drm/amdgpu: Apply retry to IP discovery v2 and v4 To ensure GPU driver touch the local framebuffer until it is initialized by integrated firmware. Signed-off-by: Hawking Zhang Reviewed-by: Likun Gao Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index a07e4b87d4ca..8d9d6b2e68e8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -245,12 +245,16 @@ static int amdgpu_discovery_read_binary_from_sysmem(struct amdgpu_device *adev, return -ENOENT; } +#define IP_DISCOVERY_V2 2 +#define IP_DISCOVERY_V4 4 + static int amdgpu_discovery_read_binary_from_mem(struct amdgpu_device *adev, uint8_t *binary) { uint64_t vram_size; u32 msg; int i, ret = 0; + int ip_discovery_ver = 0; /* It can take up to a second for IFWI init to complete on some dGPUs, * but generally it should be in the 60-100ms range. Normally this starts @@ -259,7 +263,11 @@ static int amdgpu_discovery_read_binary_from_mem(struct amdgpu_device *adev, * wait for this to complete. Once the C2PMSG is updated, we can * continue. */ - if (dev_is_removable(&adev->pdev->dev)) { + + ip_discovery_ver = RREG32(mmIP_DISCOVERY_VERSION); + if ((dev_is_removable(&adev->pdev->dev)) || + (ip_discovery_ver == IP_DISCOVERY_V2) || + (ip_discovery_ver == IP_DISCOVERY_V4)) { for (i = 0; i < 1000; i++) { msg = RREG32(mmMP0_SMN_C2PMSG_33); if (msg & 0x80000000) -- cgit v1.2.3 From 3cfaadbe0fcbb5b6a4a46e4c4ad70f5f5091edf3 Mon Sep 17 00:00:00 2001 From: Likun Gao Date: Wed, 13 Mar 2024 15:27:57 +0800 Subject: drm/amdgpu: add support for atom fw version v3_5 Support for atom_firmware_info_v3_5. Signed-off-by: Likun Gao Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c index 6857c586ded7..a6d64bdbbb14 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c @@ -34,6 +34,7 @@ union firmware_info { struct atom_firmware_info_v3_2 v32; struct atom_firmware_info_v3_3 v33; struct atom_firmware_info_v3_4 v34; + struct atom_firmware_info_v3_5 v35; }; /* @@ -872,6 +873,10 @@ int amdgpu_atomfirmware_get_fw_reserved_fb_size(struct amdgpu_device *adev) fw_reserved_fb_size = (firmware_info->v34.fw_reserved_size_in_kb << 10); break; + case 5: + fw_reserved_fb_size = + (firmware_info->v35.fw_reserved_size_in_kb << 10); + break; default: fw_reserved_fb_size = 0; break; -- cgit v1.2.3 From 6fe4dab331a706fe4f08736621f88625c6060df0 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Mon, 18 Mar 2024 11:26:17 +0530 Subject: drm/amdgpu: remove the adev check for NULL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit adev is a global data structure and isn't expected to be NULL and hence removing the redundant adev check from the devcoredump code. Cc: Dan Carpenter Signed-off-by: Sunil Khatri Suggested-by: Dan Carpenter Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c | 57 ++++++++++++++----------------- 1 file changed, 25 insertions(+), 32 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c index 1434e9a5506b..3398f2a368d5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c @@ -211,7 +211,8 @@ amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count, struct drm_printer p; struct amdgpu_coredump_info *coredump = data; struct drm_print_iterator iter; - int i; + struct amdgpu_vm_fault_info *fault_info; + int i, ver; iter.data = buffer; iter.offset = 0; @@ -233,26 +234,22 @@ amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count, coredump->reset_task_info.pid); /* GPU IP's information of the SOC */ - if (coredump->adev) { - drm_printf(&p, "\nIP Information\n"); - drm_printf(&p, "SOC Family: %d\n", coredump->adev->family); - drm_printf(&p, "SOC Revision id: %d\n", coredump->adev->rev_id); - drm_printf(&p, "SOC External Revision id: %d\n", - coredump->adev->external_rev_id); - - for (int i = 1; i < MAX_HWIP; i++) { - for (int j = 0; j < HWIP_MAX_INSTANCE; j++) { - int ver = coredump->adev->ip_versions[i][j]; - - if (ver) - drm_printf(&p, "HWIP: %s[%d][%d]: v%d.%d.%d.%d.%d\n", - hw_ip_names[i], i, j, - IP_VERSION_MAJ(ver), - IP_VERSION_MIN(ver), - IP_VERSION_REV(ver), - IP_VERSION_VARIANT(ver), - IP_VERSION_SUBREV(ver)); - } + drm_printf(&p, "\nIP Information\n"); + drm_printf(&p, "SOC Family: %d\n", coredump->adev->family); + drm_printf(&p, "SOC Revision id: %d\n", coredump->adev->rev_id); + drm_printf(&p, "SOC External Revision id: %d\n", coredump->adev->external_rev_id); + + for (int i = 1; i < MAX_HWIP; i++) { + for (int j = 0; j < HWIP_MAX_INSTANCE; j++) { + ver = coredump->adev->ip_versions[i][j]; + if (ver) + drm_printf(&p, "HWIP: %s[%d][%d]: v%d.%d.%d.%d.%d\n", + hw_ip_names[i], i, j, + IP_VERSION_MAJ(ver), + IP_VERSION_MIN(ver), + IP_VERSION_REV(ver), + IP_VERSION_VARIANT(ver), + IP_VERSION_SUBREV(ver)); } } @@ -263,18 +260,14 @@ amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count, coredump->ring->name); } - if (coredump->adev) { - struct amdgpu_vm_fault_info *fault_info = - &coredump->adev->vm_manager.fault_info; - - drm_printf(&p, "\n[%s] Page fault observed\n", - fault_info->vmhub ? "mmhub" : "gfxhub"); - drm_printf(&p, "Faulty page starting at address: 0x%016llx\n", - fault_info->addr); - drm_printf(&p, "Protection fault status register: 0x%x\n\n", - fault_info->status); - } + /* Add page fault information */ + fault_info = &coredump->adev->vm_manager.fault_info; + drm_printf(&p, "\n[%s] Page fault observed\n", + fault_info->vmhub ? "mmhub" : "gfxhub"); + drm_printf(&p, "Faulty page starting at address: 0x%016llx\n", fault_info->addr); + drm_printf(&p, "Protection fault status register: 0x%x\n\n", fault_info->status); + /* Add ring buffer information */ drm_printf(&p, "Ring buffer information\n"); for (int i = 0; i < coredump->adev->num_rings; i++) { int j = 0; -- cgit v1.2.3 From abc3b5d21d34d21914b8e3caa75690f72baa2f36 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Wed, 31 Jan 2024 17:48:34 +0800 Subject: drm/amdgpu: add new aca_smu_type support Add new types to distinguish between ACA error type and smu mca type. e.g.: the ACA_ERROR_TYPE_DEFERRED is not matched any smu mca valid bank channel, so add new type 'aca_smu_type' to distinguish aca error type and smu mca type. Signed-off-by: Yang Wang Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c | 69 ++++++++++++++-------- drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h | 15 +++-- drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 18 ++++-- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 37 ++++++------ drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c | 33 ++++++----- drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 29 +++++---- drivers/gpu/drm/amd/amdgpu/umc_v12_0.c | 10 ++-- .../gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 26 ++++---- 8 files changed, 141 insertions(+), 96 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c index 493982f94649..2ad2d874f42f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c @@ -28,7 +28,7 @@ #define ACA_BANK_HWID(type, hwid, mcatype) [ACA_HWIP_TYPE_##type] = {hwid, mcatype} -typedef int bank_handler_t(struct aca_handle *handle, struct aca_bank *bank, enum aca_error_type type, void *data); +typedef int bank_handler_t(struct aca_handle *handle, struct aca_bank *bank, enum aca_smu_type type, void *data); struct aca_banks { int nr_banks; @@ -86,7 +86,7 @@ static void aca_banks_release(struct aca_banks *banks) } } -static int aca_smu_get_valid_aca_count(struct amdgpu_device *adev, enum aca_error_type type, u32 *count) +static int aca_smu_get_valid_aca_count(struct amdgpu_device *adev, enum aca_smu_type type, u32 *count) { struct amdgpu_aca *aca = &adev->aca; const struct aca_smu_funcs *smu_funcs = aca->smu_funcs; @@ -127,7 +127,7 @@ static void aca_smu_bank_dump(struct amdgpu_device *adev, int idx, int total, st idx + 1, total, aca_regs[i].name, bank->regs[aca_regs[i].reg_idx]); } -static int aca_smu_get_valid_aca_banks(struct amdgpu_device *adev, enum aca_error_type type, +static int aca_smu_get_valid_aca_banks(struct amdgpu_device *adev, enum aca_smu_type type, int start, int count, struct aca_banks *banks) { @@ -143,13 +143,12 @@ static int aca_smu_get_valid_aca_banks(struct amdgpu_device *adev, enum aca_erro return -EOPNOTSUPP; switch (type) { - case ACA_ERROR_TYPE_UE: + case ACA_SMU_TYPE_UE: max_count = smu_funcs->max_ue_bank_count; break; - case ACA_ERROR_TYPE_CE: + case ACA_SMU_TYPE_CE: max_count = smu_funcs->max_ce_bank_count; break; - case ACA_ERROR_TYPE_DEFERRED: default: return -EINVAL; } @@ -164,6 +163,8 @@ static int aca_smu_get_valid_aca_banks(struct amdgpu_device *adev, enum aca_erro if (ret) return ret; + bank.type = type; + aca_smu_bank_dump(adev, i, count, &bank); ret = aca_banks_add_bank(banks, &bank); @@ -195,7 +196,7 @@ static bool aca_bank_hwip_is_matched(struct aca_bank *bank, enum aca_hwip_type t return hwip->hwid == hwid && hwip->mcatype == mcatype; } -static bool aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank, enum aca_error_type type) +static bool aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank, enum aca_smu_type type) { const struct aca_bank_ops *bank_ops = handle->bank_ops; @@ -297,7 +298,7 @@ static int aca_log_errors(struct aca_handle *handle, enum aca_error_type type, } static int aca_generate_bank_report(struct aca_handle *handle, struct aca_bank *bank, - enum aca_error_type type, struct aca_bank_report *report) + enum aca_smu_type type, struct aca_bank_report *report) { const struct aca_bank_ops *bank_ops = handle->bank_ops; @@ -313,12 +314,24 @@ static int aca_generate_bank_report(struct aca_handle *handle, struct aca_bank * } static int handler_aca_log_bank_error(struct aca_handle *handle, struct aca_bank *bank, - enum aca_error_type type, void *data) + enum aca_smu_type smu_type, void *data) { struct aca_bank_report report; + enum aca_error_type type; int ret; - ret = aca_generate_bank_report(handle, bank, type, &report); + switch (smu_type) { + case ACA_SMU_TYPE_UE: + type = ACA_ERROR_TYPE_UE; + break; + case ACA_SMU_TYPE_CE: + type = ACA_ERROR_TYPE_CE; + break; + default: + return -EINVAL; + } + + ret = aca_generate_bank_report(handle, bank, smu_type, &report); if (ret) return ret; @@ -333,7 +346,7 @@ static int handler_aca_log_bank_error(struct aca_handle *handle, struct aca_bank } static int aca_dispatch_bank(struct aca_handle_manager *mgr, struct aca_bank *bank, - enum aca_error_type type, bank_handler_t handler, void *data) + enum aca_smu_type type, bank_handler_t handler, void *data) { struct aca_handle *handle; int ret; @@ -354,7 +367,7 @@ static int aca_dispatch_bank(struct aca_handle_manager *mgr, struct aca_bank *ba } static int aca_dispatch_banks(struct aca_handle_manager *mgr, struct aca_banks *banks, - enum aca_error_type type, bank_handler_t handler, void *data) + enum aca_smu_type type, bank_handler_t handler, void *data) { struct aca_bank_node *node; struct aca_bank *bank; @@ -378,7 +391,7 @@ static int aca_dispatch_banks(struct aca_handle_manager *mgr, struct aca_banks * return 0; } -static int aca_banks_update(struct amdgpu_device *adev, enum aca_error_type type, +static int aca_banks_update(struct amdgpu_device *adev, enum aca_smu_type type, bank_handler_t handler, void *data) { struct amdgpu_aca *aca = &adev->aca; @@ -389,10 +402,6 @@ static int aca_banks_update(struct amdgpu_device *adev, enum aca_error_type type if (list_empty(&aca->mgr.list)) return 0; - /* NOTE: pmfw is only support UE and CE */ - if (type == ACA_ERROR_TYPE_DEFERRED) - type = ACA_ERROR_TYPE_CE; - ret = aca_smu_get_valid_aca_count(adev, type, &count); if (ret) return ret; @@ -479,10 +488,22 @@ out_unlock: static int __aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *handle, enum aca_error_type type, struct ras_err_data *err_data) { + enum aca_smu_type smu_type; int ret; + switch (type) { + case ACA_ERROR_TYPE_UE: + smu_type = ACA_SMU_TYPE_UE; + break; + case ACA_ERROR_TYPE_CE: + smu_type = ACA_SMU_TYPE_CE; + break; + default: + return -EINVAL; + } + /* udpate aca bank to aca source error_cache first */ - ret = aca_banks_update(adev, type, handler_aca_log_bank_error, NULL); + ret = aca_banks_update(adev, smu_type, handler_aca_log_bank_error, NULL); if (ret) return ret; @@ -784,7 +805,7 @@ static int amdgpu_aca_smu_debug_mode_set(void *data, u64 val) return 0; } -static void aca_dump_entry(struct seq_file *m, struct aca_bank *bank, enum aca_error_type type, int idx) +static void aca_dump_entry(struct seq_file *m, struct aca_bank *bank, enum aca_smu_type type, int idx) { struct aca_bank_info info; int i, ret; @@ -793,7 +814,7 @@ static void aca_dump_entry(struct seq_file *m, struct aca_bank *bank, enum aca_e if (ret) return; - seq_printf(m, "aca entry[%d].type: %s\n", idx, type == ACA_ERROR_TYPE_UE ? "UE" : "CE"); + seq_printf(m, "aca entry[%d].type: %s\n", idx, type == ACA_SMU_TYPE_UE ? "UE" : "CE"); seq_printf(m, "aca entry[%d].info: socketid:%d aid:%d hwid:0x%03x mcatype:0x%04x\n", idx, info.socket_id, info.die_id, info.hwid, info.mcatype); @@ -807,7 +828,7 @@ struct aca_dump_context { }; static int handler_aca_bank_dump(struct aca_handle *handle, struct aca_bank *bank, - enum aca_error_type type, void *data) + enum aca_smu_type type, void *data) { struct aca_dump_context *ctx = (struct aca_dump_context *)data; @@ -816,7 +837,7 @@ static int handler_aca_bank_dump(struct aca_handle *handle, struct aca_bank *ban return handler_aca_log_bank_error(handle, bank, type, NULL); } -static int aca_dump_show(struct seq_file *m, enum aca_error_type type) +static int aca_dump_show(struct seq_file *m, enum aca_smu_type type) { struct amdgpu_device *adev = (struct amdgpu_device *)m->private; struct aca_dump_context context = { @@ -829,7 +850,7 @@ static int aca_dump_show(struct seq_file *m, enum aca_error_type type) static int aca_dump_ce_show(struct seq_file *m, void *unused) { - return aca_dump_show(m, ACA_ERROR_TYPE_CE); + return aca_dump_show(m, ACA_SMU_TYPE_CE); } static int aca_dump_ce_open(struct inode *inode, struct file *file) @@ -847,7 +868,7 @@ static const struct file_operations aca_ce_dump_debug_fops = { static int aca_dump_ue_show(struct seq_file *m, void *unused) { - return aca_dump_show(m, ACA_ERROR_TYPE_UE); + return aca_dump_show(m, ACA_SMU_TYPE_UE); } static int aca_dump_ue_open(struct inode *inode, struct file *file) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h index 2da50e095883..a48f4abc345c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h @@ -99,7 +99,14 @@ enum aca_error_type { ACA_ERROR_TYPE_COUNT }; +enum aca_smu_type { + ACA_SMU_TYPE_UE = 0, + ACA_SMU_TYPE_CE, + ACA_SMU_TYPE_COUNT, +}; + struct aca_bank { + enum aca_smu_type type; u64 regs[ACA_MAX_REGS_COUNT]; }; @@ -157,9 +164,9 @@ struct aca_handle { }; struct aca_bank_ops { - int (*aca_bank_generate_report)(struct aca_handle *handle, struct aca_bank *bank, enum aca_error_type type, + int (*aca_bank_generate_report)(struct aca_handle *handle, struct aca_bank *bank, enum aca_smu_type type, struct aca_bank_report *report, void *data); - bool (*aca_bank_is_valid)(struct aca_handle *handle, struct aca_bank *bank, enum aca_error_type type, + bool (*aca_bank_is_valid)(struct aca_handle *handle, struct aca_bank *bank, enum aca_smu_type type, void *data); }; @@ -167,8 +174,8 @@ struct aca_smu_funcs { int max_ue_bank_count; int max_ce_bank_count; int (*set_debug_mode)(struct amdgpu_device *adev, bool enable); - int (*get_valid_aca_count)(struct amdgpu_device *adev, enum aca_error_type type, u32 *count); - int (*get_valid_aca_bank)(struct amdgpu_device *adev, enum aca_error_type type, int idx, struct aca_bank *bank); + int (*get_valid_aca_count)(struct amdgpu_device *adev, enum aca_smu_type type, u32 *count); + int (*get_valid_aca_bank)(struct amdgpu_device *adev, enum aca_smu_type type, int idx, struct aca_bank *bank); }; struct amdgpu_aca { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 20d51f6c9bb8..43b1aaa04234 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -1035,12 +1035,12 @@ int amdgpu_xgmi_remove_device(struct amdgpu_device *adev) return 0; } -static int xgmi_v6_4_0_aca_bank_generate_report(struct aca_handle *handle, struct aca_bank *bank, enum aca_error_type type, +static int xgmi_v6_4_0_aca_bank_generate_report(struct aca_handle *handle, struct aca_bank *bank, enum aca_smu_type type, struct aca_bank_report *report, void *data) { struct amdgpu_device *adev = handle->adev; const char *error_str; - u64 status; + u64 status, count; int ret, ext_error_code; ret = aca_bank_info_decode(bank, &report->info); @@ -1055,9 +1055,17 @@ static int xgmi_v6_4_0_aca_bank_generate_report(struct aca_handle *handle, struc if (error_str) dev_info(adev->dev, "%s detected\n", error_str); - if ((type == ACA_ERROR_TYPE_UE && ext_error_code == 0) || - (type == ACA_ERROR_TYPE_CE && ext_error_code == 6)) - report->count[type] = ACA_REG__MISC0__ERRCNT(bank->regs[ACA_REG_IDX_MISC0]); + count = ACA_REG__MISC0__ERRCNT(bank->regs[ACA_REG_IDX_MISC0]); + switch (type) { + case ACA_SMU_TYPE_UE: + report->count[ACA_ERROR_TYPE_UE] = ext_error_code == 0 ? count : 0ULL; + break; + case ACA_SMU_TYPE_CE: + report->count[ACA_ERROR_TYPE_CE] = ext_error_code == 6 ? count : 0ULL; + break; + default: + return -EINVAL; + } return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index b53c8fd4e8cf..bb5be8c4cf11 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -681,37 +681,40 @@ static const struct amdgpu_gfx_funcs gfx_v9_4_3_gfx_funcs = { }; static int gfx_v9_4_3_aca_bank_generate_report(struct aca_handle *handle, - struct aca_bank *bank, enum aca_error_type type, + struct aca_bank *bank, enum aca_smu_type type, struct aca_bank_report *report, void *data) { - u64 status, misc0; + u64 misc0; u32 instlo; int ret; - status = bank->regs[ACA_REG_IDX_STATUS]; - if ((type == ACA_ERROR_TYPE_UE && - ACA_REG__STATUS__ERRORCODEEXT(status) == ACA_EXTERROR_CODE_FAULT) || - (type == ACA_ERROR_TYPE_CE && - ACA_REG__STATUS__ERRORCODEEXT(status) == ACA_EXTERROR_CODE_CE)) { + ret = aca_bank_info_decode(bank, &report->info); + if (ret) + return ret; - ret = aca_bank_info_decode(bank, &report->info); - if (ret) - return ret; + /* NOTE: overwrite info.die_id with xcd id for gfx */ + instlo = ACA_REG__IPID__INSTANCEIDLO(bank->regs[ACA_REG_IDX_IPID]); + instlo &= GENMASK(31, 1); + report->info.die_id = instlo == mmSMNAID_XCD0_MCA_SMU ? 0 : 1; - /* NOTE: overwrite info.die_id with xcd id for gfx */ - instlo = ACA_REG__IPID__INSTANCEIDLO(bank->regs[ACA_REG_IDX_IPID]); - instlo &= GENMASK(31, 1); - report->info.die_id = instlo == mmSMNAID_XCD0_MCA_SMU ? 0 : 1; + misc0 = bank->regs[ACA_REG_IDX_MISC0]; - misc0 = bank->regs[ACA_REG_IDX_MISC0]; - report->count[type] = ACA_REG__MISC0__ERRCNT(misc0); + switch (type) { + case ACA_SMU_TYPE_UE: + report->count[ACA_ERROR_TYPE_UE] = 1ULL; + break; + case ACA_SMU_TYPE_CE: + report->count[ACA_ERROR_TYPE_CE] = ACA_REG__MISC0__ERRCNT(misc0); + break; + default: + return -EINVAL; } return 0; } static bool gfx_v9_4_3_aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank, - enum aca_error_type type, void *data) + enum aca_smu_type type, void *data) { u32 instlo; diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c index c0fc44cdd658..bf83e2c08f04 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c @@ -707,24 +707,27 @@ static const struct amdgpu_ras_block_hw_ops mmhub_v1_8_ras_hw_ops = { }; static int mmhub_v1_8_aca_bank_generate_report(struct aca_handle *handle, - struct aca_bank *bank, enum aca_error_type type, + struct aca_bank *bank, enum aca_smu_type type, struct aca_bank_report *report, void *data) { - u64 status, misc0; + u64 misc0; int ret; - status = bank->regs[ACA_REG_IDX_STATUS]; - if ((type == ACA_ERROR_TYPE_UE && - ACA_REG__STATUS__ERRORCODEEXT(status) == ACA_EXTERROR_CODE_FAULT) || - (type == ACA_ERROR_TYPE_CE && - ACA_REG__STATUS__ERRORCODEEXT(status) == ACA_EXTERROR_CODE_CE)) { - - ret = aca_bank_info_decode(bank, &report->info); - if (ret) - return ret; - - misc0 = bank->regs[ACA_REG_IDX_MISC0]; - report->count[type] = ACA_REG__MISC0__ERRCNT(misc0); + ret = aca_bank_info_decode(bank, &report->info); + if (ret) + return ret; + + misc0 = bank->regs[ACA_REG_IDX_MISC0]; + + switch (type) { + case ACA_SMU_TYPE_UE: + report->count[ACA_ERROR_TYPE_UE] = 1ULL; + break; + case ACA_SMU_TYPE_CE: + report->count[ACA_ERROR_TYPE_CE] = ACA_REG__MISC0__ERRCNT(misc0); + break; + default: + return -EINVAL; } return 0; @@ -741,7 +744,7 @@ static int mmhub_v1_8_err_codes[] = { }; static bool mmhub_v1_8_aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank, - enum aca_error_type type, void *data) + enum aca_smu_type type, void *data) { u32 instlo; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c index 103dc9c7325f..9aa5c190950e 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c @@ -2190,24 +2190,27 @@ static const struct amdgpu_ras_block_hw_ops sdma_v4_4_2_ras_hw_ops = { }; static int sdma_v4_4_2_aca_bank_generate_report(struct aca_handle *handle, - struct aca_bank *bank, enum aca_error_type type, + struct aca_bank *bank, enum aca_smu_type type, struct aca_bank_report *report, void *data) { - u64 status, misc0; + u64 misc0; int ret; - status = bank->regs[ACA_REG_IDX_STATUS]; - if ((type == ACA_ERROR_TYPE_UE && - ACA_REG__STATUS__ERRORCODEEXT(status) == ACA_EXTERROR_CODE_FAULT) || - (type == ACA_ERROR_TYPE_CE && - ACA_REG__STATUS__ERRORCODEEXT(status) == ACA_EXTERROR_CODE_CE)) { + ret = aca_bank_info_decode(bank, &report->info); + if (ret) + return ret; - ret = aca_bank_info_decode(bank, &report->info); - if (ret) - return ret; + misc0 = bank->regs[ACA_REG_IDX_MISC0]; - misc0 = bank->regs[ACA_REG_IDX_MISC0]; - report->count[type] = ACA_REG__MISC0__ERRCNT(misc0); + switch (type) { + case ACA_SMU_TYPE_UE: + report->count[ACA_ERROR_TYPE_UE] = 1ULL; + break; + case ACA_SMU_TYPE_CE: + report->count[ACA_ERROR_TYPE_CE] = ACA_REG__MISC0__ERRCNT(misc0); + break; + default: + return -EINVAL; } return 0; @@ -2217,7 +2220,7 @@ static int sdma_v4_4_2_aca_bank_generate_report(struct aca_handle *handle, static int sdma_v4_4_2_err_codes[] = { 33, 34, 35, 36 }; static bool sdma_v4_4_2_aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank, - enum aca_error_type type, void *data) + enum aca_smu_type type, void *data) { u32 instlo; diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c index 4a02e1f041da..7b841431344f 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c @@ -504,7 +504,7 @@ const struct amdgpu_ras_block_hw_ops umc_v12_0_ras_hw_ops = { .query_ras_error_address = umc_v12_0_query_ras_error_address, }; -static int umc_v12_0_aca_bank_generate_report(struct aca_handle *handle, struct aca_bank *bank, enum aca_error_type type, +static int umc_v12_0_aca_bank_generate_report(struct aca_handle *handle, struct aca_bank *bank, enum aca_smu_type type, struct aca_bank_report *report, void *data) { struct amdgpu_device *adev = handle->adev; @@ -517,14 +517,14 @@ static int umc_v12_0_aca_bank_generate_report(struct aca_handle *handle, struct status = bank->regs[ACA_REG_IDX_STATUS]; switch (type) { - case ACA_ERROR_TYPE_UE: + case ACA_SMU_TYPE_UE: if (umc_v12_0_is_uncorrectable_error(adev, status)) { - report->count[type] = 1; + report->count[ACA_ERROR_TYPE_UE] = 1; } break; - case ACA_ERROR_TYPE_CE: + case ACA_SMU_TYPE_CE: if (umc_v12_0_is_correctable_error(adev, status)) { - report->count[type] = 1; + report->count[ACA_ERROR_TYPE_CE] = 1; } break; default: diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index 4e178916e845..1258060816dd 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -2984,7 +2984,7 @@ static int aca_smu_set_debug_mode(struct amdgpu_device *adev, bool enable) return smu_v13_0_6_mca_set_debug_mode(smu, enable); } -static int smu_v13_0_6_get_valid_aca_count(struct smu_context *smu, enum aca_error_type type, u32 *count) +static int smu_v13_0_6_get_valid_aca_count(struct smu_context *smu, enum aca_smu_type type, u32 *count) { uint32_t msg; int ret; @@ -2993,10 +2993,10 @@ static int smu_v13_0_6_get_valid_aca_count(struct smu_context *smu, enum aca_err return -EINVAL; switch (type) { - case ACA_ERROR_TYPE_UE: + case ACA_SMU_TYPE_UE: msg = SMU_MSG_QueryValidMcaCount; break; - case ACA_ERROR_TYPE_CE: + case ACA_SMU_TYPE_CE: msg = SMU_MSG_QueryValidMcaCeCount; break; default: @@ -3013,14 +3013,14 @@ static int smu_v13_0_6_get_valid_aca_count(struct smu_context *smu, enum aca_err } static int aca_smu_get_valid_aca_count(struct amdgpu_device *adev, - enum aca_error_type type, u32 *count) + enum aca_smu_type type, u32 *count) { struct smu_context *smu = adev->powerplay.pp_handle; int ret; switch (type) { - case ACA_ERROR_TYPE_UE: - case ACA_ERROR_TYPE_CE: + case ACA_SMU_TYPE_UE: + case ACA_SMU_TYPE_CE: ret = smu_v13_0_6_get_valid_aca_count(smu, type, count); break; default: @@ -3031,16 +3031,16 @@ static int aca_smu_get_valid_aca_count(struct amdgpu_device *adev, return ret; } -static int __smu_v13_0_6_aca_bank_dump(struct smu_context *smu, enum aca_error_type type, +static int __smu_v13_0_6_aca_bank_dump(struct smu_context *smu, enum aca_smu_type type, int idx, int offset, u32 *val) { uint32_t msg, param; switch (type) { - case ACA_ERROR_TYPE_UE: + case ACA_SMU_TYPE_UE: msg = SMU_MSG_McaBankDumpDW; break; - case ACA_ERROR_TYPE_CE: + case ACA_SMU_TYPE_CE: msg = SMU_MSG_McaBankCeDumpDW; break; default: @@ -3052,7 +3052,7 @@ static int __smu_v13_0_6_aca_bank_dump(struct smu_context *smu, enum aca_error_t return smu_cmn_send_smc_msg_with_param(smu, msg, param, (uint32_t *)val); } -static int smu_v13_0_6_aca_bank_dump(struct smu_context *smu, enum aca_error_type type, +static int smu_v13_0_6_aca_bank_dump(struct smu_context *smu, enum aca_smu_type type, int idx, int offset, u32 *val, int count) { int ret, i; @@ -3069,7 +3069,7 @@ static int smu_v13_0_6_aca_bank_dump(struct smu_context *smu, enum aca_error_typ return 0; } -static int aca_bank_read_reg(struct amdgpu_device *adev, enum aca_error_type type, +static int aca_bank_read_reg(struct amdgpu_device *adev, enum aca_smu_type type, int idx, int reg_idx, u64 *val) { struct smu_context *smu = adev->powerplay.pp_handle; @@ -3086,13 +3086,13 @@ static int aca_bank_read_reg(struct amdgpu_device *adev, enum aca_error_type typ *val = (u64)data[1] << 32 | data[0]; dev_dbg(adev->dev, "mca read bank reg: type:%s, index: %d, reg_idx: %d, val: 0x%016llx\n", - type == ACA_ERROR_TYPE_UE ? "UE" : "CE", idx, reg_idx, *val); + type == ACA_SMU_TYPE_UE ? "UE" : "CE", idx, reg_idx, *val); return 0; } static int aca_smu_get_valid_aca_bank(struct amdgpu_device *adev, - enum aca_error_type type, int idx, struct aca_bank *bank) + enum aca_smu_type type, int idx, struct aca_bank *bank) { int i, ret, count; -- cgit v1.2.3 From 949899cbacf54f1611a7c343093069462bbb6625 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Tue, 6 Feb 2024 13:24:16 +0800 Subject: drm/amdgpu: add new api to save error count into aca cache add new api to save error count into aca cache. Signed-off-by: Yang Wang Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c | 33 +++++++-------------------------- drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h | 4 +++- 2 files changed, 10 insertions(+), 27 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c index 2ad2d874f42f..7cbff74c0f2f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c @@ -274,25 +274,25 @@ static struct aca_bank_error *get_bank_error(struct aca_error *aerr, struct aca_ return new_bank_error(aerr, info); } -static int aca_log_errors(struct aca_handle *handle, enum aca_error_type type, - struct aca_bank_report *report) +int aca_error_cache_log_bank_error(struct aca_handle *handle, struct aca_bank_info *info, + enum aca_error_type type, u64 count) { struct aca_error_cache *error_cache = &handle->error_cache; struct aca_bank_error *bank_error; struct aca_error *aerr; - if (!handle || !report) + if (!handle || !info || type >= ACA_ERROR_TYPE_COUNT) return -EINVAL; - if (!report->count[type]) + if (!count) return 0; aerr = &error_cache->errors[type]; - bank_error = get_bank_error(aerr, &report->info); + bank_error = get_bank_error(aerr, info); if (!bank_error) return -ENOMEM; - bank_error->count[type] += report->count[type]; + bank_error->count += count; return 0; } @@ -317,31 +317,12 @@ static int handler_aca_log_bank_error(struct aca_handle *handle, struct aca_bank enum aca_smu_type smu_type, void *data) { struct aca_bank_report report; - enum aca_error_type type; int ret; - switch (smu_type) { - case ACA_SMU_TYPE_UE: - type = ACA_ERROR_TYPE_UE; - break; - case ACA_SMU_TYPE_CE: - type = ACA_ERROR_TYPE_CE; - break; - default: - return -EINVAL; - } - ret = aca_generate_bank_report(handle, bank, smu_type, &report); if (ret) return ret; - if (!report.count[type]) - return 0; - - ret = aca_log_errors(handle, type, &report); - if (ret) - return ret; - return 0; } @@ -440,7 +421,7 @@ static int aca_log_aca_error_data(struct aca_bank_error *bank_error, enum aca_er if (type >= ACA_ERROR_TYPE_COUNT) return -EINVAL; - count = bank_error->count[type]; + count = bank_error->count; if (!count) return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h index a48f4abc345c..470efefcde10 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h @@ -130,7 +130,7 @@ struct aca_bank_report { struct aca_bank_error { struct list_head node; struct aca_bank_info info; - u64 count[ACA_ERROR_TYPE_COUNT]; + u64 count; }; struct aca_error { @@ -206,4 +206,6 @@ int amdgpu_aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *han enum aca_error_type type, void *data); int amdgpu_aca_smu_set_debug_mode(struct amdgpu_device *adev, bool en); void amdgpu_aca_smu_debugfs_init(struct amdgpu_device *adev, struct dentry *root); +int aca_error_cache_log_bank_error(struct aca_handle *handle, struct aca_bank_info *info, + enum aca_error_type type, u64 count); #endif -- cgit v1.2.3 From e6136150cd2666a03c71208b0a0521c6996107dc Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Thu, 22 Feb 2024 10:10:43 +0800 Subject: drm/amdgpu: refine aca error cache for gfx v9.4.3 refine aca error cache for gfx 9.4.3 Signed-off-by: Yang Wang Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index bb5be8c4cf11..cc7cccf0b2b2 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -684,33 +684,36 @@ static int gfx_v9_4_3_aca_bank_generate_report(struct aca_handle *handle, struct aca_bank *bank, enum aca_smu_type type, struct aca_bank_report *report, void *data) { + struct aca_bank_info info; u64 misc0; u32 instlo; int ret; - ret = aca_bank_info_decode(bank, &report->info); + ret = aca_bank_info_decode(bank, &info); if (ret) return ret; /* NOTE: overwrite info.die_id with xcd id for gfx */ instlo = ACA_REG__IPID__INSTANCEIDLO(bank->regs[ACA_REG_IDX_IPID]); instlo &= GENMASK(31, 1); - report->info.die_id = instlo == mmSMNAID_XCD0_MCA_SMU ? 0 : 1; + info.die_id = instlo == mmSMNAID_XCD0_MCA_SMU ? 0 : 1; misc0 = bank->regs[ACA_REG_IDX_MISC0]; switch (type) { case ACA_SMU_TYPE_UE: - report->count[ACA_ERROR_TYPE_UE] = 1ULL; + ret = aca_error_cache_log_bank_error(handle, &info, + ACA_ERROR_TYPE_UE, 1ULL); break; case ACA_SMU_TYPE_CE: - report->count[ACA_ERROR_TYPE_CE] = ACA_REG__MISC0__ERRCNT(misc0); + ret = aca_error_cache_log_bank_error(handle, &info, + ACA_ERROR_TYPE_CE, ACA_REG__MISC0__ERRCNT(misc0)); break; default: return -EINVAL; } - return 0; + return ret; } static bool gfx_v9_4_3_aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank, -- cgit v1.2.3 From d8a3f0a0348d02adf975fb0be71938dfb1c2e273 Mon Sep 17 00:00:00 2001 From: Christian Koenig Date: Mon, 18 Mar 2024 11:43:39 +0100 Subject: drm/amdgpu: implement TLB flush fence The problem is that when (for example) 4k pages are replaced with a single 2M page we need to wait for change to be flushed out by invalidating the TLB before the PT can be freed. Solve this by moving the TLB flush into a DMA-fence object which can be used to delay the freeing of the PT BOs until it is signaled. V2: (Shashank) - rebase - set dma_fence_error only in case of error - add tlb_flush fence only when PT/PD BO is locked (Felix) - use vm->pasid when f is NULL (Mukul) V4: - add a wait for (f->dependency) in tlb_fence_work (Christian) - move the misplaced fence_create call to the end (Philip) V5: - free the f->dependency properly V6: (Shashank) - light code movement, moved all the clean-up in previous patch - introduce params.needs_flush and its usage in this patch - rebase without TLB HW sequence patch V7: - Keep the vm->last_update_fence and tlb_cb code until we can fix the HW sequencing (Christian) - Move all the tlb_fence related code in a separate function so that its easier to read and review V9: Addressed review comments from Christian - start PT update only when we have callback memory allocated V10: - handle device unlock in OOM case (Christian, Mukul) - added Christian's R-B Cc: Christian Koenig Cc: Felix Kuehling Cc: Rajneesh Bhardwaj Cc: Alex Deucher Acked-by: Felix Kuehling Acked-by: Rajneesh Bhardwaj Tested-by: Rajneesh Bhardwaj Reviewed-by: Shashank Sharma Reviewed-by: Christian Koenig Signed-off-by: Christian Koenig Signed-off-by: Shashank Sharma Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/Makefile | 3 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 62 ++++++++++--- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 8 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c | 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c | 4 + drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c | 112 +++++++++++++++++++++++ 7 files changed, 175 insertions(+), 20 deletions(-) create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 4536c8ad0e11..f24f11ac3e92 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -70,7 +70,8 @@ amdgpu-y += amdgpu_device.o amdgpu_doorbell_mgr.o amdgpu_kms.o \ amdgpu_cs.o amdgpu_bios.o amdgpu_benchmark.o \ atombios_dp.o amdgpu_afmt.o amdgpu_trace_points.o \ atombios_encoders.o amdgpu_sa.o atombios_i2c.o \ - amdgpu_dma_buf.o amdgpu_vm.o amdgpu_vm_pt.o amdgpu_ib.o amdgpu_pll.o \ + amdgpu_dma_buf.o amdgpu_vm.o amdgpu_vm_pt.o amdgpu_vm_tlb_fence.o \ + amdgpu_ib.o amdgpu_pll.o \ amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \ amdgpu_gtt_mgr.o amdgpu_preempt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o \ amdgpu_atomfirmware.o amdgpu_vf_error.o amdgpu_sched.o \ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 81fb3465e197..317d2d468a6b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -885,6 +885,44 @@ static void amdgpu_vm_tlb_seq_cb(struct dma_fence *fence, kfree(tlb_cb); } +/** + * amdgpu_vm_tlb_flush - prepare TLB flush + * + * @params: parameters for update + * @fence: input fence to sync TLB flush with + * @tlb_cb: the callback structure + * + * Increments the tlb sequence to make sure that future CS execute a VM flush. + */ +static void +amdgpu_vm_tlb_flush(struct amdgpu_vm_update_params *params, + struct dma_fence **fence, + struct amdgpu_vm_tlb_seq_struct *tlb_cb) +{ + struct amdgpu_vm *vm = params->vm; + + if (!fence || !*fence) + return; + + tlb_cb->vm = vm; + if (!dma_fence_add_callback(*fence, &tlb_cb->cb, + amdgpu_vm_tlb_seq_cb)) { + dma_fence_put(vm->last_tlb_flush); + vm->last_tlb_flush = dma_fence_get(*fence); + } else { + amdgpu_vm_tlb_seq_cb(NULL, &tlb_cb->cb); + } + + /* Prepare a TLB flush fence to be attached to PTs */ + if (!params->unlocked && vm->is_compute_context) { + amdgpu_vm_tlb_fence_create(params->adev, vm, fence); + + /* Makes sure no PD/PT is freed before the flush */ + dma_resv_add_fence(vm->root.bo->tbo.base.resv, *fence, + DMA_RESV_USAGE_BOOKKEEP); + } +} + /** * amdgpu_vm_update_range - update a range in the vm page table * @@ -916,8 +954,8 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct ttm_resource *res, dma_addr_t *pages_addr, struct dma_fence **fence) { - struct amdgpu_vm_update_params params; struct amdgpu_vm_tlb_seq_struct *tlb_cb; + struct amdgpu_vm_update_params params; struct amdgpu_res_cursor cursor; enum amdgpu_sync_mode sync_mode; int r, idx; @@ -927,8 +965,8 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm, tlb_cb = kmalloc(sizeof(*tlb_cb), GFP_KERNEL); if (!tlb_cb) { - r = -ENOMEM; - goto error_unlock; + drm_dev_exit(idx); + return -ENOMEM; } /* Vega20+XGMI where PTEs get inadvertently cached in L2 texture cache, @@ -948,6 +986,7 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm, params.immediate = immediate; params.pages_addr = pages_addr; params.unlocked = unlocked; + params.needs_flush = flush_tlb; params.allow_override = allow_override; /* Implicitly sync to command submissions in the same VM before @@ -1031,24 +1070,16 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm, } r = vm->update_funcs->commit(¶ms, fence); + if (r) + goto error_free; - if (flush_tlb || params.table_freed) { - tlb_cb->vm = vm; - if (fence && *fence && - !dma_fence_add_callback(*fence, &tlb_cb->cb, - amdgpu_vm_tlb_seq_cb)) { - dma_fence_put(vm->last_tlb_flush); - vm->last_tlb_flush = dma_fence_get(*fence); - } else { - amdgpu_vm_tlb_seq_cb(NULL, &tlb_cb->cb); - } + if (params.needs_flush) { + amdgpu_vm_tlb_flush(¶ms, fence, tlb_cb); tlb_cb = NULL; } error_free: kfree(tlb_cb); - -error_unlock: amdgpu_vm_eviction_unlock(vm); drm_dev_exit(idx); return r; @@ -2391,6 +2422,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, mutex_init(&vm->eviction_lock); vm->evicting = false; + vm->tlb_fence_context = dma_fence_context_alloc(1); r = amdgpu_vm_pt_create(adev, vm, adev->vm_manager.root_level, false, &root, xcp_id); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 8efa8422f4f7..b0a4fe683352 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -257,9 +257,9 @@ struct amdgpu_vm_update_params { unsigned int num_dw_left; /** - * @table_freed: return true if page table is freed when updating + * @needs_flush: true whenever we need to invalidate the TLB */ - bool table_freed; + bool needs_flush; /** * @allow_override: true for memory that is not uncached: allows MTYPE @@ -342,6 +342,7 @@ struct amdgpu_vm { atomic64_t tlb_seq; struct dma_fence *last_tlb_flush; atomic64_t kfd_last_flushed_seq; + uint64_t tlb_fence_context; /* How many times we had to re-generate the page tables */ uint64_t generation; @@ -611,5 +612,8 @@ void amdgpu_vm_update_fault_cache(struct amdgpu_device *adev, uint64_t addr, uint32_t status, unsigned int vmhub); +void amdgpu_vm_tlb_fence_create(struct amdgpu_device *adev, + struct amdgpu_vm *vm, + struct dma_fence **fence); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c index 6e31621452de..3895bd7d176a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c @@ -108,7 +108,9 @@ static int amdgpu_vm_cpu_update(struct amdgpu_vm_update_params *p, static int amdgpu_vm_cpu_commit(struct amdgpu_vm_update_params *p, struct dma_fence **fence) { - /* Flush HDP */ + if (p->needs_flush) + atomic64_inc(&p->vm->tlb_seq); + mb(); amdgpu_device_flush_hdp(p->adev, NULL); return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c index 124389a6bf48..601df0ce8290 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c @@ -972,7 +972,7 @@ int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params, while (cursor.pfn < frag_start) { /* Make sure previous mapping is freed */ if (cursor.entry->bo) { - params->table_freed = true; + params->needs_flush = true; amdgpu_vm_pt_free_dfs(adev, params->vm, &cursor, params->unlocked); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c index 349416e176a1..66e8a016126b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c @@ -126,6 +126,10 @@ static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p, WARN_ON(ib->length_dw == 0); amdgpu_ring_pad_ib(ring, ib); + + if (p->needs_flush) + atomic64_inc(&p->vm->tlb_seq); + WARN_ON(ib->length_dw > p->num_dw_left); f = amdgpu_job_submit(p->job); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c new file mode 100644 index 000000000000..51cddfa3f1e8 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c @@ -0,0 +1,112 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include +#include + +#include "amdgpu.h" +#include "amdgpu_vm.h" +#include "amdgpu_gmc.h" + +struct amdgpu_tlb_fence { + struct dma_fence base; + struct amdgpu_device *adev; + struct dma_fence *dependency; + struct work_struct work; + spinlock_t lock; + uint16_t pasid; + +}; + +static const char *amdgpu_tlb_fence_get_driver_name(struct dma_fence *fence) +{ + return "amdgpu tlb fence"; +} + +static const char *amdgpu_tlb_fence_get_timeline_name(struct dma_fence *f) +{ + return "amdgpu tlb timeline"; +} + +static void amdgpu_tlb_fence_work(struct work_struct *work) +{ + struct amdgpu_tlb_fence *f = container_of(work, typeof(*f), work); + int r; + + if (f->dependency) { + dma_fence_wait(f->dependency, false); + dma_fence_put(f->dependency); + f->dependency = NULL; + } + + r = amdgpu_gmc_flush_gpu_tlb_pasid(f->adev, f->pasid, 2, true, 0); + if (r) { + dev_err(f->adev->dev, "TLB flush failed for PASID %d.\n", + f->pasid); + dma_fence_set_error(&f->base, r); + } + + dma_fence_signal(&f->base); + dma_fence_put(&f->base); +} + +static const struct dma_fence_ops amdgpu_tlb_fence_ops = { + .use_64bit_seqno = true, + .get_driver_name = amdgpu_tlb_fence_get_driver_name, + .get_timeline_name = amdgpu_tlb_fence_get_timeline_name +}; + +void amdgpu_vm_tlb_fence_create(struct amdgpu_device *adev, struct amdgpu_vm *vm, + struct dma_fence **fence) +{ + struct amdgpu_tlb_fence *f; + + f = kmalloc(sizeof(*f), GFP_KERNEL); + if (!f) { + /* + * We can't fail since the PDEs and PTEs are already updated, so + * just block for the dependency and execute the TLB flush + */ + if (*fence) + dma_fence_wait(*fence, false); + + amdgpu_gmc_flush_gpu_tlb_pasid(adev, vm->pasid, 2, true, 0); + *fence = dma_fence_get_stub(); + return; + } + + f->adev = adev; + f->dependency = *fence; + f->pasid = vm->pasid; + INIT_WORK(&f->work, amdgpu_tlb_fence_work); + spin_lock_init(&f->lock); + + dma_fence_init(&f->base, &amdgpu_tlb_fence_ops, &f->lock, + vm->tlb_fence_context, atomic64_read(&vm->tlb_seq)); + + /* TODO: We probably need a separate wq here */ + dma_fence_get(&f->base); + schedule_work(&f->work); + + *fence = &f->base; +} -- cgit v1.2.3 From 5275114a7043425b690275566e80d450ddc7525d Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Thu, 22 Feb 2024 10:11:07 +0800 Subject: drm/amdgpu: refine aca error cache for mmhub v1.8 refine aca error cache for mmhub v1.8 Signed-off-by: Yang Wang Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c index bf83e2c08f04..c41f600a5e10 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c @@ -710,27 +710,29 @@ static int mmhub_v1_8_aca_bank_generate_report(struct aca_handle *handle, struct aca_bank *bank, enum aca_smu_type type, struct aca_bank_report *report, void *data) { + struct aca_bank_info info; u64 misc0; int ret; - ret = aca_bank_info_decode(bank, &report->info); + ret = aca_bank_info_decode(bank, &info); if (ret) return ret; misc0 = bank->regs[ACA_REG_IDX_MISC0]; - switch (type) { case ACA_SMU_TYPE_UE: - report->count[ACA_ERROR_TYPE_UE] = 1ULL; + ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_UE, + 1ULL); break; case ACA_SMU_TYPE_CE: - report->count[ACA_ERROR_TYPE_CE] = ACA_REG__MISC0__ERRCNT(misc0); + ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_CE, + ACA_REG__MISC0__ERRCNT(misc0)); break; default: return -EINVAL; } - return 0; + return ret; } /* reference to smu driver if header file */ -- cgit v1.2.3 From 176c3e89567fdc037c318f672bbe3c2271004560 Mon Sep 17 00:00:00 2001 From: Tao Zhou Date: Mon, 11 Mar 2024 16:51:49 +0800 Subject: drm/amdgpu: add utcl2 RAS poison query for mmhub Add it for mmhub v1.8. Signed-off-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h | 2 ++ drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c | 15 +++++++++++++++ 2 files changed, 17 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h index 1ca9d4ed8063..95d676ee207f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h @@ -63,6 +63,8 @@ struct amdgpu_mmhub_funcs { uint64_t page_table_base); void (*update_power_gating)(struct amdgpu_device *adev, bool enable); + bool (*query_utcl2_poison_status)(struct amdgpu_device *adev, + int hub_inst); }; struct amdgpu_mmhub { diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c index c41f600a5e10..50113cadaa0d 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c @@ -559,6 +559,20 @@ static void mmhub_v1_8_get_clockgating(struct amdgpu_device *adev, u64 *flags) } +static bool mmhub_v1_8_query_utcl2_poison_status(struct amdgpu_device *adev, + int hub_inst) +{ + u32 fed, status; + + status = RREG32_SOC15(MMHUB, hub_inst, regVM_L2_PROTECTION_FAULT_STATUS); + fed = REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, FED); + /* reset page fault status */ + WREG32_P(SOC15_REG_OFFSET(MMHUB, hub_inst, + regVM_L2_PROTECTION_FAULT_STATUS), 1, ~1); + + return fed; +} + const struct amdgpu_mmhub_funcs mmhub_v1_8_funcs = { .get_fb_location = mmhub_v1_8_get_fb_location, .init = mmhub_v1_8_init, @@ -568,6 +582,7 @@ const struct amdgpu_mmhub_funcs mmhub_v1_8_funcs = { .setup_vm_pt_regs = mmhub_v1_8_setup_vm_pt_regs, .set_clockgating = mmhub_v1_8_set_clockgating, .get_clockgating = mmhub_v1_8_get_clockgating, + .query_utcl2_poison_status = mmhub_v1_8_query_utcl2_poison_status, }; static const struct amdgpu_ras_err_status_reg_entry mmhub_v1_8_ce_reg_list[] = { -- cgit v1.2.3 From d8070c4241087d8c1ad3fc21632bac268dd7578a Mon Sep 17 00:00:00 2001 From: Tao Zhou Date: Mon, 11 Mar 2024 16:53:53 +0800 Subject: drm/amdgpu: support utcl2 RAS poison query for mmhub Support the query for both gfxhub and mmhub, also replace xcc_id with hub_inst. Signed-off-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 17 ++++++++++++----- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 2 +- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 5 ++--- drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c | 17 +++++++++++------ drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c | 17 +++++++++++------ 5 files changed, 37 insertions(+), 21 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index d5fde8adf19b..8ee18c2c082a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -770,12 +770,19 @@ int amdgpu_amdkfd_send_close_event_drain_irq(struct amdgpu_device *adev, } bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev, - int xcc_id) + int hub_inst, int hub_type) { - if (adev->gfxhub.funcs->query_utcl2_poison_status) - return adev->gfxhub.funcs->query_utcl2_poison_status(adev, xcc_id); - else - return false; + if (!hub_type) { + if (adev->gfxhub.funcs->query_utcl2_poison_status) + return adev->gfxhub.funcs->query_utcl2_poison_status(adev, hub_inst); + else + return false; + } else { + if (adev->mmhub.funcs->query_utcl2_poison_status) + return adev->mmhub.funcs->query_utcl2_poison_status(adev, hub_inst); + else + return false; + } } int amdgpu_amdkfd_check_and_lock_kfd(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index caee36e52a09..6b67f0025966 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -342,7 +342,7 @@ bool amdgpu_amdkfd_bo_mapped_to_dev(struct amdgpu_device *adev, struct kgd_mem * void amdgpu_amdkfd_block_mmu_notifications(void *p); int amdgpu_amdkfd_criu_resume(void *p); bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev, - int xcc_id); + int hub_inst, int hub_type); int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, uint64_t size, u32 alloc_flag, int8_t xcp_id); void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 05d52b9274a9..470a146f2f43 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -666,10 +666,9 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, rw = REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, RW); fed = REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, FED); - /* for gfx fed error, kfd will handle it, return directly */ + /* for fed error, kfd will handle it, return directly */ if (fed && amdgpu_ras_is_poison_mode_supported(adev) && - (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, 4, 2)) && - (vmhub < AMDGPU_MMHUB0_START)) + (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, 4, 2))) return 0; WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c index a8e76287dde0..650da18b0d87 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c @@ -369,18 +369,23 @@ static void event_interrupt_wq_v10(struct kfd_node *dev, uint16_t ring_id = SOC15_RING_ID_FROM_IH_ENTRY(ih_ring_entry); uint32_t node_id = SOC15_NODEID_FROM_IH_ENTRY(ih_ring_entry); uint32_t vmid_type = SOC15_VMID_TYPE_FROM_IH_ENTRY(ih_ring_entry); - int xcc_id = 0; + int hub_inst = 0; struct kfd_hsa_memory_exception_data exception_data; + /* gfxhub */ if (!vmid_type && dev->adev->gfx.funcs->ih_node_to_logical_xcc) { - xcc_id = dev->adev->gfx.funcs->ih_node_to_logical_xcc(dev->adev, + hub_inst = dev->adev->gfx.funcs->ih_node_to_logical_xcc(dev->adev, node_id); - if (xcc_id < 0) - xcc_id = 0; + if (hub_inst < 0) + hub_inst = 0; } - if (client_id == SOC15_IH_CLIENTID_UTCL2 && !vmid_type && - amdgpu_amdkfd_ras_query_utcl2_poison_status(dev->adev, xcc_id)) { + /* mmhub */ + if (vmid_type && client_id == SOC15_IH_CLIENTID_VMC) + hub_inst = node_id / 4; + + if (amdgpu_amdkfd_ras_query_utcl2_poison_status(dev->adev, + hub_inst, vmid_type)) { event_interrupt_poison_consumption(dev, pasid, client_id); return; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c index ff7392336795..11641f4645e6 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c @@ -415,18 +415,23 @@ static void event_interrupt_wq_v9(struct kfd_node *dev, uint16_t ring_id = SOC15_RING_ID_FROM_IH_ENTRY(ih_ring_entry); uint32_t node_id = SOC15_NODEID_FROM_IH_ENTRY(ih_ring_entry); uint32_t vmid_type = SOC15_VMID_TYPE_FROM_IH_ENTRY(ih_ring_entry); - int xcc_id = 0; + int hub_inst = 0; struct kfd_hsa_memory_exception_data exception_data; + /* gfxhub */ if (!vmid_type && dev->adev->gfx.funcs->ih_node_to_logical_xcc) { - xcc_id = dev->adev->gfx.funcs->ih_node_to_logical_xcc(dev->adev, + hub_inst = dev->adev->gfx.funcs->ih_node_to_logical_xcc(dev->adev, node_id); - if (xcc_id < 0) - xcc_id = 0; + if (hub_inst < 0) + hub_inst = 0; } - if (client_id == SOC15_IH_CLIENTID_UTCL2 && !vmid_type && - amdgpu_amdkfd_ras_query_utcl2_poison_status(dev->adev, xcc_id)) { + /* mmhub */ + if (vmid_type && client_id == SOC15_IH_CLIENTID_VMC) + hub_inst = node_id / 4; + + if (amdgpu_amdkfd_ras_query_utcl2_poison_status(dev->adev, + hub_inst, vmid_type)) { event_interrupt_poison_consumption_v9(dev, pasid, client_id); return; } -- cgit v1.2.3 From 62d2aaa7d466ec286ca871e9fc628cedaffb41b1 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Thu, 22 Feb 2024 10:11:20 +0800 Subject: drm/amdgpu: refine aca error cache for xgmi v6.4.0 refine aca error cache for xgmi v6.4.0 Signed-off-by: Yang Wang Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 43b1aaa04234..3e7f0199926e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -1039,11 +1039,12 @@ static int xgmi_v6_4_0_aca_bank_generate_report(struct aca_handle *handle, struc struct aca_bank_report *report, void *data) { struct amdgpu_device *adev = handle->adev; + struct aca_bank_info info; const char *error_str; u64 status, count; int ret, ext_error_code; - ret = aca_bank_info_decode(bank, &report->info); + ret = aca_bank_info_decode(bank, &info); if (ret) return ret; @@ -1056,18 +1057,21 @@ static int xgmi_v6_4_0_aca_bank_generate_report(struct aca_handle *handle, struc dev_info(adev->dev, "%s detected\n", error_str); count = ACA_REG__MISC0__ERRCNT(bank->regs[ACA_REG_IDX_MISC0]); + switch (type) { case ACA_SMU_TYPE_UE: - report->count[ACA_ERROR_TYPE_UE] = ext_error_code == 0 ? count : 0ULL; + count = ext_error_code == 0 ? count : 0ULL; + ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_UE, count); break; case ACA_SMU_TYPE_CE: - report->count[ACA_ERROR_TYPE_CE] = ext_error_code == 6 ? count : 0ULL; + count = ext_error_code == 6 ? count : 0ULL; + ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_CE, count); break; default: return -EINVAL; } - return 0; + return ret; } static const struct aca_bank_ops xgmi_v6_4_0_aca_bank_ops = { -- cgit v1.2.3 From 87428b4054379363e9b3b57128b6c8c2828c0202 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Thu, 22 Feb 2024 10:11:31 +0800 Subject: drm/amdgpu: refine aca error cache for sdma v4.4.2 refine aca error cache for sdma v4.4.2 Signed-off-by: Yang Wang Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c index 9aa5c190950e..02fc11b98b20 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c @@ -2193,27 +2193,29 @@ static int sdma_v4_4_2_aca_bank_generate_report(struct aca_handle *handle, struct aca_bank *bank, enum aca_smu_type type, struct aca_bank_report *report, void *data) { + struct aca_bank_info info; u64 misc0; int ret; - ret = aca_bank_info_decode(bank, &report->info); + ret = aca_bank_info_decode(bank, &info); if (ret) return ret; misc0 = bank->regs[ACA_REG_IDX_MISC0]; - switch (type) { case ACA_SMU_TYPE_UE: - report->count[ACA_ERROR_TYPE_UE] = 1ULL; + ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_UE, + 1ULL); break; case ACA_SMU_TYPE_CE: - report->count[ACA_ERROR_TYPE_CE] = ACA_REG__MISC0__ERRCNT(misc0); + ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_CE, + ACA_REG__MISC0__ERRCNT(misc0)); break; default: return -EINVAL; } - return 0; + return ret; } /* CODE_SDMA0 - CODE_SDMA4, reference to smu driver if header file */ -- cgit v1.2.3 From 69bf42fbb2270441674b38f1c29fe5c59afd06b8 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Thu, 22 Feb 2024 10:11:46 +0800 Subject: drm/amdgpu: refine aca error cache for umc v12.0 refine aca error cache for umc v12.0 Signed-off-by: Yang Wang Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/umc_v12_0.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c index 7b841431344f..b512a4b38067 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c @@ -508,10 +508,11 @@ static int umc_v12_0_aca_bank_generate_report(struct aca_handle *handle, struct struct aca_bank_report *report, void *data) { struct amdgpu_device *adev = handle->adev; + struct aca_bank_info info; u64 status; int ret; - ret = aca_bank_info_decode(bank, &report->info); + ret = aca_bank_info_decode(bank, &info); if (ret) return ret; @@ -519,12 +520,18 @@ static int umc_v12_0_aca_bank_generate_report(struct aca_handle *handle, struct switch (type) { case ACA_SMU_TYPE_UE: if (umc_v12_0_is_uncorrectable_error(adev, status)) { - report->count[ACA_ERROR_TYPE_UE] = 1; + ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_UE, + 1ULL); + if (ret) + return ret; } break; case ACA_SMU_TYPE_CE: if (umc_v12_0_is_correctable_error(adev, status)) { - report->count[ACA_ERROR_TYPE_CE] = 1; + ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_UE, + 1ULL); + if (ret) + return ret; } break; default: -- cgit v1.2.3 From f26c4e3fc999fd90d65ef5bc517a0323b27c43fb Mon Sep 17 00:00:00 2001 From: Candice Li Date: Mon, 18 Mar 2024 11:23:39 +0800 Subject: drm/amdgpu: Update setting EEPROM table version Use helper function instead of umc callback to set EEPROM table version. Signed-off-by: Candice Li Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c | 22 +++++++++++++++++----- drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h | 2 -- drivers/gpu/drm/amd/amdgpu/umc_v8_10.c | 6 ------ 3 files changed, 17 insertions(+), 13 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c index b12808c0c331..06a62a8a992e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c @@ -404,6 +404,22 @@ static int amdgpu_ras_eeprom_correct_header_tag( return res; } +static void amdgpu_ras_set_eeprom_table_version(struct amdgpu_ras_eeprom_control *control) +{ + struct amdgpu_device *adev = to_amdgpu_device(control); + struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr; + + switch (amdgpu_ip_version(adev, UMC_HWIP, 0)) { + case IP_VERSION(8, 10, 0): + case IP_VERSION(12, 0, 0): + hdr->version = RAS_TABLE_VER_V2_1; + return; + default: + hdr->version = RAS_TABLE_VER_V1; + return; + } +} + /** * amdgpu_ras_eeprom_reset_table -- Reset the RAS EEPROM table * @control: pointer to control structure @@ -423,11 +439,7 @@ int amdgpu_ras_eeprom_reset_table(struct amdgpu_ras_eeprom_control *control) mutex_lock(&control->ras_tbl_mutex); hdr->header = RAS_TABLE_HDR_VAL; - if (adev->umc.ras && - adev->umc.ras->set_eeprom_table_version) - adev->umc.ras->set_eeprom_table_version(hdr); - else - hdr->version = RAS_TABLE_VER_V1; + amdgpu_ras_set_eeprom_table_version(control); if (hdr->version == RAS_TABLE_VER_V2_1) { hdr->first_rec_offset = RAS_RECORD_START_V2_1; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h index 26d2ae498daf..5954e839d580 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h @@ -66,8 +66,6 @@ struct amdgpu_umc_ras { void *ras_error_status); bool (*check_ecc_err_status)(struct amdgpu_device *adev, enum amdgpu_mca_error_type type, void *ras_error_status); - /* support different eeprom table version for different asic */ - void (*set_eeprom_table_version)(struct amdgpu_ras_eeprom_table_header *hdr); }; struct amdgpu_umc_funcs { diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c index c4c77257710c..a32f87992f20 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c @@ -442,11 +442,6 @@ static void umc_v8_10_ecc_info_query_ras_error_address(struct amdgpu_device *ade umc_v8_10_ecc_info_query_error_address, ras_error_status); } -static void umc_v8_10_set_eeprom_table_version(struct amdgpu_ras_eeprom_table_header *hdr) -{ - hdr->version = RAS_TABLE_VER_V2_1; -} - const struct amdgpu_ras_block_hw_ops umc_v8_10_ras_hw_ops = { .query_ras_error_count = umc_v8_10_query_ras_error_count, .query_ras_error_address = umc_v8_10_query_ras_error_address, @@ -460,5 +455,4 @@ struct amdgpu_umc_ras umc_v8_10_ras = { .query_ras_poison_mode = umc_v8_10_query_ras_poison_mode, .ecc_info_query_ras_error_count = umc_v8_10_ecc_info_query_ras_error_count, .ecc_info_query_ras_error_address = umc_v8_10_ecc_info_query_ras_error_address, - .set_eeprom_table_version = umc_v8_10_set_eeprom_table_version, }; -- cgit v1.2.3 From e3d4de8d8b24933a9588019bf53891bca520b226 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Thu, 22 Feb 2024 14:03:14 +0800 Subject: drm/amdgpu: retire unused aca_bank_report data structure retire unused aca_bank_report data structure. Signed-off-by: Yang Wang Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c | 17 +++++++---------- drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h | 8 +------- drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 6 +++--- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 8 ++++---- drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c | 7 +++---- drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 7 +++---- drivers/gpu/drm/amd/amdgpu/umc_v12_0.c | 6 +++--- 7 files changed, 24 insertions(+), 35 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c index 7cbff74c0f2f..a0b7f0d0c513 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c @@ -297,29 +297,26 @@ int aca_error_cache_log_bank_error(struct aca_handle *handle, struct aca_bank_in return 0; } -static int aca_generate_bank_report(struct aca_handle *handle, struct aca_bank *bank, - enum aca_smu_type type, struct aca_bank_report *report) +static int aca_bank_parser(struct aca_handle *handle, struct aca_bank *bank, enum aca_smu_type type) { const struct aca_bank_ops *bank_ops = handle->bank_ops; - if (!bank || !report) + if (!bank) return -EINVAL; - if (!bank_ops->aca_bank_generate_report) + if (!bank_ops->aca_bank_parser) return -EOPNOTSUPP; - memset(report, 0, sizeof(*report)); - return bank_ops->aca_bank_generate_report(handle, bank, type, - report, handle->data); + return bank_ops->aca_bank_parser(handle, bank, type, + handle->data); } static int handler_aca_log_bank_error(struct aca_handle *handle, struct aca_bank *bank, - enum aca_smu_type smu_type, void *data) + enum aca_smu_type type, void *data) { - struct aca_bank_report report; int ret; - ret = aca_generate_bank_report(handle, bank, smu_type, &report); + ret = aca_bank_parser(handle, bank, type); if (ret) return ret; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h index 470efefcde10..6eb4c0341278 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h @@ -122,11 +122,6 @@ struct aca_bank_info { int mcatype; }; -struct aca_bank_report { - struct aca_bank_info info; - u64 count[ACA_ERROR_TYPE_COUNT]; -}; - struct aca_bank_error { struct list_head node; struct aca_bank_info info; @@ -164,8 +159,7 @@ struct aca_handle { }; struct aca_bank_ops { - int (*aca_bank_generate_report)(struct aca_handle *handle, struct aca_bank *bank, enum aca_smu_type type, - struct aca_bank_report *report, void *data); + int (*aca_bank_parser)(struct aca_handle *handle, struct aca_bank *bank, enum aca_smu_type type, void *data); bool (*aca_bank_is_valid)(struct aca_handle *handle, struct aca_bank *bank, enum aca_smu_type type, void *data); }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 3e7f0199926e..523827e8e7a8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -1035,8 +1035,8 @@ int amdgpu_xgmi_remove_device(struct amdgpu_device *adev) return 0; } -static int xgmi_v6_4_0_aca_bank_generate_report(struct aca_handle *handle, struct aca_bank *bank, enum aca_smu_type type, - struct aca_bank_report *report, void *data) +static int xgmi_v6_4_0_aca_bank_parser(struct aca_handle *handle, struct aca_bank *bank, + enum aca_smu_type type, void *data) { struct amdgpu_device *adev = handle->adev; struct aca_bank_info info; @@ -1075,7 +1075,7 @@ static int xgmi_v6_4_0_aca_bank_generate_report(struct aca_handle *handle, struc } static const struct aca_bank_ops xgmi_v6_4_0_aca_bank_ops = { - .aca_bank_generate_report = xgmi_v6_4_0_aca_bank_generate_report, + .aca_bank_parser = xgmi_v6_4_0_aca_bank_parser, }; static const struct aca_info xgmi_v6_4_0_aca_info = { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index cc7cccf0b2b2..fc33354f1d3d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -680,9 +680,9 @@ static const struct amdgpu_gfx_funcs gfx_v9_4_3_gfx_funcs = { .ih_node_to_logical_xcc = &gfx_v9_4_3_ih_to_xcc_inst, }; -static int gfx_v9_4_3_aca_bank_generate_report(struct aca_handle *handle, - struct aca_bank *bank, enum aca_smu_type type, - struct aca_bank_report *report, void *data) +static int gfx_v9_4_3_aca_bank_parser(struct aca_handle *handle, + struct aca_bank *bank, enum aca_smu_type type, + void *data) { struct aca_bank_info info; u64 misc0; @@ -736,7 +736,7 @@ static bool gfx_v9_4_3_aca_bank_is_valid(struct aca_handle *handle, struct aca_b } static const struct aca_bank_ops gfx_v9_4_3_aca_bank_ops = { - .aca_bank_generate_report = gfx_v9_4_3_aca_bank_generate_report, + .aca_bank_parser = gfx_v9_4_3_aca_bank_parser, .aca_bank_is_valid = gfx_v9_4_3_aca_bank_is_valid, }; diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c index 50113cadaa0d..7a1ff298417a 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c @@ -721,9 +721,8 @@ static const struct amdgpu_ras_block_hw_ops mmhub_v1_8_ras_hw_ops = { .reset_ras_error_count = mmhub_v1_8_reset_ras_error_count, }; -static int mmhub_v1_8_aca_bank_generate_report(struct aca_handle *handle, - struct aca_bank *bank, enum aca_smu_type type, - struct aca_bank_report *report, void *data) +static int mmhub_v1_8_aca_bank_parser(struct aca_handle *handle, struct aca_bank *bank, + enum aca_smu_type type, void *data) { struct aca_bank_info info; u64 misc0; @@ -780,7 +779,7 @@ static bool mmhub_v1_8_aca_bank_is_valid(struct aca_handle *handle, struct aca_b } static const struct aca_bank_ops mmhub_v1_8_aca_bank_ops = { - .aca_bank_generate_report = mmhub_v1_8_aca_bank_generate_report, + .aca_bank_parser = mmhub_v1_8_aca_bank_parser, .aca_bank_is_valid = mmhub_v1_8_aca_bank_is_valid, }; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c index 02fc11b98b20..71c2f50530cb 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c @@ -2189,9 +2189,8 @@ static const struct amdgpu_ras_block_hw_ops sdma_v4_4_2_ras_hw_ops = { .reset_ras_error_count = sdma_v4_4_2_reset_ras_error_count, }; -static int sdma_v4_4_2_aca_bank_generate_report(struct aca_handle *handle, - struct aca_bank *bank, enum aca_smu_type type, - struct aca_bank_report *report, void *data) +static int sdma_v4_4_2_aca_bank_parser(struct aca_handle *handle, struct aca_bank *bank, + enum aca_smu_type type, void *data) { struct aca_bank_info info; u64 misc0; @@ -2241,7 +2240,7 @@ static bool sdma_v4_4_2_aca_bank_is_valid(struct aca_handle *handle, struct aca_ } static const struct aca_bank_ops sdma_v4_4_2_aca_bank_ops = { - .aca_bank_generate_report = sdma_v4_4_2_aca_bank_generate_report, + .aca_bank_parser = sdma_v4_4_2_aca_bank_parser, .aca_bank_is_valid = sdma_v4_4_2_aca_bank_is_valid, }; diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c index b512a4b38067..e2e18ccbbc62 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c @@ -504,8 +504,8 @@ const struct amdgpu_ras_block_hw_ops umc_v12_0_ras_hw_ops = { .query_ras_error_address = umc_v12_0_query_ras_error_address, }; -static int umc_v12_0_aca_bank_generate_report(struct aca_handle *handle, struct aca_bank *bank, enum aca_smu_type type, - struct aca_bank_report *report, void *data) +static int umc_v12_0_aca_bank_parser(struct aca_handle *handle, struct aca_bank *bank, + enum aca_smu_type type, void *data) { struct amdgpu_device *adev = handle->adev; struct aca_bank_info info; @@ -542,7 +542,7 @@ static int umc_v12_0_aca_bank_generate_report(struct aca_handle *handle, struct } static const struct aca_bank_ops umc_v12_0_aca_bank_ops = { - .aca_bank_generate_report = umc_v12_0_aca_bank_generate_report, + .aca_bank_parser = umc_v12_0_aca_bank_parser, }; const struct aca_info umc_v12_0_aca_info = { -- cgit v1.2.3 From 2fc46e0b2fe8802c98a68d5c903e0109e1d4a49c Mon Sep 17 00:00:00 2001 From: Tao Zhou Date: Tue, 12 Mar 2024 11:30:09 +0800 Subject: drm/amdgpu: make reset method configurable for RAS poison Each RAS block has different requirement for gpu reset in poison consumption handling. Add support for mmhub RAS poison consumption handling. v2: remove the mmhub poison support for kfd int v10. Signed-off-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c | 16 +++++++--------- drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h | 4 ++-- drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c | 13 ++++++++----- drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c | 9 +++++---- drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c | 20 +++++++++++++++----- 8 files changed, 41 insertions(+), 29 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 8ee18c2c082a..3b4591f554f1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -748,7 +748,7 @@ bool amdgpu_amdkfd_is_fed(struct amdgpu_device *adev) } void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev, - enum amdgpu_ras_block block, bool reset) + enum amdgpu_ras_block block, uint32_t reset) { amdgpu_umc_poison_handler(adev, block, reset); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 6b67f0025966..c51954c9052e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -336,7 +336,7 @@ void amdgpu_amdkfd_debug_mem_fence(struct amdgpu_device *adev); int amdgpu_amdkfd_get_tile_config(struct amdgpu_device *adev, struct tile_config *config); void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev, - enum amdgpu_ras_block block, bool reset); + enum amdgpu_ras_block block, uint32_t reset); bool amdgpu_amdkfd_is_fed(struct amdgpu_device *adev); bool amdgpu_amdkfd_bo_mapped_to_dev(struct amdgpu_device *adev, struct kgd_mem *mem); void amdgpu_amdkfd_block_mmu_notifications(void *p); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 26662c76b293..3c6d532824f6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -2051,7 +2051,7 @@ static void amdgpu_ras_interrupt_poison_consumption_handler(struct ras_manager * } } - amdgpu_umc_poison_handler(adev, obj->head.block, false); + amdgpu_umc_poison_handler(adev, obj->head.block, 0); if (block_obj->hw_ops && block_obj->hw_ops->handle_poison_consumption) poison_stat = block_obj->hw_ops->handle_poison_consumption(adev); @@ -2704,7 +2704,7 @@ static int amdgpu_ras_page_retirement_thread(void *param) atomic_dec(&con->page_retirement_req_cnt); amdgpu_umc_bad_page_polling_timeout(adev, - false, MAX_UMC_POISON_POLLING_TIME_ASYNC); + 0, MAX_UMC_POISON_POLLING_TIME_ASYNC); } return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c index 20436f81856a..f486510fc94c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c @@ -177,7 +177,7 @@ static void amdgpu_umc_handle_bad_pages(struct amdgpu_device *adev, static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev, void *ras_error_status, struct amdgpu_iv_entry *entry, - bool reset) + uint32_t reset) { struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; struct amdgpu_ras *con = amdgpu_ras_get_context(adev); @@ -186,9 +186,7 @@ static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev, amdgpu_umc_handle_bad_pages(adev, ras_error_status); if (err_data->ue_count && reset) { - /* use mode-2 reset for poison consumption */ - if (!entry) - con->gpu_reset_flags |= AMDGPU_RAS_GPU_RESET_MODE2_RESET; + con->gpu_reset_flags |= reset; amdgpu_ras_reset_gpu(adev); } @@ -196,7 +194,7 @@ static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev, } int amdgpu_umc_bad_page_polling_timeout(struct amdgpu_device *adev, - bool reset, uint32_t timeout_ms) + uint32_t reset, uint32_t timeout_ms) { struct ras_err_data err_data; struct ras_common_if head = { @@ -238,8 +236,7 @@ int amdgpu_umc_bad_page_polling_timeout(struct amdgpu_device *adev, if (reset) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); - /* use mode-2 reset for poison consumption */ - con->gpu_reset_flags |= AMDGPU_RAS_GPU_RESET_MODE2_RESET; + con->gpu_reset_flags |= reset; amdgpu_ras_reset_gpu(adev); } @@ -247,7 +244,7 @@ int amdgpu_umc_bad_page_polling_timeout(struct amdgpu_device *adev, } int amdgpu_umc_poison_handler(struct amdgpu_device *adev, - enum amdgpu_ras_block block, bool reset) + enum amdgpu_ras_block block, uint32_t reset) { int ret = AMDGPU_RAS_SUCCESS; @@ -311,7 +308,8 @@ int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev, void *ras_error_status, struct amdgpu_iv_entry *entry) { - return amdgpu_umc_do_page_retirement(adev, ras_error_status, entry, true); + return amdgpu_umc_do_page_retirement(adev, ras_error_status, entry, + AMDGPU_RAS_GPU_RESET_MODE1_RESET); } int amdgpu_umc_ras_sw_init(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h index 5954e839d580..563b0249247e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h @@ -101,7 +101,7 @@ struct amdgpu_umc { int amdgpu_umc_ras_sw_init(struct amdgpu_device *adev); int amdgpu_umc_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block); int amdgpu_umc_poison_handler(struct amdgpu_device *adev, - enum amdgpu_ras_block block, bool reset); + enum amdgpu_ras_block block, uint32_t reset); int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry); @@ -121,5 +121,5 @@ int amdgpu_umc_loop_channels(struct amdgpu_device *adev, umc_func func, void *data); int amdgpu_umc_bad_page_polling_timeout(struct amdgpu_device *adev, - bool reset, uint32_t timeout_ms); + uint32_t reset, uint32_t timeout_ms); #endif diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c index 650da18b0d87..740f89aafbc0 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c @@ -134,6 +134,7 @@ static void event_interrupt_poison_consumption(struct kfd_node *dev, { enum amdgpu_ras_block block = 0; int old_poison, ret = -EINVAL; + uint32_t reset = 0; struct kfd_process *p = kfd_lookup_process_by_pasid(pasid); if (!p) @@ -153,6 +154,8 @@ static void event_interrupt_poison_consumption(struct kfd_node *dev, case SOC15_IH_CLIENTID_UTCL2: ret = kfd_dqm_evict_pasid(dev->dqm, pasid); block = AMDGPU_RAS_BLOCK__GFX; + if (ret) + reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET; break; case SOC15_IH_CLIENTID_SDMA0: case SOC15_IH_CLIENTID_SDMA1: @@ -160,6 +163,7 @@ static void event_interrupt_poison_consumption(struct kfd_node *dev, case SOC15_IH_CLIENTID_SDMA3: case SOC15_IH_CLIENTID_SDMA4: block = AMDGPU_RAS_BLOCK__SDMA; + reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET; break; default: break; @@ -170,17 +174,16 @@ static void event_interrupt_poison_consumption(struct kfd_node *dev, /* resetting queue passes, do page retirement without gpu reset * resetting queue fails, fallback to gpu reset solution */ - if (!ret) { + if (!ret) dev_warn(dev->adev->dev, "RAS poison consumption, unmap queue flow succeeded: client id %d\n", client_id); - amdgpu_amdkfd_ras_poison_consumption_handler(dev->adev, block, false); - } else { + else dev_warn(dev->adev->dev, "RAS poison consumption, fall back to gpu reset flow: client id %d\n", client_id); - amdgpu_amdkfd_ras_poison_consumption_handler(dev->adev, block, true); - } + + amdgpu_amdkfd_ras_poison_consumption_handler(dev->adev, block, reset); } static bool event_interrupt_isr_v10(struct kfd_node *dev, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c index 7e2859736a55..d3d6b5c180b3 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c @@ -193,6 +193,7 @@ static void event_interrupt_poison_consumption_v11(struct kfd_node *dev, { enum amdgpu_ras_block block = 0; int ret = -EINVAL; + uint32_t reset = 0; struct kfd_process *p = kfd_lookup_process_by_pasid(pasid); if (!p) @@ -212,10 +213,13 @@ static void event_interrupt_poison_consumption_v11(struct kfd_node *dev, if (dev->dqm->ops.reset_queues) ret = dev->dqm->ops.reset_queues(dev->dqm, pasid); block = AMDGPU_RAS_BLOCK__GFX; + if (ret) + reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET; break; case SOC21_INTSRC_SDMA_ECC: default: block = AMDGPU_RAS_BLOCK__GFX; + reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET; break; } @@ -223,10 +227,7 @@ static void event_interrupt_poison_consumption_v11(struct kfd_node *dev, /* resetting queue passes, do page retirement without gpu reset resetting queue fails, fallback to gpu reset solution */ - if (!ret) - amdgpu_amdkfd_ras_poison_consumption_handler(dev->adev, block, false); - else - amdgpu_amdkfd_ras_poison_consumption_handler(dev->adev, block, true); + amdgpu_amdkfd_ras_poison_consumption_handler(dev->adev, block, reset); } static bool event_interrupt_isr_v11(struct kfd_node *dev, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c index 11641f4645e6..2a37ab7a7150 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c @@ -145,6 +145,7 @@ static void event_interrupt_poison_consumption_v9(struct kfd_node *dev, { enum amdgpu_ras_block block = 0; int old_poison, ret = -EINVAL; + uint32_t reset = 0; struct kfd_process *p = kfd_lookup_process_by_pasid(pasid); if (!p) @@ -164,6 +165,15 @@ static void event_interrupt_poison_consumption_v9(struct kfd_node *dev, case SOC15_IH_CLIENTID_UTCL2: ret = kfd_dqm_evict_pasid(dev->dqm, pasid); block = AMDGPU_RAS_BLOCK__GFX; + if (ret) + reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET; + break; + case SOC15_IH_CLIENTID_VMC: + case SOC15_IH_CLIENTID_VMC1: + ret = kfd_dqm_evict_pasid(dev->dqm, pasid); + block = AMDGPU_RAS_BLOCK__MMHUB; + if (ret) + reset = AMDGPU_RAS_GPU_RESET_MODE1_RESET; break; case SOC15_IH_CLIENTID_SDMA0: case SOC15_IH_CLIENTID_SDMA1: @@ -171,6 +181,7 @@ static void event_interrupt_poison_consumption_v9(struct kfd_node *dev, case SOC15_IH_CLIENTID_SDMA3: case SOC15_IH_CLIENTID_SDMA4: block = AMDGPU_RAS_BLOCK__SDMA; + reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET; break; default: break; @@ -181,17 +192,16 @@ static void event_interrupt_poison_consumption_v9(struct kfd_node *dev, /* resetting queue passes, do page retirement without gpu reset * resetting queue fails, fallback to gpu reset solution */ - if (!ret) { + if (!ret) dev_warn(dev->adev->dev, "RAS poison consumption, unmap queue flow succeeded: client id %d\n", client_id); - amdgpu_amdkfd_ras_poison_consumption_handler(dev->adev, block, false); - } else { + else dev_warn(dev->adev->dev, "RAS poison consumption, fall back to gpu reset flow: client id %d\n", client_id); - amdgpu_amdkfd_ras_poison_consumption_handler(dev->adev, block, true); - } + + amdgpu_amdkfd_ras_poison_consumption_handler(dev->adev, block, reset); } static bool context_id_expected(struct kfd_dev *dev) -- cgit v1.2.3 From 865d3397630b806f2df156e2143cdfa416db1e01 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Wed, 21 Feb 2024 15:07:30 +0800 Subject: drm/amdgpu: add aca deferred error type support add aca deferred error type support Signed-off-by: Yang Wang Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c | 3 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 8 ++++++-- 2 files changed, 9 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c index a0b7f0d0c513..611c1751577a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c @@ -434,6 +434,8 @@ static int aca_log_aca_error_data(struct aca_bank_error *bank_error, enum aca_er amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, NULL, count); break; case ACA_ERROR_TYPE_DEFERRED: + amdgpu_ras_error_statistic_de_count(err_data, &mcm_info, NULL, count); + break; default: break; } @@ -474,6 +476,7 @@ static int __aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *h smu_type = ACA_SMU_TYPE_UE; break; case ACA_ERROR_TYPE_CE: + case ACA_ERROR_TYPE_DEFERRED: smu_type = ACA_SMU_TYPE_CE; break; default: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 3c6d532824f6..2bc02b809246 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -1291,8 +1291,8 @@ ssize_t amdgpu_ras_aca_sysfs_read(struct device *dev, struct device_attribute *a if (amdgpu_ras_query_error_status(obj->adev, &info)) return -EINVAL; - return sysfs_emit(buf, "%s: %lu\n%s: %lu\n", "ue", info.ue_count, - "ce", info.ce_count); + return sysfs_emit(buf, "%s: %lu\n%s: %lu\n%s: %lu\n", "ue", info.ue_count, + "ce", info.ce_count, "de", info.ue_count); } static int amdgpu_ras_query_error_status_helper(struct amdgpu_device *adev, @@ -1341,6 +1341,10 @@ static int amdgpu_ras_query_error_status_helper(struct amdgpu_device *adev, ret = amdgpu_aca_log_ras_error_data(adev, blk, ACA_ERROR_TYPE_CE, err_data); if (ret) return ret; + + ret = amdgpu_aca_log_ras_error_data(adev, blk, ACA_ERROR_TYPE_DEFERRED, err_data); + if (ret) + return ret; } else { /* FIXME: add code to check return value later */ amdgpu_mca_smu_log_ras_error(adev, blk, AMDGPU_MCA_ERROR_TYPE_UE, err_data, qctx); -- cgit v1.2.3 From b93d759f540a3d3145b985bbbdbeb98b22862df0 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Wed, 21 Feb 2024 14:36:13 +0800 Subject: drm/amdgpu: add umc v12.0.0 deferred error support add umc v12.0.0 deferred error support. Signed-off-by: Yang Wang Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/umc_v12_0.c | 37 ++++++++++++---------------------- 1 file changed, 13 insertions(+), 24 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c index e2e18ccbbc62..d29ea2fde025 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c @@ -509,36 +509,25 @@ static int umc_v12_0_aca_bank_parser(struct aca_handle *handle, struct aca_bank { struct amdgpu_device *adev = handle->adev; struct aca_bank_info info; + enum aca_error_type err_type; u64 status; int ret; + status = bank->regs[ACA_REG_IDX_STATUS]; + if (umc_v12_0_is_deferred_error(adev, status)) + err_type = ACA_ERROR_TYPE_DEFERRED; + else if (umc_v12_0_is_uncorrectable_error(adev, status)) + err_type = ACA_ERROR_TYPE_UE; + else if (umc_v12_0_is_correctable_error(adev, status)) + err_type = ACA_ERROR_TYPE_CE; + else + return 0; + ret = aca_bank_info_decode(bank, &info); if (ret) return ret; - status = bank->regs[ACA_REG_IDX_STATUS]; - switch (type) { - case ACA_SMU_TYPE_UE: - if (umc_v12_0_is_uncorrectable_error(adev, status)) { - ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_UE, - 1ULL); - if (ret) - return ret; - } - break; - case ACA_SMU_TYPE_CE: - if (umc_v12_0_is_correctable_error(adev, status)) { - ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_UE, - 1ULL); - if (ret) - return ret; - } - break; - default: - return -EINVAL; - } - - return 0; + return aca_error_cache_log_bank_error(handle, &info, err_type, 1ULL); } static const struct aca_bank_ops umc_v12_0_aca_bank_ops = { @@ -547,7 +536,7 @@ static const struct aca_bank_ops umc_v12_0_aca_bank_ops = { const struct aca_info umc_v12_0_aca_info = { .hwip = ACA_HWIP_TYPE_UMC, - .mask = ACA_ERROR_UE_MASK | ACA_ERROR_CE_MASK, + .mask = ACA_ERROR_UE_MASK | ACA_ERROR_CE_MASK | ACA_ERROR_DEFERRED_MASK, .bank_ops = &umc_v12_0_aca_bank_ops, }; -- cgit v1.2.3 From 2d93151de8908b9f7db954297868fe02bc07041d Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Sun, 17 Mar 2024 18:34:03 +0800 Subject: drm/amdgpu: Add smuio v14_0_2 ip block support Add smuio v14_0_2 ip block support Signed-off-by: Hawking Zhang Reviewed-by: Likun Gao Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/Makefile | 3 ++- drivers/gpu/drm/amd/amdgpu/smuio_v14_0_2.c | 41 ++++++++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/smuio_v14_0_2.h | 30 ++++++++++++++++++++++ 3 files changed, 73 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/amd/amdgpu/smuio_v14_0_2.c create mode 100644 drivers/gpu/drm/amd/amdgpu/smuio_v14_0_2.h (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index f24f11ac3e92..535e3936cfe0 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -248,7 +248,8 @@ amdgpu-y += \ smuio_v11_0_6.o \ smuio_v13_0.o \ smuio_v13_0_3.o \ - smuio_v13_0_6.o + smuio_v13_0_6.o \ + smuio_v14_0_2.o # add reset block amdgpu-y += \ diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v14_0_2.c b/drivers/gpu/drm/amd/amdgpu/smuio_v14_0_2.c new file mode 100644 index 000000000000..7b7cca220b26 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/smuio_v14_0_2.c @@ -0,0 +1,41 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "smuio_v14_0_2.h" +#include "smuio/smuio_14_0_2_offset.h" +#include "smuio/smuio_14_0_2_sh_mask.h" + +static u32 smuio_v14_0_2_get_rom_index_offset(struct amdgpu_device *adev) +{ + return SOC15_REG_OFFSET(SMUIO, 0, regROM_INDEX); +} + +static u32 smuio_v14_0_2_get_rom_data_offset(struct amdgpu_device *adev) +{ + return SOC15_REG_OFFSET(SMUIO, 0, regROM_DATA); +} + +const struct amdgpu_smuio_funcs smuio_v14_0_2_funcs = { + .get_rom_index_offset = smuio_v14_0_2_get_rom_index_offset, + .get_rom_data_offset = smuio_v14_0_2_get_rom_data_offset, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v14_0_2.h b/drivers/gpu/drm/amd/amdgpu/smuio_v14_0_2.h new file mode 100644 index 000000000000..6e617f832d90 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/smuio_v14_0_2.h @@ -0,0 +1,30 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __SMUIO_V14_0_2_H__ +#define __SMUIO_V14_0_2_H__ + +#include "soc15_common.h" + +extern const struct amdgpu_smuio_funcs smuio_v14_0_2_funcs; + +#endif /* __SMUIO_V14_0_2_H__ */ -- cgit v1.2.3 From d80e44a34e25a33a15c510a74575377f66a24b1e Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Sun, 17 Mar 2024 19:07:11 +0800 Subject: drm/amdgpu: Add smuio callback to get gpu clk counter Add smuio callback to get gpu clk counter Signed-off-by: Hawking Zhang Reviewed-by: Likun Gao Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h | 1 + drivers/gpu/drm/amd/amdgpu/smuio_v14_0_2.c | 21 +++++++++++++++++++++ 2 files changed, 22 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h index ff4435181055..ec9d12f85f39 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h @@ -44,6 +44,7 @@ struct amdgpu_smuio_funcs { u32 (*get_socket_id)(struct amdgpu_device *adev); enum amdgpu_pkg_type (*get_pkg_type)(struct amdgpu_device *adev); bool (*is_host_gpu_xgmi_supported)(struct amdgpu_device *adev); + u64 (*get_gpu_clock_counter)(struct amdgpu_device *adev); }; struct amdgpu_smuio { diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v14_0_2.c b/drivers/gpu/drm/amd/amdgpu/smuio_v14_0_2.c index 7b7cca220b26..2a51a70d4846 100644 --- a/drivers/gpu/drm/amd/amdgpu/smuio_v14_0_2.c +++ b/drivers/gpu/drm/amd/amdgpu/smuio_v14_0_2.c @@ -24,6 +24,7 @@ #include "smuio_v14_0_2.h" #include "smuio/smuio_14_0_2_offset.h" #include "smuio/smuio_14_0_2_sh_mask.h" +#include static u32 smuio_v14_0_2_get_rom_index_offset(struct amdgpu_device *adev) { @@ -35,7 +36,27 @@ static u32 smuio_v14_0_2_get_rom_data_offset(struct amdgpu_device *adev) return SOC15_REG_OFFSET(SMUIO, 0, regROM_DATA); } +static u64 smuio_v14_0_2_get_gpu_clock_counter(struct amdgpu_device *adev) +{ + u64 clock; + u64 clock_counter_lo, clock_counter_hi_pre, clock_counter_hi_after; + + preempt_disable(); + clock_counter_hi_pre = (u64)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_UPPER); + clock_counter_lo = (u64)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_LOWER); + /* the clock counter may be udpated during polling the counters */ + clock_counter_hi_after = (u64)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_UPPER); + if (clock_counter_hi_pre != clock_counter_hi_after) + clock_counter_lo = (u64)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_LOWER); + preempt_enable(); + + clock = clock_counter_lo | (clock_counter_hi_after << 32ULL); + + return clock; +} + const struct amdgpu_smuio_funcs smuio_v14_0_2_funcs = { .get_rom_index_offset = smuio_v14_0_2_get_rom_index_offset, .get_rom_data_offset = smuio_v14_0_2_get_rom_data_offset, + .get_gpu_clock_counter = smuio_v14_0_2_get_gpu_clock_counter, }; -- cgit v1.2.3 From a61e2ce9d42513798df5ada7dd1397636903a005 Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Sun, 17 Mar 2024 19:09:11 +0800 Subject: drm/amdgpu: Enable smuio v14_0_2 callbacks Enable smuio v14_0_2_callbacks Signed-off-by: Hawking Zhang Reviewed-by: Likun Gao Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index 8d9d6b2e68e8..230412fc4d62 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -97,6 +97,7 @@ #include "smuio_v13_0.h" #include "smuio_v13_0_3.h" #include "smuio_v13_0_6.h" +#include "smuio_v14_0_2.h" #include "vcn_v5_0_0.h" #include "jpeg_v5_0_0.h" @@ -2684,6 +2685,9 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(14, 0, 1): adev->smuio.funcs = &smuio_v13_0_6_funcs; break; + case IP_VERSION(14, 0, 2): + adev->smuio.funcs = &smuio_v14_0_2_funcs; + break; default: break; } -- cgit v1.2.3 From b6c4f90b3819148b066154ac7ae5388232aa1773 Mon Sep 17 00:00:00 2001 From: Shashank Sharma Date: Mon, 18 Mar 2024 11:54:13 +0100 Subject: drm/amdgpu: sync page table freeing with tlb flush MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The idea behind this patch is to delay the freeing of PT entry objects until the TLB flush is done. This patch: - Adds a tlb_flush_waitlist in amdgpu_vm_update_params which will keep the objects that need to be freed after tlb_flush. - Adds PT entries in this list in amdgpu_vm_ptes_update after finding the PT entry. - Changes functionality of amdgpu_vm_pt_free_dfs from (df_search + free) to simply freeing of the BOs, also renames it to amdgpu_vm_pt_free_list to reflect this same. - Exports function amdgpu_vm_pt_free_list to be called directly. - Calls amdgpu_vm_pt_free_list directly from amdgpu_vm_update_range. V2: rebase V4: Addressed review comments from Christian - add only locked PTEs entries in TLB flush waitlist. - do not create a separate function for list flush. - do not create a new lock for TLB flush. - there is no need to wait on tlb_flush_fence exclusively. V5: Addressed review comments from Christian - change the amdgpu_vm_pt_free_dfs's functionality to simple freeing of the objects and rename it. - add all the PTE objects in params->tlb_flush_waitlist - let amdgpu_vm_pt_free_root handle the freeing of BOs independently - call amdgpu_vm_pt_free_list directly V6: Rebase V7: Rebase V8: Added a NULL check to fix this backtrace issue: [ 415.351447] BUG: kernel NULL pointer dereference, address: 0000000000000008 [ 415.359245] #PF: supervisor write access in kernel mode [ 415.365081] #PF: error_code(0x0002) - not-present page [ 415.370817] PGD 101259067 P4D 101259067 PUD 10125a067 PMD 0 [ 415.377140] Oops: 0002 [#1] PREEMPT SMP NOPTI [ 415.382004] CPU: 0 PID: 25481 Comm: test_with_MPI.e Tainted: G OE 5.18.2-mi300-build-140423-ubuntu-22.04+ #24 [ 415.394437] Hardware name: AMD Corporation Sh51p/Sh51p, BIOS RMO1001AS 02/21/2024 [ 415.402797] RIP: 0010:amdgpu_vm_ptes_update+0x6fd/0xa10 [amdgpu] [ 415.409648] Code: 4c 89 ff 4d 8d 66 30 e8 f1 ed ff ff 48 85 db 74 42 48 39 5d a0 74 40 48 8b 53 20 48 8b 4b 18 48 8d 43 18 48 8d 75 b0 4c 89 ff <48 > 89 51 08 48 89 0a 49 8b 56 30 48 89 42 08 48 89 53 18 4c 89 63 [ 415.430621] RSP: 0018:ffffc9000401f990 EFLAGS: 00010287 [ 415.436456] RAX: ffff888147bb82f0 RBX: ffff888147bb82d8 RCX: 0000000000000000 [ 415.444426] RDX: 0000000000000000 RSI: ffffc9000401fa30 RDI: ffff888161f80000 [ 415.452397] RBP: ffffc9000401fa80 R08: 0000000000000000 R09: ffffc9000401fa00 [ 415.460368] R10: 00000007f0cc0000 R11: 00000007f0c85000 R12: ffffc9000401fb20 [ 415.468340] R13: 00000007f0d00000 R14: ffffc9000401faf0 R15: ffff888161f80000 [ 415.476312] FS: 00007f132ff89840(0000) GS:ffff889f87c00000(0000) knlGS:0000000000000000 [ 415.485350] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 415.491767] CR2: 0000000000000008 CR3: 0000000161d46003 CR4: 0000000000770ef0 [ 415.499738] PKRU: 55555554 [ 415.502750] Call Trace: [ 415.505482] [ 415.507825] amdgpu_vm_update_range+0x32a/0x880 [amdgpu] [ 415.513869] amdgpu_vm_clear_freed+0x117/0x250 [amdgpu] [ 415.519814] amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu+0x18c/0x250 [amdgpu] [ 415.527729] kfd_ioctl_unmap_memory_from_gpu+0xed/0x340 [amdgpu] [ 415.534551] kfd_ioctl+0x3b6/0x510 [amdgpu] V9: Addressed review comments from Christian - No NULL check reqd for root PT freeing - Free PT list regardless of needs_flush - Move adding BOs in list in a separate function V10: Added Christian's RB V11: squash in list fix Cc: Christian König Cc: Alex Deucher Cc: Felix Kuehling Cc: Rajneesh Bhardwaj Acked-by: Felix Kuehling Acked-by: Rajneesh Bhardwaj Reviewed-by: Christian König Tested-by: Rajneesh Bhardwaj Signed-off-by: Shashank Sharma Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 3 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 7 ++++ drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 66 ++++++++++++++++++++----------- 3 files changed, 53 insertions(+), 23 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 317d2d468a6b..8af3f0fd3073 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -988,6 +988,7 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm, params.unlocked = unlocked; params.needs_flush = flush_tlb; params.allow_override = allow_override; + INIT_LIST_HEAD(¶ms.tlb_flush_waitlist); /* Implicitly sync to command submissions in the same VM before * unmapping. Sync to moving fences before mapping. @@ -1078,6 +1079,8 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm, tlb_cb = NULL; } + amdgpu_vm_pt_free_list(adev, ¶ms); + error_free: kfree(tlb_cb); amdgpu_vm_eviction_unlock(vm); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index b0a4fe683352..54d7da396de0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -266,6 +266,11 @@ struct amdgpu_vm_update_params { * to be overridden for NUMA local memory. */ bool allow_override; + + /** + * @tlb_flush_waitlist: temporary storage for BOs until tlb_flush + */ + struct list_head tlb_flush_waitlist; }; struct amdgpu_vm_update_funcs { @@ -547,6 +552,8 @@ int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params, uint64_t start, uint64_t end, uint64_t dst, uint64_t flags); void amdgpu_vm_pt_free_work(struct work_struct *work); +void amdgpu_vm_pt_free_list(struct amdgpu_device *adev, + struct amdgpu_vm_update_params *params); #if defined(CONFIG_DEBUG_FS) void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c index 601df0ce8290..cbe0ae4c8738 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c @@ -622,40 +622,58 @@ void amdgpu_vm_pt_free_work(struct work_struct *work) } /** - * amdgpu_vm_pt_free_dfs - free PD/PT levels + * amdgpu_vm_pt_free_list - free PD/PT levels * * @adev: amdgpu device structure - * @vm: amdgpu vm structure - * @start: optional cursor where to start freeing PDs/PTs - * @unlocked: vm resv unlock status + * @params: see amdgpu_vm_update_params definition * - * Free the page directory or page table level and all sub levels. + * Free the page directory objects saved in the flush list */ -static void amdgpu_vm_pt_free_dfs(struct amdgpu_device *adev, - struct amdgpu_vm *vm, - struct amdgpu_vm_pt_cursor *start, - bool unlocked) +void amdgpu_vm_pt_free_list(struct amdgpu_device *adev, + struct amdgpu_vm_update_params *params) { - struct amdgpu_vm_pt_cursor cursor; - struct amdgpu_vm_bo_base *entry; + struct amdgpu_vm_bo_base *entry, *next; + struct amdgpu_vm *vm = params->vm; + bool unlocked = params->unlocked; + + if (list_empty(¶ms->tlb_flush_waitlist)) + return; if (unlocked) { spin_lock(&vm->status_lock); - for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry) - list_move(&entry->vm_status, &vm->pt_freed); - - if (start) - list_move(&start->entry->vm_status, &vm->pt_freed); + list_splice_init(¶ms->tlb_flush_waitlist, &vm->pt_freed); spin_unlock(&vm->status_lock); schedule_work(&vm->pt_free_work); return; } - for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry) + list_for_each_entry_safe(entry, next, ¶ms->tlb_flush_waitlist, vm_status) amdgpu_vm_pt_free(entry); +} - if (start) - amdgpu_vm_pt_free(start->entry); +/** + * amdgpu_vm_pt_add_list - add PD/PT level to the flush list + * + * @params: parameters for the update + * @cursor: first PT entry to start DF search from, non NULL + * + * This list will be freed after TLB flush. + */ +static void amdgpu_vm_pt_add_list(struct amdgpu_vm_update_params *params, + struct amdgpu_vm_pt_cursor *cursor) +{ + struct amdgpu_vm_pt_cursor seek; + struct amdgpu_vm_bo_base *entry; + + spin_lock(¶ms->vm->status_lock); + for_each_amdgpu_vm_pt_dfs_safe(params->adev, params->vm, cursor, seek, entry) { + if (entry && entry->bo) + list_move(&entry->vm_status, ¶ms->tlb_flush_waitlist); + } + + /* enter start node now */ + list_move(&cursor->entry->vm_status, ¶ms->tlb_flush_waitlist); + spin_unlock(¶ms->vm->status_lock); } /** @@ -667,7 +685,11 @@ static void amdgpu_vm_pt_free_dfs(struct amdgpu_device *adev, */ void amdgpu_vm_pt_free_root(struct amdgpu_device *adev, struct amdgpu_vm *vm) { - amdgpu_vm_pt_free_dfs(adev, vm, NULL, false); + struct amdgpu_vm_pt_cursor cursor; + struct amdgpu_vm_bo_base *entry; + + for_each_amdgpu_vm_pt_dfs_safe(adev, vm, NULL, cursor, entry) + amdgpu_vm_pt_free(entry); } /** @@ -973,9 +995,7 @@ int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params, /* Make sure previous mapping is freed */ if (cursor.entry->bo) { params->needs_flush = true; - amdgpu_vm_pt_free_dfs(adev, params->vm, - &cursor, - params->unlocked); + amdgpu_vm_pt_add_list(params, &cursor); } amdgpu_vm_pt_next(adev, &cursor); } -- cgit v1.2.3 From f7bcfb7a56b2b6165daa9930429ec5bcd7e00e62 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Mon, 26 Feb 2024 16:30:46 +0800 Subject: drm/amdgpu: retrieve umc odecc error count for aca umc v12.0 retrieve umc odecc error count for aca umc v12.0 Signed-off-by: Yang Wang Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/umc_v12_0.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c index d29ea2fde025..5f08d3dcf174 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c @@ -510,7 +510,8 @@ static int umc_v12_0_aca_bank_parser(struct aca_handle *handle, struct aca_bank struct amdgpu_device *adev = handle->adev; struct aca_bank_info info; enum aca_error_type err_type; - u64 status; + u64 status, count; + u32 ext_error_code; int ret; status = bank->regs[ACA_REG_IDX_STATUS]; @@ -527,7 +528,11 @@ static int umc_v12_0_aca_bank_parser(struct aca_handle *handle, struct aca_bank if (ret) return ret; - return aca_error_cache_log_bank_error(handle, &info, err_type, 1ULL); + ext_error_code = ACA_REG__STATUS__ERRORCODEEXT(status); + count = ext_error_code == 0 ? + ACA_REG__MISC0__ERRCNT(bank->regs[ACA_REG_IDX_MISC0]) : 1ULL; + + return aca_error_cache_log_bank_error(handle, &info, err_type, count); } static const struct aca_bank_ops umc_v12_0_aca_bank_ops = { -- cgit v1.2.3 From bd15bf742f6d869998d331f01496e8ed54bcf237 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Sun, 3 Mar 2024 19:01:23 +0800 Subject: drm/amdgpu: avoid update aca bank multi times during ras isr Because the UE Valid MCA count will only be cleared after reset, in order to avoid repeated counting of the error count, the aca bank is only updated once during ras isr. Signed-off-by: Yang Wang Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c | 27 +++++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h | 1 + 2 files changed, 28 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c index 611c1751577a..53ad76f590a1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c @@ -369,6 +369,26 @@ static int aca_dispatch_banks(struct aca_handle_manager *mgr, struct aca_banks * return 0; } +static bool aca_bank_should_update(struct amdgpu_device *adev, enum aca_smu_type type) +{ + struct amdgpu_aca *aca = &adev->aca; + bool ret = true; + + /* + * Because the UE Valid MCA count will only be cleared after reset, + * in order to avoid repeated counting of the error count, + * the aca bank is only updated once during the gpu recovery stage. + */ + if (type == ACA_SMU_TYPE_UE) { + if (amdgpu_ras_intr_triggered()) + ret = atomic_cmpxchg(&aca->ue_update_flag, 0, 1) == 0; + else + atomic_set(&aca->ue_update_flag, 0); + } + + return ret; +} + static int aca_banks_update(struct amdgpu_device *adev, enum aca_smu_type type, bank_handler_t handler, void *data) { @@ -380,6 +400,9 @@ static int aca_banks_update(struct amdgpu_device *adev, enum aca_smu_type type, if (list_empty(&aca->mgr.list)) return 0; + if (!aca_bank_should_update(adev, type)) + return 0; + ret = aca_smu_get_valid_aca_count(adev, type, &count); if (ret) return ret; @@ -670,6 +693,8 @@ int amdgpu_aca_init(struct amdgpu_device *adev) struct amdgpu_aca *aca = &adev->aca; int ret; + atomic_set(&aca->ue_update_flag, 0); + ret = aca_manager_init(&aca->mgr); if (ret) return ret; @@ -682,6 +707,8 @@ void amdgpu_aca_fini(struct amdgpu_device *adev) struct amdgpu_aca *aca = &adev->aca; aca_manager_fini(&aca->mgr); + + atomic_set(&aca->ue_update_flag, 0); } int amdgpu_aca_reset(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h index 6eb4c0341278..674a5a9da862 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h @@ -175,6 +175,7 @@ struct aca_smu_funcs { struct amdgpu_aca { struct aca_handle_manager mgr; const struct aca_smu_funcs *smu_funcs; + atomic_t ue_update_flag; bool is_enabled; }; -- cgit v1.2.3 From 31fd330b97ba334e00ea6102dfb68565eeb7a23f Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Mon, 18 Mar 2024 09:32:36 +0800 Subject: drm/amdgpu: add ras event id support for ACA add ras event id support for ACA. Signed-off-by: Yang Wang Reviewed-by: Hawking Zhang Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c | 29 ++++++++++++++++------------- drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 11 ++++++----- 3 files changed, 23 insertions(+), 19 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c index 53ad76f590a1..c3242dbf8355 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c @@ -116,20 +116,22 @@ static struct aca_regs_dump { {"CONTROL_MASK", ACA_REG_IDX_CTL_MASK}, }; -static void aca_smu_bank_dump(struct amdgpu_device *adev, int idx, int total, struct aca_bank *bank) +static void aca_smu_bank_dump(struct amdgpu_device *adev, int idx, int total, struct aca_bank *bank, + struct ras_query_context *qctx) { + u64 event_id = qctx ? qctx->event_id : 0ULL; int i; - dev_info(adev->dev, HW_ERR "Accelerator Check Architecture events logged\n"); + RAS_EVENT_LOG(adev, event_id, HW_ERR "Accelerator Check Architecture events logged\n"); /* plus 1 for output format, e.g: ACA[08/08]: xxxx */ for (i = 0; i < ARRAY_SIZE(aca_regs); i++) - dev_info(adev->dev, HW_ERR "ACA[%02d/%02d].%s=0x%016llx\n", - idx + 1, total, aca_regs[i].name, bank->regs[aca_regs[i].reg_idx]); + RAS_EVENT_LOG(adev, event_id, HW_ERR "ACA[%02d/%02d].%s=0x%016llx\n", + idx + 1, total, aca_regs[i].name, bank->regs[aca_regs[i].reg_idx]); } static int aca_smu_get_valid_aca_banks(struct amdgpu_device *adev, enum aca_smu_type type, int start, int count, - struct aca_banks *banks) + struct aca_banks *banks, struct ras_query_context *qctx) { struct amdgpu_aca *aca = &adev->aca; const struct aca_smu_funcs *smu_funcs = aca->smu_funcs; @@ -165,7 +167,7 @@ static int aca_smu_get_valid_aca_banks(struct amdgpu_device *adev, enum aca_smu_ bank.type = type; - aca_smu_bank_dump(adev, i, count, &bank); + aca_smu_bank_dump(adev, i, count, &bank, qctx); ret = aca_banks_add_bank(banks, &bank); if (ret) @@ -390,7 +392,7 @@ static bool aca_bank_should_update(struct amdgpu_device *adev, enum aca_smu_type } static int aca_banks_update(struct amdgpu_device *adev, enum aca_smu_type type, - bank_handler_t handler, void *data) + bank_handler_t handler, struct ras_query_context *qctx, void *data) { struct amdgpu_aca *aca = &adev->aca; struct aca_banks banks; @@ -412,7 +414,7 @@ static int aca_banks_update(struct amdgpu_device *adev, enum aca_smu_type type, aca_banks_init(&banks); - ret = aca_smu_get_valid_aca_banks(adev, type, 0, count, &banks); + ret = aca_smu_get_valid_aca_banks(adev, type, 0, count, &banks, qctx); if (ret) goto err_release_banks; @@ -489,7 +491,7 @@ out_unlock: } static int __aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *handle, enum aca_error_type type, - struct ras_err_data *err_data) + struct ras_err_data *err_data, struct ras_query_context *qctx) { enum aca_smu_type smu_type; int ret; @@ -507,7 +509,7 @@ static int __aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *h } /* udpate aca bank to aca source error_cache first */ - ret = aca_banks_update(adev, smu_type, handler_aca_log_bank_error, NULL); + ret = aca_banks_update(adev, smu_type, handler_aca_log_bank_error, qctx, NULL); if (ret) return ret; @@ -523,7 +525,7 @@ static bool aca_handle_is_valid(struct aca_handle *handle) } int amdgpu_aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *handle, - enum aca_error_type type, void *data) + enum aca_error_type type, void *data, void *qctx) { struct ras_err_data *err_data = (struct ras_err_data *)data; @@ -536,7 +538,8 @@ int amdgpu_aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *han if (!(BIT(type) & handle->mask)) return 0; - return __aca_get_error_data(adev, handle, type, err_data); + return __aca_get_error_data(adev, handle, type, err_data, + (struct ras_query_context *)qctx); } static void aca_error_init(struct aca_error *aerr, enum aca_error_type type) @@ -853,7 +856,7 @@ static int aca_dump_show(struct seq_file *m, enum aca_smu_type type) .idx = 0, }; - return aca_banks_update(adev, type, handler_aca_bank_dump, (void *)&context); + return aca_banks_update(adev, type, handler_aca_bank_dump, NULL, (void *)&context); } static int aca_dump_ce_show(struct seq_file *m, void *unused) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h index 674a5a9da862..247968d6a925 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h @@ -198,7 +198,7 @@ int amdgpu_aca_add_handle(struct amdgpu_device *adev, struct aca_handle *handle, const char *name, const struct aca_info *aca_info, void *data); void amdgpu_aca_remove_handle(struct aca_handle *handle); int amdgpu_aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *handle, - enum aca_error_type type, void *data); + enum aca_error_type type, void *data, void *qctx); int amdgpu_aca_smu_set_debug_mode(struct amdgpu_device *adev, bool en); void amdgpu_aca_smu_debugfs_init(struct amdgpu_device *adev, struct dentry *root); int aca_error_cache_log_bank_error(struct aca_handle *handle, struct aca_bank_info *info, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 2bc02b809246..b8c7d0bf8fb1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -1269,7 +1269,8 @@ int amdgpu_ras_unbind_aca(struct amdgpu_device *adev, enum amdgpu_ras_block blk) } static int amdgpu_aca_log_ras_error_data(struct amdgpu_device *adev, enum amdgpu_ras_block blk, - enum aca_error_type type, struct ras_err_data *err_data) + enum aca_error_type type, struct ras_err_data *err_data, + struct ras_query_context *qctx) { struct ras_manager *obj; @@ -1277,7 +1278,7 @@ static int amdgpu_aca_log_ras_error_data(struct amdgpu_device *adev, enum amdgpu if (!obj) return -EINVAL; - return amdgpu_aca_get_error_data(adev, &obj->aca_handle, type, err_data); + return amdgpu_aca_get_error_data(adev, &obj->aca_handle, type, err_data, qctx); } ssize_t amdgpu_ras_aca_sysfs_read(struct device *dev, struct device_attribute *attr, @@ -1334,15 +1335,15 @@ static int amdgpu_ras_query_error_status_helper(struct amdgpu_device *adev, } } else { if (amdgpu_aca_is_enabled(adev)) { - ret = amdgpu_aca_log_ras_error_data(adev, blk, ACA_ERROR_TYPE_UE, err_data); + ret = amdgpu_aca_log_ras_error_data(adev, blk, ACA_ERROR_TYPE_UE, err_data, qctx); if (ret) return ret; - ret = amdgpu_aca_log_ras_error_data(adev, blk, ACA_ERROR_TYPE_CE, err_data); + ret = amdgpu_aca_log_ras_error_data(adev, blk, ACA_ERROR_TYPE_CE, err_data, qctx); if (ret) return ret; - ret = amdgpu_aca_log_ras_error_data(adev, blk, ACA_ERROR_TYPE_DEFERRED, err_data); + ret = amdgpu_aca_log_ras_error_data(adev, blk, ACA_ERROR_TYPE_DEFERRED, err_data, qctx); if (ret) return ret; } else { -- cgit v1.2.3 From 9ddafd1d140441d288d3fc376d2809995ce504b7 Mon Sep 17 00:00:00 2001 From: Peyton Lee Date: Wed, 13 Mar 2024 16:53:49 +0800 Subject: drm/amdgpu/vpe: power on vpe when hw_init To fix mode2 reset failure. Should power on VPE when hw_init. Signed-off-by: Peyton Lee Reviewed-by: Lang Yu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c index 7a65a2b128ec..6695481f870f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c @@ -396,6 +396,12 @@ static int vpe_hw_init(void *handle) struct amdgpu_vpe *vpe = &adev->vpe; int ret; + /* Power on VPE */ + ret = amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VPE, + AMD_PG_STATE_UNGATE); + if (ret) + return ret; + ret = vpe_load_microcode(vpe); if (ret) return ret; -- cgit v1.2.3 From 9022f01b9709331c485703e49b3a8b2633513a92 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Thu, 21 Mar 2024 00:53:35 +0530 Subject: drm/amdgpu: refactor code to split devcoredump code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Refractor devcoredump code into new files since its functionality is expanded further and better to slit and devcoredump to have its own file. v2: Fix the build failure caught by arm compiler of implicit function declaration with #ifdef v3: squash in fix for implicit declaration error Cc: Ivan Lipski Acked-by: Christian König Signed-off-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/Makefile | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c | 216 +++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h | 47 +++++ drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c | 191 -------------------- drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h | 16 -- 6 files changed, 265 insertions(+), 208 deletions(-) create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 535e3936cfe0..1f6b56ec99f6 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -81,7 +81,7 @@ amdgpu-y += amdgpu_device.o amdgpu_doorbell_mgr.o amdgpu_kms.o \ amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \ amdgpu_fw_attestation.o amdgpu_securedisplay.o \ amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \ - amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o amdgpu_aca.o + amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o amdgpu_aca.o amdgpu_dev_coredump.o amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c new file mode 100644 index 000000000000..44c5da8aa9ce --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c @@ -0,0 +1,216 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include +#include +#include "amdgpu_dev_coredump.h" + +#ifndef CONFIG_DEV_COREDUMP +void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost, + struct amdgpu_reset_context *reset_context) +{ +} +#else + +const char *hw_ip_names[MAX_HWIP] = { + [GC_HWIP] = "GC", + [HDP_HWIP] = "HDP", + [SDMA0_HWIP] = "SDMA0", + [SDMA1_HWIP] = "SDMA1", + [SDMA2_HWIP] = "SDMA2", + [SDMA3_HWIP] = "SDMA3", + [SDMA4_HWIP] = "SDMA4", + [SDMA5_HWIP] = "SDMA5", + [SDMA6_HWIP] = "SDMA6", + [SDMA7_HWIP] = "SDMA7", + [LSDMA_HWIP] = "LSDMA", + [MMHUB_HWIP] = "MMHUB", + [ATHUB_HWIP] = "ATHUB", + [NBIO_HWIP] = "NBIO", + [MP0_HWIP] = "MP0", + [MP1_HWIP] = "MP1", + [UVD_HWIP] = "UVD/JPEG/VCN", + [VCN1_HWIP] = "VCN1", + [VCE_HWIP] = "VCE", + [VPE_HWIP] = "VPE", + [DF_HWIP] = "DF", + [DCE_HWIP] = "DCE", + [OSSSYS_HWIP] = "OSSSYS", + [SMUIO_HWIP] = "SMUIO", + [PWR_HWIP] = "PWR", + [NBIF_HWIP] = "NBIF", + [THM_HWIP] = "THM", + [CLK_HWIP] = "CLK", + [UMC_HWIP] = "UMC", + [RSMU_HWIP] = "RSMU", + [XGMI_HWIP] = "XGMI", + [DCI_HWIP] = "DCI", + [PCIE_HWIP] = "PCIE", +}; + +static ssize_t +amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count, + void *data, size_t datalen) +{ + struct drm_printer p; + struct amdgpu_coredump_info *coredump = data; + struct drm_print_iterator iter; + struct amdgpu_vm_fault_info *fault_info; + int i, ver; + + iter.data = buffer; + iter.offset = 0; + iter.start = offset; + iter.remain = count; + + p = drm_coredump_printer(&iter); + + drm_printf(&p, "**** AMDGPU Device Coredump ****\n"); + drm_printf(&p, "version: " AMDGPU_COREDUMP_VERSION "\n"); + drm_printf(&p, "kernel: " UTS_RELEASE "\n"); + drm_printf(&p, "module: " KBUILD_MODNAME "\n"); + drm_printf(&p, "time: %lld.%09ld\n", coredump->reset_time.tv_sec, + coredump->reset_time.tv_nsec); + + if (coredump->reset_task_info.pid) + drm_printf(&p, "process_name: %s PID: %d\n", + coredump->reset_task_info.process_name, + coredump->reset_task_info.pid); + + /* GPU IP's information of the SOC */ + drm_printf(&p, "\nIP Information\n"); + drm_printf(&p, "SOC Family: %d\n", coredump->adev->family); + drm_printf(&p, "SOC Revision id: %d\n", coredump->adev->rev_id); + drm_printf(&p, "SOC External Revision id: %d\n", coredump->adev->external_rev_id); + + for (int i = 1; i < MAX_HWIP; i++) { + for (int j = 0; j < HWIP_MAX_INSTANCE; j++) { + ver = coredump->adev->ip_versions[i][j]; + if (ver) + drm_printf(&p, "HWIP: %s[%d][%d]: v%d.%d.%d.%d.%d\n", + hw_ip_names[i], i, j, + IP_VERSION_MAJ(ver), + IP_VERSION_MIN(ver), + IP_VERSION_REV(ver), + IP_VERSION_VARIANT(ver), + IP_VERSION_SUBREV(ver)); + } + } + + if (coredump->ring) { + drm_printf(&p, "\nRing timed out details\n"); + drm_printf(&p, "IP Type: %d Ring Name: %s\n", + coredump->ring->funcs->type, + coredump->ring->name); + } + + /* Add page fault information */ + fault_info = &coredump->adev->vm_manager.fault_info; + drm_printf(&p, "\n[%s] Page fault observed\n", + fault_info->vmhub ? "mmhub" : "gfxhub"); + drm_printf(&p, "Faulty page starting at address: 0x%016llx\n", fault_info->addr); + drm_printf(&p, "Protection fault status register: 0x%x\n\n", fault_info->status); + + /* Add ring buffer information */ + drm_printf(&p, "Ring buffer information\n"); + for (int i = 0; i < coredump->adev->num_rings; i++) { + int j = 0; + struct amdgpu_ring *ring = coredump->adev->rings[i]; + + drm_printf(&p, "ring name: %s\n", ring->name); + drm_printf(&p, "Rptr: 0x%llx Wptr: 0x%llx RB mask: %x\n", + amdgpu_ring_get_rptr(ring), + amdgpu_ring_get_wptr(ring), + ring->buf_mask); + drm_printf(&p, "Ring size in dwords: %d\n", + ring->ring_size / 4); + drm_printf(&p, "Ring contents\n"); + drm_printf(&p, "Offset \t Value\n"); + + while (j < ring->ring_size) { + drm_printf(&p, "0x%x \t 0x%x\n", j, ring->ring[j / 4]); + j += 4; + } + } + + if (coredump->reset_vram_lost) + drm_printf(&p, "VRAM is lost due to GPU reset!\n"); + if (coredump->adev->reset_info.num_regs) { + drm_printf(&p, "AMDGPU register dumps:\nOffset: Value:\n"); + + for (i = 0; i < coredump->adev->reset_info.num_regs; i++) + drm_printf(&p, "0x%08x: 0x%08x\n", + coredump->adev->reset_info.reset_dump_reg_list[i], + coredump->adev->reset_info.reset_dump_reg_value[i]); + } + + return count - iter.remain; +} + +static void amdgpu_devcoredump_free(void *data) +{ + kfree(data); +} + +void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost, + struct amdgpu_reset_context *reset_context) +{ + struct amdgpu_coredump_info *coredump; + struct drm_device *dev = adev_to_drm(adev); + struct amdgpu_job *job = reset_context->job; + struct drm_sched_job *s_job; + + coredump = kzalloc(sizeof(*coredump), GFP_NOWAIT); + + if (!coredump) { + DRM_ERROR("%s: failed to allocate memory for coredump\n", __func__); + return; + } + + coredump->reset_vram_lost = vram_lost; + + if (reset_context->job && reset_context->job->vm) { + struct amdgpu_task_info *ti; + struct amdgpu_vm *vm = reset_context->job->vm; + + ti = amdgpu_vm_get_task_info_vm(vm); + if (ti) { + coredump->reset_task_info = *ti; + amdgpu_vm_put_task_info(ti); + } + } + + if (job) { + s_job = &job->base; + coredump->ring = to_amdgpu_ring(s_job->sched); + } + + coredump->adev = adev; + + ktime_get_ts64(&coredump->reset_time); + + dev_coredumpm(dev->dev, THIS_MODULE, coredump, 0, GFP_NOWAIT, + amdgpu_devcoredump_read, amdgpu_devcoredump_free); +} +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h new file mode 100644 index 000000000000..52459512cb2b --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __AMDGPU_DEV_COREDUMP_H__ +#define __AMDGPU_DEV_COREDUMP_H__ + +#include "amdgpu.h" +#include "amdgpu_reset.h" + +#ifdef CONFIG_DEV_COREDUMP + +#define AMDGPU_COREDUMP_VERSION "1" + +struct amdgpu_coredump_info { + struct amdgpu_device *adev; + struct amdgpu_task_info reset_task_info; + struct timespec64 reset_time; + bool reset_vram_lost; + struct amdgpu_ring *ring; +}; +#endif + +void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost, + struct amdgpu_reset_context *reset_context); + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 3204b8f6edeb..f771b2042a43 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -74,6 +74,7 @@ #include "amdgpu_fru_eeprom.h" #include "amdgpu_reset.h" #include "amdgpu_virt.h" +#include "amdgpu_dev_coredump.h" #include #include diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c index 3398f2a368d5..ea4873f6ccd1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c @@ -21,50 +21,11 @@ * */ -#include -#include - #include "amdgpu_reset.h" #include "aldebaran.h" #include "sienna_cichlid.h" #include "smu_v13_0_10.h" -const char *hw_ip_names[MAX_HWIP] = { - [GC_HWIP] = "GC", - [HDP_HWIP] = "HDP", - [SDMA0_HWIP] = "SDMA0", - [SDMA1_HWIP] = "SDMA1", - [SDMA2_HWIP] = "SDMA2", - [SDMA3_HWIP] = "SDMA3", - [SDMA4_HWIP] = "SDMA4", - [SDMA5_HWIP] = "SDMA5", - [SDMA6_HWIP] = "SDMA6", - [SDMA7_HWIP] = "SDMA7", - [LSDMA_HWIP] = "LSDMA", - [MMHUB_HWIP] = "MMHUB", - [ATHUB_HWIP] = "ATHUB", - [NBIO_HWIP] = "NBIO", - [MP0_HWIP] = "MP0", - [MP1_HWIP] = "MP1", - [UVD_HWIP] = "UVD/JPEG/VCN", - [VCN1_HWIP] = "VCN1", - [VCE_HWIP] = "VCE", - [VPE_HWIP] = "VPE", - [DF_HWIP] = "DF", - [DCE_HWIP] = "DCE", - [OSSSYS_HWIP] = "OSSSYS", - [SMUIO_HWIP] = "SMUIO", - [PWR_HWIP] = "PWR", - [NBIF_HWIP] = "NBIF", - [THM_HWIP] = "THM", - [CLK_HWIP] = "CLK", - [UMC_HWIP] = "UMC", - [RSMU_HWIP] = "RSMU", - [XGMI_HWIP] = "XGMI", - [DCI_HWIP] = "DCI", - [PCIE_HWIP] = "PCIE", -}; - int amdgpu_reset_init(struct amdgpu_device *adev) { int ret = 0; @@ -197,155 +158,3 @@ void amdgpu_device_unlock_reset_domain(struct amdgpu_reset_domain *reset_domain) atomic_set(&reset_domain->in_gpu_reset, 0); up_write(&reset_domain->sem); } - -#ifndef CONFIG_DEV_COREDUMP -void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost, - struct amdgpu_reset_context *reset_context) -{ -} -#else -static ssize_t -amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count, - void *data, size_t datalen) -{ - struct drm_printer p; - struct amdgpu_coredump_info *coredump = data; - struct drm_print_iterator iter; - struct amdgpu_vm_fault_info *fault_info; - int i, ver; - - iter.data = buffer; - iter.offset = 0; - iter.start = offset; - iter.remain = count; - - p = drm_coredump_printer(&iter); - - drm_printf(&p, "**** AMDGPU Device Coredump ****\n"); - drm_printf(&p, "version: " AMDGPU_COREDUMP_VERSION "\n"); - drm_printf(&p, "kernel: " UTS_RELEASE "\n"); - drm_printf(&p, "module: " KBUILD_MODNAME "\n"); - drm_printf(&p, "time: %lld.%09ld\n", coredump->reset_time.tv_sec, - coredump->reset_time.tv_nsec); - - if (coredump->reset_task_info.pid) - drm_printf(&p, "process_name: %s PID: %d\n", - coredump->reset_task_info.process_name, - coredump->reset_task_info.pid); - - /* GPU IP's information of the SOC */ - drm_printf(&p, "\nIP Information\n"); - drm_printf(&p, "SOC Family: %d\n", coredump->adev->family); - drm_printf(&p, "SOC Revision id: %d\n", coredump->adev->rev_id); - drm_printf(&p, "SOC External Revision id: %d\n", coredump->adev->external_rev_id); - - for (int i = 1; i < MAX_HWIP; i++) { - for (int j = 0; j < HWIP_MAX_INSTANCE; j++) { - ver = coredump->adev->ip_versions[i][j]; - if (ver) - drm_printf(&p, "HWIP: %s[%d][%d]: v%d.%d.%d.%d.%d\n", - hw_ip_names[i], i, j, - IP_VERSION_MAJ(ver), - IP_VERSION_MIN(ver), - IP_VERSION_REV(ver), - IP_VERSION_VARIANT(ver), - IP_VERSION_SUBREV(ver)); - } - } - - if (coredump->ring) { - drm_printf(&p, "\nRing timed out details\n"); - drm_printf(&p, "IP Type: %d Ring Name: %s\n", - coredump->ring->funcs->type, - coredump->ring->name); - } - - /* Add page fault information */ - fault_info = &coredump->adev->vm_manager.fault_info; - drm_printf(&p, "\n[%s] Page fault observed\n", - fault_info->vmhub ? "mmhub" : "gfxhub"); - drm_printf(&p, "Faulty page starting at address: 0x%016llx\n", fault_info->addr); - drm_printf(&p, "Protection fault status register: 0x%x\n\n", fault_info->status); - - /* Add ring buffer information */ - drm_printf(&p, "Ring buffer information\n"); - for (int i = 0; i < coredump->adev->num_rings; i++) { - int j = 0; - struct amdgpu_ring *ring = coredump->adev->rings[i]; - - drm_printf(&p, "ring name: %s\n", ring->name); - drm_printf(&p, "Rptr: 0x%llx Wptr: 0x%llx RB mask: %x\n", - amdgpu_ring_get_rptr(ring), - amdgpu_ring_get_wptr(ring), - ring->buf_mask); - drm_printf(&p, "Ring size in dwords: %d\n", - ring->ring_size / 4); - drm_printf(&p, "Ring contents\n"); - drm_printf(&p, "Offset \t Value\n"); - - while (j < ring->ring_size) { - drm_printf(&p, "0x%x \t 0x%x\n", j, ring->ring[j/4]); - j += 4; - } - } - - if (coredump->reset_vram_lost) - drm_printf(&p, "VRAM is lost due to GPU reset!\n"); - if (coredump->adev->reset_info.num_regs) { - drm_printf(&p, "AMDGPU register dumps:\nOffset: Value:\n"); - - for (i = 0; i < coredump->adev->reset_info.num_regs; i++) - drm_printf(&p, "0x%08x: 0x%08x\n", - coredump->adev->reset_info.reset_dump_reg_list[i], - coredump->adev->reset_info.reset_dump_reg_value[i]); - } - - return count - iter.remain; -} - -static void amdgpu_devcoredump_free(void *data) -{ - kfree(data); -} - -void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost, - struct amdgpu_reset_context *reset_context) -{ - struct amdgpu_coredump_info *coredump; - struct drm_device *dev = adev_to_drm(adev); - struct amdgpu_job *job = reset_context->job; - struct drm_sched_job *s_job; - - coredump = kzalloc(sizeof(*coredump), GFP_NOWAIT); - - if (!coredump) { - DRM_ERROR("%s: failed to allocate memory for coredump\n", __func__); - return; - } - - coredump->reset_vram_lost = vram_lost; - - if (reset_context->job && reset_context->job->vm) { - struct amdgpu_task_info *ti; - struct amdgpu_vm *vm = reset_context->job->vm; - - ti = amdgpu_vm_get_task_info_vm(vm); - if (ti) { - coredump->reset_task_info = *ti; - amdgpu_vm_put_task_info(ti); - } - } - - if (job) { - s_job = &job->base; - coredump->ring = to_amdgpu_ring(s_job->sched); - } - - coredump->adev = adev; - - ktime_get_ts64(&coredump->reset_time); - - dev_coredumpm(dev->dev, THIS_MODULE, coredump, 0, GFP_NOWAIT, - amdgpu_devcoredump_read, amdgpu_devcoredump_free); -} -#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h index 60522963aaca..66125d43cf21 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h @@ -88,19 +88,6 @@ struct amdgpu_reset_domain { atomic_t reset_res; }; -#ifdef CONFIG_DEV_COREDUMP - -#define AMDGPU_COREDUMP_VERSION "1" - -struct amdgpu_coredump_info { - struct amdgpu_device *adev; - struct amdgpu_task_info reset_task_info; - struct timespec64 reset_time; - bool reset_vram_lost; - struct amdgpu_ring *ring; -}; -#endif - int amdgpu_reset_init(struct amdgpu_device *adev); int amdgpu_reset_fini(struct amdgpu_device *adev); @@ -141,9 +128,6 @@ void amdgpu_device_lock_reset_domain(struct amdgpu_reset_domain *reset_domain); void amdgpu_device_unlock_reset_domain(struct amdgpu_reset_domain *reset_domain); -void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost, - struct amdgpu_reset_context *reset_context); - #define for_each_handler(i, handler, reset_ctl) \ for (i = 0; (i < AMDGPU_RESET_MAX_HANDLERS) && \ (handler = (*reset_ctl->reset_handlers)[i]); \ -- cgit v1.2.3 From f88a7dd06ab435f8c07dc9b84a003f321c08cd72 Mon Sep 17 00:00:00 2001 From: Shashank Sharma Date: Thu, 21 Mar 2024 15:21:00 +0100 Subject: drm/amdgpu: Add a NULL check for freeing root PT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds a NULL check to fix this crash reported during the freeing of root PT entry: BUG: unable to handle page fault for address: ffffc9002d637aa0 #PF: supervisor write access in kernel mode #PF: error_code(0x0002) - not-present page RIP: 0010:amdgpu_vm_pt_free+0x66/0xe0 [amdgpu] PKRU: 55555554 Call Trace: amdgpu_vm_pt_free_root+0x60/0xa0 [amdgpu] amdgpu_vm_fini+0x2cb/0x5d0 [amdgpu] ? amdgpu_ctx_mgr_entity_fini+0x53/0x1c0 [amdgpu] amdgpu_driver_postclose_kms+0x191/0x2d0 [amdgpu] drm_file_free.part.0+0x1e5/0x260 [drm] Cc: Christian König Cc: Alex Deucher Cc: Felix Kuehling Cc: Rajneesh Bhardwaj Acked-by: Christian König Signed-off-by: Shashank Sharma Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c index cbe0ae4c8738..7fdd306a48a0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c @@ -688,8 +688,10 @@ void amdgpu_vm_pt_free_root(struct amdgpu_device *adev, struct amdgpu_vm *vm) struct amdgpu_vm_pt_cursor cursor; struct amdgpu_vm_bo_base *entry; - for_each_amdgpu_vm_pt_dfs_safe(adev, vm, NULL, cursor, entry) - amdgpu_vm_pt_free(entry); + for_each_amdgpu_vm_pt_dfs_safe(adev, vm, NULL, cursor, entry) { + if (entry) + amdgpu_vm_pt_free(entry); + } } /** -- cgit v1.2.3 From 8b3495eafb4ddd4e1963834ce9a56c729128d62e Mon Sep 17 00:00:00 2001 From: Tao Zhou Date: Wed, 20 Mar 2024 13:57:29 +0800 Subject: drm/amdgpu: add socket id parameter for psp query address cmd And set the socket id. Signed-off-by: Tao Zhou Reviewed-by: Stanley.Yang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/ta_ras_if.h | 1 + drivers/gpu/drm/amd/amdgpu/umc_v12_0.c | 14 +++++++++++--- 2 files changed, 12 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h b/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h index 056d4df8fa1f..3ac56a9645eb 100644 --- a/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h +++ b/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h @@ -146,6 +146,7 @@ struct ta_ras_mca_addr { uint32_t ch_inst; uint32_t umc_inst; uint32_t node_inst; + uint32_t socket_id; }; struct ta_ras_phy_addr { diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c index 5f08d3dcf174..6a52a9f0dc30 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c @@ -268,7 +268,7 @@ static void umc_v12_0_mca_addr_to_pa(struct amdgpu_device *adev, static void umc_v12_0_convert_error_address(struct amdgpu_device *adev, struct ras_err_data *err_data, uint64_t err_addr, uint32_t ch_inst, uint32_t umc_inst, - uint32_t node_inst) + uint32_t node_inst, uint32_t socket_id) { uint32_t col, row, row_xor, bank, channel_index; uint64_t soc_pa, retired_page, column; @@ -280,6 +280,7 @@ static void umc_v12_0_convert_error_address(struct amdgpu_device *adev, addr_in.ma.ch_inst = ch_inst; addr_in.ma.umc_inst = umc_inst; addr_in.ma.node_inst = node_inst; + addr_in.ma.socket_id = socket_id; if (psp_ras_query_address(&adev->psp, &addr_in, &addr_out)) /* fallback to old path if fail to get pa from psp */ @@ -331,6 +332,7 @@ static int umc_v12_0_query_error_address(struct amdgpu_device *adev, struct ras_err_data *err_data = (struct ras_err_data *)data; uint64_t umc_reg_offset = get_umc_v12_0_reg_offset(adev, node_inst, umc_inst, ch_inst); + uint32_t socket_id = 0; mc_umc_status_addr = SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0); @@ -357,8 +359,13 @@ static int umc_v12_0_query_error_address(struct amdgpu_device *adev, err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr); + if (!adev->aid_mask && + adev->smuio.funcs && + adev->smuio.funcs->get_socket_id) + socket_id = adev->smuio.funcs->get_socket_id(adev); + umc_v12_0_convert_error_address(adev, err_data, err_addr, - ch_inst, umc_inst, node_inst); + ch_inst, umc_inst, node_inst, socket_id); } /* clear umc status */ @@ -456,7 +463,8 @@ static void umc_v12_0_ecc_info_query_ras_error_address(struct amdgpu_device *ade err_data, err_addr, MCA_IPID_LO_2_UMC_CH(InstanceIdLo), MCA_IPID_LO_2_UMC_INST(InstanceIdLo), - err_info->mcm_info.die_id); + err_info->mcm_info.die_id, + err_info->mcm_info.socket_id); } /* Delete error address node from list and free memory */ -- cgit v1.2.3 From 7c2bc34ab926447f1b329a57669adfce76eafc3c Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Wed, 20 Mar 2024 14:13:54 +0530 Subject: drm/amdgpu: Fix format character cut-off issues in amdgpu_vcn_early_init() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reducing the size of ucode_prefix to 25 in the amdgpu_vcn_early_init function. This would ensure that the total number of characters being written into fw_name does not exceed its size of 40. Fixes the below with gcc W=1: drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c: In function ‘amdgpu_vcn_early_init’: drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c:102:66: warning: ‘snprintf’ output may be truncated before the last format character [-Wformat-truncation=] 102 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s.bin", ucode_prefix); | ^ drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c:102:17: note: ‘snprintf’ output between 12 and 41 bytes into a destination of size 40 102 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s.bin", ucode_prefix); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c:102:66: warning: ‘snprintf’ output may be truncated before the last format character [-Wformat-truncation=] 102 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s.bin", ucode_prefix); | ^ drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c:102:17: note: ‘snprintf’ output between 12 and 41 bytes into a destination of size 40 102 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s.bin", ucode_prefix); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c:105:73: warning: ‘.bin’ directive output may be truncated writing 4 bytes into a region of size between 2 and 31 [-Wformat-truncation=] 105 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_%d.bin", ucode_prefix, i); | ^~~~ drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c:105:25: note: ‘snprintf’ output between 14 and 43 bytes into a destination of size 40 105 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_%d.bin", ucode_prefix, i); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Cc: Alex Deucher Cc: Christian König Suggested-by: Lijo Lazar Signed-off-by: Srinivasan Shanmugam Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index 9c514a606a2f..bb85772b1374 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -93,7 +93,7 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work); int amdgpu_vcn_early_init(struct amdgpu_device *adev) { - char ucode_prefix[30]; + char ucode_prefix[25]; char fw_name[40]; int r, i; -- cgit v1.2.3 From 20fd14460f45a01b9ec63aa7b12e6c3c66e54fa7 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Thu, 21 Mar 2024 11:05:55 +0530 Subject: drm/amdgpu: Fix 'fw_name' buffer size to prevent truncations in amdgpu_mes_init_microcode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The snprintf function is used to write a formatted string into fw_name. The format of the string is "amdgpu/%s_mes%s.bin", where %s is replaced by the string in ucode_prefix and the second %s is replaced by either "_2" or "1" depending on the condition pipe == AMDGPU_MES_SCHED_PIPE. The length of the string "amdgpu/%s_mes%s.bin" is 16 characters plus the length of ucode_prefix and the length of the string "_2" or "1". The size of ucode_prefix is 30, so the maximum length of ucode_prefix is 29 characters (since one character is needed for the null terminator). Therefore, the maximum possible length of the string written into fw_name is 16 + 29 + 2 = 47 characters. The size of fw_name is 40, so if the length of the string written into fw_name is more than 39 characters (since one character is needed for the null terminator), it will be truncated by the snprintf function, and thus warnings will be seen. By increasing the size of fw_name to 50, we ensure that fw_name is large enough to hold the maximum possible length of the string, so the snprintf function will not truncate the output. Fixes the below with gcc W=1: drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c: In function ‘amdgpu_mes_init_microcode’: drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c:1482:66: warning: ‘%s’ directive output may be truncated writing up to 1 bytes into a region of size between 0 and 29 [-Wformat-truncation=] 1482 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mes%s.bin", | ^~ drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c:1482:17: note: ‘snprintf’ output between 16 and 46 bytes into a destination of size 40 1482 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mes%s.bin", | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1483 | ucode_prefix, | ~~~~~~~~~~~~~ 1484 | pipe == AMDGPU_MES_SCHED_PIPE ? "" : "1"); | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c:1477:66: warning: ‘%s’ directive output may be truncated writing 1 byte into a region of size between 0 and 29 [-Wformat-truncation=] 1477 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mes%s.bin", | ^~ 1478 | ucode_prefix, 1479 | pipe == AMDGPU_MES_SCHED_PIPE ? "_2" : "1"); | ~~~ drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c:1477:17: note: ‘snprintf’ output between 17 and 46 bytes into a destination of size 40 1477 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mes%s.bin", | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1478 | ucode_prefix, | ~~~~~~~~~~~~~ 1479 | pipe == AMDGPU_MES_SCHED_PIPE ? "_2" : "1"); | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c:1477:66: warning: ‘%s’ directive output may be truncated writing 2 bytes into a region of size between 0 and 29 [-Wformat-truncation=] 1477 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mes%s.bin", | ^~ 1478 | ucode_prefix, 1479 | pipe == AMDGPU_MES_SCHED_PIPE ? "_2" : "1"); | ~~~~ drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c:1477:17: note: ‘snprintf’ output between 18 and 47 bytes into a destination of size 40 1477 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mes%s.bin", | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1478 | ucode_prefix, | ~~~~~~~~~~~~~ 1479 | pipe == AMDGPU_MES_SCHED_PIPE ? "_2" : "1"); | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c:1489:62: warning: ‘_mes.bin’ directive output may be truncated writing 8 bytes into a region of size between 4 and 33 [-Wformat-truncation=] 1489 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mes.bin", | ^~~~~~~~ drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c:1489:17: note: ‘snprintf’ output between 16 and 45 bytes into a destination of size 40 1489 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mes.bin", | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1490 | ucode_prefix); | ~~~~~~~~~~~~~ Cc: Alex Deucher Cc: Christian König Suggested-by: Lijo Lazar Signed-off-by: Srinivasan Shanmugam Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index eb75c524b4a6..e4fcc950e542 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -1467,7 +1467,7 @@ int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe) const struct mes_firmware_header_v1_0 *mes_hdr; struct amdgpu_firmware_info *info; char ucode_prefix[30]; - char fw_name[40]; + char fw_name[50]; bool need_retry = false; int r; -- cgit v1.2.3 From 927a8a800ebbea623c82ab9cc72b2827c4035aa9 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Fri, 22 Mar 2024 12:18:37 +0530 Subject: drm/amdgpu: Fix truncation in gfx_v10_0_init_microcode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The total size of the fw_name buffer is 8 (for "amdgpu/") + 30 (for ucode_prefix) + 5 (for "_pfp") + 5 (for "_wks") + 5 (for ".bin") = 53 characters. Fixes the below with gcc W=1: drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c: In function ‘gfx_v10_0_early_init’: drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c:3982:58: warning: ‘%s’ directive output may be truncated writing up to 4 bytes into a region of size between 0 and 29 [-Wformat-truncation=] 3982 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp%s.bin", ucode_prefix, wks); | ^~ drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c:3982:9: note: ‘snprintf’ output between 16 and 49 bytes into a destination of size 40 3982 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp%s.bin", ucode_prefix, wks); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c:3988:57: warning: ‘%s’ directive output may be truncated writing up to 4 bytes into a region of size between 1 and 30 [-Wformat-truncation=] 3988 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me%s.bin", ucode_prefix, wks); | ^~ drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c:3988:9: note: ‘snprintf’ output between 15 and 48 bytes into a destination of size 40 3988 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me%s.bin", ucode_prefix, wks); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c:3994:57: warning: ‘%s’ directive output may be truncated writing up to 4 bytes into a region of size between 1 and 30 [-Wformat-truncation=] 3994 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce%s.bin", ucode_prefix, wks); | ^~ drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c:3994:9: note: ‘snprintf’ output between 15 and 48 bytes into a destination of size 40 3994 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce%s.bin", ucode_prefix, wks); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c:4001:62: warning: ‘_rlc.bin’ directive output may be truncated writing 8 bytes into a region of size between 4 and 33 [-Wformat-truncation=] 4001 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", ucode_prefix); | ^~~~~~~~ drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c:4001:17: note: ‘snprintf’ output between 16 and 45 bytes into a destination of size 40 4001 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", ucode_prefix); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c:4017:58: warning: ‘%s’ directive output may be truncated writing up to 4 bytes into a region of size between 0 and 29 [-Wformat-truncation=] 4017 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec%s.bin", ucode_prefix, wks); | ^~ drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c:4017:9: note: ‘snprintf’ output between 16 and 49 bytes into a destination of size 40 4017 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec%s.bin", ucode_prefix, wks); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c:4024:54: warning: ‘_mec2’ directive output may be truncated writing 5 bytes into a region of size between 4 and 33 [-Wformat-truncation=] 4024 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2%s.bin", ucode_prefix, wks); | ^~~~~ drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c:4024:9: note: ‘snprintf’ output between 17 and 50 bytes into a destination of size 40 4024 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2%s.bin", ucode_prefix, wks); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Cc: Alex Deucher Cc: Christian König Suggested-by: Lijo Lazar Signed-off-by: Srinivasan Shanmugam Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index f90905ef32c7..d524f1a353ed 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -3964,7 +3964,7 @@ static void gfx_v10_0_check_gfxoff_flag(struct amdgpu_device *adev) static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) { - char fw_name[40]; + char fw_name[53]; char ucode_prefix[30]; const char *wks = ""; int err; -- cgit v1.2.3 From 539ff12ee5e423d6a17d746ed5cca0e1b8c38feb Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Thu, 21 Mar 2024 16:46:01 +0530 Subject: drm/amdgpu: Fix truncation issues in gfx_v9_0.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The size of fw_name is increased to ensure that it can accommodate the maximum possible size of the string being written into it. Fixes the below with gcc W=1: drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c: In function ‘gfx_v9_0_early_init’: drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c:1255:52: warning: ‘%s’ directive output may be truncated writing up to 29 bytes into a region of size 23 [-Wformat-truncation=] 1255 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name); | ^~ ...... 1393 | r = gfx_v9_0_init_cp_gfx_microcode(adev, ucode_prefix); | ~~~~~~~~~~~~ drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c:1255:9: note: ‘snprintf’ output between 16 and 45 bytes into a destination of size 30 1255 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c:1261:52: warning: ‘%s’ directive output may be truncated writing up to 29 bytes into a region of size 23 [-Wformat-truncation=] 1261 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name); | ^~ ...... 1393 | r = gfx_v9_0_init_cp_gfx_microcode(adev, ucode_prefix); | ~~~~~~~~~~~~ drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c:1261:9: note: ‘snprintf’ output between 15 and 44 bytes into a destination of size 30 1261 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c:1267:52: warning: ‘%s’ directive output may be truncated writing up to 29 bytes into a region of size 23 [-Wformat-truncation=] 1267 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name); | ^~ ...... 1393 | r = gfx_v9_0_init_cp_gfx_microcode(adev, ucode_prefix); | ~~~~~~~~~~~~ drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c:1267:9: note: ‘snprintf’ output between 15 and 44 bytes into a destination of size 30 1267 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c:1303:60: warning: ‘%s’ directive output may be truncated writing up to 29 bytes into a region of size 23 [-Wformat-truncation=] 1303 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name); | ^~ ...... 1398 | r = gfx_v9_0_init_rlc_microcode(adev, ucode_prefix); | ~~~~~~~~~~~~ drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c:1303:17: note: ‘snprintf’ output between 20 and 49 bytes into a destination of size 30 1303 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c:1309:60: warning: ‘%s’ directive output may be truncated writing up to 29 bytes into a region of size 23 [-Wformat-truncation=] 1309 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name); | ^~ ...... 1398 | r = gfx_v9_0_init_rlc_microcode(adev, ucode_prefix); | ~~~~~~~~~~~~ drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c:1309:17: note: ‘snprintf’ output between 23 and 52 bytes into a destination of size 30 1309 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c:1311:60: warning: ‘%s’ directive output may be truncated writing up to 29 bytes into a region of size 23 [-Wformat-truncation=] 1311 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); | ^~ ...... 1398 | r = gfx_v9_0_init_rlc_microcode(adev, ucode_prefix); | ~~~~~~~~~~~~ drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c:1311:17: note: ‘snprintf’ output between 16 and 45 bytes into a destination of size 30 1311 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c:1344:60: warning: ‘%s’ directive output may be truncated writing up to 29 bytes into a region of size 23 [-Wformat-truncation=] 1344 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sjt_mec.bin", chip_name); | ^~ ...... 1402 | r = gfx_v9_0_init_cp_compute_microcode(adev, ucode_prefix); | ~~~~~~~~~~~~ drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c:1344:17: note: ‘snprintf’ output between 20 and 49 bytes into a destination of size 30 1344 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sjt_mec.bin", chip_name); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c:1346:60: warning: ‘%s’ directive output may be truncated writing up to 29 bytes into a region of size 23 [-Wformat-truncation=] 1346 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); | ^~ ...... 1402 | r = gfx_v9_0_init_cp_compute_microcode(adev, ucode_prefix); | ~~~~~~~~~~~~ drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c:1346:17: note: ‘snprintf’ output between 16 and 45 bytes into a destination of size 30 1346 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c:1356:68: warning: ‘%s’ directive output may be truncated writing up to 29 bytes into a region of size 23 [-Wformat-truncation=] 1356 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sjt_mec2.bin", chip_name); | ^~ ...... 1402 | r = gfx_v9_0_init_cp_compute_microcode(adev, ucode_prefix); | ~~~~~~~~~~~~ drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c:1356:25: note: ‘snprintf’ output between 21 and 50 bytes into a destination of size 30 1356 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sjt_mec2.bin", chip_name); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c:1358:68: warning: ‘%s’ directive output may be truncated writing up to 29 bytes into a region of size 23 [-Wformat-truncation=] 1358 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); | ^~ ...... 1402 | r = gfx_v9_0_init_cp_compute_microcode(adev, ucode_prefix); | ~~~~~~~~~~~~ drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c:1358:25: note: ‘snprintf’ output between 17 and 46 bytes into a destination of size 30 1358 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Cc: Alex Deucher Cc: Christian König Suggested-by: Lijo Lazar Signed-off-by: Srinivasan Shanmugam Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 6f97a6d0e6d0..71b555993b7a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -1249,7 +1249,7 @@ static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev) static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev, char *chip_name) { - char fw_name[30]; + char fw_name[50]; int err; snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name); @@ -1282,7 +1282,7 @@ out: static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev, char *chip_name) { - char fw_name[30]; + char fw_name[53]; int err; const struct rlc_firmware_header_v2_0 *rlc_hdr; uint16_t version_major; @@ -1337,7 +1337,7 @@ static bool gfx_v9_0_load_mec2_fw_bin_support(struct amdgpu_device *adev) static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev, char *chip_name) { - char fw_name[30]; + char fw_name[50]; int err; if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_ALDEBARAN)) -- cgit v1.2.3 From 8e4617c25d53e1b40472bb802898a773832cc5d4 Mon Sep 17 00:00:00 2001 From: Tao Zhou Date: Thu, 21 Mar 2024 11:14:12 +0800 Subject: drm/amdgpu: simplify convert_error_address interface for UMC v12 Replace separate parameters with struct ta_ras_query_address_input. Signed-off-by: Tao Zhou Reviewed-by: Stanley.Yang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/umc_v12_0.c | 57 ++++++++++++++++++---------------- 1 file changed, 30 insertions(+), 27 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c index 6a52a9f0dc30..f46a176f9b55 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c @@ -266,26 +266,19 @@ static void umc_v12_0_mca_addr_to_pa(struct amdgpu_device *adev, } static void umc_v12_0_convert_error_address(struct amdgpu_device *adev, - struct ras_err_data *err_data, uint64_t err_addr, - uint32_t ch_inst, uint32_t umc_inst, - uint32_t node_inst, uint32_t socket_id) + struct ras_err_data *err_data, + struct ta_ras_query_address_input *addr_in) { uint32_t col, row, row_xor, bank, channel_index; - uint64_t soc_pa, retired_page, column; - struct ta_ras_query_address_input addr_in; + uint64_t soc_pa, retired_page, column, err_addr; struct ta_ras_query_address_output addr_out; - addr_in.addr_type = TA_RAS_MCA_TO_PA; - addr_in.ma.err_addr = err_addr; - addr_in.ma.ch_inst = ch_inst; - addr_in.ma.umc_inst = umc_inst; - addr_in.ma.node_inst = node_inst; - addr_in.ma.socket_id = socket_id; - - if (psp_ras_query_address(&adev->psp, &addr_in, &addr_out)) + err_addr = addr_in->ma.err_addr; + addr_in->addr_type = TA_RAS_MCA_TO_PA; + if (psp_ras_query_address(&adev->psp, addr_in, &addr_out)) /* fallback to old path if fail to get pa from psp */ - umc_v12_0_mca_addr_to_pa(adev, err_addr, ch_inst, umc_inst, - node_inst, &addr_out); + umc_v12_0_mca_addr_to_pa(adev, err_addr, addr_in->ma.ch_inst, + addr_in->ma.umc_inst, addr_in->ma.node_inst, &addr_out); soc_pa = addr_out.pa.pa; bank = addr_out.pa.bank; @@ -310,7 +303,7 @@ static void umc_v12_0_convert_error_address(struct amdgpu_device *adev, "Error Address(PA):0x%-10llx Row:0x%-4x Col:0x%-2x Bank:0x%x Channel:0x%x\n", retired_page, row, col, bank, channel_index); amdgpu_umc_fill_error_record(err_data, err_addr, - retired_page, channel_index, umc_inst); + retired_page, channel_index, addr_in->ma.umc_inst); /* shift R13 bit */ retired_page ^= (0x1ULL << UMC_V12_0_PA_R13_BIT); @@ -318,7 +311,7 @@ static void umc_v12_0_convert_error_address(struct amdgpu_device *adev, "Error Address(PA):0x%-10llx Row:0x%-4x Col:0x%-2x Bank:0x%x Channel:0x%x\n", retired_page, row_xor, col, bank, channel_index); amdgpu_umc_fill_error_record(err_data, err_addr, - retired_page, channel_index, umc_inst); + retired_page, channel_index, addr_in->ma.umc_inst); } } @@ -326,13 +319,13 @@ static int umc_v12_0_query_error_address(struct amdgpu_device *adev, uint32_t node_inst, uint32_t umc_inst, uint32_t ch_inst, void *data) { + struct ras_err_data *err_data = (struct ras_err_data *)data; + struct ta_ras_query_address_input addr_in; uint64_t mc_umc_status_addr; uint64_t mc_umc_status, err_addr; uint64_t mc_umc_addrt0; - struct ras_err_data *err_data = (struct ras_err_data *)data; uint64_t umc_reg_offset = get_umc_v12_0_reg_offset(adev, node_inst, umc_inst, ch_inst); - uint32_t socket_id = 0; mc_umc_status_addr = SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0); @@ -362,10 +355,16 @@ static int umc_v12_0_query_error_address(struct amdgpu_device *adev, if (!adev->aid_mask && adev->smuio.funcs && adev->smuio.funcs->get_socket_id) - socket_id = adev->smuio.funcs->get_socket_id(adev); + addr_in.ma.socket_id = adev->smuio.funcs->get_socket_id(adev); + else + addr_in.ma.socket_id = 0; + + addr_in.ma.err_addr = err_addr; + addr_in.ma.ch_inst = ch_inst; + addr_in.ma.umc_inst = umc_inst; + addr_in.ma.node_inst = node_inst; - umc_v12_0_convert_error_address(adev, err_data, err_addr, - ch_inst, umc_inst, node_inst, socket_id); + umc_v12_0_convert_error_address(adev, err_data, &addr_in); } /* clear umc status */ @@ -431,12 +430,16 @@ static void umc_v12_0_ecc_info_query_ras_error_address(struct amdgpu_device *ade struct ras_err_info *err_info; struct ras_err_addr *mca_err_addr, *tmp; struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; + struct ta_ras_query_address_input addr_in; for_each_ras_error(err_node, err_data) { err_info = &err_node->err_info; if (list_empty(&err_info->err_addr_list)) continue; + addr_in.ma.node_inst = err_info->mcm_info.die_id; + addr_in.ma.socket_id = err_info->mcm_info.socket_id; + list_for_each_entry_safe(mca_err_addr, tmp, &err_info->err_addr_list, node) { mc_umc_status = mca_err_addr->err_status; if (mc_umc_status && @@ -452,6 +455,10 @@ static void umc_v12_0_ecc_info_query_ras_error_address(struct amdgpu_device *ade MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr); InstanceIdLo = REG_GET_FIELD(mca_ipid, MCMP1_IPIDT0, InstanceIdLo); + addr_in.ma.err_addr = err_addr; + addr_in.ma.ch_inst = MCA_IPID_LO_2_UMC_CH(InstanceIdLo); + addr_in.ma.umc_inst = MCA_IPID_LO_2_UMC_INST(InstanceIdLo); + dev_info(adev->dev, "UMC:IPID:0x%llx, aid:%d, inst:%d, ch:%d, err_addr:0x%llx\n", mca_ipid, err_info->mcm_info.die_id, @@ -460,11 +467,7 @@ static void umc_v12_0_ecc_info_query_ras_error_address(struct amdgpu_device *ade err_addr); umc_v12_0_convert_error_address(adev, - err_data, err_addr, - MCA_IPID_LO_2_UMC_CH(InstanceIdLo), - MCA_IPID_LO_2_UMC_INST(InstanceIdLo), - err_info->mcm_info.die_id, - err_info->mcm_info.socket_id); + err_data, &addr_in); } /* Delete error address node from list and free memory */ -- cgit v1.2.3 From eb4f6eca2632584b8260c175e8aac41ed36314be Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Fri, 22 Mar 2024 12:00:31 +0530 Subject: drm/amdgpu: Fix truncations in gfx_v11_0_init_microcode() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reducing the size of ucode_prefix to 25 in the gfx_v11_0_init_microcode function. This would ensure that the total number of characters being written into fw_name does not exceed its size of 40. Fixes the below with gcc W=1: drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c: In function ‘gfx_v11_0_early_init’: drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c:523:54: warning: ‘_pfp.bin’ directive output may be truncated writing 8 bytes into a region of size between 4 and 33 [-Wformat-truncation=] 523 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", ucode_prefix); | ^~~~~~~~ drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c:523:9: note: ‘snprintf’ output between 16 and 45 bytes into a destination of size 40 523 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", ucode_prefix); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c:540:54: warning: ‘_me.bin’ directive output may be truncated writing 7 bytes into a region of size between 4 and 33 [-Wformat-truncation=] 540 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", ucode_prefix); | ^~~~~~~ drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c:540:9: note: ‘snprintf’ output between 15 and 44 bytes into a destination of size 40 540 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", ucode_prefix); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c:557:70: warning: ‘_rlc.bin’ directive output may be truncated writing 8 bytes into a region of size between 4 and 33 [-Wformat-truncation=] 557 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", ucode_prefix); | ^~~~~~~~ drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c:557:25: note: ‘snprintf’ output between 16 and 45 bytes into a destination of size 40 557 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", ucode_prefix); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c:569:54: warning: ‘_mec.bin’ directive output may be truncated writing 8 bytes into a region of size between 4 and 33 [-Wformat-truncation=] 569 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", ucode_prefix); | ^~~~~~~~ drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c:569:9: note: ‘snprintf’ output between 16 and 45 bytes into a destination of size 40 569 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", ucode_prefix); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ CC [M] drivers/gpu/drm/amd/amdgpu/../pm/powerplay/hwmgr/smu7_clockpowergating.o Cc: Alex Deucher Cc: Christian König Suggested-by: Lijo Lazar Signed-off-by: Srinivasan Shanmugam Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 1770e496c1b7..7a906318e451 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -510,7 +510,7 @@ static void gfx_v11_0_check_fw_cp_gfx_shadow(struct amdgpu_device *adev) static int gfx_v11_0_init_microcode(struct amdgpu_device *adev) { char fw_name[40]; - char ucode_prefix[30]; + char ucode_prefix[25]; int err; const struct rlc_firmware_header_v2_0 *rlc_hdr; uint16_t version_major; -- cgit v1.2.3 From 0355b24bdec3b69ba31375c83d94fa80ca2c7ae1 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 20 Mar 2024 13:32:21 -0500 Subject: drm/amd: Flush GFXOFF requests in prepare stage If the system hasn't entered GFXOFF when suspend starts it can cause hangs accessing GC and RLC during the suspend stage. Cc: # 6.1.y: 5095d5418193 ("drm/amd: Evict resources during PM ops prepare() callback") Cc: # 6.1.y: cb11ca3233aa ("drm/amd: Add concept of running prepare_suspend() sequence for IP blocks") Cc: # 6.1.y: 2ceec37b0e3d ("drm/amd: Add missing kernel doc for prepare_suspend()") Cc: # 6.1.y: 3a9626c816db ("drm/amd: Stop evicting resources on APUs in suspend") Cc: # 6.6.y: 5095d5418193 ("drm/amd: Evict resources during PM ops prepare() callback") Cc: # 6.6.y: cb11ca3233aa ("drm/amd: Add concept of running prepare_suspend() sequence for IP blocks") Cc: # 6.6.y: 2ceec37b0e3d ("drm/amd: Add missing kernel doc for prepare_suspend()") Cc: # 6.6.y: 3a9626c816db ("drm/amd: Stop evicting resources on APUs in suspend") Cc: # 6.1+ Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3132 Fixes: ab4750332dbe ("drm/amdgpu/sdma5.2: add begin/end_use ring callbacks") Reviewed-by: Alex Deucher Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index f771b2042a43..12dc71a6b5db 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4542,6 +4542,8 @@ int amdgpu_device_prepare(struct drm_device *dev) if (r) goto unprepare; + flush_delayed_work(&adev->gfx.gfx_off_delay_work); + for (i = 0; i < adev->num_ip_blocks; i++) { if (!adev->ip_blocks[i].status.valid) continue; -- cgit v1.2.3 From f3e698978cfb3c9ece9e60acf0860dafece345ff Mon Sep 17 00:00:00 2001 From: Lang Yu Date: Thu, 7 Mar 2024 11:13:19 +0800 Subject: drm/amdgpu/umsch: update UMSCH 4.0 FW interface Align with FW changes. Signed-off-by: Lang Yu Reviewed-by: Veerabadhran Gopalakrishnan Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.h | 20 ++++++++++---------- drivers/gpu/drm/amd/include/umsch_mm_4_0_api_def.h | 13 +++++++++++-- 2 files changed, 21 insertions(+), 12 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.h index 8258a43a6236..5014b5af95fd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.h @@ -33,13 +33,6 @@ enum UMSCH_SWIP_ENGINE_TYPE { UMSCH_SWIP_ENGINE_TYPE_MAX }; -enum UMSCH_SWIP_AFFINITY_TYPE { - UMSCH_SWIP_AFFINITY_TYPE_ANY = 0, - UMSCH_SWIP_AFFINITY_TYPE_VCN0 = 1, - UMSCH_SWIP_AFFINITY_TYPE_VCN1 = 2, - UMSCH_SWIP_AFFINITY_TYPE_MAX -}; - enum UMSCH_CONTEXT_PRIORITY_LEVEL { CONTEXT_PRIORITY_LEVEL_IDLE = 0, CONTEXT_PRIORITY_LEVEL_NORMAL = 1, @@ -51,13 +44,15 @@ enum UMSCH_CONTEXT_PRIORITY_LEVEL { struct umsch_mm_set_resource_input { uint32_t vmid_mask_mm_vcn; uint32_t vmid_mask_mm_vpe; + uint32_t collaboration_mask_vpe; uint32_t logging_vmid; uint32_t engine_mask; union { struct { uint32_t disable_reset : 1; uint32_t disable_umsch_mm_log : 1; - uint32_t reserved : 30; + uint32_t use_rs64mem_for_proc_ctx_csa : 1; + uint32_t reserved : 29; }; uint32_t uint32_all; }; @@ -78,15 +73,18 @@ struct umsch_mm_add_queue_input { uint32_t doorbell_offset_1; enum UMSCH_SWIP_ENGINE_TYPE engine_type; uint32_t affinity; - enum UMSCH_SWIP_AFFINITY_TYPE affinity_type; uint64_t mqd_addr; uint64_t h_context; uint64_t h_queue; uint32_t vm_context_cntl; + uint32_t process_csa_array_index; + uint32_t context_csa_array_index; + struct { uint32_t is_context_suspended : 1; - uint32_t reserved : 31; + uint32_t collaboration_mode : 1; + uint32_t reserved : 30; }; }; @@ -94,6 +92,7 @@ struct umsch_mm_remove_queue_input { uint32_t doorbell_offset_0; uint32_t doorbell_offset_1; uint64_t context_csa_addr; + uint32_t context_csa_array_index; }; struct MQD_INFO { @@ -103,6 +102,7 @@ struct MQD_INFO { uint32_t wptr_val; uint32_t rptr_val; uint32_t unmapped; + uint32_t vmid; }; struct amdgpu_umsch_mm; diff --git a/drivers/gpu/drm/amd/include/umsch_mm_4_0_api_def.h b/drivers/gpu/drm/amd/include/umsch_mm_4_0_api_def.h index beadb9e42850..ca83e9e5c3ff 100644 --- a/drivers/gpu/drm/amd/include/umsch_mm_4_0_api_def.h +++ b/drivers/gpu/drm/amd/include/umsch_mm_4_0_api_def.h @@ -234,7 +234,8 @@ union UMSCHAPI__SET_HW_RESOURCES { uint32_t enable_level_process_quantum_check : 1; uint32_t is_vcn0_enabled : 1; uint32_t is_vcn1_enabled : 1; - uint32_t reserved : 27; + uint32_t use_rs64mem_for_proc_ctx_csa : 1; + uint32_t reserved : 26; }; uint32_t uint32_all; }; @@ -297,9 +298,12 @@ union UMSCHAPI__ADD_QUEUE { struct { uint32_t is_context_suspended : 1; - uint32_t reserved : 31; + uint32_t collaboration_mode : 1; + uint32_t reserved : 30; }; struct UMSCH_API_STATUS api_status; + uint32_t process_csa_array_index; + uint32_t context_csa_array_index; }; uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS]; @@ -314,6 +318,7 @@ union UMSCHAPI__REMOVE_QUEUE { uint64_t context_csa_addr; struct UMSCH_API_STATUS api_status; + uint32_t context_csa_array_index; }; uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS]; @@ -337,6 +342,7 @@ union UMSCHAPI__SUSPEND { uint32_t suspend_fence_value; struct UMSCH_API_STATUS api_status; + uint32_t context_csa_array_index; }; uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS]; @@ -356,6 +362,7 @@ union UMSCHAPI__RESUME { enum UMSCH_ENGINE_TYPE engine_type; struct UMSCH_API_STATUS api_status; + uint32_t context_csa_array_index; }; uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS]; @@ -404,6 +411,7 @@ union UMSCHAPI__UPDATE_AFFINITY { union UMSCH_AFFINITY affinity; uint64_t context_csa_addr; struct UMSCH_API_STATUS api_status; + uint32_t context_csa_array_index; }; uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS]; @@ -417,6 +425,7 @@ union UMSCHAPI__CHANGE_CONTEXT_PRIORITY_LEVEL { uint64_t context_quantum; uint64_t context_csa_addr; struct UMSCH_API_STATUS api_status; + uint32_t context_csa_array_index; }; uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS]; -- cgit v1.2.3 From b9a8aee136b70d032f078de98ab6f8cc3eaebf3e Mon Sep 17 00:00:00 2001 From: Lang Yu Date: Thu, 7 Mar 2024 13:57:06 +0800 Subject: drm/amdgpu: enable UMSCH 4.0.6 Share same codes with 4.0.5 and enable collaborate mode for VPE. Signed-off-by: Lang Yu Reviewed-by: Veerabadhran Gopalakrishnan Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c | 12 ++++++++++-- drivers/gpu/drm/amd/amdgpu/umsch_mm_v4_0.c | 7 +++++-- 3 files changed, 16 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index 230412fc4d62..362da1413e1e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -2246,6 +2246,7 @@ static int amdgpu_discovery_set_umsch_mm_ip_blocks(struct amdgpu_device *adev) { switch (amdgpu_ip_version(adev, VCN_HWIP, 0)) { case IP_VERSION(4, 0, 5): + case IP_VERSION(4, 0, 6): if (amdgpu_umsch_mm & 0x1) { amdgpu_device_ip_block_add(adev, &umsch_mm_v4_0_ip_block); adev->enable_umsch_mm = true; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c index ab820cf52668..0df97c3e3a70 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c @@ -189,10 +189,13 @@ static void setup_vpe_queue(struct amdgpu_device *adev, mqd->rptr_val = 0; mqd->unmapped = 1; + if (adev->vpe.collaborate_mode) + memcpy(++mqd, test->mqd_data_cpu_addr, sizeof(struct MQD_INFO)); + qinfo->mqd_addr = test->mqd_data_gpu_addr; qinfo->csa_addr = test->ctx_data_gpu_addr + offsetof(struct umsch_mm_test_ctx_data, vpe_ctx_csa); - qinfo->doorbell_offset_0 = (adev->doorbell_index.vpe_ring + 1) << 1; + qinfo->doorbell_offset_0 = 0; qinfo->doorbell_offset_1 = 0; } @@ -287,7 +290,10 @@ static int submit_vpe_queue(struct amdgpu_device *adev, struct umsch_mm_test *te ring[5] = 0; mqd->wptr_val = (6 << 2); - // WDOORBELL32(adev->umsch_mm.agdb_index[CONTEXT_PRIORITY_LEVEL_NORMAL], mqd->wptr_val); + if (adev->vpe.collaborate_mode) + (++mqd)->wptr_val = (6 << 2); + + WDOORBELL32(adev->umsch_mm.agdb_index[CONTEXT_PRIORITY_LEVEL_NORMAL], mqd->wptr_val); for (i = 0; i < adev->usec_timeout; i++) { if (*fence == test_pattern) @@ -571,6 +577,7 @@ int amdgpu_umsch_mm_init_microcode(struct amdgpu_umsch_mm *umsch) switch (amdgpu_ip_version(adev, VCN_HWIP, 0)) { case IP_VERSION(4, 0, 5): + case IP_VERSION(4, 0, 6): fw_name = "amdgpu/umsch_mm_4_0_0.bin"; break; default: @@ -750,6 +757,7 @@ static int umsch_mm_early_init(void *handle) switch (amdgpu_ip_version(adev, VCN_HWIP, 0)) { case IP_VERSION(4, 0, 5): + case IP_VERSION(4, 0, 6): umsch_mm_v4_0_set_funcs(&adev->umsch_mm); break; default: diff --git a/drivers/gpu/drm/amd/amdgpu/umsch_mm_v4_0.c b/drivers/gpu/drm/amd/amdgpu/umsch_mm_v4_0.c index 8e7b763cfdb7..84368cf1e175 100644 --- a/drivers/gpu/drm/amd/amdgpu/umsch_mm_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/umsch_mm_v4_0.c @@ -60,7 +60,7 @@ static int umsch_mm_v4_0_load_microcode(struct amdgpu_umsch_mm *umsch) umsch->cmd_buf_curr_ptr = umsch->cmd_buf_ptr; - if (amdgpu_ip_version(adev, VCN_HWIP, 0) == IP_VERSION(4, 0, 5)) { + if (amdgpu_ip_version(adev, VCN_HWIP, 0) >= IP_VERSION(4, 0, 5)) { WREG32_SOC15(VCN, 0, regUVD_IPX_DLDO_CONFIG, 1 << UVD_IPX_DLDO_CONFIG__ONO0_PWR_CONFIG__SHIFT); SOC15_WAIT_ON_RREG(VCN, 0, regUVD_IPX_DLDO_STATUS, @@ -248,7 +248,7 @@ static int umsch_mm_v4_0_ring_stop(struct amdgpu_umsch_mm *umsch) data = REG_SET_FIELD(data, VCN_UMSCH_RB_DB_CTRL, EN, 0); WREG32_SOC15(VCN, 0, regVCN_UMSCH_RB_DB_CTRL, data); - if (amdgpu_ip_version(adev, VCN_HWIP, 0) == IP_VERSION(4, 0, 5)) { + if (amdgpu_ip_version(adev, VCN_HWIP, 0) >= IP_VERSION(4, 0, 5)) { WREG32_SOC15(VCN, 0, regUVD_IPX_DLDO_CONFIG, 2 << UVD_IPX_DLDO_CONFIG__ONO0_PWR_CONFIG__SHIFT); SOC15_WAIT_ON_RREG(VCN, 0, regUVD_IPX_DLDO_STATUS, @@ -271,6 +271,8 @@ static int umsch_mm_v4_0_set_hw_resources(struct amdgpu_umsch_mm *umsch) set_hw_resources.vmid_mask_mm_vcn = umsch->vmid_mask_mm_vcn; set_hw_resources.vmid_mask_mm_vpe = umsch->vmid_mask_mm_vpe; + set_hw_resources.collaboration_mask_vpe = + adev->vpe.collaborate_mode ? 0x3 : 0x0; set_hw_resources.engine_mask = umsch->engine_mask; set_hw_resources.vcn0_hqd_mask[0] = umsch->vcn0_hqd_mask; @@ -346,6 +348,7 @@ static int umsch_mm_v4_0_add_queue(struct amdgpu_umsch_mm *umsch, add_queue.h_queue = input_ptr->h_queue; add_queue.vm_context_cntl = input_ptr->vm_context_cntl; add_queue.is_context_suspended = input_ptr->is_context_suspended; + add_queue.collaboration_mode = adev->vpe.collaborate_mode ? 1 : 0; add_queue.api_status.api_completion_fence_addr = umsch->ring.fence_drv.gpu_addr; add_queue.api_status.api_completion_fence_value = ++umsch->ring.fence_drv.sync_seq; -- cgit v1.2.3 From c25d09bcb79fa6a6f45beb558ccc77e68f757e19 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 7 Mar 2024 17:07:37 -0500 Subject: drm/amdgpu: fix deadlock while reading mqd from debugfs An errant disk backup on my desktop got into debugfs and triggered the following deadlock scenario in the amdgpu debugfs files. The machine also hard-resets immediately after those lines are printed (although I wasn't able to reproduce that part when reading by hand): [ 1318.016074][ T1082] ====================================================== [ 1318.016607][ T1082] WARNING: possible circular locking dependency detected [ 1318.017107][ T1082] 6.8.0-rc7-00015-ge0c8221b72c0 #17 Not tainted [ 1318.017598][ T1082] ------------------------------------------------------ [ 1318.018096][ T1082] tar/1082 is trying to acquire lock: [ 1318.018585][ T1082] ffff98c44175d6a0 (&mm->mmap_lock){++++}-{3:3}, at: __might_fault+0x40/0x80 [ 1318.019084][ T1082] [ 1318.019084][ T1082] but task is already holding lock: [ 1318.020052][ T1082] ffff98c4c13f55f8 (reservation_ww_class_mutex){+.+.}-{3:3}, at: amdgpu_debugfs_mqd_read+0x6a/0x250 [amdgpu] [ 1318.020607][ T1082] [ 1318.020607][ T1082] which lock already depends on the new lock. [ 1318.020607][ T1082] [ 1318.022081][ T1082] [ 1318.022081][ T1082] the existing dependency chain (in reverse order) is: [ 1318.023083][ T1082] [ 1318.023083][ T1082] -> #2 (reservation_ww_class_mutex){+.+.}-{3:3}: [ 1318.024114][ T1082] __ww_mutex_lock.constprop.0+0xe0/0x12f0 [ 1318.024639][ T1082] ww_mutex_lock+0x32/0x90 [ 1318.025161][ T1082] dma_resv_lockdep+0x18a/0x330 [ 1318.025683][ T1082] do_one_initcall+0x6a/0x350 [ 1318.026210][ T1082] kernel_init_freeable+0x1a3/0x310 [ 1318.026728][ T1082] kernel_init+0x15/0x1a0 [ 1318.027242][ T1082] ret_from_fork+0x2c/0x40 [ 1318.027759][ T1082] ret_from_fork_asm+0x11/0x20 [ 1318.028281][ T1082] [ 1318.028281][ T1082] -> #1 (reservation_ww_class_acquire){+.+.}-{0:0}: [ 1318.029297][ T1082] dma_resv_lockdep+0x16c/0x330 [ 1318.029790][ T1082] do_one_initcall+0x6a/0x350 [ 1318.030263][ T1082] kernel_init_freeable+0x1a3/0x310 [ 1318.030722][ T1082] kernel_init+0x15/0x1a0 [ 1318.031168][ T1082] ret_from_fork+0x2c/0x40 [ 1318.031598][ T1082] ret_from_fork_asm+0x11/0x20 [ 1318.032011][ T1082] [ 1318.032011][ T1082] -> #0 (&mm->mmap_lock){++++}-{3:3}: [ 1318.032778][ T1082] __lock_acquire+0x14bf/0x2680 [ 1318.033141][ T1082] lock_acquire+0xcd/0x2c0 [ 1318.033487][ T1082] __might_fault+0x58/0x80 [ 1318.033814][ T1082] amdgpu_debugfs_mqd_read+0x103/0x250 [amdgpu] [ 1318.034181][ T1082] full_proxy_read+0x55/0x80 [ 1318.034487][ T1082] vfs_read+0xa7/0x360 [ 1318.034788][ T1082] ksys_read+0x70/0xf0 [ 1318.035085][ T1082] do_syscall_64+0x94/0x180 [ 1318.035375][ T1082] entry_SYSCALL_64_after_hwframe+0x46/0x4e [ 1318.035664][ T1082] [ 1318.035664][ T1082] other info that might help us debug this: [ 1318.035664][ T1082] [ 1318.036487][ T1082] Chain exists of: [ 1318.036487][ T1082] &mm->mmap_lock --> reservation_ww_class_acquire --> reservation_ww_class_mutex [ 1318.036487][ T1082] [ 1318.037310][ T1082] Possible unsafe locking scenario: [ 1318.037310][ T1082] [ 1318.037838][ T1082] CPU0 CPU1 [ 1318.038101][ T1082] ---- ---- [ 1318.038350][ T1082] lock(reservation_ww_class_mutex); [ 1318.038590][ T1082] lock(reservation_ww_class_acquire); [ 1318.038839][ T1082] lock(reservation_ww_class_mutex); [ 1318.039083][ T1082] rlock(&mm->mmap_lock); [ 1318.039328][ T1082] [ 1318.039328][ T1082] *** DEADLOCK *** [ 1318.039328][ T1082] [ 1318.040029][ T1082] 1 lock held by tar/1082: [ 1318.040259][ T1082] #0: ffff98c4c13f55f8 (reservation_ww_class_mutex){+.+.}-{3:3}, at: amdgpu_debugfs_mqd_read+0x6a/0x250 [amdgpu] [ 1318.040560][ T1082] [ 1318.040560][ T1082] stack backtrace: [ 1318.041053][ T1082] CPU: 22 PID: 1082 Comm: tar Not tainted 6.8.0-rc7-00015-ge0c8221b72c0 #17 3316c85d50e282c5643b075d1f01a4f6365e39c2 [ 1318.041329][ T1082] Hardware name: Gigabyte Technology Co., Ltd. B650 AORUS PRO AX/B650 AORUS PRO AX, BIOS F20 12/14/2023 [ 1318.041614][ T1082] Call Trace: [ 1318.041895][ T1082] [ 1318.042175][ T1082] dump_stack_lvl+0x4a/0x80 [ 1318.042460][ T1082] check_noncircular+0x145/0x160 [ 1318.042743][ T1082] __lock_acquire+0x14bf/0x2680 [ 1318.043022][ T1082] lock_acquire+0xcd/0x2c0 [ 1318.043301][ T1082] ? __might_fault+0x40/0x80 [ 1318.043580][ T1082] ? __might_fault+0x40/0x80 [ 1318.043856][ T1082] __might_fault+0x58/0x80 [ 1318.044131][ T1082] ? __might_fault+0x40/0x80 [ 1318.044408][ T1082] amdgpu_debugfs_mqd_read+0x103/0x250 [amdgpu 8fe2afaa910cbd7654c8cab23563a94d6caebaab] [ 1318.044749][ T1082] full_proxy_read+0x55/0x80 [ 1318.045042][ T1082] vfs_read+0xa7/0x360 [ 1318.045333][ T1082] ksys_read+0x70/0xf0 [ 1318.045623][ T1082] do_syscall_64+0x94/0x180 [ 1318.045913][ T1082] ? do_syscall_64+0xa0/0x180 [ 1318.046201][ T1082] ? lockdep_hardirqs_on+0x7d/0x100 [ 1318.046487][ T1082] ? do_syscall_64+0xa0/0x180 [ 1318.046773][ T1082] ? do_syscall_64+0xa0/0x180 [ 1318.047057][ T1082] ? do_syscall_64+0xa0/0x180 [ 1318.047337][ T1082] ? do_syscall_64+0xa0/0x180 [ 1318.047611][ T1082] entry_SYSCALL_64_after_hwframe+0x46/0x4e [ 1318.047887][ T1082] RIP: 0033:0x7f480b70a39d [ 1318.048162][ T1082] Code: 91 ba 0d 00 f7 d8 64 89 02 b8 ff ff ff ff eb b2 e8 18 a3 01 00 0f 1f 84 00 00 00 00 00 80 3d a9 3c 0e 00 00 74 17 31 c0 0f 05 <48> 3d 00 f0 ff ff 77 5b c3 66 2e 0f 1f 84 00 00 00 00 00 53 48 83 [ 1318.048769][ T1082] RSP: 002b:00007ffde77f5c68 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 [ 1318.049083][ T1082] RAX: ffffffffffffffda RBX: 0000000000000800 RCX: 00007f480b70a39d [ 1318.049392][ T1082] RDX: 0000000000000800 RSI: 000055c9f2120c00 RDI: 0000000000000008 [ 1318.049703][ T1082] RBP: 0000000000000800 R08: 000055c9f2120a94 R09: 0000000000000007 [ 1318.050011][ T1082] R10: 0000000000000000 R11: 0000000000000246 R12: 000055c9f2120c00 [ 1318.050324][ T1082] R13: 0000000000000008 R14: 0000000000000008 R15: 0000000000000800 [ 1318.050638][ T1082] amdgpu_debugfs_mqd_read() holds a reservation when it calls put_user(), which may fault and acquire the mmap_sem. This violates the established locking order. Bounce the mqd data through a kernel buffer to get put_user() out of the illegal section. Fixes: 445d85e3c1df ("drm/amdgpu: add debugfs interface for reading MQDs") Cc: stable@vger.kernel.org # v6.5+ Reviewed-by: Shashank Sharma Signed-off-by: Johannes Weiner Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 46 ++++++++++++++++++++------------ 1 file changed, 29 insertions(+), 17 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 5505d646f43a..06f0a6534a94 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -524,46 +524,58 @@ static ssize_t amdgpu_debugfs_mqd_read(struct file *f, char __user *buf, { struct amdgpu_ring *ring = file_inode(f)->i_private; volatile u32 *mqd; - int r; + u32 *kbuf; + int r, i; uint32_t value, result; if (*pos & 3 || size & 3) return -EINVAL; - result = 0; + kbuf = kmalloc(ring->mqd_size, GFP_KERNEL); + if (!kbuf) + return -ENOMEM; r = amdgpu_bo_reserve(ring->mqd_obj, false); if (unlikely(r != 0)) - return r; + goto err_free; r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&mqd); - if (r) { - amdgpu_bo_unreserve(ring->mqd_obj); - return r; - } + if (r) + goto err_unreserve; + /* + * Copy to local buffer to avoid put_user(), which might fault + * and acquire mmap_sem, under reservation_ww_class_mutex. + */ + for (i = 0; i < ring->mqd_size/sizeof(u32); i++) + kbuf[i] = mqd[i]; + + amdgpu_bo_kunmap(ring->mqd_obj); + amdgpu_bo_unreserve(ring->mqd_obj); + + result = 0; while (size) { if (*pos >= ring->mqd_size) - goto done; + break; - value = mqd[*pos/4]; + value = kbuf[*pos/4]; r = put_user(value, (uint32_t *)buf); if (r) - goto done; + goto err_free; buf += 4; result += 4; size -= 4; *pos += 4; } -done: - amdgpu_bo_kunmap(ring->mqd_obj); - mqd = NULL; - amdgpu_bo_unreserve(ring->mqd_obj); - if (r) - return r; - + kfree(kbuf); return result; + +err_unreserve: + amdgpu_bo_unreserve(ring->mqd_obj); +err_free: + kfree(kbuf); + return r; } static const struct file_operations amdgpu_debugfs_mqd_fops = { -- cgit v1.2.3 From d7f1487643552f18d1855474eba54a8ff3655935 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Sat, 23 Mar 2024 20:46:53 -0400 Subject: drm/amdgpu: always force full reset for SOC21 There are cases where soft reset seems to succeed, but does not, so always use mode1/2 for now. Reviewed-by: Harish Kasiviswanathan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/soc21.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index 581a3bd11481..8526282f4da1 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -457,10 +457,8 @@ static bool soc21_need_full_reset(struct amdgpu_device *adev) { switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(11, 0, 0): - return amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC); case IP_VERSION(11, 0, 2): case IP_VERSION(11, 0, 3): - return false; default: return true; } -- cgit v1.2.3 From e075e496f516bf92bc0cbaf94d64e8d4a6b58321 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Wed, 27 Mar 2024 11:57:03 +0100 Subject: drm: Switch DRM_DISPLAY_HELPER to depends on Most of our helpers have relied on being selected so far through Kconfig, but that creates issues when we have multiple layers of helpers with some depending on others. Indeed, select doesn't select a dependency's dependencies, and thus isn't super intuitive. Depends on however doesn't have that limitation, so we can just switch all the drivers that were selecting DRM_DISPLAY_HELPER to depend on it. Reviewed-by: Jani Nikula Link: https://lore.kernel.org/r/20240327-kms-kconfig-helpers-v3-8-eafee11b84b3@kernel.org Signed-off-by: Maxime Ripard --- drivers/gpu/drm/Kconfig | 6 ++++-- drivers/gpu/drm/amd/amdgpu/Kconfig | 6 ++++-- drivers/gpu/drm/bridge/Kconfig | 10 +++++----- drivers/gpu/drm/bridge/analogix/Kconfig | 6 +++--- drivers/gpu/drm/bridge/cadence/Kconfig | 4 ++-- drivers/gpu/drm/bridge/synopsys/Kconfig | 2 +- drivers/gpu/drm/display/Kconfig | 1 + drivers/gpu/drm/exynos/Kconfig | 2 +- drivers/gpu/drm/i915/Kconfig | 2 +- drivers/gpu/drm/mediatek/Kconfig | 2 +- drivers/gpu/drm/msm/Kconfig | 4 ++-- drivers/gpu/drm/nouveau/Kconfig | 6 ++++-- drivers/gpu/drm/panel/Kconfig | 20 ++++++++++---------- drivers/gpu/drm/radeon/Kconfig | 6 ++++-- drivers/gpu/drm/rockchip/Kconfig | 4 ++-- drivers/gpu/drm/tegra/Kconfig | 2 +- drivers/gpu/drm/vc4/Kconfig | 8 ++++---- drivers/gpu/drm/xe/Kconfig | 7 +++++-- drivers/gpu/drm/xlnx/Kconfig | 6 ++++-- 19 files changed, 59 insertions(+), 45 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index 16029435b750..1eb939463c35 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -73,10 +73,12 @@ config DRM_KUNIT_TEST_HELPERS config DRM_KUNIT_TEST tristate "KUnit tests for DRM" if !KUNIT_ALL_TESTS - depends on DRM && KUNIT && MMU + depends on DRM + depends on DRM_DISPLAY_HELPER + depends on KUNIT + depends on MMU select DRM_BUDDY select DRM_DISPLAY_DP_HELPER - select DRM_DISPLAY_HELPER select DRM_EXEC select DRM_EXPORT_FOR_TESTS if m select DRM_GEM_SHMEM_HELPER diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig index 22d88f8ef527..cf931b94a188 100644 --- a/drivers/gpu/drm/amd/amdgpu/Kconfig +++ b/drivers/gpu/drm/amd/amdgpu/Kconfig @@ -2,13 +2,15 @@ config DRM_AMDGPU tristate "AMD GPU" - depends on DRM && PCI && MMU + depends on DRM + depends on DRM_DISPLAY_HELPER + depends on MMU + depends on PCI depends on !UML select FW_LOADER select DRM_DISPLAY_DP_HELPER select DRM_DISPLAY_HDMI_HELPER select DRM_DISPLAY_HDCP_HELPER - select DRM_DISPLAY_HELPER select DRM_KMS_HELPER select DRM_SCHED select DRM_TTM diff --git a/drivers/gpu/drm/bridge/Kconfig b/drivers/gpu/drm/bridge/Kconfig index 703c3e30885b..aa870e68e165 100644 --- a/drivers/gpu/drm/bridge/Kconfig +++ b/drivers/gpu/drm/bridge/Kconfig @@ -92,10 +92,10 @@ config DRM_FSL_LDB config DRM_ITE_IT6505 tristate "ITE IT6505 DisplayPort bridge" + depends on DRM_DISPLAY_HELPER depends on OF select DRM_DISPLAY_DP_HELPER select DRM_DISPLAY_HDCP_HELPER - select DRM_DISPLAY_HELPER select DRM_DISPLAY_DP_AUX_BUS select DRM_KMS_HELPER select DRM_DP_HELPER @@ -226,9 +226,9 @@ config DRM_PARADE_PS8622 config DRM_PARADE_PS8640 tristate "Parade PS8640 MIPI DSI to eDP Converter" + depends on DRM_DISPLAY_HELPER depends on OF select DRM_DISPLAY_DP_HELPER - select DRM_DISPLAY_HELPER select DRM_DISPLAY_DP_AUX_BUS select DRM_KMS_HELPER select DRM_MIPI_DSI @@ -313,9 +313,9 @@ config DRM_TOSHIBA_TC358764 config DRM_TOSHIBA_TC358767 tristate "Toshiba TC358767 eDP bridge" + depends on DRM_DISPLAY_HELPER depends on OF select DRM_DISPLAY_DP_HELPER - select DRM_DISPLAY_HELPER select DRM_KMS_HELPER select REGMAP_I2C select DRM_MIPI_DSI @@ -336,9 +336,9 @@ config DRM_TOSHIBA_TC358768 config DRM_TOSHIBA_TC358775 tristate "Toshiba TC358775 DSI/LVDS bridge" + depends on DRM_DISPLAY_HELPER depends on OF select DRM_DISPLAY_DP_HELPER - select DRM_DISPLAY_HELPER select DRM_KMS_HELPER select REGMAP_I2C select DRM_PANEL @@ -381,9 +381,9 @@ config DRM_TI_SN65DSI83 config DRM_TI_SN65DSI86 tristate "TI SN65DSI86 DSI to eDP bridge" + depends on DRM_DISPLAY_HELPER depends on OF select DRM_DISPLAY_DP_HELPER - select DRM_DISPLAY_HELPER select DRM_KMS_HELPER select REGMAP_I2C select DRM_PANEL diff --git a/drivers/gpu/drm/bridge/analogix/Kconfig b/drivers/gpu/drm/bridge/analogix/Kconfig index 4846b2e9be7c..16d18dde483a 100644 --- a/drivers/gpu/drm/bridge/analogix/Kconfig +++ b/drivers/gpu/drm/bridge/analogix/Kconfig @@ -1,10 +1,10 @@ # SPDX-License-Identifier: GPL-2.0-only config DRM_ANALOGIX_ANX6345 tristate "Analogix ANX6345 bridge" + depends on DRM_DISPLAY_HELPER depends on OF select DRM_ANALOGIX_DP select DRM_DISPLAY_DP_HELPER - select DRM_DISPLAY_HELPER select DRM_KMS_HELPER select REGMAP_I2C help @@ -15,9 +15,9 @@ config DRM_ANALOGIX_ANX6345 config DRM_ANALOGIX_ANX78XX tristate "Analogix ANX78XX bridge" + depends on DRM_DISPLAY_HELPER select DRM_ANALOGIX_DP select DRM_DISPLAY_DP_HELPER - select DRM_DISPLAY_HELPER select DRM_KMS_HELPER select REGMAP_I2C help @@ -33,10 +33,10 @@ config DRM_ANALOGIX_DP config DRM_ANALOGIX_ANX7625 tristate "Analogix Anx7625 MIPI to DP interface support" depends on DRM + depends on DRM_DISPLAY_HELPER depends on OF select DRM_DISPLAY_DP_HELPER select DRM_DISPLAY_HDCP_HELPER - select DRM_DISPLAY_HELPER select DRM_DISPLAY_DP_AUX_BUS select DRM_MIPI_DSI help diff --git a/drivers/gpu/drm/bridge/cadence/Kconfig b/drivers/gpu/drm/bridge/cadence/Kconfig index cced81633ddc..20143afded40 100644 --- a/drivers/gpu/drm/bridge/cadence/Kconfig +++ b/drivers/gpu/drm/bridge/cadence/Kconfig @@ -23,12 +23,12 @@ endif config DRM_CDNS_MHDP8546 tristate "Cadence DPI/DP bridge" + depends on DRM_DISPLAY_HELPER + depends on OF select DRM_DISPLAY_DP_HELPER select DRM_DISPLAY_HDCP_HELPER - select DRM_DISPLAY_HELPER select DRM_KMS_HELPER select DRM_PANEL_BRIDGE - depends on OF help Support Cadence DPI to DP bridge. This is an internal bridge and is meant to be directly embedded in a SoC. diff --git a/drivers/gpu/drm/bridge/synopsys/Kconfig b/drivers/gpu/drm/bridge/synopsys/Kconfig index 15fc182d05ef..f366ece47146 100644 --- a/drivers/gpu/drm/bridge/synopsys/Kconfig +++ b/drivers/gpu/drm/bridge/synopsys/Kconfig @@ -1,8 +1,8 @@ # SPDX-License-Identifier: GPL-2.0-only config DRM_DW_HDMI tristate + depends on DRM_DISPLAY_HELPER select DRM_DISPLAY_HDMI_HELPER - select DRM_DISPLAY_HELPER select DRM_KMS_HELPER select REGMAP_MMIO select CEC_CORE if CEC_NOTIFIER diff --git a/drivers/gpu/drm/display/Kconfig b/drivers/gpu/drm/display/Kconfig index c77e7f85bd67..cffa2acdbc6c 100644 --- a/drivers/gpu/drm/display/Kconfig +++ b/drivers/gpu/drm/display/Kconfig @@ -3,6 +3,7 @@ config DRM_DISPLAY_HELPER tristate "DRM Display Helpers" depends on DRM + default y help DRM helpers for display adapters. diff --git a/drivers/gpu/drm/exynos/Kconfig b/drivers/gpu/drm/exynos/Kconfig index 733b109a5095..4b0183bf221c 100644 --- a/drivers/gpu/drm/exynos/Kconfig +++ b/drivers/gpu/drm/exynos/Kconfig @@ -4,7 +4,6 @@ config DRM_EXYNOS depends on OF && DRM && COMMON_CLK depends on ARCH_S3C64XX || ARCH_S5PV210 || ARCH_EXYNOS || COMPILE_TEST depends on MMU - select DRM_DISPLAY_HELPER if DRM_EXYNOS_DP select DRM_KMS_HELPER select VIDEOMODE_HELPERS select FB_DMAMEM_HELPERS if DRM_FBDEV_EMULATION @@ -68,6 +67,7 @@ config DRM_EXYNOS_DSI config DRM_EXYNOS_DP bool "Exynos specific extensions for Analogix DP driver" depends on DRM_EXYNOS_FIMD || DRM_EXYNOS7_DECON + depends on DRM_DISPLAY_HELPER select DRM_ANALOGIX_DP select DRM_DISPLAY_DP_HELPER default DRM_EXYNOS diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig index 5932024f8f95..43183a68a095 100644 --- a/drivers/gpu/drm/i915/Kconfig +++ b/drivers/gpu/drm/i915/Kconfig @@ -2,6 +2,7 @@ config DRM_I915 tristate "Intel 8xx/9xx/G3x/G4x/HD Graphics" depends on DRM + depends on DRM_DISPLAY_HELPER depends on X86 && PCI depends on !PREEMPT_RT select INTEL_GTT if X86 @@ -13,7 +14,6 @@ config DRM_I915 select DRM_DISPLAY_DP_HELPER select DRM_DISPLAY_HDCP_HELPER select DRM_DISPLAY_HDMI_HELPER - select DRM_DISPLAY_HELPER select DRM_KMS_HELPER select DRM_PANEL select DRM_MIPI_DSI diff --git a/drivers/gpu/drm/mediatek/Kconfig b/drivers/gpu/drm/mediatek/Kconfig index 96cbe020f493..50bb28327f65 100644 --- a/drivers/gpu/drm/mediatek/Kconfig +++ b/drivers/gpu/drm/mediatek/Kconfig @@ -22,9 +22,9 @@ config DRM_MEDIATEK config DRM_MEDIATEK_DP tristate "DRM DPTX Support for MediaTek SoCs" + depends on DRM_DISPLAY_HELPER depends on DRM_MEDIATEK select PHY_MTK_DP - select DRM_DISPLAY_HELPER select DRM_DISPLAY_DP_HELPER select DRM_DISPLAY_DP_AUX_BUS help diff --git a/drivers/gpu/drm/msm/Kconfig b/drivers/gpu/drm/msm/Kconfig index 27d72ed8b389..2055266506e5 100644 --- a/drivers/gpu/drm/msm/Kconfig +++ b/drivers/gpu/drm/msm/Kconfig @@ -2,9 +2,10 @@ config DRM_MSM tristate "MSM DRM" - depends on DRM depends on ARCH_QCOM || SOC_IMX5 || COMPILE_TEST depends on COMMON_CLK + depends on DRM + depends on DRM_DISPLAY_HELPER depends on IOMMU_SUPPORT depends on QCOM_AOSS_QMP || QCOM_AOSS_QMP=n depends on QCOM_OCMEM || QCOM_OCMEM=n @@ -16,7 +17,6 @@ config DRM_MSM select REGULATOR select DRM_DISPLAY_DP_AUX_BUS select DRM_DISPLAY_DP_HELPER - select DRM_DISPLAY_HELPER select DRM_EXEC select DRM_KMS_HELPER select DRM_PANEL diff --git a/drivers/gpu/drm/nouveau/Kconfig b/drivers/gpu/drm/nouveau/Kconfig index ceef470c9fbf..5ac852b816db 100644 --- a/drivers/gpu/drm/nouveau/Kconfig +++ b/drivers/gpu/drm/nouveau/Kconfig @@ -1,12 +1,14 @@ # SPDX-License-Identifier: GPL-2.0-only config DRM_NOUVEAU tristate "Nouveau (NVIDIA) cards" - depends on DRM && PCI && MMU + depends on DRM + depends on DRM_DISPLAY_HELPER + depends on PCI + depends on MMU select IOMMU_API select FW_LOADER select DRM_DISPLAY_DP_HELPER select DRM_DISPLAY_HDMI_HELPER - select DRM_DISPLAY_HELPER select DRM_KMS_HELPER select DRM_TTM select DRM_TTM_HELPER diff --git a/drivers/gpu/drm/panel/Kconfig b/drivers/gpu/drm/panel/Kconfig index 9eefe09c2ecb..c83285811bb3 100644 --- a/drivers/gpu/drm/panel/Kconfig +++ b/drivers/gpu/drm/panel/Kconfig @@ -533,11 +533,11 @@ config DRM_PANEL_RAYDIUM_RM68200 config DRM_PANEL_RAYDIUM_RM692E5 tristate "Raydium RM692E5-based DSI panel" - depends on OF - depends on DRM_MIPI_DSI depends on BACKLIGHT_CLASS_DEVICE + depends on DRM_DISPLAY_HELPER + depends on DRM_MIPI_DSI + depends on OF select DRM_DISPLAY_DP_HELPER - select DRM_DISPLAY_HELPER help Say Y here if you want to enable support for Raydium RM692E5-based display panels, such as the one found in the Fairphone 5 smartphone. @@ -559,11 +559,11 @@ config DRM_PANEL_SAMSUNG_S6E88A0_AMS452EF01 config DRM_PANEL_SAMSUNG_ATNA33XC20 tristate "Samsung ATNA33XC20 eDP panel" - depends on OF depends on BACKLIGHT_CLASS_DEVICE + depends on DRM_DISPLAY_HELPER + depends on OF depends on PM select DRM_DISPLAY_DP_HELPER - select DRM_DISPLAY_HELPER select DRM_DISPLAY_DP_AUX_BUS help DRM panel driver for the Samsung ATNA33XC20 panel. This panel can't @@ -799,12 +799,12 @@ config DRM_PANEL_STARTEK_KD070FHFID015 config DRM_PANEL_EDP tristate "support for simple Embedded DisplayPort panels" - depends on OF depends on BACKLIGHT_CLASS_DEVICE + depends on DRM_DISPLAY_HELPER + depends on OF depends on PM select VIDEOMODE_HELPERS select DRM_DISPLAY_DP_HELPER - select DRM_DISPLAY_HELPER select DRM_DISPLAY_DP_AUX_BUS select DRM_KMS_HELPER help @@ -879,11 +879,11 @@ config DRM_PANEL_TRULY_NT35597_WQXGA config DRM_PANEL_VISIONOX_R66451 tristate "Visionox R66451" - depends on OF - depends on DRM_MIPI_DSI depends on BACKLIGHT_CLASS_DEVICE + depends on DRM_DISPLAY_HELPER + depends on DRM_MIPI_DSI + depends on OF select DRM_DISPLAY_DP_HELPER - select DRM_DISPLAY_HELPER help Say Y here if you want to enable support for Visionox R66451 1080x2340 AMOLED DSI panel. diff --git a/drivers/gpu/drm/radeon/Kconfig b/drivers/gpu/drm/radeon/Kconfig index f98356be0af2..07d330450f05 100644 --- a/drivers/gpu/drm/radeon/Kconfig +++ b/drivers/gpu/drm/radeon/Kconfig @@ -2,11 +2,13 @@ config DRM_RADEON tristate "ATI Radeon" - depends on DRM && PCI && MMU depends on AGP || !AGP + depends on DRM + depends on DRM_DISPLAY_HELPER + depends on PCI + depends on MMU select FW_LOADER select DRM_DISPLAY_DP_HELPER - select DRM_DISPLAY_HELPER select DRM_KMS_HELPER select DRM_SUBALLOC_HELPER select DRM_TTM diff --git a/drivers/gpu/drm/rockchip/Kconfig b/drivers/gpu/drm/rockchip/Kconfig index 0d5260e10f27..b72c0bbf346d 100644 --- a/drivers/gpu/drm/rockchip/Kconfig +++ b/drivers/gpu/drm/rockchip/Kconfig @@ -35,8 +35,8 @@ config ROCKCHIP_VOP2 config ROCKCHIP_ANALOGIX_DP bool "Rockchip specific extensions for Analogix DP driver" + depends on DRM_DISPLAY_HELPER depends on ROCKCHIP_VOP - select DRM_DISPLAY_HELPER select DRM_DISPLAY_DP_HELPER help This selects support for Rockchip SoC specific extensions @@ -45,8 +45,8 @@ config ROCKCHIP_ANALOGIX_DP config ROCKCHIP_CDN_DP bool "Rockchip cdn DP" + depends on DRM_DISPLAY_HELPER depends on EXTCON=y || (EXTCON=m && DRM_ROCKCHIP=m) - select DRM_DISPLAY_HELPER select DRM_DISPLAY_DP_HELPER help This selects support for Rockchip SoC specific extensions diff --git a/drivers/gpu/drm/tegra/Kconfig b/drivers/gpu/drm/tegra/Kconfig index 782f51d3044a..44381ee6ea9e 100644 --- a/drivers/gpu/drm/tegra/Kconfig +++ b/drivers/gpu/drm/tegra/Kconfig @@ -4,10 +4,10 @@ config DRM_TEGRA depends on ARCH_TEGRA || COMPILE_TEST depends on COMMON_CLK depends on DRM + depends on DRM_DISPLAY_HELPER depends on OF select DRM_DISPLAY_DP_HELPER select DRM_DISPLAY_HDMI_HELPER - select DRM_DISPLAY_HELPER select DRM_DISPLAY_DP_AUX_BUS select DRM_KMS_HELPER select DRM_MIPI_DSI diff --git a/drivers/gpu/drm/vc4/Kconfig b/drivers/gpu/drm/vc4/Kconfig index 91dcf8d174d6..98772a6b5bf0 100644 --- a/drivers/gpu/drm/vc4/Kconfig +++ b/drivers/gpu/drm/vc4/Kconfig @@ -2,15 +2,15 @@ config DRM_VC4 tristate "Broadcom VC4 Graphics" depends on ARCH_BCM || ARCH_BCM2835 || COMPILE_TEST + depends on COMMON_CLK + depends on DRM + depends on DRM_DISPLAY_HELPER + depends on PM # Make sure not 'y' when RASPBERRYPI_FIRMWARE is 'm'. This can only # happen when COMPILE_TEST=y, hence the added !RASPBERRYPI_FIRMWARE. depends on RASPBERRYPI_FIRMWARE || (COMPILE_TEST && !RASPBERRYPI_FIRMWARE) - depends on DRM depends on SND && SND_SOC - depends on COMMON_CLK - depends on PM select DRM_DISPLAY_HDMI_HELPER - select DRM_DISPLAY_HELPER select DRM_KMS_HELPER select DRM_GEM_DMA_HELPER select DRM_PANEL_BRIDGE diff --git a/drivers/gpu/drm/xe/Kconfig b/drivers/gpu/drm/xe/Kconfig index 1a556d087e63..be29e5cd5215 100644 --- a/drivers/gpu/drm/xe/Kconfig +++ b/drivers/gpu/drm/xe/Kconfig @@ -1,7 +1,11 @@ # SPDX-License-Identifier: GPL-2.0-only config DRM_XE tristate "Intel Xe Graphics" - depends on DRM && PCI && MMU && (m || (y && KUNIT=y)) + depends on (m || (y && KUNIT=y)) + depends on DRM + depends on DRM_DISPLAY_HELPER + depends on MMU + depends on PCI select INTERVAL_TREE # we need shmfs for the swappable backing store, and in particular # the shmem_readpage() which depends upon tmpfs @@ -16,7 +20,6 @@ config DRM_XE select DRM_DISPLAY_DP_HELPER select DRM_DISPLAY_HDCP_HELPER select DRM_DISPLAY_HDMI_HELPER - select DRM_DISPLAY_HELPER select DRM_MIPI_DSI select RELAY select IRQ_WORK diff --git a/drivers/gpu/drm/xlnx/Kconfig b/drivers/gpu/drm/xlnx/Kconfig index 68ee897de9d7..7a14a8c2e7be 100644 --- a/drivers/gpu/drm/xlnx/Kconfig +++ b/drivers/gpu/drm/xlnx/Kconfig @@ -1,13 +1,15 @@ config DRM_ZYNQMP_DPSUB tristate "ZynqMP DisplayPort Controller Driver" depends on ARCH_ZYNQMP || COMPILE_TEST - depends on COMMON_CLK && DRM && OF + depends on COMMON_CLK depends on DMADEVICES + depends on DRM + depends on DRM_DISPLAY_HELPER + depends on OF depends on PHY_XILINX_ZYNQMP depends on XILINX_ZYNQMP_DPDMA select DMA_ENGINE select DRM_DISPLAY_DP_HELPER - select DRM_DISPLAY_HELPER select DRM_GEM_DMA_HELPER select DRM_KMS_HELPER select GENERIC_PHY -- cgit v1.2.3 From 0323287de87d7e6e9c22c57d7440aa353a2298d0 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Wed, 27 Mar 2024 11:57:05 +0100 Subject: drm: Switch DRM_DISPLAY_DP_HELPER to depends on Most of our helpers have relied on being selected so far through Kconfig, but that creates issues when we have multiple layers of helpers with some depending on others. Indeed, select doesn't select a dependency's dependencies, and thus isn't super intuitive. Depends on however doesn't have that limitation, so we can just switch all the drivers that were selecting DRM_DISPLAY_DP_HELPER to depend on it. Reviewed-by: Jani Nikula Link: https://lore.kernel.org/r/20240327-kms-kconfig-helpers-v3-10-eafee11b84b3@kernel.org Signed-off-by: Maxime Ripard --- drivers/gpu/drm/Kconfig | 2 +- drivers/gpu/drm/amd/amdgpu/Kconfig | 2 +- drivers/gpu/drm/bridge/Kconfig | 10 +++++----- drivers/gpu/drm/bridge/analogix/Kconfig | 6 +++--- drivers/gpu/drm/bridge/cadence/Kconfig | 2 +- drivers/gpu/drm/display/Kconfig | 1 + drivers/gpu/drm/exynos/Kconfig | 2 +- drivers/gpu/drm/i915/Kconfig | 2 +- drivers/gpu/drm/mediatek/Kconfig | 2 +- drivers/gpu/drm/msm/Kconfig | 2 +- drivers/gpu/drm/nouveau/Kconfig | 2 +- drivers/gpu/drm/panel/Kconfig | 8 ++++---- drivers/gpu/drm/radeon/Kconfig | 2 +- drivers/gpu/drm/rockchip/Kconfig | 4 ++-- drivers/gpu/drm/tegra/Kconfig | 2 +- drivers/gpu/drm/xe/Kconfig | 2 +- drivers/gpu/drm/xlnx/Kconfig | 2 +- 17 files changed, 27 insertions(+), 26 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index 1eb939463c35..50b809346bf9 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -74,11 +74,11 @@ config DRM_KUNIT_TEST_HELPERS config DRM_KUNIT_TEST tristate "KUnit tests for DRM" if !KUNIT_ALL_TESTS depends on DRM + depends on DRM_DISPLAY_DP_HELPER depends on DRM_DISPLAY_HELPER depends on KUNIT depends on MMU select DRM_BUDDY - select DRM_DISPLAY_DP_HELPER select DRM_EXEC select DRM_EXPORT_FOR_TESTS if m select DRM_GEM_SHMEM_HELPER diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig index cf931b94a188..ba09121e7deb 100644 --- a/drivers/gpu/drm/amd/amdgpu/Kconfig +++ b/drivers/gpu/drm/amd/amdgpu/Kconfig @@ -3,12 +3,12 @@ config DRM_AMDGPU tristate "AMD GPU" depends on DRM + depends on DRM_DISPLAY_DP_HELPER depends on DRM_DISPLAY_HELPER depends on MMU depends on PCI depends on !UML select FW_LOADER - select DRM_DISPLAY_DP_HELPER select DRM_DISPLAY_HDMI_HELPER select DRM_DISPLAY_HDCP_HELPER select DRM_KMS_HELPER diff --git a/drivers/gpu/drm/bridge/Kconfig b/drivers/gpu/drm/bridge/Kconfig index 5d0193a87314..54d45dd31b7f 100644 --- a/drivers/gpu/drm/bridge/Kconfig +++ b/drivers/gpu/drm/bridge/Kconfig @@ -93,9 +93,9 @@ config DRM_FSL_LDB config DRM_ITE_IT6505 tristate "ITE IT6505 DisplayPort bridge" depends on DRM_DISPLAY_DP_AUX_BUS + depends on DRM_DISPLAY_DP_HELPER depends on DRM_DISPLAY_HELPER depends on OF - select DRM_DISPLAY_DP_HELPER select DRM_DISPLAY_HDCP_HELPER select DRM_KMS_HELPER select DRM_DP_HELPER @@ -227,9 +227,9 @@ config DRM_PARADE_PS8622 config DRM_PARADE_PS8640 tristate "Parade PS8640 MIPI DSI to eDP Converter" depends on DRM_DISPLAY_DP_AUX_BUS + depends on DRM_DISPLAY_DP_HELPER depends on DRM_DISPLAY_HELPER depends on OF - select DRM_DISPLAY_DP_HELPER select DRM_KMS_HELPER select DRM_MIPI_DSI select DRM_PANEL @@ -313,9 +313,9 @@ config DRM_TOSHIBA_TC358764 config DRM_TOSHIBA_TC358767 tristate "Toshiba TC358767 eDP bridge" + depends on DRM_DISPLAY_DP_HELPER depends on DRM_DISPLAY_HELPER depends on OF - select DRM_DISPLAY_DP_HELPER select DRM_KMS_HELPER select REGMAP_I2C select DRM_MIPI_DSI @@ -336,9 +336,9 @@ config DRM_TOSHIBA_TC358768 config DRM_TOSHIBA_TC358775 tristate "Toshiba TC358775 DSI/LVDS bridge" + depends on DRM_DISPLAY_DP_HELPER depends on DRM_DISPLAY_HELPER depends on OF - select DRM_DISPLAY_DP_HELPER select DRM_KMS_HELPER select REGMAP_I2C select DRM_PANEL @@ -382,9 +382,9 @@ config DRM_TI_SN65DSI83 config DRM_TI_SN65DSI86 tristate "TI SN65DSI86 DSI to eDP bridge" depends on DRM_DISPLAY_DP_AUX_BUS + depends on DRM_DISPLAY_DP_HELPER depends on DRM_DISPLAY_HELPER depends on OF - select DRM_DISPLAY_DP_HELPER select DRM_KMS_HELPER select REGMAP_I2C select DRM_PANEL diff --git a/drivers/gpu/drm/bridge/analogix/Kconfig b/drivers/gpu/drm/bridge/analogix/Kconfig index ec98c9453573..9659df6718de 100644 --- a/drivers/gpu/drm/bridge/analogix/Kconfig +++ b/drivers/gpu/drm/bridge/analogix/Kconfig @@ -1,10 +1,10 @@ # SPDX-License-Identifier: GPL-2.0-only config DRM_ANALOGIX_ANX6345 tristate "Analogix ANX6345 bridge" + depends on DRM_DISPLAY_DP_HELPER depends on DRM_DISPLAY_HELPER depends on OF select DRM_ANALOGIX_DP - select DRM_DISPLAY_DP_HELPER select DRM_KMS_HELPER select REGMAP_I2C help @@ -15,9 +15,9 @@ config DRM_ANALOGIX_ANX6345 config DRM_ANALOGIX_ANX78XX tristate "Analogix ANX78XX bridge" + depends on DRM_DISPLAY_DP_HELPER depends on DRM_DISPLAY_HELPER select DRM_ANALOGIX_DP - select DRM_DISPLAY_DP_HELPER select DRM_KMS_HELPER select REGMAP_I2C help @@ -34,9 +34,9 @@ config DRM_ANALOGIX_ANX7625 tristate "Analogix Anx7625 MIPI to DP interface support" depends on DRM depends on DRM_DISPLAY_DP_AUX_BUS + depends on DRM_DISPLAY_DP_HELPER depends on DRM_DISPLAY_HELPER depends on OF - select DRM_DISPLAY_DP_HELPER select DRM_DISPLAY_HDCP_HELPER select DRM_MIPI_DSI help diff --git a/drivers/gpu/drm/bridge/cadence/Kconfig b/drivers/gpu/drm/bridge/cadence/Kconfig index 20143afded40..3480fd4d0a5f 100644 --- a/drivers/gpu/drm/bridge/cadence/Kconfig +++ b/drivers/gpu/drm/bridge/cadence/Kconfig @@ -23,9 +23,9 @@ endif config DRM_CDNS_MHDP8546 tristate "Cadence DPI/DP bridge" + depends on DRM_DISPLAY_DP_HELPER depends on DRM_DISPLAY_HELPER depends on OF - select DRM_DISPLAY_DP_HELPER select DRM_DISPLAY_HDCP_HELPER select DRM_KMS_HELPER select DRM_PANEL_BRIDGE diff --git a/drivers/gpu/drm/display/Kconfig b/drivers/gpu/drm/display/Kconfig index 0cd439691422..9801f47a3704 100644 --- a/drivers/gpu/drm/display/Kconfig +++ b/drivers/gpu/drm/display/Kconfig @@ -39,6 +39,7 @@ config DRM_DISPLAY_DP_AUX_CHARDEV config DRM_DISPLAY_DP_HELPER bool "DRM DisplayPort Helpers" depends on DRM_DISPLAY_HELPER + default y help DRM display helpers for DisplayPort. diff --git a/drivers/gpu/drm/exynos/Kconfig b/drivers/gpu/drm/exynos/Kconfig index 4b0183bf221c..6a26a0b8eff2 100644 --- a/drivers/gpu/drm/exynos/Kconfig +++ b/drivers/gpu/drm/exynos/Kconfig @@ -67,9 +67,9 @@ config DRM_EXYNOS_DSI config DRM_EXYNOS_DP bool "Exynos specific extensions for Analogix DP driver" depends on DRM_EXYNOS_FIMD || DRM_EXYNOS7_DECON + depends on DRM_DISPLAY_DP_HELPER depends on DRM_DISPLAY_HELPER select DRM_ANALOGIX_DP - select DRM_DISPLAY_DP_HELPER default DRM_EXYNOS select DRM_PANEL help diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig index 43183a68a095..dbde4e29d93a 100644 --- a/drivers/gpu/drm/i915/Kconfig +++ b/drivers/gpu/drm/i915/Kconfig @@ -2,6 +2,7 @@ config DRM_I915 tristate "Intel 8xx/9xx/G3x/G4x/HD Graphics" depends on DRM + depends on DRM_DISPLAY_DP_HELPER depends on DRM_DISPLAY_HELPER depends on X86 && PCI depends on !PREEMPT_RT @@ -11,7 +12,6 @@ config DRM_I915 # the shmem_readpage() which depends upon tmpfs select SHMEM select TMPFS - select DRM_DISPLAY_DP_HELPER select DRM_DISPLAY_HDCP_HELPER select DRM_DISPLAY_HDMI_HELPER select DRM_KMS_HELPER diff --git a/drivers/gpu/drm/mediatek/Kconfig b/drivers/gpu/drm/mediatek/Kconfig index 2add54486ac4..6caab8d4d4e0 100644 --- a/drivers/gpu/drm/mediatek/Kconfig +++ b/drivers/gpu/drm/mediatek/Kconfig @@ -23,10 +23,10 @@ config DRM_MEDIATEK config DRM_MEDIATEK_DP tristate "DRM DPTX Support for MediaTek SoCs" depends on DRM_DISPLAY_DP_AUX_BUS + depends on DRM_DISPLAY_DP_HELPER depends on DRM_DISPLAY_HELPER depends on DRM_MEDIATEK select PHY_MTK_DP - select DRM_DISPLAY_DP_HELPER help DRM/KMS Display Port driver for MediaTek SoCs. diff --git a/drivers/gpu/drm/msm/Kconfig b/drivers/gpu/drm/msm/Kconfig index 28a898722ace..f7708590583e 100644 --- a/drivers/gpu/drm/msm/Kconfig +++ b/drivers/gpu/drm/msm/Kconfig @@ -6,6 +6,7 @@ config DRM_MSM depends on COMMON_CLK depends on DRM depends on DRM_DISPLAY_DP_AUX_BUS + depends on DRM_DISPLAY_DP_HELPER depends on DRM_DISPLAY_HELPER depends on IOMMU_SUPPORT depends on QCOM_AOSS_QMP || QCOM_AOSS_QMP=n @@ -16,7 +17,6 @@ config DRM_MSM select IOMMU_IO_PGTABLE select QCOM_MDT_LOADER if ARCH_QCOM select REGULATOR - select DRM_DISPLAY_DP_HELPER select DRM_EXEC select DRM_KMS_HELPER select DRM_PANEL diff --git a/drivers/gpu/drm/nouveau/Kconfig b/drivers/gpu/drm/nouveau/Kconfig index 5ac852b816db..7cc305b2826d 100644 --- a/drivers/gpu/drm/nouveau/Kconfig +++ b/drivers/gpu/drm/nouveau/Kconfig @@ -2,12 +2,12 @@ config DRM_NOUVEAU tristate "Nouveau (NVIDIA) cards" depends on DRM + depends on DRM_DISPLAY_DP_HELPER depends on DRM_DISPLAY_HELPER depends on PCI depends on MMU select IOMMU_API select FW_LOADER - select DRM_DISPLAY_DP_HELPER select DRM_DISPLAY_HDMI_HELPER select DRM_KMS_HELPER select DRM_TTM diff --git a/drivers/gpu/drm/panel/Kconfig b/drivers/gpu/drm/panel/Kconfig index 01235397c493..154f5bf82980 100644 --- a/drivers/gpu/drm/panel/Kconfig +++ b/drivers/gpu/drm/panel/Kconfig @@ -534,10 +534,10 @@ config DRM_PANEL_RAYDIUM_RM68200 config DRM_PANEL_RAYDIUM_RM692E5 tristate "Raydium RM692E5-based DSI panel" depends on BACKLIGHT_CLASS_DEVICE + depends on DRM_DISPLAY_DP_HELPER depends on DRM_DISPLAY_HELPER depends on DRM_MIPI_DSI depends on OF - select DRM_DISPLAY_DP_HELPER help Say Y here if you want to enable support for Raydium RM692E5-based display panels, such as the one found in the Fairphone 5 smartphone. @@ -561,10 +561,10 @@ config DRM_PANEL_SAMSUNG_ATNA33XC20 tristate "Samsung ATNA33XC20 eDP panel" depends on BACKLIGHT_CLASS_DEVICE depends on DRM_DISPLAY_DP_AUX_BUS + depends on DRM_DISPLAY_DP_HELPER depends on DRM_DISPLAY_HELPER depends on OF depends on PM - select DRM_DISPLAY_DP_HELPER help DRM panel driver for the Samsung ATNA33XC20 panel. This panel can't be handled by the DRM_PANEL_SIMPLE driver because its power @@ -801,11 +801,11 @@ config DRM_PANEL_EDP tristate "support for simple Embedded DisplayPort panels" depends on BACKLIGHT_CLASS_DEVICE depends on DRM_DISPLAY_DP_AUX_BUS + depends on DRM_DISPLAY_DP_HELPER depends on DRM_DISPLAY_HELPER depends on OF depends on PM select VIDEOMODE_HELPERS - select DRM_DISPLAY_DP_HELPER select DRM_KMS_HELPER help DRM panel driver for dumb eDP panels that need at most a regulator and @@ -880,10 +880,10 @@ config DRM_PANEL_TRULY_NT35597_WQXGA config DRM_PANEL_VISIONOX_R66451 tristate "Visionox R66451" depends on BACKLIGHT_CLASS_DEVICE + depends on DRM_DISPLAY_DP_HELPER depends on DRM_DISPLAY_HELPER depends on DRM_MIPI_DSI depends on OF - select DRM_DISPLAY_DP_HELPER help Say Y here if you want to enable support for Visionox R66451 1080x2340 AMOLED DSI panel. diff --git a/drivers/gpu/drm/radeon/Kconfig b/drivers/gpu/drm/radeon/Kconfig index 07d330450f05..18c867219a70 100644 --- a/drivers/gpu/drm/radeon/Kconfig +++ b/drivers/gpu/drm/radeon/Kconfig @@ -4,11 +4,11 @@ config DRM_RADEON tristate "ATI Radeon" depends on AGP || !AGP depends on DRM + depends on DRM_DISPLAY_DP_HELPER depends on DRM_DISPLAY_HELPER depends on PCI depends on MMU select FW_LOADER - select DRM_DISPLAY_DP_HELPER select DRM_KMS_HELPER select DRM_SUBALLOC_HELPER select DRM_TTM diff --git a/drivers/gpu/drm/rockchip/Kconfig b/drivers/gpu/drm/rockchip/Kconfig index b72c0bbf346d..4b49a14758fe 100644 --- a/drivers/gpu/drm/rockchip/Kconfig +++ b/drivers/gpu/drm/rockchip/Kconfig @@ -35,9 +35,9 @@ config ROCKCHIP_VOP2 config ROCKCHIP_ANALOGIX_DP bool "Rockchip specific extensions for Analogix DP driver" + depends on DRM_DISPLAY_DP_HELPER depends on DRM_DISPLAY_HELPER depends on ROCKCHIP_VOP - select DRM_DISPLAY_DP_HELPER help This selects support for Rockchip SoC specific extensions for the Analogix Core DP driver. If you want to enable DP @@ -45,9 +45,9 @@ config ROCKCHIP_ANALOGIX_DP config ROCKCHIP_CDN_DP bool "Rockchip cdn DP" + depends on DRM_DISPLAY_DP_HELPER depends on DRM_DISPLAY_HELPER depends on EXTCON=y || (EXTCON=m && DRM_ROCKCHIP=m) - select DRM_DISPLAY_DP_HELPER help This selects support for Rockchip SoC specific extensions for the cdn DP driver. If you want to enable Dp on diff --git a/drivers/gpu/drm/tegra/Kconfig b/drivers/gpu/drm/tegra/Kconfig index e0385d175ec6..bb6e35261f11 100644 --- a/drivers/gpu/drm/tegra/Kconfig +++ b/drivers/gpu/drm/tegra/Kconfig @@ -5,9 +5,9 @@ config DRM_TEGRA depends on COMMON_CLK depends on DRM depends on DRM_DISPLAY_DP_AUX_BUS + depends on DRM_DISPLAY_DP_HELPER depends on DRM_DISPLAY_HELPER depends on OF - select DRM_DISPLAY_DP_HELPER select DRM_DISPLAY_HDMI_HELPER select DRM_KMS_HELPER select DRM_MIPI_DSI diff --git a/drivers/gpu/drm/xe/Kconfig b/drivers/gpu/drm/xe/Kconfig index be29e5cd5215..02da2faf5ae3 100644 --- a/drivers/gpu/drm/xe/Kconfig +++ b/drivers/gpu/drm/xe/Kconfig @@ -3,6 +3,7 @@ config DRM_XE tristate "Intel Xe Graphics" depends on (m || (y && KUNIT=y)) depends on DRM + depends on DRM_DISPLAY_DP_HELPER depends on DRM_DISPLAY_HELPER depends on MMU depends on PCI @@ -17,7 +18,6 @@ config DRM_XE select DRM_KUNIT_TEST_HELPERS if DRM_XE_KUNIT_TEST != n select DRM_PANEL select DRM_SUBALLOC_HELPER - select DRM_DISPLAY_DP_HELPER select DRM_DISPLAY_HDCP_HELPER select DRM_DISPLAY_HDMI_HELPER select DRM_MIPI_DSI diff --git a/drivers/gpu/drm/xlnx/Kconfig b/drivers/gpu/drm/xlnx/Kconfig index 7a14a8c2e7be..41d753b14ccd 100644 --- a/drivers/gpu/drm/xlnx/Kconfig +++ b/drivers/gpu/drm/xlnx/Kconfig @@ -4,12 +4,12 @@ config DRM_ZYNQMP_DPSUB depends on COMMON_CLK depends on DMADEVICES depends on DRM + depends on DRM_DISPLAY_DP_HELPER depends on DRM_DISPLAY_HELPER depends on OF depends on PHY_XILINX_ZYNQMP depends on XILINX_ZYNQMP_DPDMA select DMA_ENGINE - select DRM_DISPLAY_DP_HELPER select DRM_GEM_DMA_HELPER select DRM_KMS_HELPER select GENERIC_PHY -- cgit v1.2.3 From 3166e7e6d935caaef07605a5c90773fbf9ffeaf4 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Wed, 27 Mar 2024 11:57:06 +0100 Subject: drm: Switch DRM_DISPLAY_HDCP_HELPER to depends on Most of our helpers have relied on being selected so far through Kconfig, but that creates issues when we have multiple layers of helpers with some depending on others. Indeed, select doesn't select a dependency's dependencies, and thus isn't super intuitive. Depends on however doesn't have that limitation, so we can just switch all the drivers that were selecting DRM_DISPLAY_HDCP_HELPER to depend on it. Reviewed-by: Jani Nikula Link: https://lore.kernel.org/r/20240327-kms-kconfig-helpers-v3-11-eafee11b84b3@kernel.org Signed-off-by: Maxime Ripard --- drivers/gpu/drm/amd/amdgpu/Kconfig | 2 +- drivers/gpu/drm/bridge/Kconfig | 2 +- drivers/gpu/drm/bridge/analogix/Kconfig | 2 +- drivers/gpu/drm/bridge/cadence/Kconfig | 2 +- drivers/gpu/drm/display/Kconfig | 1 + drivers/gpu/drm/i915/Kconfig | 2 +- drivers/gpu/drm/xe/Kconfig | 2 +- 7 files changed, 7 insertions(+), 6 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig index ba09121e7deb..1662dc49f18e 100644 --- a/drivers/gpu/drm/amd/amdgpu/Kconfig +++ b/drivers/gpu/drm/amd/amdgpu/Kconfig @@ -4,13 +4,13 @@ config DRM_AMDGPU tristate "AMD GPU" depends on DRM depends on DRM_DISPLAY_DP_HELPER + depends on DRM_DISPLAY_HDCP_HELPER depends on DRM_DISPLAY_HELPER depends on MMU depends on PCI depends on !UML select FW_LOADER select DRM_DISPLAY_HDMI_HELPER - select DRM_DISPLAY_HDCP_HELPER select DRM_KMS_HELPER select DRM_SCHED select DRM_TTM diff --git a/drivers/gpu/drm/bridge/Kconfig b/drivers/gpu/drm/bridge/Kconfig index 54d45dd31b7f..6e6ec300ab16 100644 --- a/drivers/gpu/drm/bridge/Kconfig +++ b/drivers/gpu/drm/bridge/Kconfig @@ -94,9 +94,9 @@ config DRM_ITE_IT6505 tristate "ITE IT6505 DisplayPort bridge" depends on DRM_DISPLAY_DP_AUX_BUS depends on DRM_DISPLAY_DP_HELPER + depends on DRM_DISPLAY_HDCP_HELPER depends on DRM_DISPLAY_HELPER depends on OF - select DRM_DISPLAY_HDCP_HELPER select DRM_KMS_HELPER select DRM_DP_HELPER select EXTCON diff --git a/drivers/gpu/drm/bridge/analogix/Kconfig b/drivers/gpu/drm/bridge/analogix/Kconfig index 9659df6718de..12bfea53bf24 100644 --- a/drivers/gpu/drm/bridge/analogix/Kconfig +++ b/drivers/gpu/drm/bridge/analogix/Kconfig @@ -35,9 +35,9 @@ config DRM_ANALOGIX_ANX7625 depends on DRM depends on DRM_DISPLAY_DP_AUX_BUS depends on DRM_DISPLAY_DP_HELPER + depends on DRM_DISPLAY_HDCP_HELPER depends on DRM_DISPLAY_HELPER depends on OF - select DRM_DISPLAY_HDCP_HELPER select DRM_MIPI_DSI help ANX7625 is an ultra-low power 4K mobile HD transmitter diff --git a/drivers/gpu/drm/bridge/cadence/Kconfig b/drivers/gpu/drm/bridge/cadence/Kconfig index 3480fd4d0a5f..7817f6f56607 100644 --- a/drivers/gpu/drm/bridge/cadence/Kconfig +++ b/drivers/gpu/drm/bridge/cadence/Kconfig @@ -24,9 +24,9 @@ endif config DRM_CDNS_MHDP8546 tristate "Cadence DPI/DP bridge" depends on DRM_DISPLAY_DP_HELPER + depends on DRM_DISPLAY_HDCP_HELPER depends on DRM_DISPLAY_HELPER depends on OF - select DRM_DISPLAY_HDCP_HELPER select DRM_KMS_HELPER select DRM_PANEL_BRIDGE help diff --git a/drivers/gpu/drm/display/Kconfig b/drivers/gpu/drm/display/Kconfig index 9801f47a3704..d65f1a37c08c 100644 --- a/drivers/gpu/drm/display/Kconfig +++ b/drivers/gpu/drm/display/Kconfig @@ -67,6 +67,7 @@ config DRM_DISPLAY_DP_TUNNEL_STATE_DEBUG config DRM_DISPLAY_HDCP_HELPER bool "DRM HDCD Helpers" depends on DRM_DISPLAY_HELPER + default y help DRM display helpers for HDCP. diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig index dbde4e29d93a..87ef8c4d72a5 100644 --- a/drivers/gpu/drm/i915/Kconfig +++ b/drivers/gpu/drm/i915/Kconfig @@ -3,6 +3,7 @@ config DRM_I915 tristate "Intel 8xx/9xx/G3x/G4x/HD Graphics" depends on DRM depends on DRM_DISPLAY_DP_HELPER + depends on DRM_DISPLAY_HDCP_HELPER depends on DRM_DISPLAY_HELPER depends on X86 && PCI depends on !PREEMPT_RT @@ -12,7 +13,6 @@ config DRM_I915 # the shmem_readpage() which depends upon tmpfs select SHMEM select TMPFS - select DRM_DISPLAY_HDCP_HELPER select DRM_DISPLAY_HDMI_HELPER select DRM_KMS_HELPER select DRM_PANEL diff --git a/drivers/gpu/drm/xe/Kconfig b/drivers/gpu/drm/xe/Kconfig index 02da2faf5ae3..1fa8ef75823c 100644 --- a/drivers/gpu/drm/xe/Kconfig +++ b/drivers/gpu/drm/xe/Kconfig @@ -4,6 +4,7 @@ config DRM_XE depends on (m || (y && KUNIT=y)) depends on DRM depends on DRM_DISPLAY_DP_HELPER + depends on DRM_DISPLAY_HDCP_HELPER depends on DRM_DISPLAY_HELPER depends on MMU depends on PCI @@ -18,7 +19,6 @@ config DRM_XE select DRM_KUNIT_TEST_HELPERS if DRM_XE_KUNIT_TEST != n select DRM_PANEL select DRM_SUBALLOC_HELPER - select DRM_DISPLAY_HDCP_HELPER select DRM_DISPLAY_HDMI_HELPER select DRM_MIPI_DSI select RELAY -- cgit v1.2.3 From f6d2dc03fa8546b284dd8c1af027d9fac5725921 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Wed, 27 Mar 2024 11:57:07 +0100 Subject: drm: Switch DRM_DISPLAY_HDMI_HELPER to depends on Most of our helpers have relied on being selected so far through Kconfig, but that creates issues when we have multiple layers of helpers with some depending on others. Indeed, select doesn't select a dependency's dependencies, and thus isn't super intuitive. Depends on however doesn't have that limitation, so we can just switch all the drivers that were selecting DRM_DISPLAY_HDMI_HELPER to depend on it. Reviewed-by: Jani Nikula Link: https://lore.kernel.org/r/20240327-kms-kconfig-helpers-v3-12-eafee11b84b3@kernel.org Signed-off-by: Maxime Ripard --- drivers/gpu/drm/amd/amdgpu/Kconfig | 2 +- drivers/gpu/drm/bridge/synopsys/Kconfig | 2 +- drivers/gpu/drm/display/Kconfig | 1 + drivers/gpu/drm/i915/Kconfig | 2 +- drivers/gpu/drm/nouveau/Kconfig | 2 +- drivers/gpu/drm/tegra/Kconfig | 2 +- drivers/gpu/drm/vc4/Kconfig | 2 +- drivers/gpu/drm/xe/Kconfig | 2 +- 8 files changed, 8 insertions(+), 7 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig index 1662dc49f18e..b0365cc1374e 100644 --- a/drivers/gpu/drm/amd/amdgpu/Kconfig +++ b/drivers/gpu/drm/amd/amdgpu/Kconfig @@ -5,12 +5,12 @@ config DRM_AMDGPU depends on DRM depends on DRM_DISPLAY_DP_HELPER depends on DRM_DISPLAY_HDCP_HELPER + depends on DRM_DISPLAY_HDMI_HELPER depends on DRM_DISPLAY_HELPER depends on MMU depends on PCI depends on !UML select FW_LOADER - select DRM_DISPLAY_HDMI_HELPER select DRM_KMS_HELPER select DRM_SCHED select DRM_TTM diff --git a/drivers/gpu/drm/bridge/synopsys/Kconfig b/drivers/gpu/drm/bridge/synopsys/Kconfig index f366ece47146..387f5bd86089 100644 --- a/drivers/gpu/drm/bridge/synopsys/Kconfig +++ b/drivers/gpu/drm/bridge/synopsys/Kconfig @@ -1,8 +1,8 @@ # SPDX-License-Identifier: GPL-2.0-only config DRM_DW_HDMI tristate + depends on DRM_DISPLAY_HDMI_HELPER depends on DRM_DISPLAY_HELPER - select DRM_DISPLAY_HDMI_HELPER select DRM_KMS_HELPER select REGMAP_MMIO select CEC_CORE if CEC_NOTIFIER diff --git a/drivers/gpu/drm/display/Kconfig b/drivers/gpu/drm/display/Kconfig index d65f1a37c08c..01f2a231aa5f 100644 --- a/drivers/gpu/drm/display/Kconfig +++ b/drivers/gpu/drm/display/Kconfig @@ -74,5 +74,6 @@ config DRM_DISPLAY_HDCP_HELPER config DRM_DISPLAY_HDMI_HELPER bool "DRM HDMI Helpers" depends on DRM_DISPLAY_HELPER + default y help DRM display helpers for HDMI. diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig index 87ef8c4d72a5..4f0d18a16b0f 100644 --- a/drivers/gpu/drm/i915/Kconfig +++ b/drivers/gpu/drm/i915/Kconfig @@ -4,6 +4,7 @@ config DRM_I915 depends on DRM depends on DRM_DISPLAY_DP_HELPER depends on DRM_DISPLAY_HDCP_HELPER + depends on DRM_DISPLAY_HDMI_HELPER depends on DRM_DISPLAY_HELPER depends on X86 && PCI depends on !PREEMPT_RT @@ -13,7 +14,6 @@ config DRM_I915 # the shmem_readpage() which depends upon tmpfs select SHMEM select TMPFS - select DRM_DISPLAY_HDMI_HELPER select DRM_KMS_HELPER select DRM_PANEL select DRM_MIPI_DSI diff --git a/drivers/gpu/drm/nouveau/Kconfig b/drivers/gpu/drm/nouveau/Kconfig index 7cc305b2826d..4c10b400658c 100644 --- a/drivers/gpu/drm/nouveau/Kconfig +++ b/drivers/gpu/drm/nouveau/Kconfig @@ -3,12 +3,12 @@ config DRM_NOUVEAU tristate "Nouveau (NVIDIA) cards" depends on DRM depends on DRM_DISPLAY_DP_HELPER + depends on DRM_DISPLAY_HDMI_HELPER depends on DRM_DISPLAY_HELPER depends on PCI depends on MMU select IOMMU_API select FW_LOADER - select DRM_DISPLAY_HDMI_HELPER select DRM_KMS_HELPER select DRM_TTM select DRM_TTM_HELPER diff --git a/drivers/gpu/drm/tegra/Kconfig b/drivers/gpu/drm/tegra/Kconfig index bb6e35261f11..6974caa99ece 100644 --- a/drivers/gpu/drm/tegra/Kconfig +++ b/drivers/gpu/drm/tegra/Kconfig @@ -6,9 +6,9 @@ config DRM_TEGRA depends on DRM depends on DRM_DISPLAY_DP_AUX_BUS depends on DRM_DISPLAY_DP_HELPER + depends on DRM_DISPLAY_HDMI_HELPER depends on DRM_DISPLAY_HELPER depends on OF - select DRM_DISPLAY_HDMI_HELPER select DRM_KMS_HELPER select DRM_MIPI_DSI select DRM_PANEL diff --git a/drivers/gpu/drm/vc4/Kconfig b/drivers/gpu/drm/vc4/Kconfig index 98772a6b5bf0..4801f8b64d3d 100644 --- a/drivers/gpu/drm/vc4/Kconfig +++ b/drivers/gpu/drm/vc4/Kconfig @@ -4,13 +4,13 @@ config DRM_VC4 depends on ARCH_BCM || ARCH_BCM2835 || COMPILE_TEST depends on COMMON_CLK depends on DRM + depends on DRM_DISPLAY_HDMI_HELPER depends on DRM_DISPLAY_HELPER depends on PM # Make sure not 'y' when RASPBERRYPI_FIRMWARE is 'm'. This can only # happen when COMPILE_TEST=y, hence the added !RASPBERRYPI_FIRMWARE. depends on RASPBERRYPI_FIRMWARE || (COMPILE_TEST && !RASPBERRYPI_FIRMWARE) depends on SND && SND_SOC - select DRM_DISPLAY_HDMI_HELPER select DRM_KMS_HELPER select DRM_GEM_DMA_HELPER select DRM_PANEL_BRIDGE diff --git a/drivers/gpu/drm/xe/Kconfig b/drivers/gpu/drm/xe/Kconfig index 1fa8ef75823c..bfa0e9d4bd64 100644 --- a/drivers/gpu/drm/xe/Kconfig +++ b/drivers/gpu/drm/xe/Kconfig @@ -5,6 +5,7 @@ config DRM_XE depends on DRM depends on DRM_DISPLAY_DP_HELPER depends on DRM_DISPLAY_HDCP_HELPER + depends on DRM_DISPLAY_HDMI_HELPER depends on DRM_DISPLAY_HELPER depends on MMU depends on PCI @@ -19,7 +20,6 @@ config DRM_XE select DRM_KUNIT_TEST_HELPERS if DRM_XE_KUNIT_TEST != n select DRM_PANEL select DRM_SUBALLOC_HELPER - select DRM_DISPLAY_HDMI_HELPER select DRM_MIPI_DSI select RELAY select IRQ_WORK -- cgit v1.2.3 From cd409dbc6986bc5f9bfacbbc968886531d47d5ce Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Thu, 21 Mar 2024 17:46:36 +0530 Subject: drm/amdgpu: Refine IB schedule error logging MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Downgrade to debug information when IBs are skipped. Also, use dev_* to identify the device. Signed-off-by: Lijo Lazar Reviewed-by: Christian König Reviewed-by: Asad Kamal Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 4b3000c21ef2..e4742b65032d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -304,12 +304,15 @@ static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job) dma_fence_set_error(finished, -ECANCELED); if (finished->error < 0) { - DRM_INFO("Skip scheduling IBs!\n"); + dev_dbg(adev->dev, "Skip scheduling IBs in ring(%s)", + ring->name); } else { r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs, job, &fence); if (r) - DRM_ERROR("Error scheduling IBs (%d)\n", r); + dev_err(adev->dev, + "Error scheduling IBs (%d) in ring(%s)", r, + ring->name); } job->job_run_counter++; -- cgit v1.2.3 From 108ab31be9d5c6efdcf522577d6f3da2a9b34056 Mon Sep 17 00:00:00 2001 From: Lang Yu Date: Mon, 25 Mar 2024 13:24:31 +0800 Subject: drm/amdgpu/umsch: reinitialize write pointer in hw init Otherwise the old one will be used during GPU reset. That's not expected. Signed-off-by: Lang Yu Reviewed-by: Feifei Xu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/umsch_mm_v4_0.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/umsch_mm_v4_0.c b/drivers/gpu/drm/amd/amdgpu/umsch_mm_v4_0.c index 84368cf1e175..bd57896ab85d 100644 --- a/drivers/gpu/drm/amd/amdgpu/umsch_mm_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/umsch_mm_v4_0.c @@ -225,6 +225,8 @@ static int umsch_mm_v4_0_ring_start(struct amdgpu_umsch_mm *umsch) WREG32_SOC15(VCN, 0, regVCN_UMSCH_RB_SIZE, ring->ring_size); + ring->wptr = 0; + data = RREG32_SOC15(VCN, 0, regVCN_RB_ENABLE); data &= ~(VCN_RB_ENABLE__AUDIO_RB_EN_MASK); WREG32_SOC15(VCN, 0, regVCN_RB_ENABLE, data); -- cgit v1.2.3 From 6a0e1bafd70fe5c5d8dcf7d2ddff9377701343c3 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Tue, 26 Mar 2024 22:59:17 +0530 Subject: drm/amdgpu: add IP's FW information to devcoredump Add FW information of all the IP's in the devcoredump. Signed-off-by: Sunil Khatri Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c | 129 +++++++++++++++++++++++ 1 file changed, 129 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c index 44c5da8aa9ce..1129e5e5fb42 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c @@ -25,6 +25,7 @@ #include #include #include "amdgpu_dev_coredump.h" +#include "atom.h" #ifndef CONFIG_DEV_COREDUMP void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost, @@ -69,6 +70,130 @@ const char *hw_ip_names[MAX_HWIP] = { [PCIE_HWIP] = "PCIE", }; +static void amdgpu_devcoredump_fw_info(struct amdgpu_device *adev, + struct drm_printer *p) +{ + uint32_t version; + uint32_t feature; + uint8_t smu_program, smu_major, smu_minor, smu_debug; + struct atom_context *ctx = adev->mode_info.atom_context; + + drm_printf(p, "VCE feature version: %u, fw version: 0x%08x\n", + adev->vce.fb_version, adev->vce.fw_version); + drm_printf(p, "UVD feature version: %u, fw version: 0x%08x\n", 0, + adev->uvd.fw_version); + drm_printf(p, "GMC feature version: %u, fw version: 0x%08x\n", 0, + adev->gmc.fw_version); + drm_printf(p, "ME feature version: %u, fw version: 0x%08x\n", + adev->gfx.me_feature_version, adev->gfx.me_fw_version); + drm_printf(p, "PFP feature version: %u, fw version: 0x%08x\n", + adev->gfx.pfp_feature_version, adev->gfx.pfp_fw_version); + drm_printf(p, "CE feature version: %u, fw version: 0x%08x\n", + adev->gfx.ce_feature_version, adev->gfx.ce_fw_version); + drm_printf(p, "RLC feature version: %u, fw version: 0x%08x\n", + adev->gfx.rlc_feature_version, adev->gfx.rlc_fw_version); + + drm_printf(p, "RLC SRLC feature version: %u, fw version: 0x%08x\n", + adev->gfx.rlc_srlc_feature_version, + adev->gfx.rlc_srlc_fw_version); + drm_printf(p, "RLC SRLG feature version: %u, fw version: 0x%08x\n", + adev->gfx.rlc_srlg_feature_version, + adev->gfx.rlc_srlg_fw_version); + drm_printf(p, "RLC SRLS feature version: %u, fw version: 0x%08x\n", + adev->gfx.rlc_srls_feature_version, + adev->gfx.rlc_srls_fw_version); + drm_printf(p, "RLCP feature version: %u, fw version: 0x%08x\n", + adev->gfx.rlcp_ucode_feature_version, + adev->gfx.rlcp_ucode_version); + drm_printf(p, "RLCV feature version: %u, fw version: 0x%08x\n", + adev->gfx.rlcv_ucode_feature_version, + adev->gfx.rlcv_ucode_version); + drm_printf(p, "MEC feature version: %u, fw version: 0x%08x\n", + adev->gfx.mec_feature_version, adev->gfx.mec_fw_version); + + if (adev->gfx.mec2_fw) + drm_printf(p, "MEC2 feature version: %u, fw version: 0x%08x\n", + adev->gfx.mec2_feature_version, + adev->gfx.mec2_fw_version); + + drm_printf(p, "IMU feature version: %u, fw version: 0x%08x\n", 0, + adev->gfx.imu_fw_version); + drm_printf(p, "PSP SOS feature version: %u, fw version: 0x%08x\n", + adev->psp.sos.feature_version, adev->psp.sos.fw_version); + drm_printf(p, "PSP ASD feature version: %u, fw version: 0x%08x\n", + adev->psp.asd_context.bin_desc.feature_version, + adev->psp.asd_context.bin_desc.fw_version); + + drm_printf(p, "TA XGMI feature version: 0x%08x, fw version: 0x%08x\n", + adev->psp.xgmi_context.context.bin_desc.feature_version, + adev->psp.xgmi_context.context.bin_desc.fw_version); + drm_printf(p, "TA RAS feature version: 0x%08x, fw version: 0x%08x\n", + adev->psp.ras_context.context.bin_desc.feature_version, + adev->psp.ras_context.context.bin_desc.fw_version); + drm_printf(p, "TA HDCP feature version: 0x%08x, fw version: 0x%08x\n", + adev->psp.hdcp_context.context.bin_desc.feature_version, + adev->psp.hdcp_context.context.bin_desc.fw_version); + drm_printf(p, "TA DTM feature version: 0x%08x, fw version: 0x%08x\n", + adev->psp.dtm_context.context.bin_desc.feature_version, + adev->psp.dtm_context.context.bin_desc.fw_version); + drm_printf(p, "TA RAP feature version: 0x%08x, fw version: 0x%08x\n", + adev->psp.rap_context.context.bin_desc.feature_version, + adev->psp.rap_context.context.bin_desc.fw_version); + drm_printf(p, + "TA SECURE DISPLAY feature version: 0x%08x, fw version: 0x%08x\n", + adev->psp.securedisplay_context.context.bin_desc.feature_version, + adev->psp.securedisplay_context.context.bin_desc.fw_version); + + /* SMC firmware */ + version = adev->pm.fw_version; + + smu_program = (version >> 24) & 0xff; + smu_major = (version >> 16) & 0xff; + smu_minor = (version >> 8) & 0xff; + smu_debug = (version >> 0) & 0xff; + drm_printf(p, + "SMC feature version: %u, program: %d, fw version: 0x%08x (%d.%d.%d)\n", + 0, smu_program, version, smu_major, smu_minor, smu_debug); + + /* SDMA firmware */ + for (int i = 0; i < adev->sdma.num_instances; i++) { + drm_printf(p, + "SDMA%d feature version: %u, firmware version: 0x%08x\n", + i, adev->sdma.instance[i].feature_version, + adev->sdma.instance[i].fw_version); + } + + drm_printf(p, "VCN feature version: %u, fw version: 0x%08x\n", 0, + adev->vcn.fw_version); + drm_printf(p, "DMCU feature version: %u, fw version: 0x%08x\n", 0, + adev->dm.dmcu_fw_version); + drm_printf(p, "DMCUB feature version: %u, fw version: 0x%08x\n", 0, + adev->dm.dmcub_fw_version); + drm_printf(p, "PSP TOC feature version: %u, fw version: 0x%08x\n", + adev->psp.toc.feature_version, adev->psp.toc.fw_version); + + version = adev->mes.kiq_version & AMDGPU_MES_VERSION_MASK; + feature = (adev->mes.kiq_version & AMDGPU_MES_FEAT_VERSION_MASK) >> + AMDGPU_MES_FEAT_VERSION_SHIFT; + drm_printf(p, "MES_KIQ feature version: %u, fw version: 0x%08x\n", + feature, version); + + version = adev->mes.sched_version & AMDGPU_MES_VERSION_MASK; + feature = (adev->mes.sched_version & AMDGPU_MES_FEAT_VERSION_MASK) >> + AMDGPU_MES_FEAT_VERSION_SHIFT; + drm_printf(p, "MES feature version: %u, fw version: 0x%08x\n", feature, + version); + + drm_printf(p, "VPE feature version: %u, fw version: 0x%08x\n", + adev->vpe.feature_version, adev->vpe.fw_version); + + drm_printf(p, "\nVBIOS Information\n"); + drm_printf(p, "name: %s\n", ctx->name); + drm_printf(p, "pn %s\n", ctx->vbios_pn); + drm_printf(p, "version: %s\n", ctx->vbios_ver_str); + drm_printf(p, "date: %s\n", ctx->date); +} + static ssize_t amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count, void *data, size_t datalen) @@ -118,6 +243,10 @@ amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count, } } + /* IP firmware information */ + drm_printf(&p, "\nIP Firmwares\n"); + amdgpu_devcoredump_fw_info(coredump->adev, &p); + if (coredump->ring) { drm_printf(&p, "\nRing timed out details\n"); drm_printf(&p, "IP Type: %d Ring Name: %s\n", -- cgit v1.2.3 From df3c7dc5c58b1f85033d2cd9a121b27844700ca2 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Wed, 14 Feb 2024 17:55:54 +0530 Subject: drm/amdgpu: Reset dGPU if suspend got aborted For SOC21 ASICs, there is an issue in re-enabling PM features if a suspend got aborted. In such cases, reset the device during resume phase. This is a workaround till a proper solution is finalized. Signed-off-by: Lijo Lazar Reviewed-by: Alex Deucher Reviewed-by: Yang Wang Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/soc21.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index 8526282f4da1..abe319b0f063 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -867,10 +867,35 @@ static int soc21_common_suspend(void *handle) return soc21_common_hw_fini(adev); } +static bool soc21_need_reset_on_resume(struct amdgpu_device *adev) +{ + u32 sol_reg1, sol_reg2; + + /* Will reset for the following suspend abort cases. + * 1) Only reset dGPU side. + * 2) S3 suspend got aborted and TOS is active. + */ + if (!(adev->flags & AMD_IS_APU) && adev->in_s3 && + !adev->suspend_complete) { + sol_reg1 = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_81); + msleep(100); + sol_reg2 = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_81); + + return (sol_reg1 != sol_reg2); + } + + return false; +} + static int soc21_common_resume(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + if (soc21_need_reset_on_resume(adev)) { + dev_info(adev->dev, "S3 suspend aborted, resetting..."); + soc21_asic_reset(adev); + } + return soc21_common_hw_init(adev); } -- cgit v1.2.3 From 81d96e8b5a85e3ebb85342525aab02e89bec72dc Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Thu, 28 Mar 2024 13:46:14 +0800 Subject: drm/amdgpu: refine function signature of amdgpu_aca_get_error_data() refine function signature of amdgpu_aca_get_error_data(); Signed-off-by: Yang Wang Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c | 8 +++----- drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h | 6 +++++- 2 files changed, 8 insertions(+), 6 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c index c3242dbf8355..c02634a124dd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c @@ -525,10 +525,9 @@ static bool aca_handle_is_valid(struct aca_handle *handle) } int amdgpu_aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *handle, - enum aca_error_type type, void *data, void *qctx) + enum aca_error_type type, struct ras_err_data *err_data, + struct ras_query_context *qctx) { - struct ras_err_data *err_data = (struct ras_err_data *)data; - if (!handle || !err_data) return -EINVAL; @@ -538,8 +537,7 @@ int amdgpu_aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *han if (!(BIT(type) & handle->mask)) return 0; - return __aca_get_error_data(adev, handle, type, err_data, - (struct ras_query_context *)qctx); + return __aca_get_error_data(adev, handle, type, err_data, qctx); } static void aca_error_init(struct aca_error *aerr, enum aca_error_type type) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h index 247968d6a925..3765843ea648 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h @@ -26,6 +26,9 @@ #include +struct ras_err_data; +struct ras_query_context; + #define ACA_MAX_REGS_COUNT (16) #define ACA_REG_FIELD(x, h, l) (((x) & GENMASK_ULL(h, l)) >> l) @@ -198,7 +201,8 @@ int amdgpu_aca_add_handle(struct amdgpu_device *adev, struct aca_handle *handle, const char *name, const struct aca_info *aca_info, void *data); void amdgpu_aca_remove_handle(struct aca_handle *handle); int amdgpu_aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *handle, - enum aca_error_type type, void *data, void *qctx); + enum aca_error_type type, struct ras_err_data *err_data, + struct ras_query_context *qctx); int amdgpu_aca_smu_set_debug_mode(struct amdgpu_device *adev, bool en); void amdgpu_aca_smu_debugfs_init(struct amdgpu_device *adev, struct dentry *root); int aca_error_cache_log_bank_error(struct aca_handle *handle, struct aca_bank_info *info, -- cgit v1.2.3 From e58acb7613aa5e4be59c05226373a40dc7c25dc8 Mon Sep 17 00:00:00 2001 From: shaoyunl Date: Fri, 22 Mar 2024 12:25:16 -0400 Subject: drm/amdgpu : Add mes_log_enable to control mes log feature The MES log might slow down the performance for extra step of log the data, disable it by default and introduce a parameter can enable it when necessary Signed-off-by: shaoyunl Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 10 ++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 5 ++++- drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 7 +++++-- 4 files changed, 20 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 9c62552bec34..b3b84647207e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -210,6 +210,7 @@ extern int amdgpu_async_gfx_ring; extern int amdgpu_mcbp; extern int amdgpu_discovery; extern int amdgpu_mes; +extern int amdgpu_mes_log_enable; extern int amdgpu_mes_kiq; extern int amdgpu_noretry; extern int amdgpu_force_asic_type; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 80b9642f2bc4..e4277298cf1a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -195,6 +195,7 @@ int amdgpu_async_gfx_ring = 1; int amdgpu_mcbp = -1; int amdgpu_discovery = -1; int amdgpu_mes; +int amdgpu_mes_log_enable = 0; int amdgpu_mes_kiq; int amdgpu_noretry = -1; int amdgpu_force_asic_type = -1; @@ -667,6 +668,15 @@ MODULE_PARM_DESC(mes, "Enable Micro Engine Scheduler (0 = disabled (default), 1 = enabled)"); module_param_named(mes, amdgpu_mes, int, 0444); +/** + * DOC: mes_log_enable (int) + * Enable Micro Engine Scheduler log. This is used to enable/disable MES internal log. + * (0 = disabled (default), 1 = enabled) + */ +MODULE_PARM_DESC(mes_log_enable, + "Enable Micro Engine Scheduler log (0 = disabled (default), 1 = enabled)"); +module_param_named(mes_log_enable, amdgpu_mes_log_enable, int, 0444); + /** * DOC: mes_kiq (int) * Enable Micro Engine Scheduler KIQ. This is a new engine pipe for kiq. diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index e4fcc950e542..281efcd3ba8e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -100,6 +100,9 @@ static int amdgpu_mes_event_log_init(struct amdgpu_device *adev) { int r; + if (!amdgpu_mes_log_enable) + return 0; + r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, &adev->mes.event_log_gpu_obj, @@ -1561,7 +1564,7 @@ void amdgpu_debugfs_mes_event_log_init(struct amdgpu_device *adev) #if defined(CONFIG_DEBUG_FS) struct drm_minor *minor = adev_to_drm(adev)->primary; struct dentry *root = minor->debugfs_root; - if (adev->enable_mes) + if (adev->enable_mes && amdgpu_mes_log_enable) debugfs_create_file("amdgpu_mes_event_log", 0444, root, adev, &amdgpu_debugfs_mes_event_log_fops); diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index 072c478665ad..63f281a9984d 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -411,8 +411,11 @@ static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes) mes_set_hw_res_pkt.enable_reg_active_poll = 1; mes_set_hw_res_pkt.enable_level_process_quantum_check = 1; mes_set_hw_res_pkt.oversubscription_timer = 50; - mes_set_hw_res_pkt.enable_mes_event_int_logging = 1; - mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr = mes->event_log_gpu_addr; + if (amdgpu_mes_log_enable) { + mes_set_hw_res_pkt.enable_mes_event_int_logging = 1; + mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr = + mes->event_log_gpu_addr; + } return mes_v11_0_submit_pkt_and_poll_completion(mes, &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt), -- cgit v1.2.3 From 8966c3167402576d8ae4ab3914052e5e412e659b Mon Sep 17 00:00:00 2001 From: shaoyunl Date: Fri, 22 Mar 2024 12:44:55 -0400 Subject: drm/amdgpu : Increase the mes log buffer size as per new MES FW version From MES version 0x54, the log entry increased and require the log buffer size to be increased. The 16k is maximum size agreed Signed-off-by: shaoyunl Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 5 ++--- drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h | 1 + 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index 281efcd3ba8e..7d215f280aab 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -103,7 +103,7 @@ static int amdgpu_mes_event_log_init(struct amdgpu_device *adev) if (!amdgpu_mes_log_enable) return 0; - r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE, + r = amdgpu_bo_create_kernel(adev, AMDGPU_MES_LOG_BUFFER_SIZE, PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, &adev->mes.event_log_gpu_obj, &adev->mes.event_log_gpu_addr, @@ -1548,12 +1548,11 @@ static int amdgpu_debugfs_mes_event_log_show(struct seq_file *m, void *unused) uint32_t *mem = (uint32_t *)(adev->mes.event_log_cpu_addr); seq_hex_dump(m, "", DUMP_PREFIX_OFFSET, 32, 4, - mem, PAGE_SIZE, false); + mem, AMDGPU_MES_LOG_BUFFER_SIZE, false); return 0; } - DEFINE_SHOW_ATTRIBUTE(amdgpu_debugfs_mes_event_log); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h index 7d4f93fea937..4c8fc3117ef8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h @@ -52,6 +52,7 @@ enum amdgpu_mes_priority_level { #define AMDGPU_MES_PROC_CTX_SIZE 0x1000 /* one page area */ #define AMDGPU_MES_GANG_CTX_SIZE 0x1000 /* one page area */ +#define AMDGPU_MES_LOG_BUFFER_SIZE 0x4000 /* Maximu log buffer size for MES */ struct amdgpu_mes_funcs; -- cgit v1.2.3 From f5a3507c4abf662cf108e3963565d2855aac88ce Mon Sep 17 00:00:00 2001 From: Yifan Zhang Date: Tue, 12 Dec 2023 17:17:05 +0800 Subject: drm/amdgpu: add smu 14.0.1 discovery support This patch to add smu 14.0.1 support Reviewed-by: Alex Deucher Signed-off-by: Yifan Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index 362da1413e1e..07c5fca06178 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -1905,6 +1905,7 @@ static int amdgpu_discovery_set_smu_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &smu_v13_0_ip_block); break; case IP_VERSION(14, 0, 0): + case IP_VERSION(14, 0, 1): amdgpu_device_ip_block_add(adev, &smu_v14_0_ip_block); break; default: -- cgit v1.2.3 From 029faefb7302f1079173410697b0e14d2e56e19a Mon Sep 17 00:00:00 2001 From: Tao Zhou Date: Thu, 28 Mar 2024 18:22:10 +0800 Subject: drm/amdgpu: implement IRQ_STATE_ENABLE for SDMA v4.4.2 SDMA_CNTL is not set in some cases, driver configures it by itself. v2: simplify code Signed-off-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c index 71c2f50530cb..f8e2cd514493 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c @@ -1602,19 +1602,9 @@ static int sdma_v4_4_2_set_ecc_irq_state(struct amdgpu_device *adev, u32 sdma_cntl; sdma_cntl = RREG32_SDMA(type, regSDMA_CNTL); - switch (state) { - case AMDGPU_IRQ_STATE_DISABLE: - sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA_CNTL, - DRAM_ECC_INT_ENABLE, 0); - WREG32_SDMA(type, regSDMA_CNTL, sdma_cntl); - break; - /* sdma ecc interrupt is enabled by default - * driver doesn't need to do anything to - * enable the interrupt */ - case AMDGPU_IRQ_STATE_ENABLE: - default: - break; - } + sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA_CNTL, DRAM_ECC_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + WREG32_SDMA(type, regSDMA_CNTL, sdma_cntl); return 0; } -- cgit v1.2.3 From 0453e5f2202ed77e470176f97c5d1436476cf63e Mon Sep 17 00:00:00 2001 From: ZhenGuo Yin Date: Fri, 29 Mar 2024 15:41:58 +0800 Subject: drm/amdgpu: select HDP ref/mask according to gfx ring pipe Use correct ref/mask for differnent gfx ring pipe. This should fix the gfx hang issue after enabling gfx pipe1. Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2117 Reviewed-by: Alex Deucher Signed-off-by: ZhenGuo Yin Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index d524f1a353ed..a01320e72653 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -8317,7 +8317,7 @@ static void gfx_v10_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) } reg_mem_engine = 0; } else { - ref_and_mask = nbio_hf_reg->ref_and_mask_cp0; + ref_and_mask = nbio_hf_reg->ref_and_mask_cp0 << ring->pipe; reg_mem_engine = 1; /* pfp */ } -- cgit v1.2.3 From b7a1a0ef12b81957584fef7b61e2d5ec049c7209 Mon Sep 17 00:00:00 2001 From: Arunpravin Paneer Selvam Date: Mon, 6 Jun 2022 13:59:13 +0530 Subject: drm/amd/amdgpu: add pipe1 hardware support Enable pipe1 support starting from SIENNA CICHLID asic Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2117 Reviewed-by: Alex Deucher Signed-off-by: Arunpravin Paneer Selvam Signed-off-by: ZhenGuo Yin Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index a01320e72653..d5b924222903 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -4518,7 +4518,7 @@ static int gfx_v10_0_sw_init(void *handle) case IP_VERSION(10, 3, 3): case IP_VERSION(10, 3, 7): adev->gfx.me.num_me = 1; - adev->gfx.me.num_pipe_per_me = 1; + adev->gfx.me.num_pipe_per_me = 2; adev->gfx.me.num_queue_per_pipe = 1; adev->gfx.mec.num_mec = 2; adev->gfx.mec.num_pipe_per_mec = 4; -- cgit v1.2.3 From 3e2dacca540643ee35e3deb1d60873e7138a6af3 Mon Sep 17 00:00:00 2001 From: Danijel Slivka Date: Wed, 27 Mar 2024 23:56:23 +0100 Subject: drm/amdgpu: use vm_update_mode=0 as default in sriov for gfx10.3 onwards Apply this rule to all newer asics in sriov case. For asic with VF MMIO access protection avoid using CPU for VM table updates. CPU pagetable updates have issues with HDP flush as VF MMIO access protection blocks write to BIF_BX_DEV0_EPF0_VF0_HDP_MEM_COHERENCY_FLUSH_CNTL register during sriov runtime. Moved the check to amdgpu_device_init() to ensure it is done after amdgpu_device_ip_early_init() where the IP versions are discovered. Signed-off-by: Danijel Slivka Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 7 +++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 6 ------ 2 files changed, 7 insertions(+), 6 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 12dc71a6b5db..66a5979e075d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4072,6 +4072,13 @@ int amdgpu_device_init(struct amdgpu_device *adev, /* Enable TMZ based on IP_VERSION */ amdgpu_gmc_tmz_set(adev); + if (amdgpu_sriov_vf(adev) && + amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 3, 0)) + /* VF MMIO access (except mailbox range) from CPU + * will be blocked during sriov runtime + */ + adev->virt.caps |= AMDGPU_VF_MMIO_ACCESS_PROTECT; + amdgpu_gmc_noretry_set(adev); /* Need to get xgmi info early to decide the reset behavior*/ if (adev->gmc.xgmi.supported) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index aed60aaf1a55..6f01de220c44 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -724,12 +724,6 @@ void amdgpu_detect_virtualization(struct amdgpu_device *adev) adev->virt.caps |= AMDGPU_PASSTHROUGH_MODE; } - if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID) - /* VF MMIO access (except mailbox range) from CPU - * will be blocked during sriov runtime - */ - adev->virt.caps |= AMDGPU_VF_MMIO_ACCESS_PROTECT; - /* we have the ability to check now */ if (amdgpu_sriov_vf(adev)) { switch (adev->asic_type) { -- cgit v1.2.3 From 7c1d9e10e6643121f1ffe9c0903467cc8682eba8 Mon Sep 17 00:00:00 2001 From: Kenneth Feng Date: Thu, 28 Mar 2024 11:00:50 +0800 Subject: drm/amd/pm: fix the high voltage issue after unload fix the high voltage issue after unload on smu 13.0.10 Signed-off-by: Kenneth Feng Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 26 ++++++++++++--------- drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 27 ++++++++++++++++++++-- drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h | 1 + .../gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 8 ++++++- 4 files changed, 48 insertions(+), 14 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 66a5979e075d..ed34c4893108 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4145,18 +4145,22 @@ int amdgpu_device_init(struct amdgpu_device *adev, adev->ip_blocks[i].status.hw = true; } } + } else if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 10) && + !amdgpu_device_has_display_hardware(adev)) { + r = psp_gpu_reset(adev); } else { - tmp = amdgpu_reset_method; - /* It should do a default reset when loading or reloading the driver, - * regardless of the module parameter reset_method. - */ - amdgpu_reset_method = AMD_RESET_METHOD_NONE; - r = amdgpu_asic_reset(adev); - amdgpu_reset_method = tmp; - if (r) { - dev_err(adev->dev, "asic reset on init failed\n"); - goto failed; - } + tmp = amdgpu_reset_method; + /* It should do a default reset when loading or reloading the driver, + * regardless of the module parameter reset_method. + */ + amdgpu_reset_method = AMD_RESET_METHOD_NONE; + r = amdgpu_asic_reset(adev); + amdgpu_reset_method = tmp; + } + + if (r) { + dev_err(adev->dev, "asic reset on init failed\n"); + goto failed; } } diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index 246b211b1e85..65333141b1c1 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -735,7 +735,7 @@ static int smu_early_init(void *handle) smu->adev = adev; smu->pm_enabled = !!amdgpu_dpm; smu->is_apu = false; - smu->smu_baco.state = SMU_BACO_STATE_EXIT; + smu->smu_baco.state = SMU_BACO_STATE_NONE; smu->smu_baco.platform_support = false; smu->user_dpm_profile.fan_mode = -1; @@ -1966,10 +1966,25 @@ static int smu_smc_hw_cleanup(struct smu_context *smu) return 0; } +static int smu_reset_mp1_state(struct smu_context *smu) +{ + struct amdgpu_device *adev = smu->adev; + int ret = 0; + + if ((!adev->in_runpm) && (!adev->in_suspend) && + (!amdgpu_in_reset(adev)) && amdgpu_ip_version(adev, MP1_HWIP, 0) == + IP_VERSION(13, 0, 10) && + !amdgpu_device_has_display_hardware(adev)) + ret = smu_set_mp1_state(smu, PP_MP1_STATE_UNLOAD); + + return ret; +} + static int smu_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct smu_context *smu = adev->powerplay.pp_handle; + int ret; if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) return 0; @@ -1987,7 +2002,15 @@ static int smu_hw_fini(void *handle) adev->pm.dpm_enabled = false; - return smu_smc_hw_cleanup(smu); + ret = smu_smc_hw_cleanup(smu); + if (ret) + return ret; + + ret = smu_reset_mp1_state(smu); + if (ret) + return ret; + + return 0; } static void smu_late_fini(void *handle) diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h index a870bdd49a4e..1fa81575788c 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h @@ -424,6 +424,7 @@ enum smu_reset_mode { enum smu_baco_state { SMU_BACO_STATE_ENTER = 0, SMU_BACO_STATE_EXIT, + SMU_BACO_STATE_NONE, }; struct smu_baco_context { diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index 9c03296f92cd..67117ced7c6a 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -2751,7 +2751,13 @@ static int smu_v13_0_0_set_mp1_state(struct smu_context *smu, switch (mp1_state) { case PP_MP1_STATE_UNLOAD: - ret = smu_cmn_set_mp1_state(smu, mp1_state); + ret = smu_cmn_send_smc_msg_with_param(smu, + SMU_MSG_PrepareMp1ForUnload, + 0x55, NULL); + + if (!ret && smu->smu_baco.state == SMU_BACO_STATE_EXIT) + ret = smu_v13_0_disable_pmfw_state(smu); + break; default: /* Ignore others */ -- cgit v1.2.3 From b2207dc6989f5c913cca932eb8720b6ad0725d67 Mon Sep 17 00:00:00 2001 From: Ma Jun Date: Wed, 27 Mar 2024 17:26:08 +0800 Subject: drm/amdgpu/pm: Add support for MACO flag checking Add support for MACO flag checking. MACO mode only works if BACO is supported. Signed-off-by: Ma Jun Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 8 +++++--- drivers/gpu/drm/amd/amdgpu/cik.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/si.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/soc15.c | 10 +++++----- drivers/gpu/drm/amd/amdgpu/vi.c | 8 ++++---- drivers/gpu/drm/amd/include/kgd_pp_interface.h | 2 +- drivers/gpu/drm/amd/pm/amdgpu_dpm.c | 8 ++++---- drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h | 2 +- drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c | 2 +- drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_baco.c | 6 +++--- drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_baco.h | 2 +- drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu9_baco.c | 8 ++++---- drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu9_baco.h | 2 +- drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.c | 8 ++++---- drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.h | 2 +- drivers/gpu/drm/amd/pm/powerplay/inc/hwmgr.h | 2 +- drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 3 ++- drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h | 2 +- drivers/gpu/drm/amd/pm/swsmu/inc/smu_v11_0.h | 2 +- drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h | 2 +- drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0.h | 2 +- drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c | 14 +++++++++----- drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c | 4 ++-- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c | 14 +++++++++----- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 4 ++-- drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c | 14 +++++++++----- 27 files changed, 79 insertions(+), 64 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index b3b84647207e..65c17c59c152 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -606,7 +606,7 @@ struct amdgpu_asic_funcs { /* PCIe replay counter */ uint64_t (*get_pcie_replay_count)(struct amdgpu_device *adev); /* device supports BACO */ - bool (*supports_baco)(struct amdgpu_device *adev); + int (*supports_baco)(struct amdgpu_device *adev); /* pre asic_init quirks */ void (*pre_asic_init)(struct amdgpu_device *adev); /* enter/exit umd stable pstate */ @@ -1408,7 +1408,7 @@ bool amdgpu_device_supports_atpx(struct drm_device *dev); bool amdgpu_device_supports_px(struct drm_device *dev); bool amdgpu_device_supports_boco(struct drm_device *dev); bool amdgpu_device_supports_smart_shift(struct drm_device *dev); -bool amdgpu_device_supports_baco(struct drm_device *dev); +int amdgpu_device_supports_baco(struct drm_device *dev); bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev, struct amdgpu_device *peer_adev); int amdgpu_device_baco_enter(struct drm_device *dev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index ed34c4893108..f830024cffca 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -338,10 +338,12 @@ bool amdgpu_device_supports_boco(struct drm_device *dev) * * @dev: drm_device pointer * - * Returns true if the device supporte BACO, - * otherwise return false. + * Return: + * 1 if the device supporte BACO; + * 3 if the device support MACO (only works if BACO is supported) + * otherwise return 0. */ -bool amdgpu_device_supports_baco(struct drm_device *dev) +int amdgpu_device_supports_baco(struct drm_device *dev) { struct amdgpu_device *adev = drm_to_adev(dev); diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index a3a643254d7a..fdbc26346b54 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c @@ -1375,14 +1375,14 @@ static int cik_asic_pci_config_reset(struct amdgpu_device *adev) return r; } -static bool cik_asic_supports_baco(struct amdgpu_device *adev) +static int cik_asic_supports_baco(struct amdgpu_device *adev) { switch (adev->asic_type) { case CHIP_BONAIRE: case CHIP_HAWAII: return amdgpu_dpm_is_baco_supported(adev); default: - return false; + return 0; } } diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c index 23e4ef4fff7c..67e179c7e347 100644 --- a/drivers/gpu/drm/amd/amdgpu/si.c +++ b/drivers/gpu/drm/amd/amdgpu/si.c @@ -1409,9 +1409,9 @@ static int si_gpu_pci_config_reset(struct amdgpu_device *adev) return r; } -static bool si_asic_supports_baco(struct amdgpu_device *adev) +static int si_asic_supports_baco(struct amdgpu_device *adev) { - return false; + return 0; } static enum amd_reset_method diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index dec81ccf6240..c8abbf5da736 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -502,7 +502,7 @@ static int soc15_asic_baco_reset(struct amdgpu_device *adev) static enum amd_reset_method soc15_asic_reset_method(struct amdgpu_device *adev) { - bool baco_reset = false; + int baco_reset = 0; bool connected_to_cpu = false; struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); @@ -540,7 +540,7 @@ soc15_asic_reset_method(struct amdgpu_device *adev) */ if (ras && adev->ras_enabled && adev->pm.fw_version <= 0x283400) - baco_reset = false; + baco_reset = 0; } else { baco_reset = amdgpu_dpm_is_baco_supported(adev); } @@ -620,7 +620,7 @@ static int soc15_asic_reset(struct amdgpu_device *adev) } } -static bool soc15_supports_baco(struct amdgpu_device *adev) +static int soc15_supports_baco(struct amdgpu_device *adev) { switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) { case IP_VERSION(9, 0, 0): @@ -628,13 +628,13 @@ static bool soc15_supports_baco(struct amdgpu_device *adev) if (adev->asic_type == CHIP_VEGA20) { if (adev->psp.sos.fw_version >= 0x80067) return amdgpu_dpm_is_baco_supported(adev); - return false; + return 0; } else { return amdgpu_dpm_is_baco_supported(adev); } break; default: - return false; + return 0; } } diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 1a98812981f4..2415355b037c 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -897,7 +897,7 @@ static int vi_asic_pci_config_reset(struct amdgpu_device *adev) return r; } -static bool vi_asic_supports_baco(struct amdgpu_device *adev) +static int vi_asic_supports_baco(struct amdgpu_device *adev) { switch (adev->asic_type) { case CHIP_FIJI: @@ -908,14 +908,14 @@ static bool vi_asic_supports_baco(struct amdgpu_device *adev) case CHIP_TOPAZ: return amdgpu_dpm_is_baco_supported(adev); default: - return false; + return 0; } } static enum amd_reset_method vi_asic_reset_method(struct amdgpu_device *adev) { - bool baco_reset; + int baco_reset; if (amdgpu_reset_method == AMD_RESET_METHOD_LEGACY || amdgpu_reset_method == AMD_RESET_METHOD_BACO) @@ -935,7 +935,7 @@ vi_asic_reset_method(struct amdgpu_device *adev) baco_reset = amdgpu_dpm_is_baco_supported(adev); break; default: - baco_reset = false; + baco_reset = 0; break; } diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h index afb930b70615..805c9d37a2b4 100644 --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h @@ -421,7 +421,7 @@ struct amd_pm_funcs { int (*set_hard_min_dcefclk_by_freq)(void *handle, uint32_t clock); int (*set_hard_min_fclk_by_freq)(void *handle, uint32_t clock); int (*set_min_deep_sleep_dcefclk)(void *handle, uint32_t clock); - bool (*get_asic_baco_capability)(void *handle); + int (*get_asic_baco_capability)(void *handle); int (*get_asic_baco_state)(void *handle, int *state); int (*set_asic_baco_state)(void *handle, int state); int (*get_ppfeature_status)(void *handle, char *buf); diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c index f84bfed50681..eee919577b44 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c @@ -199,14 +199,14 @@ int amdgpu_dpm_notify_rlc_state(struct amdgpu_device *adev, bool en) return ret; } -bool amdgpu_dpm_is_baco_supported(struct amdgpu_device *adev) +int amdgpu_dpm_is_baco_supported(struct amdgpu_device *adev) { const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; void *pp_handle = adev->powerplay.pp_handle; - bool ret; + int ret; if (!pp_funcs || !pp_funcs->get_asic_baco_capability) - return false; + return 0; /* Don't use baco for reset in S3. * This is a workaround for some platforms * where entering BACO during suspend @@ -217,7 +217,7 @@ bool amdgpu_dpm_is_baco_supported(struct amdgpu_device *adev) * devices. Needs more investigation. */ if (adev->in_s3) - return false; + return 0; mutex_lock(&adev->pm.mutex); diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h index e6cad9f7aaeb..501f8c726e8d 100644 --- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h +++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h @@ -411,7 +411,7 @@ int amdgpu_dpm_baco_reset(struct amdgpu_device *adev); int amdgpu_dpm_mode2_reset(struct amdgpu_device *adev); int amdgpu_dpm_enable_gfx_features(struct amdgpu_device *adev); -bool amdgpu_dpm_is_baco_supported(struct amdgpu_device *adev); +int amdgpu_dpm_is_baco_supported(struct amdgpu_device *adev); bool amdgpu_dpm_is_mode1_reset_supported(struct amdgpu_device *adev); int amdgpu_dpm_mode1_reset(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c index 581dadfd13f9..133d1ee6e67c 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c +++ b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c @@ -1371,7 +1371,7 @@ static int pp_set_active_display_count(void *handle, uint32_t count) return phm_set_active_display_count(hwmgr, count); } -static bool pp_get_asic_baco_capability(void *handle) +static int pp_get_asic_baco_capability(void *handle) { struct pp_hwmgr *hwmgr = handle; diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_baco.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_baco.c index 8490614e2d7e..ad60918aaae1 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_baco.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_baco.c @@ -33,7 +33,7 @@ #include "smu/smu_7_1_2_d.h" #include "smu/smu_7_1_2_sh_mask.h" -bool smu7_get_bamaco_support(struct pp_hwmgr *hwmgr) +int smu7_get_bamaco_support(struct pp_hwmgr *hwmgr) { struct amdgpu_device *adev = (struct amdgpu_device *)(hwmgr->adev); uint32_t reg; @@ -44,9 +44,9 @@ bool smu7_get_bamaco_support(struct pp_hwmgr *hwmgr) reg = RREG32(mmCC_BIF_BX_FUSESTRAP0); if (reg & CC_BIF_BX_FUSESTRAP0__STRAP_BIF_PX_CAPABLE_MASK) - return true; + return BACO_SUPPORT; - return false; + return 0; } int smu7_baco_get_state(struct pp_hwmgr *hwmgr, enum BACO_STATE *state) diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_baco.h b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_baco.h index 93df1b9dfbd9..750082ea74d8 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_baco.h +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_baco.h @@ -25,7 +25,7 @@ #include "hwmgr.h" #include "common_baco.h" -extern bool smu7_get_bamaco_support(struct pp_hwmgr *hwmgr); +extern int smu7_get_bamaco_support(struct pp_hwmgr *hwmgr); extern int smu7_baco_get_state(struct pp_hwmgr *hwmgr, enum BACO_STATE *state); extern int smu7_baco_set_state(struct pp_hwmgr *hwmgr, enum BACO_STATE state); diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu9_baco.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu9_baco.c index 938812d6b978..c1ce1d7cae48 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu9_baco.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu9_baco.c @@ -28,13 +28,13 @@ #include "vega10_inc.h" #include "smu9_baco.h" -bool smu9_get_bamaco_support(struct pp_hwmgr *hwmgr) +int smu9_get_bamaco_support(struct pp_hwmgr *hwmgr) { struct amdgpu_device *adev = (struct amdgpu_device *)(hwmgr->adev); uint32_t reg, data; if (!phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_BACO)) - return false; + return 0; WREG32(0x12074, 0xFFF0003B); data = RREG32(0x12075); @@ -43,10 +43,10 @@ bool smu9_get_bamaco_support(struct pp_hwmgr *hwmgr) reg = RREG32_SOC15(NBIF, 0, mmRCC_BIF_STRAP0); if (reg & RCC_BIF_STRAP0__STRAP_PX_CAPABLE_MASK) - return true; + return BACO_SUPPORT; } - return false; + return 0; } int smu9_baco_get_state(struct pp_hwmgr *hwmgr, enum BACO_STATE *state) diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu9_baco.h b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu9_baco.h index 67a5cec8e6b5..2c100482084c 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu9_baco.h +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu9_baco.h @@ -25,7 +25,7 @@ #include "hwmgr.h" #include "common_baco.h" -extern bool smu9_get_bamaco_support(struct pp_hwmgr *hwmgr); +extern int smu9_get_bamaco_support(struct pp_hwmgr *hwmgr); extern int smu9_baco_get_state(struct pp_hwmgr *hwmgr, enum BACO_STATE *state); #endif diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.c index 0d0523e046d4..424e4ec9e389 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.c @@ -36,22 +36,22 @@ static const struct soc15_baco_cmd_entry clean_baco_tbl[] = { {CMD_WRITE, SOC15_REG_ENTRY(NBIF, 0, mmBIOS_SCRATCH_7), 0, 0, 0, 0}, }; -bool vega20_get_bamaco_support(struct pp_hwmgr *hwmgr) +int vega20_get_bamaco_support(struct pp_hwmgr *hwmgr) { struct amdgpu_device *adev = (struct amdgpu_device *)(hwmgr->adev); uint32_t reg; if (!phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_BACO)) - return false; + return 0; if (((RREG32(0x17569) & 0x20000000) >> 29) == 0x1) { reg = RREG32_SOC15(NBIF, 0, mmRCC_BIF_STRAP0); if (reg & RCC_BIF_STRAP0__STRAP_PX_CAPABLE_MASK) - return true; + return BACO_SUPPORT; } - return false; + return 0; } int vega20_baco_get_state(struct pp_hwmgr *hwmgr, enum BACO_STATE *state) diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.h b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.h index a2c0f4e7813c..0f2dd8c008ba 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.h +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.h @@ -25,7 +25,7 @@ #include "hwmgr.h" #include "common_baco.h" -extern bool vega20_get_bamaco_support(struct pp_hwmgr *hwmgr); +extern int vega20_get_bamaco_support(struct pp_hwmgr *hwmgr); extern int vega20_baco_get_state(struct pp_hwmgr *hwmgr, enum BACO_STATE *state); extern int vega20_baco_set_state(struct pp_hwmgr *hwmgr, enum BACO_STATE state); extern int vega20_baco_apply_vdci_flush_workaround(struct pp_hwmgr *hwmgr); diff --git a/drivers/gpu/drm/amd/pm/powerplay/inc/hwmgr.h b/drivers/gpu/drm/amd/pm/powerplay/inc/hwmgr.h index 5b3da41f159f..69928a4a074b 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/inc/hwmgr.h +++ b/drivers/gpu/drm/amd/pm/powerplay/inc/hwmgr.h @@ -351,7 +351,7 @@ struct pp_hwmgr_func { int (*set_hard_min_fclk_by_freq)(struct pp_hwmgr *hwmgr, uint32_t clock); int (*set_hard_min_gfxclk_by_freq)(struct pp_hwmgr *hwmgr, uint32_t clock); int (*set_soft_max_gfxclk_by_freq)(struct pp_hwmgr *hwmgr, uint32_t clock); - bool (*get_bamaco_support)(struct pp_hwmgr *hwmgr); + int (*get_bamaco_support)(struct pp_hwmgr *hwmgr); int (*get_asic_baco_state)(struct pp_hwmgr *hwmgr, enum BACO_STATE *state); int (*set_asic_baco_state)(struct pp_hwmgr *hwmgr, enum BACO_STATE state); int (*get_ppfeature_status)(struct pp_hwmgr *hwmgr, char *buf); diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index ed4285665ca3..dc2a864b0f51 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -737,6 +737,7 @@ static int smu_early_init(void *handle) smu->is_apu = false; smu->smu_baco.state = SMU_BACO_STATE_NONE; smu->smu_baco.platform_support = false; + smu->smu_baco.maco_support = false; smu->user_dpm_profile.fan_mode = -1; mutex_init(&smu->message_lock); @@ -3223,7 +3224,7 @@ static int smu_set_xgmi_pstate(void *handle, return ret; } -static bool smu_get_baco_capability(void *handle) +static int smu_get_baco_capability(void *handle) { struct smu_context *smu = handle; diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h index 3ef76f9543f3..5dd092703b8d 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h @@ -1178,7 +1178,7 @@ struct pptable_funcs { * BACO: Bus Active, Chip Off * MACO: Memory Active, Chip Off */ - bool (*get_bamaco_support)(struct smu_context *smu); + int (*get_bamaco_support)(struct smu_context *smu); /** * @baco_get_state: Get the current BACO state. diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v11_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v11_0.h index 0e8fdf3bccc4..c2ab336bb530 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v11_0.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v11_0.h @@ -237,7 +237,7 @@ int smu_v11_0_set_azalia_d3_pme(struct smu_context *smu); int smu_v11_0_get_max_sustainable_clocks_by_dc(struct smu_context *smu, struct pp_smu_nv_clock_table *max_clocks); -bool smu_v11_0_get_bamaco_support(struct smu_context *smu); +int smu_v11_0_get_bamaco_support(struct smu_context *smu); enum smu_baco_state smu_v11_0_baco_get_state(struct smu_context *smu); diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h index bad7e73e9fe0..d9700a3f28d2 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h @@ -210,7 +210,7 @@ int smu_v13_0_set_azalia_d3_pme(struct smu_context *smu); int smu_v13_0_get_max_sustainable_clocks_by_dc(struct smu_context *smu, struct pp_smu_nv_clock_table *max_clocks); -bool smu_v13_0_get_bamaco_support(struct smu_context *smu); +int smu_v13_0_get_bamaco_support(struct smu_context *smu); int smu_v13_0_baco_enter(struct smu_context *smu); int smu_v13_0_baco_exit(struct smu_context *smu); diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0.h index 31bbbc1ddda2..6cdfee5052d9 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0.h @@ -160,7 +160,7 @@ int smu_v14_0_register_irq_handler(struct smu_context *smu); int smu_v14_0_baco_set_armd3_sequence(struct smu_context *smu, enum smu_baco_seq baco_seq); -bool smu_v14_0_get_bamaco_support(struct smu_context *smu); +int smu_v14_0_get_bamaco_support(struct smu_context *smu); enum smu_baco_state smu_v14_0_baco_get_state(struct smu_context *smu); diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c index f6913820dbb7..8407ca4fdc17 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c @@ -1557,23 +1557,27 @@ int smu_v11_0_baco_set_armd3_sequence(struct smu_context *smu, return smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_ArmD3, baco_seq, NULL); } -bool smu_v11_0_get_bamaco_support(struct smu_context *smu) +int smu_v11_0_get_bamaco_support(struct smu_context *smu) { struct smu_baco_context *smu_baco = &smu->smu_baco; + int bamaco_support = 0; if (amdgpu_sriov_vf(smu->adev) || !smu_baco->platform_support) - return false; + return 0; + + if (smu_baco->maco_support) + bamaco_support |= MACO_SUPPORT; /* return true if ASIC is in BACO state already */ if (smu_v11_0_baco_get_state(smu) == SMU_BACO_STATE_ENTER) - return true; + return bamaco_support |= BACO_SUPPORT; /* Arcturus does not support this bit mask */ if (smu_cmn_feature_is_supported(smu, SMU_FEATURE_BACO_BIT) && !smu_cmn_feature_is_enabled(smu, SMU_FEATURE_BACO_BIT)) - return false; + return 0; - return true; + return (bamaco_support |= BACO_SUPPORT); } enum smu_baco_state smu_v11_0_baco_get_state(struct smu_context *smu) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c index 639edcb90deb..acf41db3ef72 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c @@ -1581,11 +1581,11 @@ out: adev->unique_id = ((uint64_t)upper32 << 32) | lower32; } -static bool aldebaran_get_bamaco_support(struct smu_context *smu) +static int aldebaran_get_bamaco_support(struct smu_context *smu) { /* aldebaran is not support baco */ - return false; + return 0; } static int aldebaran_set_df_cstate(struct smu_context *smu, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c index 0193b81eab14..7277cc72d7f5 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c @@ -2268,22 +2268,26 @@ static int smu_v13_0_baco_set_state(struct smu_context *smu, return ret; } -bool smu_v13_0_get_bamaco_support(struct smu_context *smu) +int smu_v13_0_get_bamaco_support(struct smu_context *smu) { struct smu_baco_context *smu_baco = &smu->smu_baco; + int bamaco_support = 0; if (amdgpu_sriov_vf(smu->adev) || !smu_baco->platform_support) - return false; + return 0; + + if (smu_baco->maco_support) + bamaco_support |= MACO_SUPPORT; /* return true if ASIC is in BACO state already */ if (smu_v13_0_baco_get_state(smu) == SMU_BACO_STATE_ENTER) - return true; + return bamaco_support |= BACO_SUPPORT; if (smu_cmn_feature_is_supported(smu, SMU_FEATURE_BACO_BIT) && !smu_cmn_feature_is_enabled(smu, SMU_FEATURE_BACO_BIT)) - return false; + return 0; - return true; + return (bamaco_support |= BACO_SUPPORT); } int smu_v13_0_baco_enter(struct smu_context *smu) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index 7c3a85879fd5..624432b3b483 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -2077,11 +2077,11 @@ static void smu_v13_0_6_get_unique_id(struct smu_context *smu) adev->unique_id = pptable->PublicSerialNumber_AID; } -static bool smu_v13_0_6_get_bamaco_support(struct smu_context *smu) +static int smu_v13_0_6_get_bamaco_support(struct smu_context *smu) { /* smu_13_0_6 does not support baco */ - return false; + return 0; } static const char *const throttling_logging_label[] = { diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c index 5bf6bde99161..707b37f535e2 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c @@ -1590,23 +1590,27 @@ int smu_v14_0_baco_set_armd3_sequence(struct smu_context *smu, return 0; } -bool smu_v14_0_get_bamaco_support(struct smu_context *smu) +int smu_v14_0_get_bamaco_support(struct smu_context *smu) { struct smu_baco_context *smu_baco = &smu->smu_baco; + int bamaco_support = 0; if (amdgpu_sriov_vf(smu->adev) || !smu_baco->platform_support) - return false; + return 0; + + if (smu_baco->maco_support) + bamaco_support |= MACO_SUPPORT; /* return true if ASIC is in BACO state already */ if (smu_v14_0_baco_get_state(smu) == SMU_BACO_STATE_ENTER) - return true; + return (bamaco_support |= BACO_SUPPORT); if (smu_cmn_feature_is_supported(smu, SMU_FEATURE_BACO_BIT) && !smu_cmn_feature_is_enabled(smu, SMU_FEATURE_BACO_BIT)) - return false; + return 0; - return true; + return (bamaco_support |= BACO_SUPPORT); } enum smu_baco_state smu_v14_0_baco_get_state(struct smu_context *smu) -- cgit v1.2.3 From 327eec542746b254d9585410bba7f8fdcf61404e Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Sun, 31 Mar 2024 12:59:17 +0800 Subject: drm/amdgpu: Bypass asd if display hw is not available ASD is not needed by headless GPU. Signed-off-by: Hawking Zhang Reviewed-by: Kenneth Feng Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 94b310fdb719..83bf86352267 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -1053,6 +1053,11 @@ static int psp_asd_initialize(struct psp_context *psp) if (amdgpu_sriov_vf(psp->adev) || !psp->asd_context.bin_desc.size_bytes) return 0; + /* bypass asd if display hardware is not available */ + if (!amdgpu_device_has_display_hardware(psp->adev) && + amdgpu_ip_version(psp->adev, MP0_HWIP, 0) >= IP_VERSION(13, 0, 10)) + return 0; + psp->asd_context.mem_context.shared_mc_addr = 0; psp->asd_context.mem_context.shared_mem_size = PSP_ASD_SHARED_MEM_SIZE; psp->asd_context.ta_load_type = GFX_CMD_ID_LOAD_ASD; -- cgit v1.2.3 From fcc0735b00872d9ff0d209551e66c08a79cd594a Mon Sep 17 00:00:00 2001 From: Ma Jun Date: Mon, 25 Mar 2024 11:56:41 +0800 Subject: drm/amdgpu: Add support for BAMACO mode checking Optimize the code to add support for BAMACO mode checking Signed-off-by: Ma Jun Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 6 ++++-- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 23 +++++++++++++++++++---- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 6 ++++-- 3 files changed, 27 insertions(+), 8 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index e4277298cf1a..6ea893ad9a36 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -2744,7 +2744,8 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev) drm_dev->switch_power_state = DRM_SWITCH_POWER_DYNAMIC_OFF; } else if (adev->pm.rpm_mode == AMDGPU_RUNPM_BOCO) { /* nothing to do */ - } else if (adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) { + } else if ((adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) || + (adev->pm.rpm_mode == AMDGPU_RUNPM_BAMACO)) { amdgpu_device_baco_enter(drm_dev); } @@ -2784,7 +2785,8 @@ static int amdgpu_pmops_runtime_resume(struct device *dev) * PCI core handles it for _PR3. */ pci_set_master(pdev); - } else if (adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) { + } else if ((adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) || + (adev->pm.rpm_mode == AMDGPU_RUNPM_BAMACO)) { amdgpu_device_baco_exit(drm_dev); } ret = amdgpu_device_resume(drm_dev, false); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index a2df3025a754..55eb4e4eb8f2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -133,6 +133,7 @@ void amdgpu_register_gpu_instance(struct amdgpu_device *adev) int amdgpu_driver_load_kms(struct amdgpu_device *adev, unsigned long flags) { struct drm_device *dev; + int bamaco_support = 0; int r, acpi_status; dev = adev_to_drm(adev); @@ -158,8 +159,12 @@ int amdgpu_driver_load_kms(struct amdgpu_device *adev, unsigned long flags) (amdgpu_runtime_pm != 0)) { /* enable boco as runtime mode */ adev->pm.rpm_mode = AMDGPU_RUNPM_BOCO; dev_info(adev->dev, "Using BOCO for runtime pm\n"); - } else if (amdgpu_device_supports_baco(dev) && - (amdgpu_runtime_pm != 0)) { + } else if (amdgpu_runtime_pm != 0) { + bamaco_support = amdgpu_device_supports_baco(dev); + + if (!bamaco_support) + goto no_runtime_pm; + switch (adev->asic_type) { case CHIP_VEGA20: case CHIP_ARCTURUS: @@ -178,10 +183,20 @@ int amdgpu_driver_load_kms(struct amdgpu_device *adev, unsigned long flags) break; } - if (adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) - dev_info(adev->dev, "Using BACO for runtime pm\n"); + if (adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) { + if (bamaco_support & MACO_SUPPORT) { + adev->pm.rpm_mode = AMDGPU_RUNPM_BAMACO; + dev_info(adev->dev, "Using BAMACO for runtime pm\n"); + } else { + dev_info(adev->dev, "Using BACO for runtime pm\n"); + } + } } +no_runtime_pm: + if (adev->pm.rpm_mode == AMDGPU_RUNPM_NONE) + dev_info(adev->dev, "NO pm mode for runtime pm\n"); + /* Call ACPI methods: require modeset init * but failure is not fatal */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 83bf86352267..edae581059af 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -2622,7 +2622,8 @@ static int psp_load_p2s_table(struct psp_context *psp) struct amdgpu_firmware_info *ucode = &adev->firmware.ucode[AMDGPU_UCODE_ID_P2S_TABLE]; - if (adev->in_runpm && (adev->pm.rpm_mode == AMDGPU_RUNPM_BACO)) + if (adev->in_runpm && ((adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) || + (adev->pm.rpm_mode == AMDGPU_RUNPM_BAMACO))) return 0; if (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6)) { @@ -2652,7 +2653,8 @@ static int psp_load_smu_fw(struct psp_context *psp) * Skip SMU FW reloading in case of using BACO for runpm only, * as SMU is always alive. */ - if (adev->in_runpm && (adev->pm.rpm_mode == AMDGPU_RUNPM_BACO)) + if (adev->in_runpm && ((adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) || + (adev->pm.rpm_mode == AMDGPU_RUNPM_BAMACO))) return 0; if (!ucode->fw || amdgpu_sriov_vf(psp->adev)) -- cgit v1.2.3 From 91bc86011661c184e01bf3f693432ba4d0347217 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Wed, 6 Mar 2024 17:05:07 +0530 Subject: drm/amdgpu: Fix VCN allocation in CPX partition VCN need not be shared in CPX mode always for all GFX 9.4.3 SOC SKUs. In certain configs, VCN instance can be exclusively allocated to a partition even under CPX mode. Signed-off-by: Lijo Lazar Reviewed-by: James Zhu Reviewed-by: Asad Kamal Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c index d6f808acfb17..fbb43ae7624f 100644 --- a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c +++ b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c @@ -62,6 +62,11 @@ void aqua_vanjaram_doorbell_index_init(struct amdgpu_device *adev) adev->doorbell_index.max_assignment = AMDGPU_DOORBELL_LAYOUT1_MAX_ASSIGNMENT << 1; } +static bool aqua_vanjaram_xcp_vcn_shared(struct amdgpu_device *adev) +{ + return (adev->xcp_mgr->num_xcps > adev->vcn.num_vcn_inst); +} + static void aqua_vanjaram_set_xcp_id(struct amdgpu_device *adev, uint32_t inst_idx, struct amdgpu_ring *ring) { @@ -87,7 +92,7 @@ static void aqua_vanjaram_set_xcp_id(struct amdgpu_device *adev, case AMDGPU_RING_TYPE_VCN_ENC: case AMDGPU_RING_TYPE_VCN_JPEG: ip_blk = AMDGPU_XCP_VCN; - if (adev->xcp_mgr->mode == AMDGPU_CPX_PARTITION_MODE) + if (aqua_vanjaram_xcp_vcn_shared(adev)) inst_mask = 1 << (inst_idx * 2); break; default: @@ -140,10 +145,12 @@ static int aqua_vanjaram_xcp_sched_list_update( aqua_vanjaram_xcp_gpu_sched_update(adev, ring, ring->xcp_id); - /* VCN is shared by two partitions under CPX MODE */ + /* VCN may be shared by two partitions under CPX MODE in certain + * configs. + */ if ((ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC || - ring->funcs->type == AMDGPU_RING_TYPE_VCN_JPEG) && - adev->xcp_mgr->mode == AMDGPU_CPX_PARTITION_MODE) + ring->funcs->type == AMDGPU_RING_TYPE_VCN_JPEG) && + aqua_vanjaram_xcp_vcn_shared(adev)) aqua_vanjaram_xcp_gpu_sched_update(adev, ring, ring->xcp_id + 1); } -- cgit v1.2.3 From 9ecef5b2d0a09eb54f8431208758bb2aa2a6b779 Mon Sep 17 00:00:00 2001 From: Tao Zhou Date: Mon, 1 Apr 2024 15:46:16 +0800 Subject: drm/amdgpu: update check condition for XGMI ACA UE Check more possible ext error codes. Signed-off-by: Tao Zhou Reviewed-by: Yang Wang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 523827e8e7a8..dd2ec48cf5c2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -1060,7 +1060,9 @@ static int xgmi_v6_4_0_aca_bank_parser(struct aca_handle *handle, struct aca_ban switch (type) { case ACA_SMU_TYPE_UE: - count = ext_error_code == 0 ? count : 0ULL; + if (ext_error_code != 0 && ext_error_code != 9) + count = 0ULL; + ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_UE, count); break; case ACA_SMU_TYPE_CE: -- cgit v1.2.3 From f6ac0842364a5721c02e9dd1c956eb51c7431ff3 Mon Sep 17 00:00:00 2001 From: chongli2 Date: Tue, 26 Mar 2024 13:24:21 +0800 Subject: drm/amd/amdgpu: support MES command SET_HW_RESOURCE1 in sriov support MES command SET_HW_RESOURCE1 in sriov Signed-off-by: chongli2 Reviewed-by: Jingwen Chen Acked-by: Jingwen Chen Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h | 6 ++++ drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 5 ++++ drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h | 4 +++ drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h | 9 ++++-- drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 43 +++++++++++++++++++++++++++ drivers/gpu/drm/amd/include/mes_v11_api_def.h | 21 +++++++++++++ 6 files changed, 85 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h index 4c8fc3117ef8..6b3e1844eac5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h @@ -141,6 +141,12 @@ struct amdgpu_mes { /* ip specific functions */ const struct amdgpu_mes_funcs *funcs; + + /* mes resource_1 bo*/ + struct amdgpu_bo *resource_1; + uint64_t resource_1_gpu_addr; + void *resource_1_addr; + }; struct amdgpu_mes_process { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 6f01de220c44..461753904dea 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -576,6 +576,11 @@ static int amdgpu_virt_write_vf2pf_data(struct amdgpu_device *adev) vf2pf_info->decode_usage = 0; vf2pf_info->dummy_page_addr = (uint64_t)adev->dummy_page_addr; + vf2pf_info->mes_info_addr = (uint64_t)adev->mes.resource_1_gpu_addr; + + if (adev->mes.resource_1) { + vf2pf_info->mes_info_size = adev->mes.resource_1->tbo.base.size; + } vf2pf_info->checksum = amd_sriov_msg_checksum( vf2pf_info, vf2pf_info->header.size, 0, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index a858bc98cad4..a9f2f0c4f799 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -132,6 +132,8 @@ enum AMDGIM_FEATURE_FLAG { AMDGIM_FEATURE_AV1_SUPPORT = (1 << 6), /* VCN RB decouple */ AMDGIM_FEATURE_VCN_RB_DECOUPLE = (1 << 7), + /* MES info */ + AMDGIM_FEATURE_MES_INFO_ENABLE = (1 << 8), }; enum AMDGIM_REG_ACCESS_FLAG { @@ -335,6 +337,8 @@ static inline bool is_virtual_machine(void) ((adev)->virt.gim_feature & AMDGIM_FEATURE_AV1_SUPPORT) #define amdgpu_sriov_is_vcn_rb_decouple(adev) \ ((adev)->virt.gim_feature & AMDGIM_FEATURE_VCN_RB_DECOUPLE) +#define amdgpu_sriov_is_mes_info_enable(adev) \ + ((adev)->virt.gim_feature & AMDGIM_FEATURE_MES_INFO_ENABLE) bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev); void amdgpu_virt_init_setting(struct amdgpu_device *adev); int amdgpu_virt_request_full_gpu(struct amdgpu_device *adev, bool init); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h index 51a14f6d93bd..0de78d6a83fe 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h @@ -94,7 +94,8 @@ union amd_sriov_msg_feature_flags { uint32_t reg_indirect_acc : 1; uint32_t av1_support : 1; uint32_t vcn_rb_decouple : 1; - uint32_t reserved : 24; + uint32_t mes_info_enable : 1; + uint32_t reserved : 23; } flags; uint32_t all; }; @@ -221,7 +222,7 @@ struct amd_sriov_msg_vf2pf_info_header { uint32_t reserved[2]; }; -#define AMD_SRIOV_MSG_VF2PF_INFO_FILLED_SIZE (70) +#define AMD_SRIOV_MSG_VF2PF_INFO_FILLED_SIZE (73) struct amd_sriov_msg_vf2pf_info { /* header contains size and version */ struct amd_sriov_msg_vf2pf_info_header header; @@ -265,7 +266,9 @@ struct amd_sriov_msg_vf2pf_info { uint32_t version; } ucode_info[AMD_SRIOV_MSG_RESERVE_UCODE]; uint64_t dummy_page_addr; - + /* FB allocated for guest MES to record UQ info */ + uint64_t mes_info_addr; + uint32_t mes_info_size; /* reserved */ uint32_t reserved[256 - AMD_SRIOV_MSG_VF2PF_INFO_FILLED_SIZE]; }; diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index 63f281a9984d..e5230078a4cd 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -422,6 +422,36 @@ static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes) offsetof(union MESAPI_SET_HW_RESOURCES, api_status)); } +static int mes_v11_0_set_hw_resources_1(struct amdgpu_mes *mes) +{ + int size = 128 * PAGE_SIZE; + int ret = 0; + struct amdgpu_device *adev = mes->adev; + union MESAPI_SET_HW_RESOURCES_1 mes_set_hw_res_pkt; + memset(&mes_set_hw_res_pkt, 0, sizeof(mes_set_hw_res_pkt)); + + mes_set_hw_res_pkt.header.type = MES_API_TYPE_SCHEDULER; + mes_set_hw_res_pkt.header.opcode = MES_SCH_API_SET_HW_RSRC_1; + mes_set_hw_res_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + mes_set_hw_res_pkt.enable_mes_info_ctx = 1; + + ret = amdgpu_bo_create_kernel(adev, size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &mes->resource_1, + &mes->resource_1_gpu_addr, + &mes->resource_1_addr); + if (ret) { + dev_err(adev->dev, "(%d) failed to create mes resource_1 bo\n", ret); + return ret; + } + + mes_set_hw_res_pkt.mes_info_ctx_mc_addr = mes->resource_1_gpu_addr; + mes_set_hw_res_pkt.mes_info_ctx_size = mes->resource_1->tbo.base.size; + return mes_v11_0_submit_pkt_and_poll_completion(mes, + &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt), + offsetof(union MESAPI_SET_HW_RESOURCES_1, api_status)); +} + static const struct amdgpu_mes_funcs mes_v11_0_funcs = { .add_hw_queue = mes_v11_0_add_hw_queue, .remove_hw_queue = mes_v11_0_remove_hw_queue, @@ -1203,6 +1233,14 @@ static int mes_v11_0_hw_init(void *handle) if (r) goto failure; + if (amdgpu_sriov_is_mes_info_enable(adev)) { + r = mes_v11_0_set_hw_resources_1(&adev->mes); + if (r) { + DRM_ERROR("failed mes_v11_0_set_hw_resources_1, r=%d\n", r); + goto failure; + } + } + r = mes_v11_0_query_sched_status(&adev->mes); if (r) { DRM_ERROR("MES is busy\n"); @@ -1226,6 +1264,11 @@ failure: static int mes_v11_0_hw_fini(void *handle) { + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + if (amdgpu_sriov_is_mes_info_enable(adev)) { + amdgpu_bo_free_kernel(&adev->mes.resource_1, &adev->mes.resource_1_gpu_addr, + &adev->mes.resource_1_addr); + } return 0; } diff --git a/drivers/gpu/drm/amd/include/mes_v11_api_def.h b/drivers/gpu/drm/amd/include/mes_v11_api_def.h index ec5b9ab67c5e..410c8d664336 100644 --- a/drivers/gpu/drm/amd/include/mes_v11_api_def.h +++ b/drivers/gpu/drm/amd/include/mes_v11_api_def.h @@ -61,6 +61,7 @@ enum MES_SCH_API_OPCODE { MES_SCH_API_MISC = 14, MES_SCH_API_UPDATE_ROOT_PAGE_TABLE = 15, MES_SCH_API_AMD_LOG = 16, + MES_SCH_API_SET_HW_RSRC_1 = 19, MES_SCH_API_MAX = 0xFF }; @@ -238,6 +239,26 @@ union MESAPI_SET_HW_RESOURCES { uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS]; }; +union MESAPI_SET_HW_RESOURCES_1 { + struct { + union MES_API_HEADER header; + struct MES_API_STATUS api_status; + uint64_t timestamp; + union { + struct { + uint32_t enable_mes_info_ctx : 1; + uint32_t reserved : 31; + }; + uint32_t uint32_all; + }; + uint64_t mes_info_ctx_mc_addr; + uint32_t mes_info_ctx_size; + uint32_t mes_kiq_unmap_timeout; // unit is 100ms + }; + + uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS]; +}; + union MESAPI__ADD_QUEUE { struct { union MES_API_HEADER header; -- cgit v1.2.3 From 4b0cb230bdb71c23981acfa5e7b367c7dde02a41 Mon Sep 17 00:00:00 2001 From: Tao Zhou Date: Tue, 2 Apr 2024 14:58:33 +0800 Subject: drm/amdgpu: retire UMC v12 mca_addr_to_pa RAS TA will handle it, the function is useless. Signed-off-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 1 - drivers/gpu/drm/amd/amdgpu/umc_v12_0.c | 105 ++------------------------------- drivers/gpu/drm/amd/amdgpu/umc_v12_0.h | 62 +------------------ 3 files changed, 7 insertions(+), 161 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 470a146f2f43..c4ec1358f3aa 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1457,7 +1457,6 @@ static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev) adev->umc.channel_offs = UMC_V12_0_PER_CHANNEL_OFFSET; adev->umc.active_mask = adev->aid_mask; adev->umc.retire_unit = UMC_V12_0_BAD_PAGE_NUM_PER_CHANNEL; - adev->umc.channel_idx_tbl = &umc_v12_0_channel_idx_tbl[0][0][0]; if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) adev->umc.ras = &umc_v12_0_ras; break; diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c index f46a176f9b55..a0122b22eda4 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c @@ -28,28 +28,6 @@ #include "umc/umc_12_0_0_sh_mask.h" #include "mp/mp_13_0_6_sh_mask.h" -const uint32_t - umc_v12_0_channel_idx_tbl[] - [UMC_V12_0_UMC_INSTANCE_NUM] - [UMC_V12_0_CHANNEL_INSTANCE_NUM] = { - {{3, 7, 11, 15, 2, 6, 10, 14}, {1, 5, 9, 13, 0, 4, 8, 12}, - {19, 23, 27, 31, 18, 22, 26, 30}, {17, 21, 25, 29, 16, 20, 24, 28}}, - {{47, 43, 39, 35, 46, 42, 38, 34}, {45, 41, 37, 33, 44, 40, 36, 32}, - {63, 59, 55, 51, 62, 58, 54, 50}, {61, 57, 53, 49, 60, 56, 52, 48}}, - {{79, 75, 71, 67, 78, 74, 70, 66}, {77, 73, 69, 65, 76, 72, 68, 64}, - {95, 91, 87, 83, 94, 90, 86, 82}, {93, 89, 85, 81, 92, 88, 84, 80}}, - {{99, 103, 107, 111, 98, 102, 106, 110}, {97, 101, 105, 109, 96, 100, 104, 108}, - {115, 119, 123, 127, 114, 118, 122, 126}, {113, 117, 121, 125, 112, 116, 120, 124}} - }; - -/* mapping of MCA error address to normalized address */ -static const uint32_t umc_v12_0_ma2na_mapping[] = { - 0, 5, 6, 8, 9, 14, 12, 13, - 10, 11, 15, 16, 17, 18, 19, 20, - 21, 22, 23, 24, 25, 26, 27, 28, - 24, 7, 29, 30, -}; - static inline uint64_t get_umc_v12_0_reg_offset(struct amdgpu_device *adev, uint32_t node_inst, uint32_t umc_inst, @@ -192,79 +170,6 @@ static void umc_v12_0_query_ras_error_count(struct amdgpu_device *adev, umc_v12_0_reset_error_count(adev); } -static bool umc_v12_0_bit_wise_xor(uint32_t val) -{ - bool result = 0; - int i; - - for (i = 0; i < 32; i++) - result = result ^ ((val >> i) & 0x1); - - return result; -} - -static void umc_v12_0_mca_addr_to_pa(struct amdgpu_device *adev, - uint64_t err_addr, uint32_t ch_inst, uint32_t umc_inst, - uint32_t node_inst, - struct ta_ras_query_address_output *addr_out) -{ - uint32_t channel_index, i; - uint64_t na, soc_pa; - uint32_t bank_hash0, bank_hash1, bank_hash2, bank_hash3, col, row; - uint32_t bank0, bank1, bank2, bank3, bank; - - bank_hash0 = (err_addr >> UMC_V12_0_MCA_B0_BIT) & 0x1ULL; - bank_hash1 = (err_addr >> UMC_V12_0_MCA_B1_BIT) & 0x1ULL; - bank_hash2 = (err_addr >> UMC_V12_0_MCA_B2_BIT) & 0x1ULL; - bank_hash3 = (err_addr >> UMC_V12_0_MCA_B3_BIT) & 0x1ULL; - col = (err_addr >> 1) & 0x1fULL; - row = (err_addr >> 10) & 0x3fffULL; - - /* apply bank hash algorithm */ - bank0 = - bank_hash0 ^ (UMC_V12_0_XOR_EN0 & - (umc_v12_0_bit_wise_xor(col & UMC_V12_0_COL_XOR0) ^ - (umc_v12_0_bit_wise_xor(row & UMC_V12_0_ROW_XOR0)))); - bank1 = - bank_hash1 ^ (UMC_V12_0_XOR_EN1 & - (umc_v12_0_bit_wise_xor(col & UMC_V12_0_COL_XOR1) ^ - (umc_v12_0_bit_wise_xor(row & UMC_V12_0_ROW_XOR1)))); - bank2 = - bank_hash2 ^ (UMC_V12_0_XOR_EN2 & - (umc_v12_0_bit_wise_xor(col & UMC_V12_0_COL_XOR2) ^ - (umc_v12_0_bit_wise_xor(row & UMC_V12_0_ROW_XOR2)))); - bank3 = - bank_hash3 ^ (UMC_V12_0_XOR_EN3 & - (umc_v12_0_bit_wise_xor(col & UMC_V12_0_COL_XOR3) ^ - (umc_v12_0_bit_wise_xor(row & UMC_V12_0_ROW_XOR3)))); - - bank = bank0 | (bank1 << 1) | (bank2 << 2) | (bank3 << 3); - err_addr &= ~0x3c0ULL; - err_addr |= (bank << UMC_V12_0_MCA_B0_BIT); - - na = 0x0; - /* convert mca error address to normalized address */ - for (i = 1; i < ARRAY_SIZE(umc_v12_0_ma2na_mapping); i++) - na |= ((err_addr >> i) & 0x1ULL) << umc_v12_0_ma2na_mapping[i]; - - channel_index = - adev->umc.channel_idx_tbl[node_inst * adev->umc.umc_inst_num * - adev->umc.channel_inst_num + - umc_inst * adev->umc.channel_inst_num + - ch_inst]; - /* translate umc channel address to soc pa, 3 parts are included */ - soc_pa = ADDR_OF_32KB_BLOCK(na) | - ADDR_OF_256B_BLOCK(channel_index) | - OFFSET_IN_256B_BLOCK(na); - - /* the umc channel bits are not original values, they are hashed */ - UMC_V12_0_SET_CHANNEL_HASH(channel_index, soc_pa); - - addr_out->pa.pa = soc_pa; - addr_out->pa.bank = bank; - addr_out->pa.channel_idx = channel_index; -} - static void umc_v12_0_convert_error_address(struct amdgpu_device *adev, struct ras_err_data *err_data, struct ta_ras_query_address_input *addr_in) @@ -275,10 +180,12 @@ static void umc_v12_0_convert_error_address(struct amdgpu_device *adev, err_addr = addr_in->ma.err_addr; addr_in->addr_type = TA_RAS_MCA_TO_PA; - if (psp_ras_query_address(&adev->psp, addr_in, &addr_out)) - /* fallback to old path if fail to get pa from psp */ - umc_v12_0_mca_addr_to_pa(adev, err_addr, addr_in->ma.ch_inst, - addr_in->ma.umc_inst, addr_in->ma.node_inst, &addr_out); + if (psp_ras_query_address(&adev->psp, addr_in, &addr_out)) { + dev_warn(adev->dev, "Failed to query RAS physical address for 0x%llx", + err_addr); + + return; + } soc_pa = addr_out.pa.pa; bank = addr_out.pa.bank; diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h index 5973bfb14fce..1d5f44dcffdd 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h +++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h @@ -55,67 +55,12 @@ #define UMC_V12_0_NA_MAP_PA_NUM 8 /* R13 bit shift should be considered, double the number */ #define UMC_V12_0_BAD_PAGE_NUM_PER_CHANNEL (UMC_V12_0_NA_MAP_PA_NUM * 2) -/* bank bits in MCA error address */ -#define UMC_V12_0_MCA_B0_BIT 6 -#define UMC_V12_0_MCA_B1_BIT 7 -#define UMC_V12_0_MCA_B2_BIT 8 -#define UMC_V12_0_MCA_B3_BIT 9 + /* column bits in SOC physical address */ #define UMC_V12_0_PA_C2_BIT 15 #define UMC_V12_0_PA_C4_BIT 21 /* row bits in SOC physical address */ #define UMC_V12_0_PA_R13_BIT 35 -/* channel index bits in SOC physical address */ -#define UMC_V12_0_PA_CH4_BIT 12 -#define UMC_V12_0_PA_CH5_BIT 13 -#define UMC_V12_0_PA_CH6_BIT 14 - -/* bank hash settings */ -#define UMC_V12_0_XOR_EN0 1 -#define UMC_V12_0_XOR_EN1 1 -#define UMC_V12_0_XOR_EN2 1 -#define UMC_V12_0_XOR_EN3 1 -#define UMC_V12_0_COL_XOR0 0x0 -#define UMC_V12_0_COL_XOR1 0x0 -#define UMC_V12_0_COL_XOR2 0x800 -#define UMC_V12_0_COL_XOR3 0x1000 -#define UMC_V12_0_ROW_XOR0 0x11111 -#define UMC_V12_0_ROW_XOR1 0x22222 -#define UMC_V12_0_ROW_XOR2 0x4444 -#define UMC_V12_0_ROW_XOR3 0x8888 - -/* channel hash settings */ -#define UMC_V12_0_HASH_4K 0 -#define UMC_V12_0_HASH_64K 1 -#define UMC_V12_0_HASH_2M 1 -#define UMC_V12_0_HASH_1G 1 -#define UMC_V12_0_HASH_1T 1 - -/* XOR some bits of PA into CH4~CH6 bits (bits 12~14 of PA), - * hash bit is only effective when related setting is enabled - */ -#define UMC_V12_0_CHANNEL_HASH_CH4(channel_idx, pa) ((((channel_idx) >> 5) & 0x1) ^ \ - (((pa) >> 20) & 0x1ULL & UMC_V12_0_HASH_64K) ^ \ - (((pa) >> 27) & 0x1ULL & UMC_V12_0_HASH_2M) ^ \ - (((pa) >> 34) & 0x1ULL & UMC_V12_0_HASH_1G) ^ \ - (((pa) >> 41) & 0x1ULL & UMC_V12_0_HASH_1T)) -#define UMC_V12_0_CHANNEL_HASH_CH5(channel_idx, pa) ((((channel_idx) >> 6) & 0x1) ^ \ - (((pa) >> 21) & 0x1ULL & UMC_V12_0_HASH_64K) ^ \ - (((pa) >> 28) & 0x1ULL & UMC_V12_0_HASH_2M) ^ \ - (((pa) >> 35) & 0x1ULL & UMC_V12_0_HASH_1G) ^ \ - (((pa) >> 42) & 0x1ULL & UMC_V12_0_HASH_1T)) -#define UMC_V12_0_CHANNEL_HASH_CH6(channel_idx, pa) ((((channel_idx) >> 4) & 0x1) ^ \ - (((pa) >> 19) & 0x1ULL & UMC_V12_0_HASH_64K) ^ \ - (((pa) >> 26) & 0x1ULL & UMC_V12_0_HASH_2M) ^ \ - (((pa) >> 33) & 0x1ULL & UMC_V12_0_HASH_1G) ^ \ - (((pa) >> 40) & 0x1ULL & UMC_V12_0_HASH_1T) ^ \ - (((pa) >> 47) & 0x1ULL & UMC_V12_0_HASH_4K)) -#define UMC_V12_0_SET_CHANNEL_HASH(channel_idx, pa) do { \ - (pa) &= ~(0x7ULL << UMC_V12_0_PA_CH4_BIT); \ - (pa) |= (UMC_V12_0_CHANNEL_HASH_CH4(channel_idx, pa) << UMC_V12_0_PA_CH4_BIT); \ - (pa) |= (UMC_V12_0_CHANNEL_HASH_CH5(channel_idx, pa) << UMC_V12_0_PA_CH5_BIT); \ - (pa) |= (UMC_V12_0_CHANNEL_HASH_CH6(channel_idx, pa) << UMC_V12_0_PA_CH6_BIT); \ - } while (0) #define MCA_IPID_LO_2_UMC_CH(_ipid_lo) (((((_ipid_lo) >> 20) & 0x1) * 4) + \ (((_ipid_lo) >> 12) & 0xF)) @@ -127,11 +72,6 @@ bool umc_v12_0_is_correctable_error(struct amdgpu_device *adev, uint64_t mc_umc_ typedef bool (*check_error_type_func)(struct amdgpu_device *adev, uint64_t mc_umc_status); -extern const uint32_t - umc_v12_0_channel_idx_tbl[] - [UMC_V12_0_UMC_INSTANCE_NUM] - [UMC_V12_0_CHANNEL_INSTANCE_NUM]; - extern struct amdgpu_umc_ras umc_v12_0_ras; #endif -- cgit v1.2.3 From 05e40141685fd6aaedbde334b404c8dfbbd83fd6 Mon Sep 17 00:00:00 2001 From: ZhenGuo Yin Date: Tue, 2 Apr 2024 11:41:05 +0800 Subject: drm/amdgpu: clear set_q_mode_offs when VM changed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [Why] set_q_mode_offs don't get cleared after GPU reset, nexting SET_Q_MODE packet to init shadow memory will be skiped, hence there has a page fault. [How] VM flush is needed after GPU reset, clear set_q_mode_offs when emitting VM flush. Fixes: 8bc75586ea01 ("drm/amdgpu: workaround to avoid SET_Q_MODE packets v2") Reviewed-by: Christian König Signed-off-by: ZhenGuo Yin Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 7a906318e451..c11c6299711e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -5465,6 +5465,7 @@ static void gfx_v11_0_ring_emit_vm_flush(struct amdgpu_ring *ring, /* Make sure that we can't skip the SET_Q_MODE packets when the VM * changed in any way. */ + ring->set_q_mode_offs = 0; ring->set_q_mode_ptr = NULL; } -- cgit v1.2.3 From b41f742d6fa6bc9799ff670a9b081f4ac6b911a0 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Mon, 25 Mar 2024 12:33:02 +0530 Subject: drm/amdgpu: Set fatal errror detected flag earlier In case of fatal errors, set FED status when interrupt is received. Set the flag on other devices in the hive before RAS recovery work. Signed-off-by: Lijo Lazar Reviewed-by: Hawking Zhang Reviewed-by: Asad Kamal Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 41 ++++++++++++++++++++++----------- 1 file changed, 28 insertions(+), 13 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index b8c7d0bf8fb1..352ce16a0963 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -2399,6 +2399,19 @@ out: return ret; } +static void amdgpu_ras_set_fed_all(struct amdgpu_device *adev, + struct amdgpu_hive_info *hive, bool status) +{ + struct amdgpu_device *tmp_adev; + + if (hive) { + list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) + amdgpu_ras_set_fed(tmp_adev, status); + } else { + amdgpu_ras_set_fed(adev, status); + } +} + static void amdgpu_ras_do_recovery(struct work_struct *work) { struct amdgpu_ras *ras = @@ -2408,8 +2421,21 @@ static void amdgpu_ras_do_recovery(struct work_struct *work) struct list_head device_list, *device_list_handle = NULL; struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev); - if (hive) + if (hive) { atomic_set(&hive->ras_recovery, 1); + + /* If any device which is part of the hive received RAS fatal + * error interrupt, set fatal error status on all. This + * condition will need a recovery, and flag will be cleared + * as part of recovery. + */ + list_for_each_entry(remote_adev, &hive->device_list, + gmc.xgmi.head) + if (amdgpu_ras_get_fed_status(remote_adev)) { + amdgpu_ras_set_fed_all(adev, hive, true); + break; + } + } if (!ras->disable_ras_err_cnt_harvest) { /* Build list of devices to query RAS related errors */ @@ -2454,18 +2480,6 @@ static void amdgpu_ras_do_recovery(struct work_struct *work) ras->gpu_reset_flags &= ~AMDGPU_RAS_GPU_RESET_MODE1_RESET; set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); - /* For any RAS error that needs a full reset to - * recover, set the fatal error status - */ - if (hive) { - list_for_each_entry(remote_adev, - &hive->device_list, - gmc.xgmi.head) - amdgpu_ras_set_fed(remote_adev, - true); - } else { - amdgpu_ras_set_fed(adev, true); - } psp_fatal_error_recovery_quirk(&adev->psp); } } @@ -3550,6 +3564,7 @@ void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev) RAS_EVENT_LOG(adev, event_id, "uncorrectable hardware error" "(ERREVENT_ATHUB_INTERRUPT) detected!\n"); + amdgpu_ras_set_fed(adev, true); ras->gpu_reset_flags |= AMDGPU_RAS_GPU_RESET_MODE1_RESET; amdgpu_ras_reset_gpu(adev); } -- cgit v1.2.3 From d1999b4017d485a3168b4ba1316937c82454165a Mon Sep 17 00:00:00 2001 From: Zhigang Luo Date: Wed, 20 Mar 2024 10:40:27 -0400 Subject: amd/amdgpu: improve VF recover time 1. change AMDGPU_VF2PF_UPDATE_MAX_RETRY_LIMIT from 30 to 5. 2. set fatel error detected flag. Signed-off-by: Zhigang Luo Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h | 2 +- 3 files changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index f830024cffca..170da4551ed5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4994,6 +4994,7 @@ retry: r = amdgpu_virt_reset_gpu(adev); if (r) return r; + amdgpu_ras_set_fed(adev, false); amdgpu_irq_gpu_reset_resume_helper(adev); /* some sw clean up VF needs to do before recover */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 461753904dea..54ab51a4ada7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -598,6 +598,7 @@ static void amdgpu_virt_update_vf2pf_work_item(struct work_struct *work) adev->virt.vf2pf_update_retry_cnt++; if ((adev->virt.vf2pf_update_retry_cnt >= AMDGPU_VF2PF_UPDATE_MAX_RETRY_LIMIT) && amdgpu_sriov_runtime(adev) && !amdgpu_in_reset(adev)) { + amdgpu_ras_set_fed(adev, true); if (amdgpu_reset_domain_schedule(adev->reset_domain, &adev->virt.flr_work)) return; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index a9f2f0c4f799..642f1fd287d8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -52,7 +52,7 @@ /* tonga/fiji use this offset */ #define mmBIF_IOV_FUNC_IDENTIFIER 0x1503 -#define AMDGPU_VF2PF_UPDATE_MAX_RETRY_LIMIT 30 +#define AMDGPU_VF2PF_UPDATE_MAX_RETRY_LIMIT 5 enum amdgpu_sriov_vf_mode { SRIOV_VF_MODE_BARE_METAL = 0, -- cgit v1.2.3 From d6d6561f936bc8066537c3fa5fa352f2ea025d55 Mon Sep 17 00:00:00 2001 From: Tim Huang Date: Wed, 3 Apr 2024 17:28:44 +0800 Subject: drm/amdgpu: fix incorrect number of active RBs for gfx11 The RB bitmap should be global active RB bitmap & active RB bitmap based on active SA. Signed-off-by: Tim Huang Reviewed-by: Yifan Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index c11c6299711e..ae6a0d19e247 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -1635,7 +1635,7 @@ static void gfx_v11_0_setup_rb(struct amdgpu_device *adev) active_rb_bitmap |= (0x3 << (i * rb_bitmap_width_per_sa)); } - active_rb_bitmap |= global_active_rb_bitmap; + active_rb_bitmap &= global_active_rb_bitmap; adev->gfx.config.backend_enable_mask = active_rb_bitmap; adev->gfx.config.num_rbs = hweight32(active_rb_bitmap); } -- cgit v1.2.3 From 526b184e888371dd3fdb131961461a2f30cd0ae4 Mon Sep 17 00:00:00 2001 From: Yifan Zhang Date: Sun, 7 Apr 2024 22:01:35 +0800 Subject: drm/amdgpu: differentiate external rev id for gfx 11.5.0 This patch to differentiate external rev id for gfx 11.5.0. Signed-off-by: Yifan Zhang Reviewed-by: Tim Huang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/soc21.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index abe319b0f063..43ca63fe85ac 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -720,7 +720,10 @@ static int soc21_common_early_init(void *handle) AMD_PG_SUPPORT_VCN | AMD_PG_SUPPORT_JPEG | AMD_PG_SUPPORT_GFX_PG; - adev->external_rev_id = adev->rev_id + 0x1; + if (adev->rev_id == 0) + adev->external_rev_id = 0x1; + else + adev->external_rev_id = adev->rev_id + 0x10; break; case IP_VERSION(11, 5, 1): adev->cg_flags = -- cgit v1.2.3 From 6b0d78032f98d494367a9293d584be2ac1173fb4 Mon Sep 17 00:00:00 2001 From: Luqmaan Irshad Date: Tue, 2 Apr 2024 17:33:46 -0400 Subject: drm/amd/amdgpu: Update PF2VF Header Adding a new field for GPU Capacity to align the header with the host. Signed-off-by: Luqmaan Irshad Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h index 0de78d6a83fe..fb2b394bb9c5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h @@ -158,7 +158,7 @@ struct amd_sriov_msg_pf2vf_info_header { uint32_t reserved[2]; }; -#define AMD_SRIOV_MSG_PF2VF_INFO_FILLED_SIZE (48) +#define AMD_SRIOV_MSG_PF2VF_INFO_FILLED_SIZE (49) struct amd_sriov_msg_pf2vf_info { /* header contains size and version */ struct amd_sriov_msg_pf2vf_info_header header; @@ -209,6 +209,8 @@ struct amd_sriov_msg_pf2vf_info { struct amd_sriov_msg_uuid_info uuid_info; /* PCIE atomic ops support flag */ uint32_t pcie_atomic_ops_support_flags; + /* Portion of GPU memory occupied by VF. MAX value is 65535, but set to uint32_t to maintain alignment with reserved size */ + uint32_t gpu_capacity; /* reserved */ uint32_t reserved[256 - AMD_SRIOV_MSG_PF2VF_INFO_FILLED_SIZE]; }; -- cgit v1.2.3 From a0e002cdac429d7b2ae8b66baf51263193a935c8 Mon Sep 17 00:00:00 2001 From: Jack Xiao Date: Tue, 9 Apr 2024 17:31:01 +0800 Subject: drm/amdgpu/sdma6: set sdma hang watchdog Set SDMAx_WATCHDOG_CNTL.QUEUE_HANG_COUNT registers to improve SDMA reliability. Signed-off-by: Jack Xiao Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c index 361835a61f2e..67a4d8b1512b 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c @@ -507,6 +507,13 @@ static int sdma_v6_0_gfx_resume(struct amdgpu_device *adev) /* set minor_ptr_update to 0 after wptr programed */ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_MINOR_PTR_UPDATE), 0); + /* Set up sdma hang watchdog */ + temp = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_WATCHDOG_CNTL)); + /* 100ms per unit */ + temp = REG_SET_FIELD(temp, SDMA0_WATCHDOG_CNTL, QUEUE_HANG_COUNT, + max(adev->usec_timeout/100000, 1)); + WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_WATCHDOG_CNTL), temp); + /* Set up RESP_MODE to non-copy addresses */ temp = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_UTCL1_CNTL)); temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, RESP_MODE, 3); -- cgit v1.2.3 From c8962679af3538deaf6d90e90bbdceb0f66b6e98 Mon Sep 17 00:00:00 2001 From: Christian König Date: Fri, 15 Mar 2024 13:07:53 +0100 Subject: drm/amdgpu: remove invalid resource->start check v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The majority of those where removed in the commit aed01a68047b ("drm/amdgpu: Remove TTM resource->start visible VRAM condition v2") But this one was missed because it's working on the resource and not the BO. Since we also no longer use a fake start address for visible BOs this will now trigger invalid mapping errors. v2: also remove the unused variable Signed-off-by: Christian König Fixes: aed01a68047b ("drm/amdgpu: Remove TTM resource->start visible VRAM condition v2") CC: stable@vger.kernel.org Acked-by: Pierre-Eric Pelloux-Prayer Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index fc418e670fda..6417cb76ccd4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -557,7 +557,6 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_device *bdev, struct ttm_resource *mem) { struct amdgpu_device *adev = amdgpu_ttm_adev(bdev); - size_t bus_size = (size_t)mem->size; switch (mem->mem_type) { case TTM_PL_SYSTEM: @@ -568,9 +567,6 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_device *bdev, break; case TTM_PL_VRAM: mem->bus.offset = mem->start << PAGE_SHIFT; - /* check if it's visible */ - if ((mem->bus.offset + bus_size) > adev->gmc.visible_vram_size) - return -EINVAL; if (adev->mman.aper_base_kaddr && mem->placement & TTM_PL_FLAG_CONTIGUOUS) -- cgit v1.2.3 From 8b9130bae048a7854c6e7d3e2710d0e96e861d31 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 28 Mar 2024 13:19:58 -0400 Subject: drm/amdgpu/gfx11: properly handle regGRBM_GFX_CNTL in soft reset Need to take the srbm_mutex and while we are here, use the helper function soc21_grbm_select(); Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index ae6a0d19e247..5dbfef49dd5d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -4506,14 +4506,11 @@ static int gfx_v11_0_soft_reset(void *handle) gfx_v11_0_set_safe_mode(adev, 0); + mutex_lock(&adev->srbm_mutex); for (i = 0; i < adev->gfx.mec.num_mec; ++i) { for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { - tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL); - tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, MEID, i); - tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, QUEUEID, j); - tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, k); - WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp); + soc21_grbm_select(adev, i, k, j, 0); WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 0x2); WREG32_SOC15(GC, 0, regSPI_COMPUTE_QUEUE_RESET, 0x1); @@ -4523,16 +4520,14 @@ static int gfx_v11_0_soft_reset(void *handle) for (i = 0; i < adev->gfx.me.num_me; ++i) { for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) { for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) { - tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL); - tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, MEID, i); - tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, QUEUEID, j); - tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, k); - WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp); + soc21_grbm_select(adev, i, k, j, 0); WREG32_SOC15(GC, 0, regCP_GFX_HQD_DEQUEUE_REQUEST, 0x1); } } } + soc21_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); /* Try to acquire the gfx mutex before access to CP_VMID_RESET */ r = gfx_v11_0_request_gfx_index_mutex(adev, 1); -- cgit v1.2.3 From 3c858cf65e9a2c99a7c2a5864603d1bc828df1be Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Fri, 12 Apr 2024 15:52:42 +0530 Subject: drm/amdgpu: add missing vbios version from devcoredump Add vbios version in the devcoredump along with formatting the information with proper alignment. Reviewed-by: Alex Deucher Signed-off-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c index 1129e5e5fb42..64fe564b8036 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c @@ -188,10 +188,11 @@ static void amdgpu_devcoredump_fw_info(struct amdgpu_device *adev, adev->vpe.feature_version, adev->vpe.fw_version); drm_printf(p, "\nVBIOS Information\n"); - drm_printf(p, "name: %s\n", ctx->name); - drm_printf(p, "pn %s\n", ctx->vbios_pn); - drm_printf(p, "version: %s\n", ctx->vbios_ver_str); - drm_printf(p, "date: %s\n", ctx->date); + drm_printf(p, "vbios name : %s\n", ctx->name); + drm_printf(p, "vbios pn : %s\n", ctx->vbios_pn); + drm_printf(p, "vbios version : %d\n", ctx->version); + drm_printf(p, "vbios ver_str : %s\n", ctx->vbios_ver_str); + drm_printf(p, "vbios date : %s\n", ctx->date); } static ssize_t -- cgit v1.2.3 From f7c161a4c250d44eb96a1dcbf5bb3a8e3eba525b Mon Sep 17 00:00:00 2001 From: Jonathan Kim Date: Thu, 11 Apr 2024 14:57:52 -0400 Subject: drm/amdgpu: increase mes submission timeout MES internally has a timeout allowance of 2 seconds. Increase driver timeout to 3 seconds to be safe. Signed-off-by: Jonathan Kim Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index e5230078a4cd..81833395324a 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -111,7 +111,7 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, struct amdgpu_device *adev = mes->adev; struct amdgpu_ring *ring = &mes->ring; unsigned long flags; - signed long timeout = adev->usec_timeout; + signed long timeout = 3000000; /* 3000 ms */ if (amdgpu_emu_mode) { timeout *= 100; -- cgit v1.2.3 From f23558627f2bb28df3b7f3d1d53b1e7f4bd1e250 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Fri, 12 Apr 2024 09:53:15 +0800 Subject: drm/amdgpu: add new aca smu callback func parse_error_code() add new aca smu callback parse_error_code{} to avoid specific asic check in amdgpu_aca.c file Signed-off-by: Yang Wang Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c | 23 ++++++++-------------- drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h | 1 + .../gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 13 ++++++++++++ 3 files changed, 22 insertions(+), 15 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c index c02634a124dd..c50202215f6b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c @@ -753,23 +753,13 @@ int aca_bank_info_decode(struct aca_bank *bank, struct aca_bank_info *info) static int aca_bank_get_error_code(struct amdgpu_device *adev, struct aca_bank *bank) { - int error_code; - - switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) { - case IP_VERSION(13, 0, 6): - if (!(adev->flags & AMD_IS_APU) && adev->pm.fw_version >= 0x00555600) { - error_code = ACA_REG__SYND__ERRORINFORMATION(bank->regs[ACA_REG_IDX_SYND]); - return error_code & 0xff; - } - break; - default: - break; - } + struct amdgpu_aca *aca = &adev->aca; + const struct aca_smu_funcs *smu_funcs = aca->smu_funcs; - /* NOTE: the true error code is encoded in status.errorcode[0:7] */ - error_code = ACA_REG__STATUS__ERRORCODE(bank->regs[ACA_REG_IDX_STATUS]); + if (!smu_funcs || !smu_funcs->parse_error_code) + return -EOPNOTSUPP; - return error_code & 0xff; + return smu_funcs->parse_error_code(adev, bank); } int aca_bank_check_error_codes(struct amdgpu_device *adev, struct aca_bank *bank, int *err_codes, int size) @@ -780,6 +770,9 @@ int aca_bank_check_error_codes(struct amdgpu_device *adev, struct aca_bank *bank return -EINVAL; error_code = aca_bank_get_error_code(adev, bank); + if (error_code < 0) + return error_code; + for (i = 0; i < size; i++) { if (err_codes[i] == error_code) return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h index 3765843ea648..5ef6b745f222 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h @@ -173,6 +173,7 @@ struct aca_smu_funcs { int (*set_debug_mode)(struct amdgpu_device *adev, bool enable); int (*get_valid_aca_count)(struct amdgpu_device *adev, enum aca_smu_type type, u32 *count); int (*get_valid_aca_bank)(struct amdgpu_device *adev, enum aca_smu_type type, int idx, struct aca_bank *bank); + int (*parse_error_code)(struct amdgpu_device *adev, struct aca_bank *bank); }; struct amdgpu_aca { diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index 16179e170874..bc1f3fe4d82a 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -3118,12 +3118,25 @@ static int aca_smu_get_valid_aca_bank(struct amdgpu_device *adev, return 0; } +static int aca_smu_parse_error_code(struct amdgpu_device *adev, struct aca_bank *bank) +{ + int error_code; + + if (!(adev->flags & AMD_IS_APU) && adev->pm.fw_version >= 0x00555600) + error_code = ACA_REG__SYND__ERRORINFORMATION(bank->regs[ACA_REG_IDX_SYND]); + else + error_code = ACA_REG__STATUS__ERRORCODE(bank->regs[ACA_REG_IDX_STATUS]); + + return error_code & 0xff; +} + static const struct aca_smu_funcs smu_v13_0_6_aca_smu_funcs = { .max_ue_bank_count = 12, .max_ce_bank_count = 12, .set_debug_mode = aca_smu_set_debug_mode, .get_valid_aca_count = aca_smu_get_valid_aca_count, .get_valid_aca_bank = aca_smu_get_valid_aca_bank, + .parse_error_code = aca_smu_parse_error_code, }; static int smu_v13_0_6_select_xgmi_plpd_policy(struct smu_context *smu, -- cgit v1.2.3 From 98856136c485e586ab063f0b3780dfc0c78df780 Mon Sep 17 00:00:00 2001 From: xinhui pan Date: Thu, 11 Apr 2024 11:11:38 +0800 Subject: drm/amdgpu: validate the parameters of bo mapping operations more clearly MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Verify the parameters of amdgpu_vm_bo_(map/replace_map/clearing_mappings) in one common place. Fixes: dc54d3d1744d ("drm/amdgpu: implement AMDGPU_VA_OP_CLEAR v2") Cc: stable@vger.kernel.org Reported-by: Vlad Stolyarov Suggested-by: Christian König Signed-off-by: xinhui pan Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 72 ++++++++++++++++++++++------------ 1 file changed, 46 insertions(+), 26 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 8af3f0fd3073..4e2391c83d7c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1647,6 +1647,37 @@ static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev, trace_amdgpu_vm_bo_map(bo_va, mapping); } +/* Validate operation parameters to prevent potential abuse */ +static int amdgpu_vm_verify_parameters(struct amdgpu_device *adev, + struct amdgpu_bo *bo, + uint64_t saddr, + uint64_t offset, + uint64_t size) +{ + uint64_t tmp, lpfn; + + if (saddr & AMDGPU_GPU_PAGE_MASK + || offset & AMDGPU_GPU_PAGE_MASK + || size & AMDGPU_GPU_PAGE_MASK) + return -EINVAL; + + if (check_add_overflow(saddr, size, &tmp) + || check_add_overflow(offset, size, &tmp) + || size == 0 /* which also leads to end < begin */) + return -EINVAL; + + /* make sure object fit at this offset */ + if (bo && offset + size > amdgpu_bo_size(bo)) + return -EINVAL; + + /* Ensure last pfn not exceed max_pfn */ + lpfn = (saddr + size - 1) >> AMDGPU_GPU_PAGE_SHIFT; + if (lpfn >= adev->vm_manager.max_pfn) + return -EINVAL; + + return 0; +} + /** * amdgpu_vm_bo_map - map bo inside a vm * @@ -1673,21 +1704,14 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, struct amdgpu_bo *bo = bo_va->base.bo; struct amdgpu_vm *vm = bo_va->base.vm; uint64_t eaddr; + int r; - /* validate the parameters */ - if (saddr & ~PAGE_MASK || offset & ~PAGE_MASK || size & ~PAGE_MASK) - return -EINVAL; - if (saddr + size <= saddr || offset + size <= offset) - return -EINVAL; - - /* make sure object fit at this offset */ - eaddr = saddr + size - 1; - if ((bo && offset + size > amdgpu_bo_size(bo)) || - (eaddr >= adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT)) - return -EINVAL; + r = amdgpu_vm_verify_parameters(adev, bo, saddr, offset, size); + if (r) + return r; saddr /= AMDGPU_GPU_PAGE_SIZE; - eaddr /= AMDGPU_GPU_PAGE_SIZE; + eaddr = saddr + (size - 1) / AMDGPU_GPU_PAGE_SIZE; tmp = amdgpu_vm_it_iter_first(&vm->va, saddr, eaddr); if (tmp) { @@ -1740,17 +1764,9 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev, uint64_t eaddr; int r; - /* validate the parameters */ - if (saddr & ~PAGE_MASK || offset & ~PAGE_MASK || size & ~PAGE_MASK) - return -EINVAL; - if (saddr + size <= saddr || offset + size <= offset) - return -EINVAL; - - /* make sure object fit at this offset */ - eaddr = saddr + size - 1; - if ((bo && offset + size > amdgpu_bo_size(bo)) || - (eaddr >= adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT)) - return -EINVAL; + r = amdgpu_vm_verify_parameters(adev, bo, saddr, offset, size); + if (r) + return r; /* Allocate all the needed memory */ mapping = kmalloc(sizeof(*mapping), GFP_KERNEL); @@ -1764,7 +1780,7 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev, } saddr /= AMDGPU_GPU_PAGE_SIZE; - eaddr /= AMDGPU_GPU_PAGE_SIZE; + eaddr = saddr + (size - 1) / AMDGPU_GPU_PAGE_SIZE; mapping->start = saddr; mapping->last = eaddr; @@ -1851,10 +1867,14 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev, struct amdgpu_bo_va_mapping *before, *after, *tmp, *next; LIST_HEAD(removed); uint64_t eaddr; + int r; + + r = amdgpu_vm_verify_parameters(adev, NULL, saddr, 0, size); + if (r) + return r; - eaddr = saddr + size - 1; saddr /= AMDGPU_GPU_PAGE_SIZE; - eaddr /= AMDGPU_GPU_PAGE_SIZE; + eaddr = saddr + (size - 1) / AMDGPU_GPU_PAGE_SIZE; /* Allocate all the needed memory */ before = kzalloc(sizeof(*before), GFP_KERNEL); -- cgit v1.2.3 From 394ae0603a6764d36437a26c097ef549e0eee1ff Mon Sep 17 00:00:00 2001 From: Christian König Date: Thu, 4 Apr 2024 16:25:40 +0200 Subject: drm/amdgpu: fix visible VRAM handling during faults MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When we removed the hacky start code check we actually didn't took into account that *all* VRAM pages needs to be CPU accessible. Clean up the code and unify the handling into a single helper which checks if the whole resource is CPU accessible. The only place where a partial check would make sense is during eviction, but that is neglitible. Signed-off-by: Christian König Fixes: aed01a68047b ("drm/amdgpu: Remove TTM resource->start visible VRAM condition v2") Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher CC: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 22 +++++------ drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 22 ----------- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 61 +++++++++++++++++++----------- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 3 ++ 5 files changed, 53 insertions(+), 57 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 0a4b09709cfb..ec888fc6ead8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -819,7 +819,7 @@ retry: p->bytes_moved += ctx.bytes_moved; if (!amdgpu_gmc_vram_full_visible(&adev->gmc) && - amdgpu_bo_in_cpu_visible_vram(bo)) + amdgpu_res_cpu_visible(adev, bo->tbo.resource)) p->bytes_moved_vis += ctx.bytes_moved; if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 010b0cb7693c..2099159a693f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -617,8 +617,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev, return r; if (!amdgpu_gmc_vram_full_visible(&adev->gmc) && - bo->tbo.resource->mem_type == TTM_PL_VRAM && - amdgpu_bo_in_cpu_visible_vram(bo)) + amdgpu_res_cpu_visible(adev, bo->tbo.resource)) amdgpu_cs_report_moved_bytes(adev, ctx.bytes_moved, ctx.bytes_moved); else @@ -1272,23 +1271,25 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, bool evict) void amdgpu_bo_get_memory(struct amdgpu_bo *bo, struct amdgpu_mem_stats *stats) { + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); + struct ttm_resource *res = bo->tbo.resource; uint64_t size = amdgpu_bo_size(bo); struct drm_gem_object *obj; unsigned int domain; bool shared; /* Abort if the BO doesn't currently have a backing store */ - if (!bo->tbo.resource) + if (!res) return; obj = &bo->tbo.base; shared = drm_gem_object_is_shared_for_memory_stats(obj); - domain = amdgpu_mem_type_to_domain(bo->tbo.resource->mem_type); + domain = amdgpu_mem_type_to_domain(res->mem_type); switch (domain) { case AMDGPU_GEM_DOMAIN_VRAM: stats->vram += size; - if (amdgpu_bo_in_cpu_visible_vram(bo)) + if (amdgpu_res_cpu_visible(adev, bo->tbo.resource)) stats->visible_vram += size; if (shared) stats->vram_shared += size; @@ -1389,10 +1390,7 @@ vm_fault_t amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo) /* Remember that this BO was accessed by the CPU */ abo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; - if (bo->resource->mem_type != TTM_PL_VRAM) - return 0; - - if (amdgpu_bo_in_cpu_visible_vram(abo)) + if (amdgpu_res_cpu_visible(adev, bo->resource)) return 0; /* Can't move a pinned BO to visible VRAM */ @@ -1415,7 +1413,7 @@ vm_fault_t amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo) /* this should never happen */ if (bo->resource->mem_type == TTM_PL_VRAM && - !amdgpu_bo_in_cpu_visible_vram(abo)) + !amdgpu_res_cpu_visible(adev, bo->resource)) return VM_FAULT_SIGBUS; ttm_bo_move_to_lru_tail_unlocked(bo); @@ -1579,6 +1577,7 @@ uint32_t amdgpu_bo_get_preferred_domain(struct amdgpu_device *adev, */ u64 amdgpu_bo_print_info(int id, struct amdgpu_bo *bo, struct seq_file *m) { + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); struct dma_buf_attachment *attachment; struct dma_buf *dma_buf; const char *placement; @@ -1587,10 +1586,11 @@ u64 amdgpu_bo_print_info(int id, struct amdgpu_bo *bo, struct seq_file *m) if (dma_resv_trylock(bo->tbo.base.resv)) { unsigned int domain; + domain = amdgpu_mem_type_to_domain(bo->tbo.resource->mem_type); switch (domain) { case AMDGPU_GEM_DOMAIN_VRAM: - if (amdgpu_bo_in_cpu_visible_vram(bo)) + if (amdgpu_res_cpu_visible(adev, bo->tbo.resource)) placement = "VRAM VISIBLE"; else placement = "VRAM"; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index be679c42b0b8..fa03d9e4874c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -250,28 +250,6 @@ static inline u64 amdgpu_bo_mmap_offset(struct amdgpu_bo *bo) return drm_vma_node_offset_addr(&bo->tbo.base.vma_node); } -/** - * amdgpu_bo_in_cpu_visible_vram - check if BO is (partly) in visible VRAM - */ -static inline bool amdgpu_bo_in_cpu_visible_vram(struct amdgpu_bo *bo) -{ - struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - struct amdgpu_res_cursor cursor; - - if (!bo->tbo.resource || bo->tbo.resource->mem_type != TTM_PL_VRAM) - return false; - - amdgpu_res_first(bo->tbo.resource, 0, amdgpu_bo_size(bo), &cursor); - while (cursor.remaining) { - if (cursor.start < adev->gmc.visible_vram_size) - return true; - - amdgpu_res_next(&cursor, cursor.size); - } - - return false; -} - /** * amdgpu_bo_explicit_sync - return whether the bo is explicitly synced */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 6417cb76ccd4..1d71729e3f6b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -133,7 +133,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo, } else if (!amdgpu_gmc_vram_full_visible(&adev->gmc) && !(abo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) && - amdgpu_bo_in_cpu_visible_vram(abo)) { + amdgpu_res_cpu_visible(adev, bo->resource)) { /* Try evicting to the CPU inaccessible part of VRAM * first, but only set GTT as busy placement, so this @@ -403,40 +403,55 @@ error: return r; } -/* - * amdgpu_mem_visible - Check that memory can be accessed by ttm_bo_move_memcpy +/** + * amdgpu_res_cpu_visible - Check that resource can be accessed by CPU + * @adev: amdgpu device + * @res: the resource to check * - * Called by amdgpu_bo_move() + * Returns: true if the full resource is CPU visible, false otherwise. */ -static bool amdgpu_mem_visible(struct amdgpu_device *adev, - struct ttm_resource *mem) +bool amdgpu_res_cpu_visible(struct amdgpu_device *adev, + struct ttm_resource *res) { - u64 mem_size = (u64)mem->size; struct amdgpu_res_cursor cursor; - u64 end; - if (mem->mem_type == TTM_PL_SYSTEM || - mem->mem_type == TTM_PL_TT) + if (!res) + return false; + + if (res->mem_type == TTM_PL_SYSTEM || res->mem_type == TTM_PL_TT || + res->mem_type == AMDGPU_PL_PREEMPT) return true; - if (mem->mem_type != TTM_PL_VRAM) + + if (res->mem_type != TTM_PL_VRAM) return false; - amdgpu_res_first(mem, 0, mem_size, &cursor); - end = cursor.start + cursor.size; + amdgpu_res_first(res, 0, res->size, &cursor); while (cursor.remaining) { + if ((cursor.start + cursor.size) >= adev->gmc.visible_vram_size) + return false; amdgpu_res_next(&cursor, cursor.size); + } - if (!cursor.remaining) - break; + return true; +} - /* ttm_resource_ioremap only supports contiguous memory */ - if (end != cursor.start) - return false; +/* + * amdgpu_res_copyable - Check that memory can be accessed by ttm_bo_move_memcpy + * + * Called by amdgpu_bo_move() + */ +static bool amdgpu_res_copyable(struct amdgpu_device *adev, + struct ttm_resource *mem) +{ + if (!amdgpu_res_cpu_visible(adev, mem)) + return false; - end = cursor.start + cursor.size; - } + /* ttm_resource_ioremap only supports contiguous memory */ + if (mem->mem_type == TTM_PL_VRAM && + !(mem->placement & TTM_PL_FLAG_CONTIGUOUS)) + return false; - return end <= adev->gmc.visible_vram_size; + return true; } /* @@ -529,8 +544,8 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, if (r) { /* Check that all memory is CPU accessible */ - if (!amdgpu_mem_visible(adev, old_mem) || - !amdgpu_mem_visible(adev, new_mem)) { + if (!amdgpu_res_copyable(adev, old_mem) || + !amdgpu_res_copyable(adev, new_mem)) { pr_err("Move buffer fallback to memcpy unavailable\n"); return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index 65ec82141a8e..32cf6b6f6efd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -139,6 +139,9 @@ int amdgpu_vram_mgr_reserve_range(struct amdgpu_vram_mgr *mgr, int amdgpu_vram_mgr_query_page_status(struct amdgpu_vram_mgr *mgr, uint64_t start); +bool amdgpu_res_cpu_visible(struct amdgpu_device *adev, + struct ttm_resource *res); + int amdgpu_ttm_init(struct amdgpu_device *adev); void amdgpu_ttm_fini(struct amdgpu_device *adev); void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, -- cgit v1.2.3 From 959056982a9b46758e0582bc6724b6ef51012e91 Mon Sep 17 00:00:00 2001 From: Ma Jun Date: Tue, 2 Apr 2024 17:21:01 +0800 Subject: drm/amdgpu: Fix discovery initialization failure during pci rescan MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Waiting for system ready to fix the discovery initialization failure issue. This failure usually occurs when dGPU is removed and then rescanned via command line. It's caused by following two errors: [1] vram size is 0 [2] wrong binary signature Signed-off-by: Ma Jun Acked-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index 07c5fca06178..90735e966318 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -255,7 +255,6 @@ static int amdgpu_discovery_read_binary_from_mem(struct amdgpu_device *adev, uint64_t vram_size; u32 msg; int i, ret = 0; - int ip_discovery_ver = 0; /* It can take up to a second for IFWI init to complete on some dGPUs, * but generally it should be in the 60-100ms range. Normally this starts @@ -265,17 +264,13 @@ static int amdgpu_discovery_read_binary_from_mem(struct amdgpu_device *adev, * continue. */ - ip_discovery_ver = RREG32(mmIP_DISCOVERY_VERSION); - if ((dev_is_removable(&adev->pdev->dev)) || - (ip_discovery_ver == IP_DISCOVERY_V2) || - (ip_discovery_ver == IP_DISCOVERY_V4)) { - for (i = 0; i < 1000; i++) { - msg = RREG32(mmMP0_SMN_C2PMSG_33); - if (msg & 0x80000000) - break; - msleep(1); - } + for (i = 0; i < 1000; i++) { + msg = RREG32(mmMP0_SMN_C2PMSG_33); + if (msg & 0x80000000) + break; + usleep_range(1000, 1100); } + vram_size = (uint64_t)RREG32(mmRCC_CONFIG_MEMSIZE) << 20; if (vram_size) { -- cgit v1.2.3 From 12b8b4e68510a7e761cbbf16038ef4bb11812d13 Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Mon, 15 Apr 2024 01:38:39 +0200 Subject: drm/amdgpu: Add missing space to DRM_WARN() message s/,please/, please/ Reviewed-by: Alex Deucher Signed-off-by: Thorsten Blum Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 170da4551ed5..48a2de92180a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1460,7 +1460,7 @@ int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev) /* PCI_EXT_CAP_ID_VNDR extended capability is located at 0x100 */ if (!pci_find_ext_capability(adev->pdev, PCI_EXT_CAP_ID_VNDR)) - DRM_WARN("System can't access extended configuration space,please check!!\n"); + DRM_WARN("System can't access extended configuration space, please check!!\n"); /* skip if the bios has already enabled large BAR */ if (adev->gmc.real_vram_size && -- cgit v1.2.3 From 6c6acc5f33ab727cfb6fe88cf3d96ca9c67cac5b Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Mon, 26 Feb 2024 10:03:57 +0800 Subject: drm/amdgpu: Load ipkeymgr drv for psp v14 while DBG_DRV is renamed to HAD_DRV for psp v14, part of its APIs/functionality is moved to a new component named Ipkeymgr_Drv. Signed-off-by: Hawking Zhang Reviewed-by: Likun Gao Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 15 +++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h | 8 +++++++- drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h | 1 + drivers/gpu/drm/amd/amdgpu/psp_v14_0.c | 5 +++++ 4 files changed, 28 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index edae581059af..4bd4602d11b1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -2265,6 +2265,15 @@ static int psp_hw_start(struct psp_context *psp) } } + if ((is_psp_fw_valid(psp->ipkeymgr_drv)) && + (psp->funcs->bootloader_load_ipkeymgr_drv != NULL)) { + ret = psp_bootloader_load_ipkeymgr_drv(psp); + if (ret) { + dev_err(adev->dev, "PSP load ipkeymgr_drv failed!\n"); + return ret; + } + } + if ((is_psp_fw_valid(psp->sos)) && (psp->funcs->bootloader_load_sos != NULL)) { ret = psp_bootloader_load_sos(psp); @@ -3280,6 +3289,12 @@ static int parse_sos_bin_descriptor(struct psp_context *psp, psp->ras_drv.size_bytes = le32_to_cpu(desc->size_bytes); psp->ras_drv.start_addr = ucode_start_addr; break; + case PSP_FW_TYPE_PSP_IPKEYMGR_DRV: + psp->ipkeymgr_drv.fw_version = le32_to_cpu(desc->fw_version); + psp->ipkeymgr_drv.feature_version = le32_to_cpu(desc->fw_version); + psp->ipkeymgr_drv.size_bytes = le32_to_cpu(desc->size_bytes); + psp->ipkeymgr_drv.start_addr = ucode_start_addr; + break; default: dev_warn(psp->adev->dev, "Unsupported PSP FW type: %d\n", desc->fw_type); break; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index ee16f134ae92..66b3f88fbecd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -74,7 +74,8 @@ enum psp_bootloader_cmd { PSP_BL__LOAD_SOCDRV = 0xB0000, PSP_BL__LOAD_DBGDRV = 0xC0000, PSP_BL__LOAD_INTFDRV = 0xD0000, - PSP_BL__LOAD_RASDRV = 0xE0000, + PSP_BL__LOAD_RASDRV = 0xE0000, + PSP_BL__LOAD_IPKEYMGRDRV = 0xF0000, PSP_BL__DRAM_LONG_TRAIN = 0x100000, PSP_BL__DRAM_SHORT_TRAIN = 0x200000, PSP_BL__LOAD_TOS_SPL_TABLE = 0x10000000, @@ -117,6 +118,7 @@ struct psp_funcs { int (*bootloader_load_intf_drv)(struct psp_context *psp); int (*bootloader_load_dbg_drv)(struct psp_context *psp); int (*bootloader_load_ras_drv)(struct psp_context *psp); + int (*bootloader_load_ipkeymgr_drv)(struct psp_context *psp); int (*bootloader_load_sos)(struct psp_context *psp); int (*ring_create)(struct psp_context *psp, enum psp_ring_type ring_type); @@ -336,6 +338,7 @@ struct psp_context { struct psp_bin_desc intf_drv; struct psp_bin_desc dbg_drv; struct psp_bin_desc ras_drv; + struct psp_bin_desc ipkeymgr_drv; /* tmr buffer */ struct amdgpu_bo *tmr_bo; @@ -424,6 +427,9 @@ struct amdgpu_psp_funcs { #define psp_bootloader_load_ras_drv(psp) \ ((psp)->funcs->bootloader_load_ras_drv ? \ (psp)->funcs->bootloader_load_ras_drv((psp)) : 0) +#define psp_bootloader_load_ipkeymgr_drv(psp) \ + ((psp)->funcs->bootloader_load_ipkeymgr_drv ? \ + (psp)->funcs->bootloader_load_ipkeymgr_drv((psp)) : 0) #define psp_bootloader_load_sos(psp) \ ((psp)->funcs->bootloader_load_sos ? (psp)->funcs->bootloader_load_sos((psp)) : 0) #define psp_smu_reload_quirk(psp) \ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h index 619445760037..105d4de0613a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h @@ -125,6 +125,7 @@ enum psp_fw_type { PSP_FW_TYPE_PSP_INTF_DRV, PSP_FW_TYPE_PSP_DBG_DRV, PSP_FW_TYPE_PSP_RAS_DRV, + PSP_FW_TYPE_PSP_IPKEYMGR_DRV, PSP_FW_TYPE_MAX_INDEX, }; diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c index 78a95f8f370b..241d5ff2ef3c 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c @@ -177,6 +177,10 @@ static int psp_v14_0_bootloader_load_ras_drv(struct psp_context *psp) return psp_v14_0_bootloader_load_component(psp, &psp->ras_drv, PSP_BL__LOAD_RASDRV); } +static int psp_v14_0_bootloader_load_ipkeymgr_drv(struct psp_context *psp) +{ + return psp_v14_0_bootloader_load_component(psp, &psp->ipkeymgr_drv, PSP_BL__LOAD_IPKEYMGRDRV); +} static int psp_v14_0_bootloader_load_sos(struct psp_context *psp) { @@ -653,6 +657,7 @@ static const struct psp_funcs psp_v14_0_funcs = { .bootloader_load_intf_drv = psp_v14_0_bootloader_load_intf_drv, .bootloader_load_dbg_drv = psp_v14_0_bootloader_load_dbg_drv, .bootloader_load_ras_drv = psp_v14_0_bootloader_load_ras_drv, + .bootloader_load_ipkeymgr_drv = psp_v14_0_bootloader_load_ipkeymgr_drv, .bootloader_load_sos = psp_v14_0_bootloader_load_sos, .ring_create = psp_v14_0_ring_create, .ring_stop = psp_v14_0_ring_stop, -- cgit v1.2.3 From 1347853271ed3ec85dd42586fa31f746664504e7 Mon Sep 17 00:00:00 2001 From: Ma Jun Date: Wed, 27 Mar 2024 15:18:40 +0800 Subject: drm/amdgpu: refactoring the runtime pm mode detection code refactor the code of runtime pm mode detection to support amdgpu_runtime_pm =2 and 1 two cases Signed-off-by: Ma Jun Reviewed-by: Yang Wang Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 75 ++++++++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 48 +------------------ 3 files changed, 77 insertions(+), 47 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 65c17c59c152..e0d7f4ee7e16 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1409,6 +1409,7 @@ bool amdgpu_device_supports_px(struct drm_device *dev); bool amdgpu_device_supports_boco(struct drm_device *dev); bool amdgpu_device_supports_smart_shift(struct drm_device *dev); int amdgpu_device_supports_baco(struct drm_device *dev); +void amdgpu_device_detect_runtime_pm_mode(struct amdgpu_device *adev); bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev, struct amdgpu_device *peer_adev); int amdgpu_device_baco_enter(struct drm_device *dev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 48a2de92180a..1b2e177bc2d6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -350,6 +350,81 @@ int amdgpu_device_supports_baco(struct drm_device *dev) return amdgpu_asic_supports_baco(adev); } +void amdgpu_device_detect_runtime_pm_mode(struct amdgpu_device *adev) +{ + struct drm_device *dev; + int bamaco_support; + + dev = adev_to_drm(adev); + + adev->pm.rpm_mode = AMDGPU_RUNPM_NONE; + bamaco_support = amdgpu_device_supports_baco(dev); + + switch (amdgpu_runtime_pm) { + case 2: + if (bamaco_support & MACO_SUPPORT) { + adev->pm.rpm_mode = AMDGPU_RUNPM_BAMACO; + dev_info(adev->dev, "Forcing BAMACO for runtime pm\n"); + } else if (bamaco_support == BACO_SUPPORT) { + adev->pm.rpm_mode = AMDGPU_RUNPM_BACO; + dev_info(adev->dev, "Requested mode BAMACO not available,fallback to use BACO\n"); + } + break; + case 1: + if (bamaco_support & BACO_SUPPORT) { + adev->pm.rpm_mode = AMDGPU_RUNPM_BACO; + dev_info(adev->dev, "Forcing BACO for runtime pm\n"); + } + break; + case -1: + case -2: + if (amdgpu_device_supports_px(dev)) { /* enable PX as runtime mode */ + adev->pm.rpm_mode = AMDGPU_RUNPM_PX; + dev_info(adev->dev, "Using ATPX for runtime pm\n"); + } else if (amdgpu_device_supports_boco(dev)) { /* enable boco as runtime mode */ + adev->pm.rpm_mode = AMDGPU_RUNPM_BOCO; + dev_info(adev->dev, "Using BOCO for runtime pm\n"); + } else { + if (!bamaco_support) + goto no_runtime_pm; + + switch (adev->asic_type) { + case CHIP_VEGA20: + case CHIP_ARCTURUS: + /* BACO are not supported on vega20 and arctrus */ + break; + case CHIP_VEGA10: + /* enable BACO as runpm mode if noretry=0 */ + if (!adev->gmc.noretry) + adev->pm.rpm_mode = AMDGPU_RUNPM_BACO; + break; + default: + /* enable BACO as runpm mode on CI+ */ + adev->pm.rpm_mode = AMDGPU_RUNPM_BACO; + break; + } + + if (adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) { + if (bamaco_support & MACO_SUPPORT) { + adev->pm.rpm_mode = AMDGPU_RUNPM_BAMACO; + dev_info(adev->dev, "Using BAMACO for runtime pm\n"); + } else { + dev_info(adev->dev, "Using BACO for runtime pm\n"); + } + } + } + break; + case 0: + dev_info(adev->dev, "runtime pm is manually disabled\n"); + break; + default: + break; + } + +no_runtime_pm: + if (adev->pm.rpm_mode == AMDGPU_RUNPM_NONE) + dev_info(adev->dev, "Runtime PM not available\n"); +} /** * amdgpu_device_supports_smart_shift - Is the device dGPU with * smart shift support diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 55eb4e4eb8f2..a0ea6fe8d060 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -133,7 +133,6 @@ void amdgpu_register_gpu_instance(struct amdgpu_device *adev) int amdgpu_driver_load_kms(struct amdgpu_device *adev, unsigned long flags) { struct drm_device *dev; - int bamaco_support = 0; int r, acpi_status; dev = adev_to_drm(adev); @@ -150,52 +149,7 @@ int amdgpu_driver_load_kms(struct amdgpu_device *adev, unsigned long flags) goto out; } - adev->pm.rpm_mode = AMDGPU_RUNPM_NONE; - if (amdgpu_device_supports_px(dev) && - (amdgpu_runtime_pm != 0)) { /* enable PX as runtime mode */ - adev->pm.rpm_mode = AMDGPU_RUNPM_PX; - dev_info(adev->dev, "Using ATPX for runtime pm\n"); - } else if (amdgpu_device_supports_boco(dev) && - (amdgpu_runtime_pm != 0)) { /* enable boco as runtime mode */ - adev->pm.rpm_mode = AMDGPU_RUNPM_BOCO; - dev_info(adev->dev, "Using BOCO for runtime pm\n"); - } else if (amdgpu_runtime_pm != 0) { - bamaco_support = amdgpu_device_supports_baco(dev); - - if (!bamaco_support) - goto no_runtime_pm; - - switch (adev->asic_type) { - case CHIP_VEGA20: - case CHIP_ARCTURUS: - /* enable BACO as runpm mode if runpm=1 */ - if (amdgpu_runtime_pm > 0) - adev->pm.rpm_mode = AMDGPU_RUNPM_BACO; - break; - case CHIP_VEGA10: - /* enable BACO as runpm mode if noretry=0 */ - if (!adev->gmc.noretry) - adev->pm.rpm_mode = AMDGPU_RUNPM_BACO; - break; - default: - /* enable BACO as runpm mode on CI+ */ - adev->pm.rpm_mode = AMDGPU_RUNPM_BACO; - break; - } - - if (adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) { - if (bamaco_support & MACO_SUPPORT) { - adev->pm.rpm_mode = AMDGPU_RUNPM_BAMACO; - dev_info(adev->dev, "Using BAMACO for runtime pm\n"); - } else { - dev_info(adev->dev, "Using BACO for runtime pm\n"); - } - } - } - -no_runtime_pm: - if (adev->pm.rpm_mode == AMDGPU_RUNPM_NONE) - dev_info(adev->dev, "NO pm mode for runtime pm\n"); + amdgpu_device_detect_runtime_pm_mode(adev); /* Call ACPI methods: require modeset init * but failure is not fatal -- cgit v1.2.3 From 577cbed31818361cefb642eeeb558b8755ccbe2c Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Mon, 26 Feb 2024 10:14:53 +0800 Subject: drm/amdgpu: rename DBG_DRV to HAD_DRV for psp v14 Add a psp bl command enum for HAD_DRV. Signed-off-by: Hawking Zhang Reviewed-by: Likun Gao Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h | 1 + drivers/gpu/drm/amd/amdgpu/psp_v14_0.c | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index 66b3f88fbecd..3635303e6548 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -73,6 +73,7 @@ enum psp_bootloader_cmd { PSP_BL__LOAD_KEY_DATABASE = 0x80000, PSP_BL__LOAD_SOCDRV = 0xB0000, PSP_BL__LOAD_DBGDRV = 0xC0000, + PSP_BL__LOAD_HADDRV = PSP_BL__LOAD_DBGDRV, PSP_BL__LOAD_INTFDRV = 0xD0000, PSP_BL__LOAD_RASDRV = 0xE0000, PSP_BL__LOAD_IPKEYMGRDRV = 0xF0000, diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c index 241d5ff2ef3c..f08a32c18694 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c @@ -169,7 +169,8 @@ static int psp_v14_0_bootloader_load_intf_drv(struct psp_context *psp) static int psp_v14_0_bootloader_load_dbg_drv(struct psp_context *psp) { - return psp_v14_0_bootloader_load_component(psp, &psp->dbg_drv, PSP_BL__LOAD_DBGDRV); + /* dbg_drv was renamed to had_drv in psp v14 */ + return psp_v14_0_bootloader_load_component(psp, &psp->dbg_drv, PSP_BL__LOAD_HADDRV); } static int psp_v14_0_bootloader_load_ras_drv(struct psp_context *psp) -- cgit v1.2.3 From 0c1195ca0d02a3db2599738a944bb6a36f6fa234 Mon Sep 17 00:00:00 2001 From: Kenneth Feng Date: Wed, 31 May 2023 16:09:05 +0800 Subject: drm/amd/swsmu: support smu block discovery for smu v14 Support for smu ip block add for SMU v14. Signed-off-by: Kenneth Feng Reviewed-by: Likun Gao Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index 90735e966318..0e31bdb4b7cb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -1901,6 +1901,8 @@ static int amdgpu_discovery_set_smu_ip_blocks(struct amdgpu_device *adev) break; case IP_VERSION(14, 0, 0): case IP_VERSION(14, 0, 1): + case IP_VERSION(14, 0, 2): + case IP_VERSION(14, 0, 3): amdgpu_device_ip_block_add(adev, &smu_v14_0_ip_block); break; default: -- cgit v1.2.3 From eefc85a2779d75909e769feb7dd056a0bfba4ca7 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Fri, 12 Apr 2024 14:19:36 +0530 Subject: drm:amdgpu: enable IH RB ring1 for IH v6.0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We need IH ring1 for handling the pagefault interrupts which are overflowing the default ring for specific usecases. Signed-off-by: Sunil Khatri Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/ih_v6_0.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c index ad4ad39f128f..26dc99232eb6 100644 --- a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c @@ -549,8 +549,15 @@ static int ih_v6_0_sw_init(void *handle) adev->irq.ih.use_doorbell = true; adev->irq.ih.doorbell_index = adev->doorbell_index.ih << 1; - adev->irq.ih1.ring_size = 0; - adev->irq.ih2.ring_size = 0; + if (!(adev->flags & AMD_IS_APU)) { + r = amdgpu_ih_ring_init(adev, &adev->irq.ih1, IH_RING_SIZE, + use_bus_addr); + if (r) + return r; + + adev->irq.ih1.use_doorbell = true; + adev->irq.ih1.doorbell_index = (adev->doorbell_index.ih + 1) << 1; + } /* initialize ih control register offset */ ih_v6_0_init_register_offset(adev); -- cgit v1.2.3 From 5adcd78fa2bcc458f9786067bcf4a15f9a3f49c9 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Fri, 12 Apr 2024 14:22:16 +0530 Subject: drm:amdgpu: enable IH ring1 for IH v6.1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We need IH ring1 for handling the pagefault interrupts which over flow in default ring for specific usecases. Signed-off-by: Sunil Khatri Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/ih_v6_1.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c b/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c index b8da0fc29378..73dba180fabd 100644 --- a/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c @@ -550,8 +550,15 @@ static int ih_v6_1_sw_init(void *handle) adev->irq.ih.use_doorbell = true; adev->irq.ih.doorbell_index = adev->doorbell_index.ih << 1; - adev->irq.ih1.ring_size = 0; - adev->irq.ih2.ring_size = 0; + if (!(adev->flags & AMD_IS_APU)) { + r = amdgpu_ih_ring_init(adev, &adev->irq.ih1, IH_RING_SIZE, + use_bus_addr); + if (r) + return r; + + adev->irq.ih1.use_doorbell = true; + adev->irq.ih1.doorbell_index = (adev->doorbell_index.ih + 1) << 1; + } /* initialize ih control register offset */ ih_v6_1_init_register_offset(adev); -- cgit v1.2.3 From ca0afa2f4161dbf82e345144fd0042d00b11eb5b Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Fri, 12 Apr 2024 15:00:20 +0530 Subject: drm/amdgpu: enable redirection of irq's for IH V6.0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Enable redirection of irq for pagefaults for specific clients to avoid overflow without dropping interrupts. So here we redirect the interrupts to another IH ring i.e ring1 where only these interrupts are processed. Reviewed-by: Christian König Signed-off-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/ih_v6_0.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c index 26dc99232eb6..c757ef99e3c5 100644 --- a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c @@ -346,6 +346,21 @@ static int ih_v6_0_irq_init(struct amdgpu_device *adev) DELAY, 3); WREG32_SOC15(OSSSYS, 0, regIH_MSI_STORM_CTRL, tmp); + /* Redirect the interrupts to IH RB1 for dGPU */ + if (adev->irq.ih1.ring_size) { + tmp = RREG32_SOC15(OSSSYS, 0, regIH_RING1_CLIENT_CFG_INDEX); + tmp = REG_SET_FIELD(tmp, IH_RING1_CLIENT_CFG_INDEX, INDEX, 0); + WREG32_SOC15(OSSSYS, 0, regIH_RING1_CLIENT_CFG_INDEX, tmp); + + tmp = RREG32_SOC15(OSSSYS, 0, regIH_RING1_CLIENT_CFG_DATA); + tmp = REG_SET_FIELD(tmp, IH_RING1_CLIENT_CFG_DATA, CLIENT_ID, 0xa); + tmp = REG_SET_FIELD(tmp, IH_RING1_CLIENT_CFG_DATA, SOURCE_ID, 0x0); + tmp = REG_SET_FIELD(tmp, IH_RING1_CLIENT_CFG_DATA, + SOURCE_ID_MATCH_ENABLE, 0x1); + + WREG32_SOC15(OSSSYS, 0, regIH_RING1_CLIENT_CFG_DATA, tmp); + } + pci_set_master(adev->pdev); /* enable interrupts */ -- cgit v1.2.3 From ea137071ada1591a05ce0366de350158bf8dd6c7 Mon Sep 17 00:00:00 2001 From: Ahmad Rehman Date: Tue, 16 Apr 2024 13:29:15 -0500 Subject: drm/amdgpu: Skip the coredump collection on reset during driver reload In passthrough environment, the driver triggers the mode-1 reset on reload. The reset causes the core dump collection which is delayed task and prevents driver from unloading until it is completed. Since we do not need to collect data on "reset on reload" case, we can skip core dump collection. v2: Use the same flag to avoid calling amdgpu_reset_reg_dumps as well. Signed-off-by: Ahmad Rehman Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 7 +++++-- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h | 1 + 3 files changed, 7 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 1b2e177bc2d6..f3b7cb18fd46 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -5357,7 +5357,9 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle, /* Try reset handler method first */ tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device, reset_list); - amdgpu_reset_reg_dumps(tmp_adev); + + if (!test_bit(AMDGPU_SKIP_COREDUMP, &reset_context->flags)) + amdgpu_reset_reg_dumps(tmp_adev); reset_context->reset_device_list = device_list_handle; r = amdgpu_reset_perform_reset(tmp_adev, reset_context); @@ -5430,7 +5432,8 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle, vram_lost = amdgpu_device_check_vram_lost(tmp_adev); - amdgpu_coredump(tmp_adev, vram_lost, reset_context); + if (!test_bit(AMDGPU_SKIP_COREDUMP, &reset_context->flags)) + amdgpu_coredump(tmp_adev, vram_lost, reset_context); if (vram_lost) { DRM_INFO("VRAM is lost due to GPU reset!\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 6ea893ad9a36..c512f70b8272 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -2481,6 +2481,7 @@ static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work) /* Use a common context, just need to make sure full reset is done */ set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags); + set_bit(AMDGPU_SKIP_COREDUMP, &reset_context.flags); r = amdgpu_do_asic_reset(&device_list, &reset_context); if (r) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h index 66125d43cf21..b11d190ece53 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h @@ -32,6 +32,7 @@ enum AMDGPU_RESET_FLAGS { AMDGPU_NEED_FULL_RESET = 0, AMDGPU_SKIP_HW_RESET = 1, + AMDGPU_SKIP_COREDUMP = 2, }; struct amdgpu_reset_context { -- cgit v1.2.3 From 93522c19488edc1b347083cd3622a1572d5a95e1 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Fri, 12 Apr 2024 15:05:00 +0530 Subject: drm/amdgpu: enable redirection of irq's for IH V6.1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Enable redirection of irq for pagefaults for specific clients to avoid overflow without dropping interrupts. So here we redirect the interrupts to another IH ring i.e ring1 where only these interrupts are processed. Signed-off-by: Sunil Khatri Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/ih_v6_1.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c b/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c index 73dba180fabd..29ed78798070 100644 --- a/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c @@ -346,6 +346,21 @@ static int ih_v6_1_irq_init(struct amdgpu_device *adev) DELAY, 3); WREG32_SOC15(OSSSYS, 0, regIH_MSI_STORM_CTRL, tmp); + /* Redirect the interrupts to IH RB1 for dGPU */ + if (adev->irq.ih1.ring_size) { + tmp = RREG32_SOC15(OSSSYS, 0, regIH_RING1_CLIENT_CFG_INDEX); + tmp = REG_SET_FIELD(tmp, IH_RING1_CLIENT_CFG_INDEX, INDEX, 0); + WREG32_SOC15(OSSSYS, 0, regIH_RING1_CLIENT_CFG_INDEX, tmp); + + tmp = RREG32_SOC15(OSSSYS, 0, regIH_RING1_CLIENT_CFG_DATA); + tmp = REG_SET_FIELD(tmp, IH_RING1_CLIENT_CFG_DATA, CLIENT_ID, 0xa); + tmp = REG_SET_FIELD(tmp, IH_RING1_CLIENT_CFG_DATA, SOURCE_ID, 0x0); + tmp = REG_SET_FIELD(tmp, IH_RING1_CLIENT_CFG_DATA, + SOURCE_ID_MATCH_ENABLE, 0x1); + + WREG32_SOC15(OSSSYS, 0, regIH_RING1_CLIENT_CFG_DATA, tmp); + } + pci_set_master(adev->pdev); /* enable interrupts */ -- cgit v1.2.3 From 8954c3fbe764a42db29bc8d54ff795a105759fe9 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Thu, 29 Feb 2024 13:59:19 +0530 Subject: drm/amdgpu: Change AID detection logic On GFX 9.4.3 SOCs, only 2 SDMA instances need to be available to be considered as a valid AID. Signed-off-by: Lijo Lazar Reviewed-by: Asad Kamal Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c index fbb43ae7624f..414ea3f560a7 100644 --- a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c +++ b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c @@ -630,7 +630,7 @@ static int aqua_vanjaram_xcp_mgr_init(struct amdgpu_device *adev) int aqua_vanjaram_init_soc_config(struct amdgpu_device *adev) { - u32 mask, inst_mask = adev->sdma.sdma_mask; + u32 mask, avail_inst, inst_mask = adev->sdma.sdma_mask; int ret, i; /* generally 1 AID supports 4 instances */ @@ -642,7 +642,9 @@ int aqua_vanjaram_init_soc_config(struct amdgpu_device *adev) for (mask = (1 << adev->sdma.num_inst_per_aid) - 1; inst_mask; inst_mask >>= adev->sdma.num_inst_per_aid, ++i) { - if ((inst_mask & mask) == mask) + avail_inst = inst_mask & mask; + if (avail_inst == mask || avail_inst == 0x3 || + avail_inst == 0xc) adev->aid_mask |= (1 << i); } -- cgit v1.2.3 From 6a009ca1bf94dfe194d6e9cc85a9319b483aac2d Mon Sep 17 00:00:00 2001 From: Zhigang Luo Date: Wed, 17 Apr 2024 10:59:58 -0400 Subject: drm/amdgpu: remove virt_init_data_exchange from poison consumption handler Host will initiate an FLR for all poison consumption. Guest should wait for FLR message to re-init data exchange. Signed-off-by: Zhigang Luo Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c index 89992c1c9a62..aba00d961627 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c @@ -446,8 +446,6 @@ static void xgpu_nv_ras_poison_handler(struct amdgpu_device *adev, amdgpu_virt_fini_data_exchange(adev); xgpu_nv_send_access_requests_with_param(adev, IDH_RAS_POISON, block, 0, 0); - if (block != AMDGPU_RAS_BLOCK__SDMA) - amdgpu_virt_init_data_exchange(adev); } } -- cgit v1.2.3 From e53a1713de314204f66cc186a74115ea62407876 Mon Sep 17 00:00:00 2001 From: Mukul Joshi Date: Thu, 18 Apr 2024 11:32:34 -0400 Subject: drm/amdgpu: Fix leak when GPU memory allocation fails Free the sync object if the memory allocation fails for any reason. Signed-off-by: Mukul Joshi Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index df58a6a1a67e..7c23ba19af33 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1854,6 +1854,7 @@ err_node_allow: err_bo_create: amdgpu_amdkfd_unreserve_mem_limit(adev, aligned_size, flags, xcp_id); err_reserve_limit: + amdgpu_sync_free(&(*mem)->sync); mutex_destroy(&(*mem)->lock); if (gobj) drm_gem_object_put(gobj); -- cgit v1.2.3 From 81bf14519a8ca17af4f057a125d87fabbae90af3 Mon Sep 17 00:00:00 2001 From: Lang Yu Date: Sun, 7 Apr 2024 12:36:00 +0800 Subject: drm/amdkfd: make sure VM is ready for updating operations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When page table BOs were evicted but not validated before updating page tables, VM is still in evicting state, amdgpu_vm_update_range returns -EBUSY and restore_process_worker runs into a dead loop. v2: Split the BO validation and page table update into two separate loops in amdgpu_amdkfd_restore_process_bos. (Felix) 1.Validate BOs 2.Validate VM (and DMABuf attachments) 3.Update page tables for the BOs validated above Fixes: 50661eb1a2c8 ("drm/amdgpu: Auto-validate DMABuf imports in compute VMs") Signed-off-by: Lang Yu Acked-by: Christian König Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 34 ++++++++++++++---------- 1 file changed, 20 insertions(+), 14 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 7c23ba19af33..2131de36e3da 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -2901,13 +2901,12 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence __rcu * amdgpu_sync_create(&sync_obj); - /* Validate BOs and map them to GPUVM (update VM page tables). */ + /* Validate BOs managed by KFD */ list_for_each_entry(mem, &process_info->kfd_bo_list, validate_list) { struct amdgpu_bo *bo = mem->bo; uint32_t domain = mem->domain; - struct kfd_mem_attachment *attachment; struct dma_resv_iter cursor; struct dma_fence *fence; @@ -2932,6 +2931,25 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence __rcu * goto validate_map_fail; } } + } + + if (failed_size) + pr_debug("0x%lx/0x%lx in system\n", failed_size, total_size); + + /* Validate PDs, PTs and evicted DMABuf imports last. Otherwise BO + * validations above would invalidate DMABuf imports again. + */ + ret = process_validate_vms(process_info, &exec.ticket); + if (ret) { + pr_debug("Validating VMs failed, ret: %d\n", ret); + goto validate_map_fail; + } + + /* Update mappings managed by KFD. */ + list_for_each_entry(mem, &process_info->kfd_bo_list, + validate_list) { + struct kfd_mem_attachment *attachment; + list_for_each_entry(attachment, &mem->attachments, list) { if (!attachment->is_mapped) continue; @@ -2948,18 +2966,6 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence __rcu * } } - if (failed_size) - pr_debug("0x%lx/0x%lx in system\n", failed_size, total_size); - - /* Validate PDs, PTs and evicted DMABuf imports last. Otherwise BO - * validations above would invalidate DMABuf imports again. - */ - ret = process_validate_vms(process_info, &exec.ticket); - if (ret) { - pr_debug("Validating VMs failed, ret: %d\n", ret); - goto validate_map_fail; - } - /* Update mappings not managed by KFD */ list_for_each_entry(peer_vm, &process_info->vm_list_head, vm_list_node) { -- cgit v1.2.3 From 96950929eb232038022abd961be46d492d7a6f0f Mon Sep 17 00:00:00 2001 From: Arunpravin Paneer Selvam Date: Fri, 19 Apr 2024 12:05:36 +0530 Subject: drm/buddy: Implement tracking clear page feature MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add tracking clear page feature. - Driver should enable the DRM_BUDDY_CLEARED flag if it successfully clears the blocks in the free path. On the otherhand, DRM buddy marks each block as cleared. - Track the available cleared pages size - If driver requests cleared memory we prefer cleared memory but fallback to uncleared if we can't find the cleared blocks. when driver requests uncleared memory we try to use uncleared but fallback to cleared memory if necessary. - When a block gets freed we clear it and mark the freed block as cleared, when there are buddies which are cleared as well we can merge them. Otherwise, we prefer to keep the blocks as separated. - Add a function to support defragmentation. v1: - Depends on the flag check DRM_BUDDY_CLEARED, enable the block as cleared. Else, reset the clear flag for each block in the list(Christian) - For merging the 2 cleared blocks compare as below, drm_buddy_is_clear(block) != drm_buddy_is_clear(buddy)(Christian) - Defragment the memory beginning from min_order till the required memory space is available. v2: (Matthew) - Add a wrapper drm_buddy_free_list_internal for the freeing of blocks operation within drm buddy. - Write a macro block_incompatible() to allocate the required blocks. - Update the xe driver for the drm_buddy_free_list change in arguments. - add a warning if the two blocks are incompatible on defragmentation - call full defragmentation in the fini() function - place a condition to test if min_order is equal to 0 - replace the list with safe_reverse() variant as we might remove the block from the list. v3: - fix Gitlab user reported lockup issue. - Keep DRM_BUDDY_HEADER_CLEAR define sorted(Matthew) - modify to pass the root order instead max_order in fini() function(Matthew) - change bool 1 to true(Matthew) - add check if min_block_size is power of 2(Matthew) - modify the min_block_size datatype to u64(Matthew) v4: - rename the function drm_buddy_defrag with __force_merge. - Include __force_merge directly in drm buddy file and remove the defrag use in amdgpu driver. - Remove list_empty() check(Matthew) - Remove unnecessary space, headers and placement of new variables(Matthew) - Add a unit test case(Matthew) v5: - remove force merge support to actual range allocation and not to bail out when contains && split(Matthew) - add range support to force merge function. v6: - modify the alloc_range() function clear page non merged blocks allocation(Matthew) - correct the list_insert function name(Matthew). Signed-off-by: Arunpravin Paneer Selvam Signed-off-by: Matthew Auld Suggested-by: Christian König Suggested-by: Matthew Auld Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20240419063538.11957-1-Arunpravin.PaneerSelvam@amd.com Signed-off-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 6 +- drivers/gpu/drm/drm_buddy.c | 427 ++++++++++++++++++++------ drivers/gpu/drm/i915/i915_ttm_buddy_manager.c | 6 +- drivers/gpu/drm/tests/drm_buddy_test.c | 28 +- drivers/gpu/drm/xe/xe_ttm_vram_mgr.c | 4 +- include/drm/drm_buddy.h | 16 +- 6 files changed, 363 insertions(+), 124 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index 8db880244324..c0c851409241 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -571,7 +571,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, return 0; error_free_blocks: - drm_buddy_free_list(mm, &vres->blocks); + drm_buddy_free_list(mm, &vres->blocks, 0); mutex_unlock(&mgr->lock); error_fini: ttm_resource_fini(man, &vres->base); @@ -604,7 +604,7 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man, amdgpu_vram_mgr_do_reserve(man); - drm_buddy_free_list(mm, &vres->blocks); + drm_buddy_free_list(mm, &vres->blocks, 0); mutex_unlock(&mgr->lock); atomic64_sub(vis_usage, &mgr->vis_usage); @@ -912,7 +912,7 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev) kfree(rsv); list_for_each_entry_safe(rsv, temp, &mgr->reserved_pages, blocks) { - drm_buddy_free_list(&mgr->mm, &rsv->allocated); + drm_buddy_free_list(&mgr->mm, &rsv->allocated, 0); kfree(rsv); } if (!adev->gmc.is_app_apu) diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c index 5ebdd6f8f36e..284ebae71cc4 100644 --- a/drivers/gpu/drm/drm_buddy.c +++ b/drivers/gpu/drm/drm_buddy.c @@ -57,6 +57,16 @@ static void list_insert_sorted(struct drm_buddy *mm, __list_add(&block->link, node->link.prev, &node->link); } +static void clear_reset(struct drm_buddy_block *block) +{ + block->header &= ~DRM_BUDDY_HEADER_CLEAR; +} + +static void mark_cleared(struct drm_buddy_block *block) +{ + block->header |= DRM_BUDDY_HEADER_CLEAR; +} + static void mark_allocated(struct drm_buddy_block *block) { block->header &= ~DRM_BUDDY_HEADER_STATE; @@ -82,6 +92,133 @@ static void mark_split(struct drm_buddy_block *block) list_del(&block->link); } +static inline bool overlaps(u64 s1, u64 e1, u64 s2, u64 e2) +{ + return s1 <= e2 && e1 >= s2; +} + +static inline bool contains(u64 s1, u64 e1, u64 s2, u64 e2) +{ + return s1 <= s2 && e1 >= e2; +} + +static struct drm_buddy_block * +__get_buddy(struct drm_buddy_block *block) +{ + struct drm_buddy_block *parent; + + parent = block->parent; + if (!parent) + return NULL; + + if (parent->left == block) + return parent->right; + + return parent->left; +} + +static unsigned int __drm_buddy_free(struct drm_buddy *mm, + struct drm_buddy_block *block, + bool force_merge) +{ + struct drm_buddy_block *parent; + unsigned int order; + + while ((parent = block->parent)) { + struct drm_buddy_block *buddy; + + buddy = __get_buddy(block); + + if (!drm_buddy_block_is_free(buddy)) + break; + + if (!force_merge) { + /* + * Check the block and its buddy clear state and exit + * the loop if they both have the dissimilar state. + */ + if (drm_buddy_block_is_clear(block) != + drm_buddy_block_is_clear(buddy)) + break; + + if (drm_buddy_block_is_clear(block)) + mark_cleared(parent); + } + + list_del(&buddy->link); + if (force_merge && drm_buddy_block_is_clear(buddy)) + mm->clear_avail -= drm_buddy_block_size(mm, buddy); + + drm_block_free(mm, block); + drm_block_free(mm, buddy); + + block = parent; + } + + order = drm_buddy_block_order(block); + mark_free(mm, block); + + return order; +} + +static int __force_merge(struct drm_buddy *mm, + u64 start, + u64 end, + unsigned int min_order) +{ + unsigned int order; + int i; + + if (!min_order) + return -ENOMEM; + + if (min_order > mm->max_order) + return -EINVAL; + + for (i = min_order - 1; i >= 0; i--) { + struct drm_buddy_block *block, *prev; + + list_for_each_entry_safe_reverse(block, prev, &mm->free_list[i], link) { + struct drm_buddy_block *buddy; + u64 block_start, block_end; + + if (!block->parent) + continue; + + block_start = drm_buddy_block_offset(block); + block_end = block_start + drm_buddy_block_size(mm, block) - 1; + + if (!contains(start, end, block_start, block_end)) + continue; + + buddy = __get_buddy(block); + if (!drm_buddy_block_is_free(buddy)) + continue; + + WARN_ON(drm_buddy_block_is_clear(block) == + drm_buddy_block_is_clear(buddy)); + + /* + * If the prev block is same as buddy, don't access the + * block in the next iteration as we would free the + * buddy block as part of the free function. + */ + if (prev == buddy) + prev = list_prev_entry(prev, link); + + list_del(&block->link); + if (drm_buddy_block_is_clear(block)) + mm->clear_avail -= drm_buddy_block_size(mm, block); + + order = __drm_buddy_free(mm, block, true); + if (order >= min_order) + return 0; + } + } + + return -ENOMEM; +} + /** * drm_buddy_init - init memory manager * @@ -186,11 +323,21 @@ EXPORT_SYMBOL(drm_buddy_init); */ void drm_buddy_fini(struct drm_buddy *mm) { + u64 root_size, size; + unsigned int order; int i; + size = mm->size; + for (i = 0; i < mm->n_roots; ++i) { + order = ilog2(size) - ilog2(mm->chunk_size); + __force_merge(mm, 0, size, order); + WARN_ON(!drm_buddy_block_is_free(mm->roots[i])); drm_block_free(mm, mm->roots[i]); + + root_size = mm->chunk_size << order; + size -= root_size; } WARN_ON(mm->avail != mm->size); @@ -223,26 +370,17 @@ static int split_block(struct drm_buddy *mm, mark_free(mm, block->left); mark_free(mm, block->right); + if (drm_buddy_block_is_clear(block)) { + mark_cleared(block->left); + mark_cleared(block->right); + clear_reset(block); + } + mark_split(block); return 0; } -static struct drm_buddy_block * -__get_buddy(struct drm_buddy_block *block) -{ - struct drm_buddy_block *parent; - - parent = block->parent; - if (!parent) - return NULL; - - if (parent->left == block) - return parent->right; - - return parent->left; -} - /** * drm_get_buddy - get buddy address * @@ -260,30 +398,6 @@ drm_get_buddy(struct drm_buddy_block *block) } EXPORT_SYMBOL(drm_get_buddy); -static void __drm_buddy_free(struct drm_buddy *mm, - struct drm_buddy_block *block) -{ - struct drm_buddy_block *parent; - - while ((parent = block->parent)) { - struct drm_buddy_block *buddy; - - buddy = __get_buddy(block); - - if (!drm_buddy_block_is_free(buddy)) - break; - - list_del(&buddy->link); - - drm_block_free(mm, block); - drm_block_free(mm, buddy); - - block = parent; - } - - mark_free(mm, block); -} - /** * drm_buddy_free_block - free a block * @@ -295,42 +409,74 @@ void drm_buddy_free_block(struct drm_buddy *mm, { BUG_ON(!drm_buddy_block_is_allocated(block)); mm->avail += drm_buddy_block_size(mm, block); - __drm_buddy_free(mm, block); + if (drm_buddy_block_is_clear(block)) + mm->clear_avail += drm_buddy_block_size(mm, block); + + __drm_buddy_free(mm, block, false); } EXPORT_SYMBOL(drm_buddy_free_block); -/** - * drm_buddy_free_list - free blocks - * - * @mm: DRM buddy manager - * @objects: input list head to free blocks - */ -void drm_buddy_free_list(struct drm_buddy *mm, struct list_head *objects) +static void __drm_buddy_free_list(struct drm_buddy *mm, + struct list_head *objects, + bool mark_clear, + bool mark_dirty) { struct drm_buddy_block *block, *on; + WARN_ON(mark_dirty && mark_clear); + list_for_each_entry_safe(block, on, objects, link) { + if (mark_clear) + mark_cleared(block); + else if (mark_dirty) + clear_reset(block); drm_buddy_free_block(mm, block); cond_resched(); } INIT_LIST_HEAD(objects); } -EXPORT_SYMBOL(drm_buddy_free_list); -static inline bool overlaps(u64 s1, u64 e1, u64 s2, u64 e2) +static void drm_buddy_free_list_internal(struct drm_buddy *mm, + struct list_head *objects) { - return s1 <= e2 && e1 >= s2; + /* + * Don't touch the clear/dirty bit, since allocation is still internal + * at this point. For example we might have just failed part of the + * allocation. + */ + __drm_buddy_free_list(mm, objects, false, false); } -static inline bool contains(u64 s1, u64 e1, u64 s2, u64 e2) +/** + * drm_buddy_free_list - free blocks + * + * @mm: DRM buddy manager + * @objects: input list head to free blocks + * @flags: optional flags like DRM_BUDDY_CLEARED + */ +void drm_buddy_free_list(struct drm_buddy *mm, + struct list_head *objects, + unsigned int flags) { - return s1 <= s2 && e1 >= e2; + bool mark_clear = flags & DRM_BUDDY_CLEARED; + + __drm_buddy_free_list(mm, objects, mark_clear, !mark_clear); +} +EXPORT_SYMBOL(drm_buddy_free_list); + +static bool block_incompatible(struct drm_buddy_block *block, unsigned int flags) +{ + bool needs_clear = flags & DRM_BUDDY_CLEAR_ALLOCATION; + + return needs_clear != drm_buddy_block_is_clear(block); } static struct drm_buddy_block * -alloc_range_bias(struct drm_buddy *mm, - u64 start, u64 end, - unsigned int order) +__alloc_range_bias(struct drm_buddy *mm, + u64 start, u64 end, + unsigned int order, + unsigned long flags, + bool fallback) { u64 req_size = mm->chunk_size << order; struct drm_buddy_block *block; @@ -379,6 +525,9 @@ alloc_range_bias(struct drm_buddy *mm, if (contains(start, end, block_start, block_end) && order == drm_buddy_block_order(block)) { + if (!fallback && block_incompatible(block, flags)) + continue; + /* * Find the free block within the range. */ @@ -410,30 +559,57 @@ err_undo: if (buddy && (drm_buddy_block_is_free(block) && drm_buddy_block_is_free(buddy))) - __drm_buddy_free(mm, block); + __drm_buddy_free(mm, block, false); return ERR_PTR(err); } static struct drm_buddy_block * -get_maxblock(struct drm_buddy *mm, unsigned int order) +__drm_buddy_alloc_range_bias(struct drm_buddy *mm, + u64 start, u64 end, + unsigned int order, + unsigned long flags) +{ + struct drm_buddy_block *block; + bool fallback = false; + + block = __alloc_range_bias(mm, start, end, order, + flags, fallback); + if (IS_ERR(block) && mm->clear_avail) + return __alloc_range_bias(mm, start, end, order, + flags, !fallback); + + return block; +} + +static struct drm_buddy_block * +get_maxblock(struct drm_buddy *mm, unsigned int order, + unsigned long flags) { - struct drm_buddy_block *max_block = NULL, *node; + struct drm_buddy_block *max_block = NULL, *block = NULL; unsigned int i; for (i = order; i <= mm->max_order; ++i) { - if (!list_empty(&mm->free_list[i])) { - node = list_last_entry(&mm->free_list[i], - struct drm_buddy_block, - link); - if (!max_block) { - max_block = node; + struct drm_buddy_block *tmp_block; + + list_for_each_entry_reverse(tmp_block, &mm->free_list[i], link) { + if (block_incompatible(tmp_block, flags)) continue; - } - if (drm_buddy_block_offset(node) > - drm_buddy_block_offset(max_block)) { - max_block = node; - } + block = tmp_block; + break; + } + + if (!block) + continue; + + if (!max_block) { + max_block = block; + continue; + } + + if (drm_buddy_block_offset(block) > + drm_buddy_block_offset(max_block)) { + max_block = block; } } @@ -450,11 +626,29 @@ alloc_from_freelist(struct drm_buddy *mm, int err; if (flags & DRM_BUDDY_TOPDOWN_ALLOCATION) { - block = get_maxblock(mm, order); + block = get_maxblock(mm, order, flags); if (block) /* Store the obtained block order */ tmp = drm_buddy_block_order(block); } else { + for (tmp = order; tmp <= mm->max_order; ++tmp) { + struct drm_buddy_block *tmp_block; + + list_for_each_entry_reverse(tmp_block, &mm->free_list[tmp], link) { + if (block_incompatible(tmp_block, flags)) + continue; + + block = tmp_block; + break; + } + + if (block) + break; + } + } + + if (!block) { + /* Fallback method */ for (tmp = order; tmp <= mm->max_order; ++tmp) { if (!list_empty(&mm->free_list[tmp])) { block = list_last_entry(&mm->free_list[tmp], @@ -464,10 +658,10 @@ alloc_from_freelist(struct drm_buddy *mm, break; } } - } - if (!block) - return ERR_PTR(-ENOSPC); + if (!block) + return ERR_PTR(-ENOSPC); + } BUG_ON(!drm_buddy_block_is_free(block)); @@ -483,7 +677,7 @@ alloc_from_freelist(struct drm_buddy *mm, err_undo: if (tmp != order) - __drm_buddy_free(mm, block); + __drm_buddy_free(mm, block, false); return ERR_PTR(err); } @@ -526,16 +720,18 @@ static int __alloc_range(struct drm_buddy *mm, } if (contains(start, end, block_start, block_end)) { - if (!drm_buddy_block_is_free(block)) { + if (drm_buddy_block_is_free(block)) { + mark_allocated(block); + total_allocated += drm_buddy_block_size(mm, block); + mm->avail -= drm_buddy_block_size(mm, block); + if (drm_buddy_block_is_clear(block)) + mm->clear_avail -= drm_buddy_block_size(mm, block); + list_add_tail(&block->link, &allocated); + continue; + } else if (!mm->clear_avail) { err = -ENOSPC; goto err_free; } - - mark_allocated(block); - total_allocated += drm_buddy_block_size(mm, block); - mm->avail -= drm_buddy_block_size(mm, block); - list_add_tail(&block->link, &allocated); - continue; } if (!drm_buddy_block_is_split(block)) { @@ -567,14 +763,14 @@ err_undo: if (buddy && (drm_buddy_block_is_free(block) && drm_buddy_block_is_free(buddy))) - __drm_buddy_free(mm, block); + __drm_buddy_free(mm, block, false); err_free: if (err == -ENOSPC && total_allocated_on_err) { list_splice_tail(&allocated, blocks); *total_allocated_on_err = total_allocated; } else { - drm_buddy_free_list(mm, &allocated); + drm_buddy_free_list_internal(mm, &allocated); } return err; @@ -640,11 +836,11 @@ static int __alloc_contig_try_harder(struct drm_buddy *mm, list_splice(&blocks_lhs, blocks); return 0; } else if (err != -ENOSPC) { - drm_buddy_free_list(mm, blocks); + drm_buddy_free_list_internal(mm, blocks); return err; } /* Free blocks for the next iteration */ - drm_buddy_free_list(mm, blocks); + drm_buddy_free_list_internal(mm, blocks); } return -ENOSPC; @@ -700,6 +896,8 @@ int drm_buddy_block_trim(struct drm_buddy *mm, list_del(&block->link); mark_free(mm, block); mm->avail += drm_buddy_block_size(mm, block); + if (drm_buddy_block_is_clear(block)) + mm->clear_avail += drm_buddy_block_size(mm, block); /* Prevent recursively freeing this node */ parent = block->parent; @@ -711,6 +909,8 @@ int drm_buddy_block_trim(struct drm_buddy *mm, if (err) { mark_allocated(block); mm->avail -= drm_buddy_block_size(mm, block); + if (drm_buddy_block_is_clear(block)) + mm->clear_avail -= drm_buddy_block_size(mm, block); list_add(&block->link, blocks); } @@ -719,13 +919,28 @@ int drm_buddy_block_trim(struct drm_buddy *mm, } EXPORT_SYMBOL(drm_buddy_block_trim); +static struct drm_buddy_block * +__drm_buddy_alloc_blocks(struct drm_buddy *mm, + u64 start, u64 end, + unsigned int order, + unsigned long flags) +{ + if (flags & DRM_BUDDY_RANGE_ALLOCATION) + /* Allocate traversing within the range */ + return __drm_buddy_alloc_range_bias(mm, start, end, + order, flags); + else + /* Allocate from freelist */ + return alloc_from_freelist(mm, order, flags); +} + /** * drm_buddy_alloc_blocks - allocate power-of-two blocks * * @mm: DRM buddy manager to allocate from * @start: start of the allowed range for this block * @end: end of the allowed range for this block - * @size: size of the allocation + * @size: size of the allocation in bytes * @min_block_size: alignment of the allocation * @blocks: output list head to add allocated blocks * @flags: DRM_BUDDY_*_ALLOCATION flags @@ -800,23 +1015,33 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm, BUG_ON(order < min_order); do { - if (flags & DRM_BUDDY_RANGE_ALLOCATION) - /* Allocate traversing within the range */ - block = alloc_range_bias(mm, start, end, order); - else - /* Allocate from freelist */ - block = alloc_from_freelist(mm, order, flags); - + block = __drm_buddy_alloc_blocks(mm, start, + end, + order, + flags); if (!IS_ERR(block)) break; if (order-- == min_order) { + /* Try allocation through force merge method */ + if (mm->clear_avail && + !__force_merge(mm, start, end, min_order)) { + block = __drm_buddy_alloc_blocks(mm, start, + end, + min_order, + flags); + if (!IS_ERR(block)) { + order = min_order; + break; + } + } + + /* + * Try contiguous block allocation through + * try harder method. + */ if (flags & DRM_BUDDY_CONTIGUOUS_ALLOCATION && !(flags & DRM_BUDDY_RANGE_ALLOCATION)) - /* - * Try contiguous block allocation through - * try harder method - */ return __alloc_contig_try_harder(mm, original_size, original_min_size, @@ -828,6 +1053,8 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm, mark_allocated(block); mm->avail -= drm_buddy_block_size(mm, block); + if (drm_buddy_block_is_clear(block)) + mm->clear_avail -= drm_buddy_block_size(mm, block); kmemleak_update_trace(block); list_add_tail(&block->link, &allocated); @@ -866,7 +1093,7 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm, return 0; err_free: - drm_buddy_free_list(mm, &allocated); + drm_buddy_free_list_internal(mm, &allocated); return err; } EXPORT_SYMBOL(drm_buddy_alloc_blocks); @@ -899,8 +1126,8 @@ void drm_buddy_print(struct drm_buddy *mm, struct drm_printer *p) { int order; - drm_printf(p, "chunk_size: %lluKiB, total: %lluMiB, free: %lluMiB\n", - mm->chunk_size >> 10, mm->size >> 20, mm->avail >> 20); + drm_printf(p, "chunk_size: %lluKiB, total: %lluMiB, free: %lluMiB, clear_free: %lluMiB\n", + mm->chunk_size >> 10, mm->size >> 20, mm->avail >> 20, mm->clear_avail >> 20); for (order = mm->max_order; order >= 0; order--) { struct drm_buddy_block *block; diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c index 0d735d5c2b35..942345548bc3 100644 --- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c +++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c @@ -126,7 +126,7 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man, return 0; err_free_blocks: - drm_buddy_free_list(mm, &bman_res->blocks); + drm_buddy_free_list(mm, &bman_res->blocks, 0); mutex_unlock(&bman->lock); err_free_res: ttm_resource_fini(man, &bman_res->base); @@ -141,7 +141,7 @@ static void i915_ttm_buddy_man_free(struct ttm_resource_manager *man, struct i915_ttm_buddy_manager *bman = to_buddy_manager(man); mutex_lock(&bman->lock); - drm_buddy_free_list(&bman->mm, &bman_res->blocks); + drm_buddy_free_list(&bman->mm, &bman_res->blocks, 0); bman->visible_avail += bman_res->used_visible_size; mutex_unlock(&bman->lock); @@ -345,7 +345,7 @@ int i915_ttm_buddy_man_fini(struct ttm_device *bdev, unsigned int type) ttm_set_driver_manager(bdev, type, NULL); mutex_lock(&bman->lock); - drm_buddy_free_list(mm, &bman->reserved); + drm_buddy_free_list(mm, &bman->reserved, 0); drm_buddy_fini(mm); bman->visible_avail += bman->visible_reserved; WARN_ON_ONCE(bman->visible_avail != bman->visible_size); diff --git a/drivers/gpu/drm/tests/drm_buddy_test.c b/drivers/gpu/drm/tests/drm_buddy_test.c index e48863a44556..4621a860cb05 100644 --- a/drivers/gpu/drm/tests/drm_buddy_test.c +++ b/drivers/gpu/drm/tests/drm_buddy_test.c @@ -103,7 +103,7 @@ static void drm_test_buddy_alloc_range_bias(struct kunit *test) DRM_BUDDY_RANGE_ALLOCATION), "buddy_alloc i failed with bias(%x-%x), size=%u, ps=%u\n", bias_start, bias_end, bias_size, bias_size); - drm_buddy_free_list(&mm, &tmp); + drm_buddy_free_list(&mm, &tmp, 0); /* single page with internal round_up */ KUNIT_ASSERT_FALSE_MSG(test, @@ -113,7 +113,7 @@ static void drm_test_buddy_alloc_range_bias(struct kunit *test) DRM_BUDDY_RANGE_ALLOCATION), "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n", bias_start, bias_end, ps, bias_size); - drm_buddy_free_list(&mm, &tmp); + drm_buddy_free_list(&mm, &tmp, 0); /* random size within */ size = max(round_up(prandom_u32_state(&prng) % bias_rem, ps), ps); @@ -153,14 +153,14 @@ static void drm_test_buddy_alloc_range_bias(struct kunit *test) * unallocated, and ideally not always on the bias * boundaries. */ - drm_buddy_free_list(&mm, &tmp); + drm_buddy_free_list(&mm, &tmp, 0); } else { list_splice_tail(&tmp, &allocated); } } kfree(order); - drm_buddy_free_list(&mm, &allocated); + drm_buddy_free_list(&mm, &allocated, 0); drm_buddy_fini(&mm); /* @@ -220,7 +220,7 @@ static void drm_test_buddy_alloc_range_bias(struct kunit *test) "buddy_alloc passed with bias(%x-%x), size=%u\n", bias_start, bias_end, ps); - drm_buddy_free_list(&mm, &allocated); + drm_buddy_free_list(&mm, &allocated, 0); drm_buddy_fini(&mm); } @@ -269,7 +269,7 @@ static void drm_test_buddy_alloc_contiguous(struct kunit *test) DRM_BUDDY_CONTIGUOUS_ALLOCATION), "buddy_alloc didn't error size=%lu\n", 3 * ps); - drm_buddy_free_list(&mm, &middle); + drm_buddy_free_list(&mm, &middle, 0); KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, 3 * ps, ps, &allocated, DRM_BUDDY_CONTIGUOUS_ALLOCATION), @@ -279,7 +279,7 @@ static void drm_test_buddy_alloc_contiguous(struct kunit *test) DRM_BUDDY_CONTIGUOUS_ALLOCATION), "buddy_alloc didn't error size=%lu\n", 2 * ps); - drm_buddy_free_list(&mm, &right); + drm_buddy_free_list(&mm, &right, 0); KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, 3 * ps, ps, &allocated, DRM_BUDDY_CONTIGUOUS_ALLOCATION), @@ -294,7 +294,7 @@ static void drm_test_buddy_alloc_contiguous(struct kunit *test) DRM_BUDDY_CONTIGUOUS_ALLOCATION), "buddy_alloc hit an error size=%lu\n", 2 * ps); - drm_buddy_free_list(&mm, &left); + drm_buddy_free_list(&mm, &left, 0); KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, 3 * ps, ps, &allocated, DRM_BUDDY_CONTIGUOUS_ALLOCATION), @@ -306,7 +306,7 @@ static void drm_test_buddy_alloc_contiguous(struct kunit *test) KUNIT_ASSERT_EQ(test, total, ps * 2 + ps * 3); - drm_buddy_free_list(&mm, &allocated); + drm_buddy_free_list(&mm, &allocated, 0); drm_buddy_fini(&mm); } @@ -375,7 +375,7 @@ static void drm_test_buddy_alloc_pathological(struct kunit *test) top, max_order); } - drm_buddy_free_list(&mm, &holes); + drm_buddy_free_list(&mm, &holes, 0); /* Nothing larger than blocks of chunk_size now available */ for (order = 1; order <= max_order; order++) { @@ -387,7 +387,7 @@ static void drm_test_buddy_alloc_pathological(struct kunit *test) } list_splice_tail(&holes, &blocks); - drm_buddy_free_list(&mm, &blocks); + drm_buddy_free_list(&mm, &blocks, 0); drm_buddy_fini(&mm); } @@ -482,7 +482,7 @@ static void drm_test_buddy_alloc_pessimistic(struct kunit *test) list_del(&block->link); drm_buddy_free_block(&mm, block); - drm_buddy_free_list(&mm, &blocks); + drm_buddy_free_list(&mm, &blocks, 0); drm_buddy_fini(&mm); } @@ -528,7 +528,7 @@ static void drm_test_buddy_alloc_optimistic(struct kunit *test) size, size, &tmp, flags), "buddy_alloc unexpectedly succeeded, it should be full!"); - drm_buddy_free_list(&mm, &blocks); + drm_buddy_free_list(&mm, &blocks, 0); drm_buddy_fini(&mm); } @@ -563,7 +563,7 @@ static void drm_test_buddy_alloc_limit(struct kunit *test) drm_buddy_block_size(&mm, block), BIT_ULL(mm.max_order) * PAGE_SIZE); - drm_buddy_free_list(&mm, &allocated); + drm_buddy_free_list(&mm, &allocated, 0); drm_buddy_fini(&mm); } diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c index 115ec745e502..1ad678b62c4a 100644 --- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c @@ -196,7 +196,7 @@ static int xe_ttm_vram_mgr_new(struct ttm_resource_manager *man, return 0; error_free_blocks: - drm_buddy_free_list(mm, &vres->blocks); + drm_buddy_free_list(mm, &vres->blocks, 0); mutex_unlock(&mgr->lock); error_fini: ttm_resource_fini(man, &vres->base); @@ -214,7 +214,7 @@ static void xe_ttm_vram_mgr_del(struct ttm_resource_manager *man, struct drm_buddy *mm = &mgr->mm; mutex_lock(&mgr->lock); - drm_buddy_free_list(mm, &vres->blocks); + drm_buddy_free_list(mm, &vres->blocks, 0); mgr->visible_avail += vres->used_visible_size; mutex_unlock(&mgr->lock); diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h index a5b39fc01003..82570f77e817 100644 --- a/include/drm/drm_buddy.h +++ b/include/drm/drm_buddy.h @@ -25,6 +25,8 @@ #define DRM_BUDDY_RANGE_ALLOCATION BIT(0) #define DRM_BUDDY_TOPDOWN_ALLOCATION BIT(1) #define DRM_BUDDY_CONTIGUOUS_ALLOCATION BIT(2) +#define DRM_BUDDY_CLEAR_ALLOCATION BIT(3) +#define DRM_BUDDY_CLEARED BIT(4) struct drm_buddy_block { #define DRM_BUDDY_HEADER_OFFSET GENMASK_ULL(63, 12) @@ -32,8 +34,9 @@ struct drm_buddy_block { #define DRM_BUDDY_ALLOCATED (1 << 10) #define DRM_BUDDY_FREE (2 << 10) #define DRM_BUDDY_SPLIT (3 << 10) +#define DRM_BUDDY_HEADER_CLEAR GENMASK_ULL(9, 9) /* Free to be used, if needed in the future */ -#define DRM_BUDDY_HEADER_UNUSED GENMASK_ULL(9, 6) +#define DRM_BUDDY_HEADER_UNUSED GENMASK_ULL(8, 6) #define DRM_BUDDY_HEADER_ORDER GENMASK_ULL(5, 0) u64 header; @@ -86,6 +89,7 @@ struct drm_buddy { u64 chunk_size; u64 size; u64 avail; + u64 clear_avail; }; static inline u64 @@ -112,6 +116,12 @@ drm_buddy_block_is_allocated(struct drm_buddy_block *block) return drm_buddy_block_state(block) == DRM_BUDDY_ALLOCATED; } +static inline bool +drm_buddy_block_is_clear(struct drm_buddy_block *block) +{ + return block->header & DRM_BUDDY_HEADER_CLEAR; +} + static inline bool drm_buddy_block_is_free(struct drm_buddy_block *block) { @@ -150,7 +160,9 @@ int drm_buddy_block_trim(struct drm_buddy *mm, void drm_buddy_free_block(struct drm_buddy *mm, struct drm_buddy_block *block); -void drm_buddy_free_list(struct drm_buddy *mm, struct list_head *objects); +void drm_buddy_free_list(struct drm_buddy *mm, + struct list_head *objects, + unsigned int flags); void drm_buddy_print(struct drm_buddy *mm, struct drm_printer *p); void drm_buddy_block_print(struct drm_buddy *mm, -- cgit v1.2.3 From a68c7eaa7a8ffdec9287ba1561a668d674c20a13 Mon Sep 17 00:00:00 2001 From: Arunpravin Paneer Selvam Date: Fri, 19 Apr 2024 12:05:37 +0530 Subject: drm/amdgpu: Enable clear page functionality MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add clear page support in vram memory region. v1(Christian): - Dont handle clear page as TTM flag since when moving the BO back in from GTT again we don't need that. - Make a specialized version of amdgpu_fill_buffer() which only clears the VRAM areas which are not already cleared - Drop the TTM_PL_FLAG_WIPE_ON_RELEASE check in amdgpu_object.c v2: - Modify the function name amdgpu_ttm_* (Alex) - Drop the delayed parameter (Christian) - handle amdgpu_res_cleared(&cursor) just above the size calculation (Christian) - Use AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE for clearing the buffers in the free path to properly wait for fences etc.. (Christian) v3(Christian): - Remove buffer clear code in VRAM manager instead change the AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE handling to set the DRM_BUDDY_CLEARED flag. - Remove ! from amdgpu_res_cleared(&cursor) check. v4(Christian): - vres flag setting move to vram manager file - use dma_fence_get_stub in amdgpu_ttm_clear_buffer function - make fence a mandatory parameter and drop the if and the get/put dance Signed-off-by: Arunpravin Paneer Selvam Suggested-by: Christian König Acked-by: Felix Kuehling Link: https://patchwork.freedesktop.org/patch/msgid/20240419063538.11957-2-Arunpravin.PaneerSelvam@amd.com Signed-off-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 9 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h | 25 +++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 70 +++++++++++++++++++++++++- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 5 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 6 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h | 10 ++++ 6 files changed, 116 insertions(+), 9 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 8bc79924d171..e011a326fe86 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -39,6 +39,7 @@ #include "amdgpu.h" #include "amdgpu_trace.h" #include "amdgpu_amdkfd.h" +#include "amdgpu_vram_mgr.h" /** * DOC: amdgpu_object @@ -601,8 +602,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev, if (!amdgpu_bo_support_uswc(bo->flags)) bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC; - if (adev->ras_enabled) - bo->flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE; + bo->flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE; bo->tbo.bdev = &adev->mman.bdev; if (bp->domain & (AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA | @@ -634,7 +634,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev, bo->tbo.resource->mem_type == TTM_PL_VRAM) { struct dma_fence *fence; - r = amdgpu_fill_buffer(bo, 0, bo->tbo.base.resv, &fence, true); + r = amdgpu_ttm_clear_buffer(bo, bo->tbo.base.resv, &fence); if (unlikely(r)) goto fail_unreserve; @@ -1365,8 +1365,9 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo) if (WARN_ON_ONCE(!dma_resv_trylock(bo->base.resv))) return; - r = amdgpu_fill_buffer(abo, AMDGPU_POISON, bo->base.resv, &fence, true); + r = amdgpu_fill_buffer(abo, 0, bo->base.resv, &fence, true); if (!WARN_ON(r)) { + amdgpu_vram_mgr_set_cleared(bo->resource); amdgpu_bo_fence(abo, fence, false); dma_fence_put(fence); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h index 381101d2bf05..50fcd86e1033 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h @@ -164,4 +164,29 @@ static inline void amdgpu_res_next(struct amdgpu_res_cursor *cur, uint64_t size) } } +/** + * amdgpu_res_cleared - check if blocks are cleared + * + * @cur: the cursor to extract the block + * + * Check if the @cur block is cleared + */ +static inline bool amdgpu_res_cleared(struct amdgpu_res_cursor *cur) +{ + struct drm_buddy_block *block; + + switch (cur->mem_type) { + case TTM_PL_VRAM: + block = cur->node; + + if (!amdgpu_vram_mgr_is_cleared(block)) + return false; + break; + default: + return false; + } + + return true; +} + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index fc418e670fda..f0b42b567357 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -378,11 +378,12 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo, (abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) { struct dma_fence *wipe_fence = NULL; - r = amdgpu_fill_buffer(abo, AMDGPU_POISON, NULL, &wipe_fence, - false); + r = amdgpu_fill_buffer(abo, 0, NULL, &wipe_fence, + false); if (r) { goto error; } else if (wipe_fence) { + amdgpu_vram_mgr_set_cleared(bo->resource); dma_fence_put(fence); fence = wipe_fence; } @@ -2215,6 +2216,71 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data, return 0; } +/** + * amdgpu_ttm_clear_buffer - clear memory buffers + * @bo: amdgpu buffer object + * @resv: reservation object + * @fence: dma_fence associated with the operation + * + * Clear the memory buffer resource. + * + * Returns: + * 0 for success or a negative error code on failure. + */ +int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo, + struct dma_resv *resv, + struct dma_fence **fence) +{ + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); + struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; + struct amdgpu_res_cursor cursor; + u64 addr; + int r; + + if (!adev->mman.buffer_funcs_enabled) + return -EINVAL; + + if (!fence) + return -EINVAL; + + *fence = dma_fence_get_stub(); + + amdgpu_res_first(bo->tbo.resource, 0, amdgpu_bo_size(bo), &cursor); + + mutex_lock(&adev->mman.gtt_window_lock); + while (cursor.remaining) { + struct dma_fence *next = NULL; + u64 size; + + if (amdgpu_res_cleared(&cursor)) { + amdgpu_res_next(&cursor, cursor.size); + continue; + } + + /* Never clear more than 256MiB at once to avoid timeouts */ + size = min(cursor.size, 256ULL << 20); + + r = amdgpu_ttm_map_buffer(&bo->tbo, bo->tbo.resource, &cursor, + 1, ring, false, &size, &addr); + if (r) + goto err; + + r = amdgpu_ttm_fill_mem(ring, 0, addr, size, resv, + &next, true, true); + if (r) + goto err; + + dma_fence_put(*fence); + *fence = next; + + amdgpu_res_next(&cursor, size); + } +err: + mutex_unlock(&adev->mman.gtt_window_lock); + + return r; +} + int amdgpu_fill_buffer(struct amdgpu_bo *bo, uint32_t src_data, struct dma_resv *resv, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index 65ec82141a8e..b404d89d52e5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -38,8 +38,6 @@ #define AMDGPU_GTT_MAX_TRANSFER_SIZE 512 #define AMDGPU_GTT_NUM_TRANSFER_WINDOWS 2 -#define AMDGPU_POISON 0xd0bed0be - extern const struct attribute_group amdgpu_vram_mgr_attr_group; extern const struct attribute_group amdgpu_gtt_mgr_attr_group; @@ -155,6 +153,9 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, uint64_t size, bool tmz, struct dma_resv *resv, struct dma_fence **f); +int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo, + struct dma_resv *resv, + struct dma_fence **fence); int amdgpu_fill_buffer(struct amdgpu_bo *bo, uint32_t src_data, struct dma_resv *resv, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index c0c851409241..e494f5bf136a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -450,6 +450,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, { struct amdgpu_vram_mgr *mgr = to_vram_mgr(man); struct amdgpu_device *adev = to_amdgpu_device(mgr); + struct amdgpu_bo *bo = ttm_to_amdgpu_bo(tbo); u64 vis_usage = 0, max_bytes, min_block_size; struct amdgpu_vram_mgr_resource *vres; u64 size, remaining_size, lpfn, fpfn; @@ -501,6 +502,9 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, if (place->flags & TTM_PL_FLAG_CONTIGUOUS) vres->flags |= DRM_BUDDY_CONTIGUOUS_ALLOCATION; + if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED) + vres->flags |= DRM_BUDDY_CLEAR_ALLOCATION; + if (fpfn || lpfn != mgr->mm.size) /* Allocate blocks in desired range */ vres->flags |= DRM_BUDDY_RANGE_ALLOCATION; @@ -604,7 +608,7 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man, amdgpu_vram_mgr_do_reserve(man); - drm_buddy_free_list(mm, &vres->blocks, 0); + drm_buddy_free_list(mm, &vres->blocks, vres->flags); mutex_unlock(&mgr->lock); atomic64_sub(vis_usage, &mgr->vis_usage); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h index 0e04e42cf809..b256cbc2bc27 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h @@ -53,10 +53,20 @@ static inline u64 amdgpu_vram_mgr_block_size(struct drm_buddy_block *block) return (u64)PAGE_SIZE << drm_buddy_block_order(block); } +static inline bool amdgpu_vram_mgr_is_cleared(struct drm_buddy_block *block) +{ + return drm_buddy_block_is_clear(block); +} + static inline struct amdgpu_vram_mgr_resource * to_amdgpu_vram_mgr_resource(struct ttm_resource *res) { return container_of(res, struct amdgpu_vram_mgr_resource, base); } +static inline void amdgpu_vram_mgr_set_cleared(struct ttm_resource *res) +{ + to_amdgpu_vram_mgr_resource(res)->flags |= DRM_BUDDY_CLEARED; +} + #endif -- cgit v1.2.3 From 939c4751819b37eb5f2734223db1a00de42c65f4 Mon Sep 17 00:00:00 2001 From: "Stanley.Yang" Date: Fri, 12 Apr 2024 10:36:25 +0800 Subject: drm/amdgpu: Support setting reset_method at runtime MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In order to support more test cases, support user change reset_method at runtime. Signed-off-by: Stanley.Yang Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index c512f70b8272..ea14f1c8f430 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -925,7 +925,7 @@ module_param_named(freesync_video, amdgpu_freesync_vid_mode, uint, 0444); * GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco) */ MODULE_PARM_DESC(reset_method, "GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco/bamaco)"); -module_param_named(reset_method, amdgpu_reset_method, int, 0444); +module_param_named(reset_method, amdgpu_reset_method, int, 0644); /** * DOC: bad_page_threshold (int) Bad page threshold is specifies the -- cgit v1.2.3 From 7b19f1f3466fc56c48e531b5e8f9dea8de089adb Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Wed, 10 Apr 2024 19:30:46 +0530 Subject: drm/amdgpu: Assign correct bits for SDMA HDP flush HDP Flush request bit can be kept unique per AID, and doesn't need to be unique SOC-wide. Assign only bits 10-13 for SDMA v4.4.2. Signed-off-by: Lijo Lazar Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c index 7bb0652159ac..20909843bcd2 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c @@ -368,7 +368,8 @@ static void sdma_v4_4_2_ring_emit_hdp_flush(struct amdgpu_ring *ring) u32 ref_and_mask = 0; const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg; - ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0 << ring->me; + ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0 + << (ring->me % adev->sdma.num_inst_per_aid); sdma_v4_4_2_wait_reg_mem(ring, 0, 1, adev->nbio.funcs->get_hdp_flush_done_offset(adev), -- cgit v1.2.3 From 2476c6bd950e696558145a5f41344b000e1af01f Mon Sep 17 00:00:00 2001 From: Peyton Lee Date: Fri, 19 Apr 2024 14:07:39 +0800 Subject: drm/amdgpu/vpe: fix vpe dpm setup failed The vpe dpm settings should be done before firmware is loaded. Otherwise, the frequency cannot be successfully raised. Signed-off-by: Peyton Lee Reviewed-by: Lang Yu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c | 2 +- drivers/gpu/drm/amd/amdgpu/vpe_v6_1.c | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c index 6695481f870f..c23d97d34b7e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c @@ -205,7 +205,7 @@ disable_dpm: dpm_ctl &= 0xfffffffe; /* Disable DPM */ WREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_enable), dpm_ctl); dev_dbg(adev->dev, "%s: disable vpe dpm\n", __func__); - return 0; + return -EINVAL; } int amdgpu_vpe_psp_update_sram(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/vpe_v6_1.c b/drivers/gpu/drm/amd/amdgpu/vpe_v6_1.c index 769eb8f7bb3c..09315dd5a1ec 100644 --- a/drivers/gpu/drm/amd/amdgpu/vpe_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/vpe_v6_1.c @@ -144,6 +144,12 @@ static int vpe_v6_1_load_microcode(struct amdgpu_vpe *vpe) WREG32(vpe_get_reg_offset(vpe, j, regVPEC_CNTL), ret); } + /* setup collaborate mode */ + vpe_v6_1_set_collaborate_mode(vpe, true); + /* setup DPM */ + if (amdgpu_vpe_configure_dpm(vpe)) + dev_warn(adev->dev, "VPE failed to enable DPM\n"); + /* * For VPE 6.1.1, still only need to add master's offset, and psp will apply it to slave as well. * Here use instance 0 as master. @@ -159,11 +165,7 @@ static int vpe_v6_1_load_microcode(struct amdgpu_vpe *vpe) adev->vpe.cmdbuf_cpu_addr[0] = f32_offset; adev->vpe.cmdbuf_cpu_addr[1] = f32_cntl; - amdgpu_vpe_psp_update_sram(adev); - vpe_v6_1_set_collaborate_mode(vpe, true); - amdgpu_vpe_configure_dpm(vpe); - - return 0; + return amdgpu_vpe_psp_update_sram(adev); } vpe_hdr = (const struct vpe_firmware_header_v1_0 *)adev->vpe.fw->data; @@ -196,8 +198,6 @@ static int vpe_v6_1_load_microcode(struct amdgpu_vpe *vpe) } vpe_v6_1_halt(vpe, false); - vpe_v6_1_set_collaborate_mode(vpe, true); - amdgpu_vpe_configure_dpm(vpe); return 0; } -- cgit v1.2.3 From 3f0664110a40033785e7fed56ed2e4287b14c1f9 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Sat, 30 Mar 2024 09:46:49 -0400 Subject: drm/amdgpu/mes11: print MES opcodes rather than numbers Makes it easier to review the logs when there are MES errors. v2: use dbg for emitted, add helpers for fetching strings v3: fix missing commas (Harish) v4: drop command prefixes (Felix) v5: squash in bounds fix (Jun) Acked-by: Felix Kuehling Reviewed by Shaoyun.liu (v2) Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 78 ++++++++++++++++++++++++++++++++-- 1 file changed, 74 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index 81833395324a..fbe31afad1d4 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -100,18 +100,72 @@ static const struct amdgpu_ring_funcs mes_v11_0_ring_funcs = { .insert_nop = amdgpu_ring_insert_nop, }; +static const char *mes_v11_0_opcodes[] = { + "SET_HW_RSRC", + "SET_SCHEDULING_CONFIG", + "ADD_QUEUE", + "REMOVE_QUEUE", + "PERFORM_YIELD", + "SET_GANG_PRIORITY_LEVEL", + "SUSPEND", + "RESUME", + "RESET", + "SET_LOG_BUFFER", + "CHANGE_GANG_PRORITY", + "QUERY_SCHEDULER_STATUS", + "PROGRAM_GDS", + "SET_DEBUG_VMID", + "MISC", + "UPDATE_ROOT_PAGE_TABLE", + "AMD_LOG", +}; + +static const char *mes_v11_0_misc_opcodes[] = { + "WRITE_REG", + "INV_GART", + "QUERY_STATUS", + "READ_REG", + "WAIT_REG_MEM", + "SET_SHADER_DEBUGGER", +}; + +static const char *mes_v11_0_get_op_string(union MESAPI__MISC *x_pkt) +{ + const char *op_str = NULL; + + if (x_pkt->header.opcode < ARRAY_SIZE(mes_v11_0_opcodes)) + op_str = mes_v11_0_opcodes[x_pkt->header.opcode]; + + return op_str; +} + +static const char *mes_v11_0_get_misc_op_string(union MESAPI__MISC *x_pkt) +{ + const char *op_str = NULL; + + if ((x_pkt->header.opcode == MES_SCH_API_MISC) && + (x_pkt->opcode < ARRAY_SIZE(mes_v11_0_misc_opcodes))) + op_str = mes_v11_0_misc_opcodes[x_pkt->opcode]; + + return op_str; +} + static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, void *pkt, int size, int api_status_off) { int ndw = size / 4; signed long r; - union MESAPI__ADD_QUEUE *x_pkt = pkt; + union MESAPI__MISC *x_pkt = pkt; struct MES_API_STATUS *api_status; struct amdgpu_device *adev = mes->adev; struct amdgpu_ring *ring = &mes->ring; unsigned long flags; signed long timeout = 3000000; /* 3000 ms */ + const char *op_str, *misc_op_str; + + if (x_pkt->header.opcode >= MES_SCH_API_MAX) + return -EINVAL; if (amdgpu_emu_mode) { timeout *= 100; @@ -135,13 +189,29 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, amdgpu_ring_commit(ring); spin_unlock_irqrestore(&mes->ring_lock, flags); - DRM_DEBUG("MES msg=%d was emitted\n", x_pkt->header.opcode); + op_str = mes_v11_0_get_op_string(x_pkt); + misc_op_str = mes_v11_0_get_misc_op_string(x_pkt); + + if (misc_op_str) + dev_dbg(adev->dev, "MES msg=%s (%s) was emitted\n", op_str, misc_op_str); + else if (op_str) + dev_dbg(adev->dev, "MES msg=%s was emitted\n", op_str); + else + dev_dbg(adev->dev, "MES msg=%d was emitted\n", x_pkt->header.opcode); r = amdgpu_fence_wait_polling(ring, ring->fence_drv.sync_seq, timeout); if (r < 1) { - DRM_ERROR("MES failed to response msg=%d\n", - x_pkt->header.opcode); + + if (misc_op_str) + dev_err(adev->dev, "MES failed to respond to msg=%s (%s)\n", + op_str, misc_op_str); + else if (op_str) + dev_err(adev->dev, "MES failed to respond to msg=%s\n", + op_str); + else + dev_err(adev->dev, "MES failed to respond to msg=%d\n", + x_pkt->header.opcode); while (halt_if_hws_hang) schedule(); -- cgit v1.2.3 From 6e042cee748fb353d510eec2938591a995db3401 Mon Sep 17 00:00:00 2001 From: Pierre-Eric Pelloux-Prayer Date: Thu, 18 Apr 2024 20:06:08 +0200 Subject: drm/amdgpu/vcn: fix unitialized variable warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Avoid returning an uninitialized value if we never enter the loop. This case should never be hit in practice, but returning 0 doesn't hurt. The same fix is applied to the 4 places using the same pattern. v2: - fixed typos in commit message (Alex) - use "return 0;" before the done label instead of initializing r to 0 Signed-off-by: Pierre-Eric Pelloux-Prayer Reviewed-by: Alex Deucher Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c | 1 + drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c | 1 + drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c | 1 + drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c | 1 + 4 files changed, 4 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c index 8f82fb887e9c..26e63f01250a 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c @@ -359,6 +359,7 @@ static int vcn_v3_0_hw_init(void *handle) } } + return 0; done: if (!r) DRM_INFO("VCN decode and encode initialized successfully(under %s).\n", diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c index 832d15f7b5f6..aff1a4d8d393 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c @@ -288,6 +288,7 @@ static int vcn_v4_0_hw_init(void *handle) } } + return 0; done: if (!r) DRM_INFO("VCN decode and encode initialized successfully(under %s).\n", diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c index 501e53e69f2a..8f2bcce13339 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c @@ -237,6 +237,7 @@ static int vcn_v4_0_5_hw_init(void *handle) goto done; } + return 0; done: if (!r) DRM_INFO("VCN decode and encode initialized successfully(under %s).\n", diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c index bc60c554eb32..b226306164bc 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c @@ -203,6 +203,7 @@ static int vcn_v5_0_0_hw_init(void *handle) goto done; } + return 0; done: if (!r) DRM_INFO("VCN decode and encode initialized successfully(under %s).\n", -- cgit v1.2.3 From e76691f45a60bf71a3210c4fb2abd492b26bd4a6 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Thu, 18 Apr 2024 13:56:42 -0400 Subject: drm/amdgpu: Update BO eviction priorities MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make SVM BOs more likely to get evicted than other BOs. These BOs opportunistically use available VRAM, but can fall back relatively seamlessly to system memory. It also avoids SVM migrations evicting other, more important BOs as they will evict other SVM allocations first. Signed-off-by: Felix Kuehling Acked-by: Mukul Joshi Tested-by: Mukul Joshi Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 92af057dbf6d..154624ca97f9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -611,6 +611,8 @@ int amdgpu_bo_create(struct amdgpu_device *adev, else amdgpu_bo_placement_from_domain(bo, bp->domain); if (bp->type == ttm_bo_type_kernel) + bo->tbo.priority = 2; + else if (!(bp->flags & AMDGPU_GEM_CREATE_DISCARDABLE)) bo->tbo.priority = 1; if (!bp->destroy) -- cgit v1.2.3 From a522ec528cc74377e541e49555ba8739c4e5d4be Mon Sep 17 00:00:00 2001 From: Lang Yu Date: Fri, 19 Apr 2024 15:40:08 +0800 Subject: drm/amdgpu/umsch: don't execute umsch test when GPU is in reset/suspend MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit umsch test needs full GPU functionality(e.g., VM update, TLB flush, possibly buffer moving under memory pressure) which may be not ready under these states. Just skip it to avoid potential issues. Signed-off-by: Lang Yu Reviewed-by: Christian König Reviewed-by: Veerabadhran Gopalakrishnan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c index 0df97c3e3a70..f7c73533e336 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c @@ -774,6 +774,9 @@ static int umsch_mm_late_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + if (amdgpu_in_reset(adev) || adev->in_s0ix || adev->in_suspend) + return 0; + return umsch_mm_test(adev); } -- cgit v1.2.3 From 26de73bc0a73edeead58d76596a70706c37b3049 Mon Sep 17 00:00:00 2001 From: Prike Liang Date: Mon, 25 Mar 2024 15:33:34 +0800 Subject: drm/amdgpu: Fix the ring buffer size for queue VM flush MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Here are the corrections needed for the queue ring buffer size calculation for the following cases: - Remove the KIQ VM flush ring usage. - Add the invalidate TLBs packet for gfx10 and gfx11 queue. - There's no VM flush and PFP sync, so remove the gfx9 real ring and compute ring buffer usage. Signed-off-by: Prike Liang Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 3 +-- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 3 +-- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 2 -- 3 files changed, 2 insertions(+), 6 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index d5b924222903..591e8ed4b491 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -9186,7 +9186,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = { 7 + /* PIPELINE_SYNC */ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + - 2 + /* VM_FLUSH */ + 4 + /* VM_FLUSH */ 8 + /* FENCE for VM_FLUSH */ 20 + /* GDS switch */ 4 + /* double SWITCH_BUFFER, @@ -9276,7 +9276,6 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = { 7 + /* gfx_v10_0_ring_emit_pipeline_sync */ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + - 2 + /* gfx_v10_0_ring_emit_vm_flush */ 8 + 8 + 8, /* gfx_v10_0_ring_emit_fence_kiq x3 for user fence, vm fence */ .emit_ib_size = 7, /* gfx_v10_0_ring_emit_ib_compute */ .emit_ib = gfx_v10_0_ring_emit_ib_compute, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 5dbfef49dd5d..3c01d89e7a1b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -6187,7 +6187,7 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = { 7 + /* PIPELINE_SYNC */ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + - 2 + /* VM_FLUSH */ + 4 + /* VM_FLUSH */ 8 + /* FENCE for VM_FLUSH */ 20 + /* GDS switch */ 5 + /* COND_EXEC */ @@ -6273,7 +6273,6 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_kiq = { 7 + /* gfx_v11_0_ring_emit_pipeline_sync */ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + - 2 + /* gfx_v11_0_ring_emit_vm_flush */ 8 + 8 + 8, /* gfx_v11_0_ring_emit_fence_kiq x3 for user fence, vm fence */ .emit_ib_size = 7, /* gfx_v11_0_ring_emit_ib_compute */ .emit_ib = gfx_v11_0_ring_emit_ib_compute, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 71b555993b7a..eff33569b300 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -6981,7 +6981,6 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + - 2 + /* gfx_v9_0_ring_emit_vm_flush */ 8 + 8 + 8 + /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */ 7 + /* gfx_v9_0_emit_mem_sync */ 5 + /* gfx_v9_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */ @@ -7019,7 +7018,6 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + - 2 + /* gfx_v9_0_ring_emit_vm_flush */ 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */ .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */ .emit_fence = gfx_v9_0_ring_emit_fence_kiq, -- cgit v1.2.3 From 92ed1e9cd5f6cc4f8c9a9ba6c4d2d2bbc6221296 Mon Sep 17 00:00:00 2001 From: Le Ma Date: Wed, 17 Apr 2024 17:57:52 +0800 Subject: drm/amdgpu: init microcode chip name from ip versions To adapt to different gc versions in gfx_v9_4_3.c file. Signed-off-by: Le Ma Reviewed-by: Hawking Zhang Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index fc33354f1d3d..622889f79f10 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -431,16 +431,16 @@ out: static int gfx_v9_4_3_init_microcode(struct amdgpu_device *adev) { - const char *chip_name; + char ucode_prefix[30]; int r; - chip_name = "gc_9_4_3"; + amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix)); - r = gfx_v9_4_3_init_rlc_microcode(adev, chip_name); + r = gfx_v9_4_3_init_rlc_microcode(adev, ucode_prefix); if (r) return r; - r = gfx_v9_4_3_init_cp_compute_microcode(adev, chip_name); + r = gfx_v9_4_3_init_cp_compute_microcode(adev, ucode_prefix); if (r) return r; -- cgit v1.2.3 From ea9238a81b3ab8dcec99b1322bab5a30043b320a Mon Sep 17 00:00:00 2001 From: Frank Min Date: Wed, 10 Apr 2024 21:13:25 +0800 Subject: drm/amdgpu: replace tmz flag into buffer flag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace tmz flag into buffer flag to make it easier to understand and extend Signed-off-by: Likun Gao Signed-off-by: Frank Min Reviewed-by: Alex Deucher Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 18 +++++++++++------- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 4 +++- drivers/gpu/drm/amd/amdgpu/cik_sdma.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 6 +++--- drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 6 +++--- drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 6 +++--- drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 6 +++--- drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c | 6 +++--- drivers/gpu/drm/amd/amdgpu/si_dma.c | 4 ++-- drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 4 ++-- 15 files changed, 42 insertions(+), 36 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c index edc6377ec5ff..199693369c7c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c @@ -39,7 +39,7 @@ static int amdgpu_benchmark_do_move(struct amdgpu_device *adev, unsigned size, for (i = 0; i < n; i++) { struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; r = amdgpu_copy_buffer(ring, saddr, daddr, size, NULL, &fence, - false, false, false); + false, false, 0); if (r) goto exit_do_move; r = dma_fence_wait(fence, false); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 154624ca97f9..69513e34f97b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -765,7 +765,7 @@ int amdgpu_bo_restore_shadow(struct amdgpu_bo *shadow, struct dma_fence **fence) return amdgpu_copy_buffer(ring, shadow_addr, parent_addr, amdgpu_bo_size(shadow), NULL, fence, - true, false, false); + true, false, 0); } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h index 173a2a308078..b51a82e711df 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h @@ -132,7 +132,7 @@ struct amdgpu_buffer_funcs { uint64_t dst_offset, /* number of byte to transfer */ uint32_t byte_count, - bool tmz); + uint32_t copy_flags); /* maximum bytes in a single operation */ uint32_t fill_max_bytes; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 1d71729e3f6b..7805ea4d82f2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -236,7 +236,7 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo, dst_addr = amdgpu_bo_gpu_offset(adev->gart.bo); dst_addr += window * AMDGPU_GTT_MAX_TRANSFER_SIZE * 8; amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, - dst_addr, num_bytes, false); + dst_addr, num_bytes, 0); amdgpu_ring_pad_ib(ring, &job->ibs[0]); WARN_ON(job->ibs[0].length_dw > num_dw); @@ -296,6 +296,8 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, struct dma_fence *fence = NULL; int r = 0; + uint32_t copy_flags = 0; + if (!adev->mman.buffer_funcs_enabled) { DRM_ERROR("Trying to move memory with ring turned off.\n"); return -EINVAL; @@ -323,8 +325,11 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, if (r) goto error; - r = amdgpu_copy_buffer(ring, from, to, cur_size, - resv, &next, false, true, tmz); + if (tmz) + copy_flags |= AMDGPU_COPY_FLAGS_TMZ; + + r = amdgpu_copy_buffer(ring, from, to, cur_size, resv, + &next, false, true, copy_flags); if (r) goto error; @@ -1488,7 +1493,7 @@ static int amdgpu_ttm_access_memory_sdma(struct ttm_buffer_object *bo, swap(src_addr, dst_addr); amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, dst_addr, - PAGE_SIZE, false); + PAGE_SIZE, 0); amdgpu_ring_pad_ib(adev->mman.buffer_funcs_ring, &job->ibs[0]); WARN_ON(job->ibs[0].length_dw > num_dw); @@ -2139,7 +2144,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, uint64_t dst_offset, uint32_t byte_count, struct dma_resv *resv, struct dma_fence **fence, bool direct_submit, - bool vm_needs_flush, bool tmz) + bool vm_needs_flush, uint32_t copy_flags) { struct amdgpu_device *adev = ring->adev; unsigned int num_loops, num_dw; @@ -2165,8 +2170,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, uint32_t cur_size_in_bytes = min(byte_count, max_bytes); amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_offset, - dst_offset, cur_size_in_bytes, tmz); - + dst_offset, cur_size_in_bytes, copy_flags); src_offset += cur_size_in_bytes; dst_offset += cur_size_in_bytes; byte_count -= cur_size_in_bytes; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index 32cf6b6f6efd..53d5a5990c31 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -111,6 +111,8 @@ struct amdgpu_copy_mem { unsigned long offset; }; +#define AMDGPU_COPY_FLAGS_TMZ (1 << 0) + int amdgpu_gtt_mgr_init(struct amdgpu_device *adev, uint64_t gtt_size); void amdgpu_gtt_mgr_fini(struct amdgpu_device *adev); int amdgpu_preempt_mgr_init(struct amdgpu_device *adev); @@ -151,7 +153,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, uint64_t dst_offset, uint32_t byte_count, struct dma_resv *resv, struct dma_fence **fence, bool direct_submit, - bool vm_needs_flush, bool tmz); + bool vm_needs_flush, uint32_t copy_flags); int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, const struct amdgpu_copy_mem *src, const struct amdgpu_copy_mem *dst, diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index a3fccc4c1f43..76b9b0a31862 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -1290,7 +1290,7 @@ static void cik_sdma_set_irq_funcs(struct amdgpu_device *adev) * @src_offset: src GPU address * @dst_offset: dst GPU address * @byte_count: number of bytes to xfer - * @tmz: is this a secure operation + * @copy_flags: unused * * Copy GPU buffers using the DMA engine (CIK). * Used by the amdgpu ttm implementation to move pages if @@ -1300,7 +1300,7 @@ static void cik_sdma_emit_copy_buffer(struct amdgpu_ib *ib, uint64_t src_offset, uint64_t dst_offset, uint32_t byte_count, - bool tmz) + uint32_t copy_flags) { ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0); ib->ptr[ib->length_dw++] = byte_count; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c index 07e19caf2bc1..bed84676f2bf 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c @@ -1176,7 +1176,7 @@ static void sdma_v2_4_set_irq_funcs(struct amdgpu_device *adev) * @src_offset: src GPU address * @dst_offset: dst GPU address * @byte_count: number of bytes to xfer - * @tmz: unused + * @copy_flags: unused * * Copy GPU buffers using the DMA engine (VI). * Used by the amdgpu ttm implementation to move pages if @@ -1186,7 +1186,7 @@ static void sdma_v2_4_emit_copy_buffer(struct amdgpu_ib *ib, uint64_t src_offset, uint64_t dst_offset, uint32_t byte_count, - bool tmz) + uint32_t copy_flags) { ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index 2ad615be4bb3..89e7accc209a 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -1616,7 +1616,7 @@ static void sdma_v3_0_set_irq_funcs(struct amdgpu_device *adev) * @src_offset: src GPU address * @dst_offset: dst GPU address * @byte_count: number of bytes to xfer - * @tmz: unused + * @copy_flags: unused * * Copy GPU buffers using the DMA engine (VI). * Used by the amdgpu ttm implementation to move pages if @@ -1626,7 +1626,7 @@ static void sdma_v3_0_emit_copy_buffer(struct amdgpu_ib *ib, uint64_t src_offset, uint64_t dst_offset, uint32_t byte_count, - bool tmz) + uint32_t copy_flags) { ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 43775cb67ff5..e2e3856938ed 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -2448,7 +2448,7 @@ static void sdma_v4_0_set_irq_funcs(struct amdgpu_device *adev) * @src_offset: src GPU address * @dst_offset: dst GPU address * @byte_count: number of bytes to xfer - * @tmz: if a secure copy should be used + * @copy_flags: copy flags for the buffers * * Copy GPU buffers using the DMA engine (VEGA10/12). * Used by the amdgpu ttm implementation to move pages if @@ -2458,11 +2458,11 @@ static void sdma_v4_0_emit_copy_buffer(struct amdgpu_ib *ib, uint64_t src_offset, uint64_t dst_offset, uint32_t byte_count, - bool tmz) + uint32_t copy_flags) { ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) | - SDMA_PKT_COPY_LINEAR_HEADER_TMZ(tmz ? 1 : 0); + SDMA_PKT_COPY_LINEAR_HEADER_TMZ((copy_flags & AMDGPU_COPY_FLAGS_TMZ) ? 1 : 0); ib->ptr[ib->length_dw++] = byte_count - 1; ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ ib->ptr[ib->length_dw++] = lower_32_bits(src_offset); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c index 20909843bcd2..341b24d8320b 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c @@ -1945,7 +1945,7 @@ static void sdma_v4_4_2_set_irq_funcs(struct amdgpu_device *adev) * @src_offset: src GPU address * @dst_offset: dst GPU address * @byte_count: number of bytes to xfer - * @tmz: if a secure copy should be used + * @copy_flags: copy flags for the buffers * * Copy GPU buffers using the DMA engine. * Used by the amdgpu ttm implementation to move pages if @@ -1955,11 +1955,11 @@ static void sdma_v4_4_2_emit_copy_buffer(struct amdgpu_ib *ib, uint64_t src_offset, uint64_t dst_offset, uint32_t byte_count, - bool tmz) + uint32_t copy_flags) { ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) | - SDMA_PKT_COPY_LINEAR_HEADER_TMZ(tmz ? 1 : 0); + SDMA_PKT_COPY_LINEAR_HEADER_TMZ((copy_flags & AMDGPU_COPY_FLAGS_TMZ) ? 1 : 0); ib->ptr[ib->length_dw++] = byte_count - 1; ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ ib->ptr[ib->length_dw++] = lower_32_bits(src_offset); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index 883e8a1b8a40..45a2d0a5a2d7 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -1805,7 +1805,7 @@ static void sdma_v5_0_set_irq_funcs(struct amdgpu_device *adev) * @src_offset: src GPU address * @dst_offset: dst GPU address * @byte_count: number of bytes to xfer - * @tmz: if a secure copy should be used + * @copy_flags: copy flags for the buffers * * Copy GPU buffers using the DMA engine (NAVI10). * Used by the amdgpu ttm implementation to move pages if @@ -1815,11 +1815,11 @@ static void sdma_v5_0_emit_copy_buffer(struct amdgpu_ib *ib, uint64_t src_offset, uint64_t dst_offset, uint32_t byte_count, - bool tmz) + uint32_t copy_flags) { ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) | - SDMA_PKT_COPY_LINEAR_HEADER_TMZ(tmz ? 1 : 0); + SDMA_PKT_COPY_LINEAR_HEADER_TMZ((copy_flags & AMDGPU_COPY_FLAGS_TMZ) ? 1 : 0); ib->ptr[ib->length_dw++] = byte_count - 1; ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ ib->ptr[ib->length_dw++] = lower_32_bits(src_offset); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index 42f4bd250def..2b601cddcae0 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -1747,7 +1747,7 @@ static void sdma_v5_2_set_irq_funcs(struct amdgpu_device *adev) * @src_offset: src GPU address * @dst_offset: dst GPU address * @byte_count: number of bytes to xfer - * @tmz: if a secure copy should be used + * @copy_flags: copy flags for the buffers * * Copy GPU buffers using the DMA engine. * Used by the amdgpu ttm implementation to move pages if @@ -1757,11 +1757,11 @@ static void sdma_v5_2_emit_copy_buffer(struct amdgpu_ib *ib, uint64_t src_offset, uint64_t dst_offset, uint32_t byte_count, - bool tmz) + uint32_t copy_flags) { ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) | - SDMA_PKT_COPY_LINEAR_HEADER_TMZ(tmz ? 1 : 0); + SDMA_PKT_COPY_LINEAR_HEADER_TMZ((copy_flags & AMDGPU_COPY_FLAGS_TMZ) ? 1 : 0); ib->ptr[ib->length_dw++] = byte_count - 1; ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ ib->ptr[ib->length_dw++] = lower_32_bits(src_offset); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c index 67a4d8b1512b..1f4877195213 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c @@ -1574,7 +1574,7 @@ static void sdma_v6_0_set_irq_funcs(struct amdgpu_device *adev) * @src_offset: src GPU address * @dst_offset: dst GPU address * @byte_count: number of bytes to xfer - * @tmz: if a secure copy should be used + * @copy_flags: copy flags for the buffers * * Copy GPU buffers using the DMA engine. * Used by the amdgpu ttm implementation to move pages if @@ -1584,11 +1584,11 @@ static void sdma_v6_0_emit_copy_buffer(struct amdgpu_ib *ib, uint64_t src_offset, uint64_t dst_offset, uint32_t byte_count, - bool tmz) + uint32_t copy_flags) { ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_COPY) | SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) | - SDMA_PKT_COPY_LINEAR_HEADER_TMZ(tmz ? 1 : 0); + SDMA_PKT_COPY_LINEAR_HEADER_TMZ((copy_flags & AMDGPU_COPY_FLAGS_TMZ) ? 1 : 0); ib->ptr[ib->length_dw++] = byte_count - 1; ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ ib->ptr[ib->length_dw++] = lower_32_bits(src_offset); diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c index 9aa0e11ee673..db7389644794 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dma.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c @@ -761,7 +761,7 @@ static void si_dma_set_irq_funcs(struct amdgpu_device *adev) * @src_offset: src GPU address * @dst_offset: dst GPU address * @byte_count: number of bytes to xfer - * @tmz: is this a secure operation + * @copy_flags: unused * * Copy GPU buffers using the DMA engine (VI). * Used by the amdgpu ttm implementation to move pages if @@ -771,7 +771,7 @@ static void si_dma_emit_copy_buffer(struct amdgpu_ib *ib, uint64_t src_offset, uint64_t dst_offset, uint32_t byte_count, - bool tmz) + uint32_t copy_flags) { ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_COPY, 1, 0, 0, byte_count); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index 5c8d81bfce7a..4bcfbeac48fb 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -77,7 +77,7 @@ svm_migrate_gart_map(struct amdgpu_ring *ring, uint64_t npages, dst_addr = amdgpu_bo_gpu_offset(adev->gart.bo); amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, - dst_addr, num_bytes, false); + dst_addr, num_bytes, 0); amdgpu_ring_pad_ib(ring, &job->ibs[0]); WARN_ON(job->ibs[0].length_dw > num_dw); @@ -153,7 +153,7 @@ svm_migrate_copy_memory_gart(struct amdgpu_device *adev, dma_addr_t *sys, } r = amdgpu_copy_buffer(ring, gart_s, gart_d, size * PAGE_SIZE, - NULL, &next, false, true, false); + NULL, &next, false, true, 0); if (r) { dev_err(adev->dev, "fail %d to copy memory\n", r); goto out_unlock; -- cgit v1.2.3 From a16b95158644224025fbb81778b6f0cfb4c38f67 Mon Sep 17 00:00:00 2001 From: Rajneesh Bhardwaj Date: Sun, 21 Apr 2024 21:06:26 -0400 Subject: drm/amdgpu: Update CGCG settings for GFXIP 9.4.3 Tune coarse grain clock gating idle threshold and rlc idle timeout to achieve better kernel launch latency. Reviewed-by: Lijo Lazar Signed-off-by: Rajneesh Bhardwaj Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 622889f79f10..4e6ed8061858 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -2404,10 +2404,10 @@ gfx_v9_4_3_xcc_update_coarse_grain_clock_gating(struct amdgpu_device *adev, if (def != data) WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE, data); - /* enable cgcg FSM(0x0000363F) */ + /* CGCG Hysteresis: 400us */ def = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGCG_CGLS_CTRL); - data = (0x36 + data = (0x2710 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) @@ -2416,10 +2416,10 @@ gfx_v9_4_3_xcc_update_coarse_grain_clock_gating(struct amdgpu_device *adev, if (def != data) WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGCG_CGLS_CTRL, data); - /* set IDLE_POLL_COUNT(0x00900100) */ + /* set IDLE_POLL_COUNT(0x33450100)*/ def = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_RB_WPTR_POLL_CNTL); data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | - (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); + (0x3345 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); if (def != data) WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_RB_WPTR_POLL_CNTL, data); } else { -- cgit v1.2.3 From e0a9bbeea00234c468607b369816547ca8c86458 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Sun, 14 Apr 2024 21:20:56 -0400 Subject: drm/amdgpu/sdma5.2: use legacy HDP flush for SDMA2/3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This avoids a potential conflict with firmwares with the newer HDP flush mechanism. Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index 2b601cddcae0..43e64b2da575 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -280,17 +280,21 @@ static void sdma_v5_2_ring_emit_hdp_flush(struct amdgpu_ring *ring) u32 ref_and_mask = 0; const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg; - ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0 << ring->me; - - amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | - SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(1) | - SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */ - amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_done_offset(adev)) << 2); - amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_req_offset(adev)) << 2); - amdgpu_ring_write(ring, ref_and_mask); /* reference */ - amdgpu_ring_write(ring, ref_and_mask); /* mask */ - amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | - SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */ + if (ring->me > 1) { + amdgpu_asic_flush_hdp(adev, ring); + } else { + ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0 << ring->me; + + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | + SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(1) | + SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */ + amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_done_offset(adev)) << 2); + amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_req_offset(adev)) << 2); + amdgpu_ring_write(ring, ref_and_mask); /* reference */ + amdgpu_ring_write(ring, ref_and_mask); /* mask */ + amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | + SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */ + } } /** -- cgit v1.2.3 From f88da7fbf665ffdcbf5b439922a231bec6c0cf01 Mon Sep 17 00:00:00 2001 From: Jack Xiao Date: Mon, 22 Apr 2024 16:22:54 +0800 Subject: drm/amdgpu/mes: fix use-after-free issue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Delete fence fallback timer to fix the ramdom use-after-free issue. v2: move to amdgpu_mes.c Signed-off-by: Jack Xiao Acked-by: Lijo Lazar Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index 7d215f280aab..310c8f0c21da 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -1128,6 +1128,7 @@ void amdgpu_mes_remove_ring(struct amdgpu_device *adev, return; amdgpu_mes_remove_hw_queue(adev, ring->hw_queue_id); + del_timer_sync(&ring->fence_drv.fallback_timer); amdgpu_ring_fini(ring); kfree(ring); } -- cgit v1.2.3 From 60c448439f3b5db9431e13f7f361b4074d0e8594 Mon Sep 17 00:00:00 2001 From: Ma Jun Date: Mon, 22 Apr 2024 14:47:52 +0800 Subject: drm/amdgpu: Fix uninitialized variable warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit return 0 to avoid returning an uninitialized variable r Signed-off-by: Ma Jun Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/aldebaran.c | 2 +- drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/aldebaran.c b/drivers/gpu/drm/amd/amdgpu/aldebaran.c index 576067d66bb9..d0a8da67dc2a 100644 --- a/drivers/gpu/drm/amd/amdgpu/aldebaran.c +++ b/drivers/gpu/drm/amd/amdgpu/aldebaran.c @@ -97,7 +97,7 @@ static int aldebaran_mode2_suspend_ip(struct amdgpu_device *adev) adev->ip_blocks[i].status.hw = false; } - return r; + return 0; } static int diff --git a/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c b/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c index 93f6772d1b24..481217c32d85 100644 --- a/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c +++ b/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c @@ -92,7 +92,7 @@ static int sienna_cichlid_mode2_suspend_ip(struct amdgpu_device *adev) adev->ip_blocks[i].status.hw = false; } - return r; + return 0; } static int -- cgit v1.2.3 From af730e08203522dbf5a03853576c5b43c9d1afea Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Fri, 29 Mar 2024 14:42:59 +0800 Subject: drm/amdgpu: Add interface to reserve bad page MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add interface to reserve bad page. Signed-off-by: YiPeng Chai Reviewed-by: Christian König Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 19 +++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 4 ++++ 2 files changed, 23 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 352ce16a0963..fe4fdb079bd1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -2792,6 +2792,7 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev) } } + mutex_init(&con->page_rsv_lock); mutex_init(&con->page_retirement_lock); init_waitqueue_head(&con->page_retirement_wq); atomic_set(&con->page_retirement_req_cnt, 0); @@ -2842,6 +2843,8 @@ static int amdgpu_ras_recovery_fini(struct amdgpu_device *adev) atomic_set(&con->page_retirement_req_cnt, 0); + mutex_destroy(&con->page_rsv_lock); + cancel_work_sync(&con->recovery_work); mutex_lock(&con->recovery_lock); @@ -4280,3 +4283,19 @@ void amdgpu_ras_query_boot_status(struct amdgpu_device *adev, u32 num_instances) amdgpu_ras_boot_time_error_reporting(adev, i, boot_error); } } + +int amdgpu_ras_reserve_page(struct amdgpu_device *adev, uint64_t pfn) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr; + uint64_t start = pfn << AMDGPU_GPU_PAGE_SHIFT; + int ret = 0; + + mutex_lock(&con->page_rsv_lock); + ret = amdgpu_vram_mgr_query_page_status(mgr, start); + if (ret == -ENOENT) + ret = amdgpu_vram_mgr_reserve_range(mgr, start, AMDGPU_GPU_PAGE_SIZE); + mutex_unlock(&con->page_rsv_lock); + + return ret; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index 8d26989c75c8..ab5bf573378e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -500,6 +500,7 @@ struct amdgpu_ras { wait_queue_head_t page_retirement_wq; struct mutex page_retirement_lock; atomic_t page_retirement_req_cnt; + struct mutex page_rsv_lock; /* Fatal error detected flag */ atomic_t fed; @@ -909,4 +910,7 @@ bool amdgpu_ras_get_fed_status(struct amdgpu_device *adev); bool amdgpu_ras_event_id_is_valid(struct amdgpu_device *adev, u64 id); u64 amdgpu_ras_acquire_event_id(struct amdgpu_device *adev, enum ras_event_type type); + +int amdgpu_ras_reserve_page(struct amdgpu_device *adev, uint64_t pfn); + #endif -- cgit v1.2.3 From e21d253bd74bd422347d202ea2205cdc7623eed2 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Mon, 1 Apr 2024 15:58:38 +0530 Subject: drm/amdgpu: add prototype for ip dump MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the prototype to dump ip registers for all ips of different asics and set them to NULL for now. Based on the requirement add a function pointer for each of them. Signed-off-by: Sunil Khatri Reviewed-by: Alex Deucher Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c | 1 + drivers/gpu/drm/amd/amdgpu/cik.c | 1 + drivers/gpu/drm/amd/amdgpu/cik_ih.c | 1 + drivers/gpu/drm/amd/amdgpu/cik_sdma.c | 1 + drivers/gpu/drm/amd/amdgpu/cz_ih.c | 1 + drivers/gpu/drm/amd/amdgpu/dce_v10_0.c | 1 + drivers/gpu/drm/amd/amdgpu/dce_v11_0.c | 1 + drivers/gpu/drm/amd/amdgpu/dce_v6_0.c | 1 + drivers/gpu/drm/amd/amdgpu/dce_v8_0.c | 1 + drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 1 + drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 1 + drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c | 1 + drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 1 + drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 1 + drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 1 + drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 1 + drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c | 1 + drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 1 + drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 1 + drivers/gpu/drm/amd/amdgpu/iceland_ih.c | 1 + drivers/gpu/drm/amd/amdgpu/ih_v6_0.c | 1 + drivers/gpu/drm/amd/amdgpu/ih_v6_1.c | 1 + drivers/gpu/drm/amd/amdgpu/ih_v7_0.c | 1 + drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c | 1 + drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c | 2 ++ drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c | 1 + drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c | 1 + drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c | 1 + drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c | 1 + drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c | 1 + drivers/gpu/drm/amd/amdgpu/mes_v10_1.c | 1 + drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 1 + drivers/gpu/drm/amd/amdgpu/navi10_ih.c | 1 + drivers/gpu/drm/amd/amdgpu/nv.c | 1 + drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | 1 + drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 1 + drivers/gpu/drm/amd/amdgpu/si.c | 1 + drivers/gpu/drm/amd/amdgpu/si_dma.c | 1 + drivers/gpu/drm/amd/amdgpu/si_ih.c | 1 + drivers/gpu/drm/amd/amdgpu/soc15.c | 1 + drivers/gpu/drm/amd/amdgpu/soc21.c | 1 + drivers/gpu/drm/amd/amdgpu/tonga_ih.c | 1 + drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c | 1 + drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c | 1 + drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c | 1 + drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c | 1 + drivers/gpu/drm/amd/amdgpu/vce_v2_0.c | 1 + drivers/gpu/drm/amd/amdgpu/vce_v3_0.c | 1 + drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c | 1 + drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c | 1 + drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c | 2 ++ drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c | 1 + drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c | 1 + drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 1 + drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c | 1 + drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c | 1 + drivers/gpu/drm/amd/amdgpu/vi.c | 1 + drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 1 + drivers/gpu/drm/amd/include/amd_shared.h | 1 + drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c | 1 + drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c | 1 + drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c | 1 + 64 files changed, 66 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c index 6d72355ac492..34a62033a388 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c @@ -637,6 +637,7 @@ static const struct amd_ip_funcs acp_ip_funcs = { .soft_reset = acp_soft_reset, .set_clockgating_state = acp_set_clockgating_state, .set_powergating_state = acp_set_powergating_state, + .dump_ip_state = NULL, }; const struct amdgpu_ip_block_version acp_ip_block = { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c index f7c73533e336..d877d96e4afe 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c @@ -878,6 +878,7 @@ static const struct amd_ip_funcs umsch_mm_v4_0_ip_funcs = { .hw_fini = umsch_mm_hw_fini, .suspend = umsch_mm_suspend, .resume = umsch_mm_resume, + .dump_ip_state = NULL, }; const struct amdgpu_ip_block_version umsch_mm_v4_0_ip_block = { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c index 8baa2e0935cc..d1dc91009c0e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c @@ -658,6 +658,7 @@ static const struct amd_ip_funcs amdgpu_vkms_ip_funcs = { .soft_reset = amdgpu_vkms_soft_reset, .set_clockgating_state = amdgpu_vkms_set_clockgating_state, .set_powergating_state = amdgpu_vkms_set_powergating_state, + .dump_ip_state = NULL, }; const struct amdgpu_ip_block_version amdgpu_vkms_ip_block = { diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index fdbc26346b54..884de42553a6 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c @@ -2210,6 +2210,7 @@ static const struct amd_ip_funcs cik_common_ip_funcs = { .soft_reset = cik_common_soft_reset, .set_clockgating_state = cik_common_set_clockgating_state, .set_powergating_state = cik_common_set_powergating_state, + .dump_ip_state = NULL, }; static const struct amdgpu_ip_block_version cik_common_ip_block = diff --git a/drivers/gpu/drm/amd/amdgpu/cik_ih.c b/drivers/gpu/drm/amd/amdgpu/cik_ih.c index f24e34dc33d1..676f3f612fde 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_ih.c @@ -435,6 +435,7 @@ static const struct amd_ip_funcs cik_ih_ip_funcs = { .soft_reset = cik_ih_soft_reset, .set_clockgating_state = cik_ih_set_clockgating_state, .set_powergating_state = cik_ih_set_powergating_state, + .dump_ip_state = NULL, }; static const struct amdgpu_ih_funcs cik_ih_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index 76b9b0a31862..36b386e9c4f1 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -1228,6 +1228,7 @@ static const struct amd_ip_funcs cik_sdma_ip_funcs = { .soft_reset = cik_sdma_soft_reset, .set_clockgating_state = cik_sdma_set_clockgating_state, .set_powergating_state = cik_sdma_set_powergating_state, + .dump_ip_state = NULL, }; static const struct amdgpu_ring_funcs cik_sdma_ring_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/cz_ih.c b/drivers/gpu/drm/amd/amdgpu/cz_ih.c index c19681492efa..958c84a6af7e 100644 --- a/drivers/gpu/drm/amd/amdgpu/cz_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/cz_ih.c @@ -433,6 +433,7 @@ static const struct amd_ip_funcs cz_ih_ip_funcs = { .soft_reset = cz_ih_soft_reset, .set_clockgating_state = cz_ih_set_clockgating_state, .set_powergating_state = cz_ih_set_powergating_state, + .dump_ip_state = NULL, }; static const struct amdgpu_ih_funcs cz_ih_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c index 221af054d874..7a32ca7d6fc4 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c @@ -3333,6 +3333,7 @@ static const struct amd_ip_funcs dce_v10_0_ip_funcs = { .soft_reset = dce_v10_0_soft_reset, .set_clockgating_state = dce_v10_0_set_clockgating_state, .set_powergating_state = dce_v10_0_set_powergating_state, + .dump_ip_state = NULL, }; static void diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c index 69e8b0db6cf7..67c01e137fac 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c @@ -3464,6 +3464,7 @@ static const struct amd_ip_funcs dce_v11_0_ip_funcs = { .soft_reset = dce_v11_0_soft_reset, .set_clockgating_state = dce_v11_0_set_clockgating_state, .set_powergating_state = dce_v11_0_set_powergating_state, + .dump_ip_state = NULL, }; static void diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c index 60d40201fdd1..209cd44bbcec 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c @@ -3154,6 +3154,7 @@ static const struct amd_ip_funcs dce_v6_0_ip_funcs = { .soft_reset = dce_v6_0_soft_reset, .set_clockgating_state = dce_v6_0_set_clockgating_state, .set_powergating_state = dce_v6_0_set_powergating_state, + .dump_ip_state = NULL, }; static void diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index 5a5fcc45e452..fff7f4f766b2 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c @@ -3242,6 +3242,7 @@ static const struct amd_ip_funcs dce_v8_0_ip_funcs = { .soft_reset = dce_v8_0_soft_reset, .set_clockgating_state = dce_v8_0_set_clockgating_state, .set_powergating_state = dce_v8_0_set_powergating_state, + .dump_ip_state = NULL, }; static void diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 591e8ed4b491..29384a2c03ec 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -9170,6 +9170,7 @@ static const struct amd_ip_funcs gfx_v10_0_ip_funcs = { .set_clockgating_state = gfx_v10_0_set_clockgating_state, .set_powergating_state = gfx_v10_0_set_powergating_state, .get_clockgating_state = gfx_v10_0_get_clockgating_state, + .dump_ip_state = NULL, }; static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 3c01d89e7a1b..e9e5b0eb536e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -6169,6 +6169,7 @@ static const struct amd_ip_funcs gfx_v11_0_ip_funcs = { .set_clockgating_state = gfx_v11_0_set_clockgating_state, .set_powergating_state = gfx_v11_0_set_powergating_state, .get_clockgating_state = gfx_v11_0_get_clockgating_state, + .dump_ip_state = NULL, }; static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c index 34f9211b2679..559250c8a147 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c @@ -3457,6 +3457,7 @@ static const struct amd_ip_funcs gfx_v6_0_ip_funcs = { .soft_reset = gfx_v6_0_soft_reset, .set_clockgating_state = gfx_v6_0_set_clockgating_state, .set_powergating_state = gfx_v6_0_set_powergating_state, + .dump_ip_state = NULL, }; static const struct amdgpu_ring_funcs gfx_v6_0_ring_funcs_gfx = { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 86a4865b1ae5..81f7ab0dc135 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -4977,6 +4977,7 @@ static const struct amd_ip_funcs gfx_v7_0_ip_funcs = { .soft_reset = gfx_v7_0_soft_reset, .set_clockgating_state = gfx_v7_0_set_clockgating_state, .set_powergating_state = gfx_v7_0_set_powergating_state, + .dump_ip_state = NULL, }; static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 202ddda57f98..522cbd45dd46 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -6878,6 +6878,7 @@ static const struct amd_ip_funcs gfx_v8_0_ip_funcs = { .set_clockgating_state = gfx_v8_0_set_clockgating_state, .set_powergating_state = gfx_v8_0_set_powergating_state, .get_clockgating_state = gfx_v8_0_get_clockgating_state, + .dump_ip_state = NULL, }; static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index eff33569b300..d3fc04a0281a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -6856,6 +6856,7 @@ static const struct amd_ip_funcs gfx_v9_0_ip_funcs = { .set_clockgating_state = gfx_v9_0_set_clockgating_state, .set_powergating_state = gfx_v9_0_set_powergating_state, .get_clockgating_state = gfx_v9_0_get_clockgating_state, + .dump_ip_state = NULL, }; static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 4e6ed8061858..6190f51bf489 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -4016,6 +4016,7 @@ static const struct amd_ip_funcs gfx_v9_4_3_ip_funcs = { .set_clockgating_state = gfx_v9_4_3_set_clockgating_state, .set_powergating_state = gfx_v9_4_3_set_powergating_state, .get_clockgating_state = gfx_v9_4_3_get_clockgating_state, + .dump_ip_state = NULL, }; static const struct amdgpu_ring_funcs gfx_v9_4_3_ring_funcs_compute = { diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c index 23b478639921..060e54b8ffff 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c @@ -1115,6 +1115,7 @@ static const struct amd_ip_funcs gmc_v6_0_ip_funcs = { .soft_reset = gmc_v6_0_soft_reset, .set_clockgating_state = gmc_v6_0_set_clockgating_state, .set_powergating_state = gmc_v6_0_set_powergating_state, + .dump_ip_state = NULL, }; static const struct amdgpu_gmc_funcs gmc_v6_0_gmc_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index 3da7b6a2b00d..534825022ddd 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -1354,6 +1354,7 @@ static const struct amd_ip_funcs gmc_v7_0_ip_funcs = { .soft_reset = gmc_v7_0_soft_reset, .set_clockgating_state = gmc_v7_0_set_clockgating_state, .set_powergating_state = gmc_v7_0_set_powergating_state, + .dump_ip_state = NULL, }; static const struct amdgpu_gmc_funcs gmc_v7_0_gmc_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index d20e5f20ee31..aba787e1386a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -1717,6 +1717,7 @@ static const struct amd_ip_funcs gmc_v8_0_ip_funcs = { .set_clockgating_state = gmc_v8_0_set_clockgating_state, .set_powergating_state = gmc_v8_0_set_powergating_state, .get_clockgating_state = gmc_v8_0_get_clockgating_state, + .dump_ip_state = NULL, }; static const struct amdgpu_gmc_funcs gmc_v8_0_gmc_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c index 2c02ae69883d..2d6f969266b8 100644 --- a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c @@ -425,6 +425,7 @@ static const struct amd_ip_funcs iceland_ih_ip_funcs = { .soft_reset = iceland_ih_soft_reset, .set_clockgating_state = iceland_ih_set_clockgating_state, .set_powergating_state = iceland_ih_set_powergating_state, + .dump_ip_state = NULL, }; static const struct amdgpu_ih_funcs iceland_ih_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c index c757ef99e3c5..77b5068b7be5 100644 --- a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c @@ -770,6 +770,7 @@ static const struct amd_ip_funcs ih_v6_0_ip_funcs = { .set_clockgating_state = ih_v6_0_set_clockgating_state, .set_powergating_state = ih_v6_0_set_powergating_state, .get_clockgating_state = ih_v6_0_get_clockgating_state, + .dump_ip_state = NULL, }; static const struct amdgpu_ih_funcs ih_v6_0_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c b/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c index 29ed78798070..fc2c27a199c6 100644 --- a/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c @@ -775,6 +775,7 @@ static const struct amd_ip_funcs ih_v6_1_ip_funcs = { .set_clockgating_state = ih_v6_1_set_clockgating_state, .set_powergating_state = ih_v6_1_set_powergating_state, .get_clockgating_state = ih_v6_1_get_clockgating_state, + .dump_ip_state = NULL, }; static const struct amdgpu_ih_funcs ih_v6_1_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v7_0.c b/drivers/gpu/drm/amd/amdgpu/ih_v7_0.c index 7aed96fa10a9..31ed5030169b 100644 --- a/drivers/gpu/drm/amd/amdgpu/ih_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/ih_v7_0.c @@ -749,6 +749,7 @@ static const struct amd_ip_funcs ih_v7_0_ip_funcs = { .set_clockgating_state = ih_v7_0_set_clockgating_state, .set_powergating_state = ih_v7_0_set_powergating_state, .get_clockgating_state = ih_v7_0_get_clockgating_state, + .dump_ip_state = NULL, }; static const struct amdgpu_ih_funcs ih_v7_0_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c index 1c8116d75f63..698c5d4b7484 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c @@ -759,6 +759,7 @@ static const struct amd_ip_funcs jpeg_v2_0_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = jpeg_v2_0_set_clockgating_state, .set_powergating_state = jpeg_v2_0_set_powergating_state, + .dump_ip_state = NULL, }; static const struct amdgpu_ring_funcs jpeg_v2_0_dec_ring_vm_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c index 99cd49ee8ef6..0a9a2d58e3ee 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c @@ -632,6 +632,7 @@ static const struct amd_ip_funcs jpeg_v2_5_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = jpeg_v2_5_set_clockgating_state, .set_powergating_state = jpeg_v2_5_set_powergating_state, + .dump_ip_state = NULL, }; static const struct amd_ip_funcs jpeg_v2_6_ip_funcs = { @@ -652,6 +653,7 @@ static const struct amd_ip_funcs jpeg_v2_6_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = jpeg_v2_5_set_clockgating_state, .set_powergating_state = jpeg_v2_5_set_powergating_state, + .dump_ip_state = NULL, }; static const struct amdgpu_ring_funcs jpeg_v2_5_dec_ring_vm_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c index a92481da60cd..e03d46151ae3 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c @@ -557,6 +557,7 @@ static const struct amd_ip_funcs jpeg_v3_0_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = jpeg_v3_0_set_clockgating_state, .set_powergating_state = jpeg_v3_0_set_powergating_state, + .dump_ip_state = NULL, }; static const struct amdgpu_ring_funcs jpeg_v3_0_dec_ring_vm_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c index 88ea58d5c4ab..f142cb200552 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c @@ -719,6 +719,7 @@ static const struct amd_ip_funcs jpeg_v4_0_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = jpeg_v4_0_set_clockgating_state, .set_powergating_state = jpeg_v4_0_set_powergating_state, + .dump_ip_state = NULL, }; static const struct amdgpu_ring_funcs jpeg_v4_0_dec_ring_vm_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c index 32caeb37cef9..bc3a6f16f4bf 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c @@ -1053,6 +1053,7 @@ static const struct amd_ip_funcs jpeg_v4_0_3_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = jpeg_v4_0_3_set_clockgating_state, .set_powergating_state = jpeg_v4_0_3_set_powergating_state, + .dump_ip_state = NULL, }; static const struct amdgpu_ring_funcs jpeg_v4_0_3_dec_ring_vm_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c index edf5bcdd2bc9..ee29c97721ec 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c @@ -762,6 +762,7 @@ static const struct amd_ip_funcs jpeg_v4_0_5_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = jpeg_v4_0_5_set_clockgating_state, .set_powergating_state = jpeg_v4_0_5_set_powergating_state, + .dump_ip_state = NULL, }; static const struct amdgpu_ring_funcs jpeg_v4_0_5_dec_ring_vm_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c index e70200f97555..f5664c92d10d 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c @@ -513,6 +513,7 @@ static const struct amd_ip_funcs jpeg_v5_0_0_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = jpeg_v5_0_0_set_clockgating_state, .set_powergating_state = jpeg_v5_0_0_set_powergating_state, + .dump_ip_state = NULL, }; static const struct amdgpu_ring_funcs jpeg_v5_0_0_dec_ring_vm_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c index 1e5ad1e08d2a..4ed0429cf4f7 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c @@ -1176,6 +1176,7 @@ static const struct amd_ip_funcs mes_v10_1_ip_funcs = { .hw_fini = mes_v10_1_hw_fini, .suspend = mes_v10_1_suspend, .resume = mes_v10_1_resume, + .dump_ip_state = NULL, }; const struct amdgpu_ip_block_version mes_v10_1_ip_block = { diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index fbe31afad1d4..bb7318c2d652 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -1404,6 +1404,7 @@ static const struct amd_ip_funcs mes_v11_0_ip_funcs = { .hw_fini = mes_v11_0_hw_fini, .suspend = mes_v11_0_suspend, .resume = mes_v11_0_resume, + .dump_ip_state = NULL, }; const struct amdgpu_ip_block_version mes_v11_0_ip_block = { diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c index 4178f4e5dad7..550c5ca4ea03 100644 --- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c @@ -713,6 +713,7 @@ static const struct amd_ip_funcs navi10_ih_ip_funcs = { .set_clockgating_state = navi10_ih_set_clockgating_state, .set_powergating_state = navi10_ih_set_powergating_state, .get_clockgating_state = navi10_ih_get_clockgating_state, + .dump_ip_state = NULL, }; static const struct amdgpu_ih_funcs navi10_ih_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index 4d7976b77767..d7d3b972392b 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -1131,4 +1131,5 @@ static const struct amd_ip_funcs nv_common_ip_funcs = { .set_clockgating_state = nv_common_set_clockgating_state, .set_powergating_state = nv_common_set_powergating_state, .get_clockgating_state = nv_common_get_clockgating_state, + .dump_ip_state = NULL, }; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c index bed84676f2bf..0fd46ed1f67d 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c @@ -1113,6 +1113,7 @@ static const struct amd_ip_funcs sdma_v2_4_ip_funcs = { .soft_reset = sdma_v2_4_soft_reset, .set_clockgating_state = sdma_v2_4_set_clockgating_state, .set_powergating_state = sdma_v2_4_set_powergating_state, + .dump_ip_state = NULL, }; static const struct amdgpu_ring_funcs sdma_v2_4_ring_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index 89e7accc209a..a3b8f1d807ed 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -1553,6 +1553,7 @@ static const struct amd_ip_funcs sdma_v3_0_ip_funcs = { .set_clockgating_state = sdma_v3_0_set_clockgating_state, .set_powergating_state = sdma_v3_0_set_powergating_state, .get_clockgating_state = sdma_v3_0_get_clockgating_state, + .dump_ip_state = NULL, }; static const struct amdgpu_ring_funcs sdma_v3_0_ring_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c index 67e179c7e347..b5b15dba9a76 100644 --- a/drivers/gpu/drm/amd/amdgpu/si.c +++ b/drivers/gpu/drm/amd/amdgpu/si.c @@ -2706,6 +2706,7 @@ static const struct amd_ip_funcs si_common_ip_funcs = { .soft_reset = si_common_soft_reset, .set_clockgating_state = si_common_set_clockgating_state, .set_powergating_state = si_common_set_powergating_state, + .dump_ip_state = NULL, }; static const struct amdgpu_ip_block_version si_common_ip_block = diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c index db7389644794..fd8f28fdee1a 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dma.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c @@ -708,6 +708,7 @@ static const struct amd_ip_funcs si_dma_ip_funcs = { .soft_reset = si_dma_soft_reset, .set_clockgating_state = si_dma_set_clockgating_state, .set_powergating_state = si_dma_set_powergating_state, + .dump_ip_state = NULL, }; static const struct amdgpu_ring_funcs si_dma_ring_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/si_ih.c b/drivers/gpu/drm/amd/amdgpu/si_ih.c index cada9f300a7f..3b7427f5a6c9 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/si_ih.c @@ -296,6 +296,7 @@ static const struct amd_ip_funcs si_ih_ip_funcs = { .soft_reset = si_ih_soft_reset, .set_clockgating_state = si_ih_set_clockgating_state, .set_powergating_state = si_ih_set_powergating_state, + .dump_ip_state = NULL, }; static const struct amdgpu_ih_funcs si_ih_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index c8abbf5da736..6ba6c96c91c8 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -1501,4 +1501,5 @@ static const struct amd_ip_funcs soc15_common_ip_funcs = { .set_clockgating_state = soc15_common_set_clockgating_state, .set_powergating_state = soc15_common_set_powergating_state, .get_clockgating_state= soc15_common_get_clockgating_state, + .dump_ip_state = NULL, }; diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index 43ca63fe85ac..40e7ab0716cd 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -985,4 +985,5 @@ static const struct amd_ip_funcs soc21_common_ip_funcs = { .set_clockgating_state = soc21_common_set_clockgating_state, .set_powergating_state = soc21_common_set_powergating_state, .get_clockgating_state = soc21_common_get_clockgating_state, + .dump_ip_state = NULL, }; diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c index 450b6e831509..794a1f7bc2ca 100644 --- a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c @@ -486,6 +486,7 @@ static const struct amd_ip_funcs tonga_ih_ip_funcs = { .post_soft_reset = tonga_ih_post_soft_reset, .set_clockgating_state = tonga_ih_set_clockgating_state, .set_powergating_state = tonga_ih_set_powergating_state, + .dump_ip_state = NULL, }; static const struct amdgpu_ih_funcs tonga_ih_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c b/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c index a6006f231c65..1e232ed23102 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c @@ -819,6 +819,7 @@ static const struct amd_ip_funcs uvd_v3_1_ip_funcs = { .soft_reset = uvd_v3_1_soft_reset, .set_clockgating_state = uvd_v3_1_set_clockgating_state, .set_powergating_state = uvd_v3_1_set_powergating_state, + .dump_ip_state = NULL, }; const struct amdgpu_ip_block_version uvd_v3_1_ip_block = { diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c index 1aa09ad7bbe3..48bcf41e5558 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c @@ -769,6 +769,7 @@ static const struct amd_ip_funcs uvd_v4_2_ip_funcs = { .soft_reset = uvd_v4_2_soft_reset, .set_clockgating_state = uvd_v4_2_set_clockgating_state, .set_powergating_state = uvd_v4_2_set_powergating_state, + .dump_ip_state = NULL, }; static const struct amdgpu_ring_funcs uvd_v4_2_ring_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c index f8b229b75435..838b7d720c52 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c @@ -877,6 +877,7 @@ static const struct amd_ip_funcs uvd_v5_0_ip_funcs = { .set_clockgating_state = uvd_v5_0_set_clockgating_state, .set_powergating_state = uvd_v5_0_set_powergating_state, .get_clockgating_state = uvd_v5_0_get_clockgating_state, + .dump_ip_state = NULL, }; static const struct amdgpu_ring_funcs uvd_v5_0_ring_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c index a9a6880f44e3..036378f5f53f 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c @@ -1545,6 +1545,7 @@ static const struct amd_ip_funcs uvd_v6_0_ip_funcs = { .set_clockgating_state = uvd_v6_0_set_clockgating_state, .set_powergating_state = uvd_v6_0_set_powergating_state, .get_clockgating_state = uvd_v6_0_get_clockgating_state, + .dump_ip_state = NULL, }; static const struct amdgpu_ring_funcs uvd_v6_0_ring_phys_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c index a08e7abca423..2178cf5a27b7 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c @@ -626,6 +626,7 @@ static const struct amd_ip_funcs vce_v2_0_ip_funcs = { .soft_reset = vce_v2_0_soft_reset, .set_clockgating_state = vce_v2_0_set_clockgating_state, .set_powergating_state = vce_v2_0_set_powergating_state, + .dump_ip_state = NULL, }; static const struct amdgpu_ring_funcs vce_v2_0_ring_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c index f4760748d349..0f0d4b0d50cb 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c @@ -913,6 +913,7 @@ static const struct amd_ip_funcs vce_v3_0_ip_funcs = { .set_clockgating_state = vce_v3_0_set_clockgating_state, .set_powergating_state = vce_v3_0_set_powergating_state, .get_clockgating_state = vce_v3_0_get_clockgating_state, + .dump_ip_state = NULL, }; static const struct amdgpu_ring_funcs vce_v3_0_ring_phys_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index aaceecd558cf..7ff5d0574454 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -1902,6 +1902,7 @@ static const struct amd_ip_funcs vcn_v1_0_ip_funcs = { .post_soft_reset = NULL /* vcn_v1_0_post_soft_reset */, .set_clockgating_state = vcn_v1_0_set_clockgating_state, .set_powergating_state = vcn_v1_0_set_powergating_state, + .dump_ip_state = NULL, }; /* diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c index e357d8cf0c01..4df1b75f971a 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c @@ -2008,6 +2008,7 @@ static const struct amd_ip_funcs vcn_v2_0_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = vcn_v2_0_set_clockgating_state, .set_powergating_state = vcn_v2_0_set_powergating_state, + .dump_ip_state = NULL, }; static const struct amdgpu_ring_funcs vcn_v2_0_dec_ring_vm_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c index 1cd8a94b0fbc..d91c3154641b 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c @@ -1901,6 +1901,7 @@ static const struct amd_ip_funcs vcn_v2_5_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = vcn_v2_5_set_clockgating_state, .set_powergating_state = vcn_v2_5_set_powergating_state, + .dump_ip_state = NULL, }; static const struct amd_ip_funcs vcn_v2_6_ip_funcs = { @@ -1921,6 +1922,7 @@ static const struct amd_ip_funcs vcn_v2_6_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = vcn_v2_5_set_clockgating_state, .set_powergating_state = vcn_v2_5_set_powergating_state, + .dump_ip_state = NULL, }; const struct amdgpu_ip_block_version vcn_v2_5_ip_block = diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c index 26e63f01250a..98b7d2d0536e 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c @@ -2231,6 +2231,7 @@ static const struct amd_ip_funcs vcn_v3_0_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = vcn_v3_0_set_clockgating_state, .set_powergating_state = vcn_v3_0_set_powergating_state, + .dump_ip_state = NULL, }; const struct amdgpu_ip_block_version vcn_v3_0_ip_block = { diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c index aff1a4d8d393..122fec3be96f 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c @@ -2131,6 +2131,7 @@ static const struct amd_ip_funcs vcn_v4_0_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = vcn_v4_0_set_clockgating_state, .set_powergating_state = vcn_v4_0_set_powergating_state, + .dump_ip_state = NULL, }; const struct amdgpu_ip_block_version vcn_v4_0_ip_block = { diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c index 203fa988322b..599b6466183e 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c @@ -1660,6 +1660,7 @@ static const struct amd_ip_funcs vcn_v4_0_3_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = vcn_v4_0_3_set_clockgating_state, .set_powergating_state = vcn_v4_0_3_set_powergating_state, + .dump_ip_state = NULL, }; const struct amdgpu_ip_block_version vcn_v4_0_3_ip_block = { diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c index 8f2bcce13339..8b1e5bc9fb69 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c @@ -1753,6 +1753,7 @@ static const struct amd_ip_funcs vcn_v4_0_5_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = vcn_v4_0_5_set_clockgating_state, .set_powergating_state = vcn_v4_0_5_set_powergating_state, + .dump_ip_state = NULL, }; const struct amdgpu_ip_block_version vcn_v4_0_5_ip_block = { diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c index b226306164bc..6d6aa02af4f5 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c @@ -1329,6 +1329,7 @@ static const struct amd_ip_funcs vcn_v5_0_0_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = vcn_v5_0_0_set_clockgating_state, .set_powergating_state = vcn_v5_0_0_set_powergating_state, + .dump_ip_state = NULL, }; const struct amdgpu_ip_block_version vcn_v5_0_0_ip_block = { diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 2415355b037c..a04aa6833fc5 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -2058,6 +2058,7 @@ static const struct amd_ip_funcs vi_common_ip_funcs = { .set_clockgating_state = vi_common_set_clockgating_state, .set_powergating_state = vi_common_set_powergating_state, .get_clockgating_state = vi_common_get_clockgating_state, + .dump_ip_state = NULL, }; static const struct amdgpu_ip_block_version vi_common_ip_block = diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 0d67fc56249b..fd58d0acc47a 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -3121,6 +3121,7 @@ static const struct amd_ip_funcs amdgpu_dm_funcs = { .soft_reset = dm_soft_reset, .set_clockgating_state = dm_set_clockgating_state, .set_powergating_state = dm_set_powergating_state, + .dump_ip_state = NULL, }; const struct amdgpu_ip_block_version dm_ip_block = { diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h index b0a6256e89f4..9884f6c48a7d 100644 --- a/drivers/gpu/drm/amd/include/amd_shared.h +++ b/drivers/gpu/drm/amd/include/amd_shared.h @@ -321,6 +321,7 @@ struct amd_ip_funcs { int (*set_powergating_state)(void *handle, enum amd_powergating_state state); void (*get_clockgating_state)(void *handle, u64 *flags); + void (*dump_ip_state)(void *handle); }; diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c index 5cb4725c773f..8c07f8c7f3ab 100644 --- a/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c @@ -3316,6 +3316,7 @@ static const struct amd_ip_funcs kv_dpm_ip_funcs = { .soft_reset = kv_dpm_soft_reset, .set_clockgating_state = kv_dpm_set_clockgating_state, .set_powergating_state = kv_dpm_set_powergating_state, + .dump_ip_state = NULL, }; const struct amdgpu_ip_block_version kv_smu_ip_block = { diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c index eb4da3666e05..c312b9332326 100644 --- a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c @@ -8060,6 +8060,7 @@ static const struct amd_ip_funcs si_dpm_ip_funcs = { .soft_reset = si_dpm_soft_reset, .set_clockgating_state = si_dpm_set_clockgating_state, .set_powergating_state = si_dpm_set_powergating_state, + .dump_ip_state = NULL, }; const struct amdgpu_ip_block_version si_smu_ip_block = diff --git a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c index 133d1ee6e67c..c63474ee17a7 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c +++ b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c @@ -302,6 +302,7 @@ static const struct amd_ip_funcs pp_ip_funcs = { .soft_reset = pp_sw_reset, .set_clockgating_state = pp_set_clockgating_state, .set_powergating_state = pp_set_powergating_state, + .dump_ip_state = NULL, }; const struct amdgpu_ip_block_version pp_smu_ip_block = -- cgit v1.2.3 From c395dbb68b294d1de9a5ac6c9faaf8ac081123c3 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Tue, 16 Apr 2024 16:14:12 +0530 Subject: drm/amdgpu: add support of gfx10 register dump MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adding gfx10 gc registers to be used for register dump via devcoredump during a gpu reset. Signed-off-by: Sunil Khatri Reviewed-by: Alex Deucher Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 8 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 4 + drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 130 ++++++++++++++++++++- drivers/gpu/drm/amd/amdgpu/soc15.h | 2 + .../drm/amd/include/asic_reg/gc/gc_10_1_0_offset.h | 12 ++ 5 files changed, 155 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index e0d7f4ee7e16..cac0ca64367b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -139,6 +139,14 @@ enum amdgpu_ss { AMDGPU_SS_DRV_UNLOAD }; +struct amdgpu_hwip_reg_entry { + u32 hwip; + u32 inst; + u32 seg; + u32 reg_offset; + const char *reg_name; +}; + struct amdgpu_watchdog_timer { bool timeout_fatal_disable; uint32_t period; /* maxCycles = (1 << period), the number of cycles before a timeout */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 04a86dff71e6..64f197bbc866 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -433,6 +433,10 @@ struct amdgpu_gfx { uint32_t num_xcc_per_xcp; struct mutex partition_mutex; bool mcbp; /* mid command buffer preemption */ + + /* IP reg dump */ + uint32_t *ip_dump; + uint32_t reg_count; }; struct amdgpu_gfx_ras_reg_entry { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 29384a2c03ec..c0cc3dd32f46 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -276,6 +276,99 @@ MODULE_FIRMWARE("amdgpu/gc_10_3_7_mec.bin"); MODULE_FIRMWARE("amdgpu/gc_10_3_7_mec2.bin"); MODULE_FIRMWARE("amdgpu/gc_10_3_7_rlc.bin"); +static const struct amdgpu_hwip_reg_entry gc_reg_list_10_1[] = { + SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS2), + SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS3), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_STALLED_STAT1), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_STALLED_STAT2), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_STALLED_STAT1), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_STALLED_STAT1), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_BUSY_STAT), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_BUSY_STAT), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_BUSY_STAT), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_BUSY_STAT2), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_BUSY_STAT2), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_ERROR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HPD_STATUS0), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_BASE), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_BASE), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_BASE), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_BASE), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_CMD_BUFSZ), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_CMD_BUFSZ), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_CMD_BUFSZ), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_CMD_BUFSZ), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BUFSZ), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BUFSZ), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BUFSZ), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BUFSZ), + SOC15_REG_ENTRY_STR(GC, 0, mmCPF_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmCPC_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmCPG_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmGDS_PROTECTION_FAULT), + SOC15_REG_ENTRY_STR(GC, 0, mmGDS_VM_PROTECTION_FAULT), + SOC15_REG_ENTRY_STR(GC, 0, mmIA_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmIA_UTCL1_STATUS_2), + SOC15_REG_ENTRY_STR(GC, 0, mmPA_CL_CNTL_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmRMI_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmSQC_DCACHE_UTCL0_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmSQC_ICACHE_UTCL0_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmSQG_UTCL0_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmTCP_UTCL0_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmWD_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmGCVM_L2_PROTECTION_FAULT_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, mmGCVM_L2_PROTECTION_FAULT_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_DEBUG), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MES_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_INSTR_PNTR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC1_INSTR_PNTR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC2_INSTR_PNTR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MES_DEBUG_INTERRUPT_INSTR_PNTR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MES_INSTR_PNTR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_INSTR_PNTR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_INSTR_PNTR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_STAT), + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_COMMAND), + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_MESSAGE), + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_1), + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_2), + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_3), + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_4), + SOC15_REG_ENTRY_STR(GC, 0, mmSMU_RLC_RESPONSE), + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SAFE_MODE), + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_SAFE_MODE), + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_RLCS_GPM_STAT_2), + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SPP_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_RLCS_BOOTLOAD_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_INT_STAT), + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_GPM_GENERAL_6), + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_GPM_DEBUG_INST_A), + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_GPM_DEBUG_INST_B), + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_GPM_DEBUG_INST_ADDR), + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_LX6_CORE_PDEBUG_INST) +}; + static const struct soc15_reg_golden golden_settings_gc_10_1[] = { SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0xffffffff, 0x00400014), SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_CPF_CLK_CTRL, 0xfcff8fff, 0xf8000100), @@ -4490,6 +4583,22 @@ static int gfx_v10_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, hw_prio, NULL); } +static void gfx_v10_0_alloc_dump_mem(struct amdgpu_device *adev) +{ + uint32_t reg_count = ARRAY_SIZE(gc_reg_list_10_1); + uint32_t *ptr; + + ptr = kcalloc(reg_count, sizeof(uint32_t), GFP_KERNEL); + if (ptr == NULL) { + DRM_ERROR("Failed to allocate memory for IP Dump\n"); + adev->gfx.ip_dump = NULL; + adev->gfx.reg_count = 0; + } else { + adev->gfx.ip_dump = ptr; + adev->gfx.reg_count = reg_count; + } +} + static int gfx_v10_0_sw_init(void *handle) { int i, j, k, r, ring_id = 0; @@ -4642,6 +4751,8 @@ static int gfx_v10_0_sw_init(void *handle) gfx_v10_0_gpu_early_init(adev); + gfx_v10_0_alloc_dump_mem(adev); + return 0; } @@ -4694,6 +4805,8 @@ static int gfx_v10_0_sw_fini(void *handle) gfx_v10_0_free_microcode(adev); + kfree(adev->gfx.ip_dump); + return 0; } @@ -9154,6 +9267,21 @@ static void gfx_v10_0_emit_mem_sync(struct amdgpu_ring *ring) amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */ } +static void gfx_v10_ip_dump(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + uint32_t i; + uint32_t reg_count = ARRAY_SIZE(gc_reg_list_10_1); + + if (!adev->gfx.ip_dump) + return; + + amdgpu_gfx_off_ctrl(adev, false); + for (i = 0; i < reg_count; i++) + adev->gfx.ip_dump[i] = RREG32(SOC15_REG_ENTRY_OFFSET(gc_reg_list_10_1[i])); + amdgpu_gfx_off_ctrl(adev, true); +} + static const struct amd_ip_funcs gfx_v10_0_ip_funcs = { .name = "gfx_v10_0", .early_init = gfx_v10_0_early_init, @@ -9170,7 +9298,7 @@ static const struct amd_ip_funcs gfx_v10_0_ip_funcs = { .set_clockgating_state = gfx_v10_0_set_clockgating_state, .set_powergating_state = gfx_v10_0_set_powergating_state, .get_clockgating_state = gfx_v10_0_get_clockgating_state, - .dump_ip_state = NULL, + .dump_ip_state = gfx_v10_ip_dump, }; static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = { diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.h b/drivers/gpu/drm/amd/amdgpu/soc15.h index 1444b7765e4b..282584a48be0 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15.h @@ -88,6 +88,8 @@ struct soc15_ras_field_entry { }; #define SOC15_REG_ENTRY(ip, inst, reg) ip##_HWIP, inst, reg##_BASE_IDX, reg +#define SOC15_REG_ENTRY_STR(ip, inst, reg) \ + { ip##_HWIP, inst, reg##_BASE_IDX, reg, #reg } #define SOC15_REG_ENTRY_OFFSET(entry) (adev->reg_offset[entry.hwip][entry.inst][entry.seg] + entry.reg_offset) diff --git a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_1_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_1_0_offset.h index 4908044f7409..4c8e7fdb6976 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_1_0_offset.h +++ b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_1_0_offset.h @@ -4830,6 +4830,8 @@ #define mmCP_ECC_FIRSTOCCURRENCE_RING2_BASE_IDX 0 #define mmGB_EDC_MODE 0x1e1e #define mmGB_EDC_MODE_BASE_IDX 0 +#define mmCP_DEBUG 0x1e1f +#define mmCP_DEBUG_BASE_IDX 0 #define mmCP_FETCHER_SOURCE 0x1e22 #define mmCP_FETCHER_SOURCE_BASE_IDX 0 #define mmCP_PQ_WPTR_POLL_CNTL 0x1e23 @@ -7778,6 +7780,8 @@ #define mmCP_MES_DOORBELL_CONTROL5_BASE_IDX 1 #define mmCP_MES_DOORBELL_CONTROL6 0x2841 #define mmCP_MES_DOORBELL_CONTROL6_BASE_IDX 1 +#define mmCP_MES_DEBUG_INTERRUPT_INSTR_PNTR 0x2842 +#define mmCP_MES_DEBUG_INTERRUPT_INSTR_PNTR_BASE_IDX 1 #define mmCP_MES_GP0_LO 0x2843 #define mmCP_MES_GP0_LO_BASE_IDX 1 #define mmCP_MES_GP0_HI 0x2844 @@ -9332,10 +9336,16 @@ #define mmRLC_LB_CNTR_INIT_1_BASE_IDX 1 #define mmRLC_LB_CNTR_1 0x4c1c #define mmRLC_LB_CNTR_1_BASE_IDX 1 +#define mmRLC_GPM_DEBUG_INST_ADDR 0x4c1d +#define mmRLC_GPM_DEBUG_INST_ADDR_BASE_IDX 1 #define mmRLC_JUMP_TABLE_RESTORE 0x4c1e #define mmRLC_JUMP_TABLE_RESTORE_BASE_IDX 1 #define mmRLC_PG_DELAY_2 0x4c1f #define mmRLC_PG_DELAY_2_BASE_IDX 1 +#define mmRLC_GPM_DEBUG_INST_A 0x4c22 +#define mmRLC_GPM_DEBUG_INST_A_BASE_IDX 1 +#define mmRLC_GPM_DEBUG_INST_B 0x4c23 +#define mmRLC_GPM_DEBUG_INST_B_BASE_IDX 1 #define mmRLC_GPU_CLOCK_COUNT_LSB 0x4c24 #define mmRLC_GPU_CLOCK_COUNT_LSB_BASE_IDX 1 #define mmRLC_GPU_CLOCK_COUNT_MSB 0x4c25 @@ -9720,6 +9730,8 @@ #define mmRLC_SPM_THREAD_TRACE_CTRL_BASE_IDX 1 #define mmRLC_LB_CNTR_2 0x4de7 #define mmRLC_LB_CNTR_2_BASE_IDX 1 +#define mmRLC_LX6_CORE_PDEBUG_INST 0x4deb +#define mmRLC_LX6_CORE_PDEBUG_INST_BASE_IDX 1 #define mmRLC_CPAXI_DOORBELL_MON_CTRL 0x4df1 #define mmRLC_CPAXI_DOORBELL_MON_CTRL_BASE_IDX 1 #define mmRLC_CPAXI_DOORBELL_MON_STAT 0x4df2 -- cgit v1.2.3 From 40356542c36160bddee9fdf25b9248e0c9e6503b Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Tue, 16 Apr 2024 16:30:50 +0530 Subject: drm/amdgpu: add protype for print ip state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the protoype for print ip state to be used to print the registers in devcoredump during a gpu reset. Signed-off-by: Sunil Khatri Reviewed-by: Alex Deucher Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c | 1 + drivers/gpu/drm/amd/amdgpu/cik.c | 1 + drivers/gpu/drm/amd/amdgpu/cik_ih.c | 1 + drivers/gpu/drm/amd/amdgpu/cik_sdma.c | 1 + drivers/gpu/drm/amd/amdgpu/cz_ih.c | 1 + drivers/gpu/drm/amd/amdgpu/dce_v10_0.c | 1 + drivers/gpu/drm/amd/amdgpu/dce_v11_0.c | 1 + drivers/gpu/drm/amd/amdgpu/dce_v6_0.c | 1 + drivers/gpu/drm/amd/amdgpu/dce_v8_0.c | 1 + drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 1 + drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 1 + drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c | 1 + drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 1 + drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 1 + drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 1 + drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 1 + drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c | 1 + drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 1 + drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 1 + drivers/gpu/drm/amd/amdgpu/iceland_ih.c | 1 + drivers/gpu/drm/amd/amdgpu/ih_v6_0.c | 1 + drivers/gpu/drm/amd/amdgpu/ih_v6_1.c | 1 + drivers/gpu/drm/amd/amdgpu/ih_v7_0.c | 1 + drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c | 1 + drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c | 2 ++ drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c | 1 + drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c | 1 + drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c | 1 + drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c | 1 + drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c | 1 + drivers/gpu/drm/amd/amdgpu/mes_v10_1.c | 1 + drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 1 + drivers/gpu/drm/amd/amdgpu/navi10_ih.c | 1 + drivers/gpu/drm/amd/amdgpu/nv.c | 1 + drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | 1 + drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 1 + drivers/gpu/drm/amd/amdgpu/si.c | 1 + drivers/gpu/drm/amd/amdgpu/si_dma.c | 1 + drivers/gpu/drm/amd/amdgpu/si_ih.c | 1 + drivers/gpu/drm/amd/amdgpu/soc15.c | 1 + drivers/gpu/drm/amd/amdgpu/soc21.c | 1 + drivers/gpu/drm/amd/amdgpu/tonga_ih.c | 1 + drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c | 1 + drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c | 1 + drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c | 1 + drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c | 1 + drivers/gpu/drm/amd/amdgpu/vce_v2_0.c | 1 + drivers/gpu/drm/amd/amdgpu/vce_v3_0.c | 1 + drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c | 1 + drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c | 1 + drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c | 2 ++ drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c | 1 + drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c | 1 + drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 1 + drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c | 1 + drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c | 1 + drivers/gpu/drm/amd/amdgpu/vi.c | 1 + drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 1 + drivers/gpu/drm/amd/include/amd_shared.h | 2 ++ drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c | 1 + drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c | 1 + drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c | 1 + 64 files changed, 67 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c index 34a62033a388..bf6c4a0d0525 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c @@ -638,6 +638,7 @@ static const struct amd_ip_funcs acp_ip_funcs = { .set_clockgating_state = acp_set_clockgating_state, .set_powergating_state = acp_set_powergating_state, .dump_ip_state = NULL, + .print_ip_state = NULL, }; const struct amdgpu_ip_block_version acp_ip_block = { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c index d877d96e4afe..e01c1c8e64c4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c @@ -879,6 +879,7 @@ static const struct amd_ip_funcs umsch_mm_v4_0_ip_funcs = { .suspend = umsch_mm_suspend, .resume = umsch_mm_resume, .dump_ip_state = NULL, + .print_ip_state = NULL, }; const struct amdgpu_ip_block_version umsch_mm_v4_0_ip_block = { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c index d1dc91009c0e..e30eecd02ae1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c @@ -659,6 +659,7 @@ static const struct amd_ip_funcs amdgpu_vkms_ip_funcs = { .set_clockgating_state = amdgpu_vkms_set_clockgating_state, .set_powergating_state = amdgpu_vkms_set_powergating_state, .dump_ip_state = NULL, + .print_ip_state = NULL, }; const struct amdgpu_ip_block_version amdgpu_vkms_ip_block = { diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index 884de42553a6..cf1d5d462b67 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c @@ -2211,6 +2211,7 @@ static const struct amd_ip_funcs cik_common_ip_funcs = { .set_clockgating_state = cik_common_set_clockgating_state, .set_powergating_state = cik_common_set_powergating_state, .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ip_block_version cik_common_ip_block = diff --git a/drivers/gpu/drm/amd/amdgpu/cik_ih.c b/drivers/gpu/drm/amd/amdgpu/cik_ih.c index 676f3f612fde..576baa9dbb0e 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_ih.c @@ -436,6 +436,7 @@ static const struct amd_ip_funcs cik_ih_ip_funcs = { .set_clockgating_state = cik_ih_set_clockgating_state, .set_powergating_state = cik_ih_set_powergating_state, .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ih_funcs cik_ih_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index 36b386e9c4f1..6948ebda0fa2 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -1229,6 +1229,7 @@ static const struct amd_ip_funcs cik_sdma_ip_funcs = { .set_clockgating_state = cik_sdma_set_clockgating_state, .set_powergating_state = cik_sdma_set_powergating_state, .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ring_funcs cik_sdma_ring_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/cz_ih.c b/drivers/gpu/drm/amd/amdgpu/cz_ih.c index 958c84a6af7e..072643787384 100644 --- a/drivers/gpu/drm/amd/amdgpu/cz_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/cz_ih.c @@ -434,6 +434,7 @@ static const struct amd_ip_funcs cz_ih_ip_funcs = { .set_clockgating_state = cz_ih_set_clockgating_state, .set_powergating_state = cz_ih_set_powergating_state, .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ih_funcs cz_ih_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c index 7a32ca7d6fc4..b44fce44c066 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c @@ -3334,6 +3334,7 @@ static const struct amd_ip_funcs dce_v10_0_ip_funcs = { .set_clockgating_state = dce_v10_0_set_clockgating_state, .set_powergating_state = dce_v10_0_set_powergating_state, .dump_ip_state = NULL, + .print_ip_state = NULL, }; static void diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c index 67c01e137fac..80b2e7f79acf 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c @@ -3465,6 +3465,7 @@ static const struct amd_ip_funcs dce_v11_0_ip_funcs = { .set_clockgating_state = dce_v11_0_set_clockgating_state, .set_powergating_state = dce_v11_0_set_powergating_state, .dump_ip_state = NULL, + .print_ip_state = NULL, }; static void diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c index 209cd44bbcec..db20012600f5 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c @@ -3155,6 +3155,7 @@ static const struct amd_ip_funcs dce_v6_0_ip_funcs = { .set_clockgating_state = dce_v6_0_set_clockgating_state, .set_powergating_state = dce_v6_0_set_powergating_state, .dump_ip_state = NULL, + .print_ip_state = NULL, }; static void diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index fff7f4f766b2..5b56100ec902 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c @@ -3243,6 +3243,7 @@ static const struct amd_ip_funcs dce_v8_0_ip_funcs = { .set_clockgating_state = dce_v8_0_set_clockgating_state, .set_powergating_state = dce_v8_0_set_powergating_state, .dump_ip_state = NULL, + .print_ip_state = NULL, }; static void diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index c0cc3dd32f46..78019c2d21e4 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -9299,6 +9299,7 @@ static const struct amd_ip_funcs gfx_v10_0_ip_funcs = { .set_powergating_state = gfx_v10_0_set_powergating_state, .get_clockgating_state = gfx_v10_0_get_clockgating_state, .dump_ip_state = gfx_v10_ip_dump, + .print_ip_state = NULL, }; static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index e9e5b0eb536e..ad6431013c73 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -6170,6 +6170,7 @@ static const struct amd_ip_funcs gfx_v11_0_ip_funcs = { .set_powergating_state = gfx_v11_0_set_powergating_state, .get_clockgating_state = gfx_v11_0_get_clockgating_state, .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c index 559250c8a147..d0992ce9fb47 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c @@ -3458,6 +3458,7 @@ static const struct amd_ip_funcs gfx_v6_0_ip_funcs = { .set_clockgating_state = gfx_v6_0_set_clockgating_state, .set_powergating_state = gfx_v6_0_set_powergating_state, .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ring_funcs gfx_v6_0_ring_funcs_gfx = { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 81f7ab0dc135..541dbd70d8c7 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -4978,6 +4978,7 @@ static const struct amd_ip_funcs gfx_v7_0_ip_funcs = { .set_clockgating_state = gfx_v7_0_set_clockgating_state, .set_powergating_state = gfx_v7_0_set_powergating_state, .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 522cbd45dd46..2f0e72caee1a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -6879,6 +6879,7 @@ static const struct amd_ip_funcs gfx_v8_0_ip_funcs = { .set_powergating_state = gfx_v8_0_set_powergating_state, .get_clockgating_state = gfx_v8_0_get_clockgating_state, .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index d3fc04a0281a..3c8c5abf35ab 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -6857,6 +6857,7 @@ static const struct amd_ip_funcs gfx_v9_0_ip_funcs = { .set_powergating_state = gfx_v9_0_set_powergating_state, .get_clockgating_state = gfx_v9_0_get_clockgating_state, .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 6190f51bf489..0e429b7ed036 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -4017,6 +4017,7 @@ static const struct amd_ip_funcs gfx_v9_4_3_ip_funcs = { .set_powergating_state = gfx_v9_4_3_set_powergating_state, .get_clockgating_state = gfx_v9_4_3_get_clockgating_state, .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ring_funcs gfx_v9_4_3_ring_funcs_compute = { diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c index 060e54b8ffff..3e38d8bfcb69 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c @@ -1116,6 +1116,7 @@ static const struct amd_ip_funcs gmc_v6_0_ip_funcs = { .set_clockgating_state = gmc_v6_0_set_clockgating_state, .set_powergating_state = gmc_v6_0_set_powergating_state, .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_gmc_funcs gmc_v6_0_gmc_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index 534825022ddd..85df8fc81065 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -1355,6 +1355,7 @@ static const struct amd_ip_funcs gmc_v7_0_ip_funcs = { .set_clockgating_state = gmc_v7_0_set_clockgating_state, .set_powergating_state = gmc_v7_0_set_powergating_state, .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_gmc_funcs gmc_v7_0_gmc_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index aba787e1386a..fc97757e33d9 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -1718,6 +1718,7 @@ static const struct amd_ip_funcs gmc_v8_0_ip_funcs = { .set_powergating_state = gmc_v8_0_set_powergating_state, .get_clockgating_state = gmc_v8_0_get_clockgating_state, .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_gmc_funcs gmc_v8_0_gmc_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c index 2d6f969266b8..07984f7c3ae7 100644 --- a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c @@ -426,6 +426,7 @@ static const struct amd_ip_funcs iceland_ih_ip_funcs = { .set_clockgating_state = iceland_ih_set_clockgating_state, .set_powergating_state = iceland_ih_set_powergating_state, .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ih_funcs iceland_ih_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c index 77b5068b7be5..3cb64c8f7175 100644 --- a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c @@ -771,6 +771,7 @@ static const struct amd_ip_funcs ih_v6_0_ip_funcs = { .set_powergating_state = ih_v6_0_set_powergating_state, .get_clockgating_state = ih_v6_0_get_clockgating_state, .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ih_funcs ih_v6_0_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c b/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c index fc2c27a199c6..0fbf5fa7b0f8 100644 --- a/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c @@ -776,6 +776,7 @@ static const struct amd_ip_funcs ih_v6_1_ip_funcs = { .set_powergating_state = ih_v6_1_set_powergating_state, .get_clockgating_state = ih_v6_1_get_clockgating_state, .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ih_funcs ih_v6_1_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v7_0.c b/drivers/gpu/drm/amd/amdgpu/ih_v7_0.c index 31ed5030169b..aa6235dd4f2b 100644 --- a/drivers/gpu/drm/amd/amdgpu/ih_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/ih_v7_0.c @@ -750,6 +750,7 @@ static const struct amd_ip_funcs ih_v7_0_ip_funcs = { .set_powergating_state = ih_v7_0_set_powergating_state, .get_clockgating_state = ih_v7_0_get_clockgating_state, .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ih_funcs ih_v7_0_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c index 698c5d4b7484..ef3e42f6b841 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c @@ -760,6 +760,7 @@ static const struct amd_ip_funcs jpeg_v2_0_ip_funcs = { .set_clockgating_state = jpeg_v2_0_set_clockgating_state, .set_powergating_state = jpeg_v2_0_set_powergating_state, .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ring_funcs jpeg_v2_0_dec_ring_vm_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c index 0a9a2d58e3ee..afeaf3c64e27 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c @@ -633,6 +633,7 @@ static const struct amd_ip_funcs jpeg_v2_5_ip_funcs = { .set_clockgating_state = jpeg_v2_5_set_clockgating_state, .set_powergating_state = jpeg_v2_5_set_powergating_state, .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amd_ip_funcs jpeg_v2_6_ip_funcs = { @@ -654,6 +655,7 @@ static const struct amd_ip_funcs jpeg_v2_6_ip_funcs = { .set_clockgating_state = jpeg_v2_5_set_clockgating_state, .set_powergating_state = jpeg_v2_5_set_powergating_state, .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ring_funcs jpeg_v2_5_dec_ring_vm_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c index e03d46151ae3..1c7cf4800bf7 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c @@ -558,6 +558,7 @@ static const struct amd_ip_funcs jpeg_v3_0_ip_funcs = { .set_clockgating_state = jpeg_v3_0_set_clockgating_state, .set_powergating_state = jpeg_v3_0_set_powergating_state, .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ring_funcs jpeg_v3_0_dec_ring_vm_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c index f142cb200552..237fe5df5a8f 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c @@ -720,6 +720,7 @@ static const struct amd_ip_funcs jpeg_v4_0_ip_funcs = { .set_clockgating_state = jpeg_v4_0_set_clockgating_state, .set_powergating_state = jpeg_v4_0_set_powergating_state, .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ring_funcs jpeg_v4_0_dec_ring_vm_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c index bc3a6f16f4bf..d66af11aa66c 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c @@ -1054,6 +1054,7 @@ static const struct amd_ip_funcs jpeg_v4_0_3_ip_funcs = { .set_clockgating_state = jpeg_v4_0_3_set_clockgating_state, .set_powergating_state = jpeg_v4_0_3_set_powergating_state, .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ring_funcs jpeg_v4_0_3_dec_ring_vm_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c index ee29c97721ec..da6bb9022b80 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c @@ -763,6 +763,7 @@ static const struct amd_ip_funcs jpeg_v4_0_5_ip_funcs = { .set_clockgating_state = jpeg_v4_0_5_set_clockgating_state, .set_powergating_state = jpeg_v4_0_5_set_powergating_state, .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ring_funcs jpeg_v4_0_5_dec_ring_vm_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c index f5664c92d10d..64c856bfe0cb 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c @@ -514,6 +514,7 @@ static const struct amd_ip_funcs jpeg_v5_0_0_ip_funcs = { .set_clockgating_state = jpeg_v5_0_0_set_clockgating_state, .set_powergating_state = jpeg_v5_0_0_set_powergating_state, .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ring_funcs jpeg_v5_0_0_dec_ring_vm_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c index 4ed0429cf4f7..a626bf904926 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c @@ -1177,6 +1177,7 @@ static const struct amd_ip_funcs mes_v10_1_ip_funcs = { .suspend = mes_v10_1_suspend, .resume = mes_v10_1_resume, .dump_ip_state = NULL, + .print_ip_state = NULL, }; const struct amdgpu_ip_block_version mes_v10_1_ip_block = { diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index bb7318c2d652..5c08ad234439 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -1405,6 +1405,7 @@ static const struct amd_ip_funcs mes_v11_0_ip_funcs = { .suspend = mes_v11_0_suspend, .resume = mes_v11_0_resume, .dump_ip_state = NULL, + .print_ip_state = NULL, }; const struct amdgpu_ip_block_version mes_v11_0_ip_block = { diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c index 550c5ca4ea03..b281462093f1 100644 --- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c @@ -714,6 +714,7 @@ static const struct amd_ip_funcs navi10_ih_ip_funcs = { .set_powergating_state = navi10_ih_set_powergating_state, .get_clockgating_state = navi10_ih_get_clockgating_state, .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ih_funcs navi10_ih_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index d7d3b972392b..b5303cd3cb53 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -1132,4 +1132,5 @@ static const struct amd_ip_funcs nv_common_ip_funcs = { .set_powergating_state = nv_common_set_powergating_state, .get_clockgating_state = nv_common_get_clockgating_state, .dump_ip_state = NULL, + .print_ip_state = NULL, }; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c index 0fd46ed1f67d..ac8a9b9b3e52 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c @@ -1114,6 +1114,7 @@ static const struct amd_ip_funcs sdma_v2_4_ip_funcs = { .set_clockgating_state = sdma_v2_4_set_clockgating_state, .set_powergating_state = sdma_v2_4_set_powergating_state, .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ring_funcs sdma_v2_4_ring_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index a3b8f1d807ed..b8ebdc4ae6f6 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -1554,6 +1554,7 @@ static const struct amd_ip_funcs sdma_v3_0_ip_funcs = { .set_powergating_state = sdma_v3_0_set_powergating_state, .get_clockgating_state = sdma_v3_0_get_clockgating_state, .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ring_funcs sdma_v3_0_ring_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c index b5b15dba9a76..85235470e872 100644 --- a/drivers/gpu/drm/amd/amdgpu/si.c +++ b/drivers/gpu/drm/amd/amdgpu/si.c @@ -2707,6 +2707,7 @@ static const struct amd_ip_funcs si_common_ip_funcs = { .set_clockgating_state = si_common_set_clockgating_state, .set_powergating_state = si_common_set_powergating_state, .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ip_block_version si_common_ip_block = diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c index fd8f28fdee1a..11db5b755832 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dma.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c @@ -709,6 +709,7 @@ static const struct amd_ip_funcs si_dma_ip_funcs = { .set_clockgating_state = si_dma_set_clockgating_state, .set_powergating_state = si_dma_set_powergating_state, .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ring_funcs si_dma_ring_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/si_ih.c b/drivers/gpu/drm/amd/amdgpu/si_ih.c index 3b7427f5a6c9..5237395e4fab 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/si_ih.c @@ -297,6 +297,7 @@ static const struct amd_ip_funcs si_ih_ip_funcs = { .set_clockgating_state = si_ih_set_clockgating_state, .set_powergating_state = si_ih_set_powergating_state, .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ih_funcs si_ih_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 6ba6c96c91c8..a5d3c66b5868 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -1502,4 +1502,5 @@ static const struct amd_ip_funcs soc15_common_ip_funcs = { .set_powergating_state = soc15_common_set_powergating_state, .get_clockgating_state= soc15_common_get_clockgating_state, .dump_ip_state = NULL, + .print_ip_state = NULL, }; diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index 40e7ab0716cd..27c26e42120e 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -986,4 +986,5 @@ static const struct amd_ip_funcs soc21_common_ip_funcs = { .set_powergating_state = soc21_common_set_powergating_state, .get_clockgating_state = soc21_common_get_clockgating_state, .dump_ip_state = NULL, + .print_ip_state = NULL, }; diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c index 794a1f7bc2ca..24d49d813607 100644 --- a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c @@ -487,6 +487,7 @@ static const struct amd_ip_funcs tonga_ih_ip_funcs = { .set_clockgating_state = tonga_ih_set_clockgating_state, .set_powergating_state = tonga_ih_set_powergating_state, .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ih_funcs tonga_ih_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c b/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c index 1e232ed23102..805d6662c88b 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c @@ -820,6 +820,7 @@ static const struct amd_ip_funcs uvd_v3_1_ip_funcs = { .set_clockgating_state = uvd_v3_1_set_clockgating_state, .set_powergating_state = uvd_v3_1_set_powergating_state, .dump_ip_state = NULL, + .print_ip_state = NULL, }; const struct amdgpu_ip_block_version uvd_v3_1_ip_block = { diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c index 48bcf41e5558..3f19c606f4de 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c @@ -770,6 +770,7 @@ static const struct amd_ip_funcs uvd_v4_2_ip_funcs = { .set_clockgating_state = uvd_v4_2_set_clockgating_state, .set_powergating_state = uvd_v4_2_set_powergating_state, .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ring_funcs uvd_v4_2_ring_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c index 838b7d720c52..efd903c21d48 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c @@ -878,6 +878,7 @@ static const struct amd_ip_funcs uvd_v5_0_ip_funcs = { .set_powergating_state = uvd_v5_0_set_powergating_state, .get_clockgating_state = uvd_v5_0_get_clockgating_state, .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ring_funcs uvd_v5_0_ring_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c index 036378f5f53f..495de5068455 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c @@ -1546,6 +1546,7 @@ static const struct amd_ip_funcs uvd_v6_0_ip_funcs = { .set_powergating_state = uvd_v6_0_set_powergating_state, .get_clockgating_state = uvd_v6_0_get_clockgating_state, .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ring_funcs uvd_v6_0_ring_phys_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c index 2178cf5a27b7..66fada199bda 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c @@ -627,6 +627,7 @@ static const struct amd_ip_funcs vce_v2_0_ip_funcs = { .set_clockgating_state = vce_v2_0_set_clockgating_state, .set_powergating_state = vce_v2_0_set_powergating_state, .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ring_funcs vce_v2_0_ring_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c index 0f0d4b0d50cb..32517c364cf7 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c @@ -914,6 +914,7 @@ static const struct amd_ip_funcs vce_v3_0_ip_funcs = { .set_powergating_state = vce_v3_0_set_powergating_state, .get_clockgating_state = vce_v3_0_get_clockgating_state, .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ring_funcs vce_v3_0_ring_phys_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index 7ff5d0574454..cb253bd3a2a2 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -1903,6 +1903,7 @@ static const struct amd_ip_funcs vcn_v1_0_ip_funcs = { .set_clockgating_state = vcn_v1_0_set_clockgating_state, .set_powergating_state = vcn_v1_0_set_powergating_state, .dump_ip_state = NULL, + .print_ip_state = NULL, }; /* diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c index 4df1b75f971a..f18fd61c435e 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c @@ -2009,6 +2009,7 @@ static const struct amd_ip_funcs vcn_v2_0_ip_funcs = { .set_clockgating_state = vcn_v2_0_set_clockgating_state, .set_powergating_state = vcn_v2_0_set_powergating_state, .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ring_funcs vcn_v2_0_dec_ring_vm_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c index d91c3154641b..baec14bde2a2 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c @@ -1902,6 +1902,7 @@ static const struct amd_ip_funcs vcn_v2_5_ip_funcs = { .set_clockgating_state = vcn_v2_5_set_clockgating_state, .set_powergating_state = vcn_v2_5_set_powergating_state, .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amd_ip_funcs vcn_v2_6_ip_funcs = { @@ -1923,6 +1924,7 @@ static const struct amd_ip_funcs vcn_v2_6_ip_funcs = { .set_clockgating_state = vcn_v2_5_set_clockgating_state, .set_powergating_state = vcn_v2_5_set_powergating_state, .dump_ip_state = NULL, + .print_ip_state = NULL, }; const struct amdgpu_ip_block_version vcn_v2_5_ip_block = diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c index 98b7d2d0536e..6b31cf4b8aac 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c @@ -2232,6 +2232,7 @@ static const struct amd_ip_funcs vcn_v3_0_ip_funcs = { .set_clockgating_state = vcn_v3_0_set_clockgating_state, .set_powergating_state = vcn_v3_0_set_powergating_state, .dump_ip_state = NULL, + .print_ip_state = NULL, }; const struct amdgpu_ip_block_version vcn_v3_0_ip_block = { diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c index 122fec3be96f..ac1b8ead03b3 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c @@ -2132,6 +2132,7 @@ static const struct amd_ip_funcs vcn_v4_0_ip_funcs = { .set_clockgating_state = vcn_v4_0_set_clockgating_state, .set_powergating_state = vcn_v4_0_set_powergating_state, .dump_ip_state = NULL, + .print_ip_state = NULL, }; const struct amdgpu_ip_block_version vcn_v4_0_ip_block = { diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c index 599b6466183e..2279d8fce03d 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c @@ -1661,6 +1661,7 @@ static const struct amd_ip_funcs vcn_v4_0_3_ip_funcs = { .set_clockgating_state = vcn_v4_0_3_set_clockgating_state, .set_powergating_state = vcn_v4_0_3_set_powergating_state, .dump_ip_state = NULL, + .print_ip_state = NULL, }; const struct amdgpu_ip_block_version vcn_v4_0_3_ip_block = { diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c index 8b1e5bc9fb69..81fb99729f37 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c @@ -1754,6 +1754,7 @@ static const struct amd_ip_funcs vcn_v4_0_5_ip_funcs = { .set_clockgating_state = vcn_v4_0_5_set_clockgating_state, .set_powergating_state = vcn_v4_0_5_set_powergating_state, .dump_ip_state = NULL, + .print_ip_state = NULL, }; const struct amdgpu_ip_block_version vcn_v4_0_5_ip_block = { diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c index 6d6aa02af4f5..b9455b6efa17 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c @@ -1330,6 +1330,7 @@ static const struct amd_ip_funcs vcn_v5_0_0_ip_funcs = { .set_clockgating_state = vcn_v5_0_0_set_clockgating_state, .set_powergating_state = vcn_v5_0_0_set_powergating_state, .dump_ip_state = NULL, + .print_ip_state = NULL, }; const struct amdgpu_ip_block_version vcn_v5_0_0_ip_block = { diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index a04aa6833fc5..d39c670f6220 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -2059,6 +2059,7 @@ static const struct amd_ip_funcs vi_common_ip_funcs = { .set_powergating_state = vi_common_set_powergating_state, .get_clockgating_state = vi_common_get_clockgating_state, .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ip_block_version vi_common_ip_block = diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index fd58d0acc47a..f174b6376168 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -3122,6 +3122,7 @@ static const struct amd_ip_funcs amdgpu_dm_funcs = { .set_clockgating_state = dm_set_clockgating_state, .set_powergating_state = dm_set_powergating_state, .dump_ip_state = NULL, + .print_ip_state = NULL, }; const struct amdgpu_ip_block_version dm_ip_block = { diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h index 9884f6c48a7d..7536c173a546 100644 --- a/drivers/gpu/drm/amd/include/amd_shared.h +++ b/drivers/gpu/drm/amd/include/amd_shared.h @@ -24,6 +24,7 @@ #define __AMD_SHARED_H__ #include +#include #define AMD_MAX_USEC_TIMEOUT 1000000 /* 1000 ms */ @@ -322,6 +323,7 @@ struct amd_ip_funcs { enum amd_powergating_state state); void (*get_clockgating_state)(void *handle, u64 *flags); void (*dump_ip_state)(void *handle); + void (*print_ip_state)(void *handle, struct drm_printer *p); }; diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c index 8c07f8c7f3ab..6bb42d04b247 100644 --- a/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c @@ -3317,6 +3317,7 @@ static const struct amd_ip_funcs kv_dpm_ip_funcs = { .set_clockgating_state = kv_dpm_set_clockgating_state, .set_powergating_state = kv_dpm_set_powergating_state, .dump_ip_state = NULL, + .print_ip_state = NULL, }; const struct amdgpu_ip_block_version kv_smu_ip_block = { diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c index c312b9332326..f245fc0bc6d3 100644 --- a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c @@ -8061,6 +8061,7 @@ static const struct amd_ip_funcs si_dpm_ip_funcs = { .set_clockgating_state = si_dpm_set_clockgating_state, .set_powergating_state = si_dpm_set_powergating_state, .dump_ip_state = NULL, + .print_ip_state = NULL, }; const struct amdgpu_ip_block_version si_smu_ip_block = diff --git a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c index c63474ee17a7..5fb21a0508cd 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c +++ b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c @@ -303,6 +303,7 @@ static const struct amd_ip_funcs pp_ip_funcs = { .set_clockgating_state = pp_set_clockgating_state, .set_powergating_state = pp_set_powergating_state, .dump_ip_state = NULL, + .print_ip_state = NULL, }; const struct amdgpu_ip_block_version pp_smu_ip_block = -- cgit v1.2.3 From c8732c80debb276c36de395a1a8f40c33cf10830 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Tue, 16 Apr 2024 16:48:01 +0530 Subject: drm/amdgpu: add support for gfx v10 print MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support to print ip information to be used to print registers in devcoredump buffer. Signed-off-by: Sunil Khatri Reviewed-by: Alex Deucher Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 78019c2d21e4..536287ddd2ec 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -9267,6 +9267,21 @@ static void gfx_v10_0_emit_mem_sync(struct amdgpu_ring *ring) amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */ } +static void gfx_v10_ip_print(void *handle, struct drm_printer *p) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + uint32_t i; + uint32_t reg_count = ARRAY_SIZE(gc_reg_list_10_1); + + if (!adev->gfx.ip_dump) + return; + + for (i = 0; i < reg_count; i++) + drm_printf(p, "%-50s \t 0x%08x\n", + gc_reg_list_10_1[i].reg_name, + adev->gfx.ip_dump[i]); +} + static void gfx_v10_ip_dump(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -9299,7 +9314,7 @@ static const struct amd_ip_funcs gfx_v10_0_ip_funcs = { .set_powergating_state = gfx_v10_0_set_powergating_state, .get_clockgating_state = gfx_v10_0_get_clockgating_state, .dump_ip_state = gfx_v10_ip_dump, - .print_ip_state = NULL, + .print_ip_state = gfx_v10_ip_print, }; static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = { -- cgit v1.2.3 From e043a35dc244b72809cf3a1b8ff315dcb941c63a Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Tue, 16 Apr 2024 16:56:23 +0530 Subject: drm/amdgpu: dump ip state before reset for each ip MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Invoke the dump_ip_state function for each ip before the asic resets and save the register values for debugging via devcoredump. Signed-off-by: Sunil Khatri Reviewed-by: Alex Deucher Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index f3b7cb18fd46..f8a34db5d9e3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -5353,6 +5353,7 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle, struct amdgpu_device *tmp_adev = NULL; bool need_full_reset, skip_hw_reset, vram_lost = false; int r = 0; + uint32_t i; /* Try reset handler method first */ tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device, @@ -5361,6 +5362,12 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle, if (!test_bit(AMDGPU_SKIP_COREDUMP, &reset_context->flags)) amdgpu_reset_reg_dumps(tmp_adev); + /* Trigger ip dump before we reset the asic */ + for (i = 0; i < tmp_adev->num_ip_blocks; i++) + if (tmp_adev->ip_blocks[i].version->funcs->dump_ip_state) + tmp_adev->ip_blocks[i].version->funcs->dump_ip_state( + (void *)tmp_adev); + reset_context->reset_device_list = device_list_handle; r = amdgpu_reset_perform_reset(tmp_adev, reset_context); /* If reset handler not implemented, continue; otherwise return */ -- cgit v1.2.3 From af8644121e3e76d7f8d77f7712becba303dfb8fe Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Tue, 16 Apr 2024 17:19:41 +0530 Subject: drm/amdgpu: add ip dump for each ip in devcoredump MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add ip dump for each ip of the asic in the devcoredump for all the ips where a callback is registered for register dump. Signed-off-by: Sunil Khatri Reviewed-by: Alex Deucher Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c index 64fe564b8036..c1cb62683695 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c @@ -262,6 +262,20 @@ amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count, drm_printf(&p, "Faulty page starting at address: 0x%016llx\n", fault_info->addr); drm_printf(&p, "Protection fault status register: 0x%x\n\n", fault_info->status); + /* dump the ip state for each ip */ + drm_printf(&p, "IP Dump\n"); + for (int i = 0; i < coredump->adev->num_ip_blocks; i++) { + if (coredump->adev->ip_blocks[i].version->funcs->print_ip_state) { + drm_printf(&p, "IP: %s\n", + coredump->adev->ip_blocks[i] + .version->funcs->name); + coredump->adev->ip_blocks[i] + .version->funcs->print_ip_state( + (void *)coredump->adev, &p); + drm_printf(&p, "\n"); + } + } + /* Add ring buffer information */ drm_printf(&p, "Ring buffer information\n"); for (int i = 0; i < coredump->adev->num_rings; i++) { -- cgit v1.2.3 From c551316e150bc0e25ec0609fb396cc37fc8e6fc9 Mon Sep 17 00:00:00 2001 From: Sathishkumar S Date: Fri, 12 Apr 2024 01:13:06 +0530 Subject: drm/amdgpu: update jpeg max decode resolution jpeg ip version v2.1 and higher supports 16kx16k resolution decode Signed-off-by: Sathishkumar S Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/nv.c | 6 +++--- drivers/gpu/drm/amd/amdgpu/soc15.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/soc21.c | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index b5303cd3cb53..12e54047bf79 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -110,7 +110,7 @@ static const struct amdgpu_video_codec_info sc_video_codecs_decode_array_vcn0[] {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)}, }; @@ -121,7 +121,7 @@ static const struct amdgpu_video_codec_info sc_video_codecs_decode_array_vcn1[] {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, }; @@ -199,7 +199,7 @@ static const struct amdgpu_video_codec_info yc_video_codecs_decode_array[] = { {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)}, }; diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index a5d3c66b5868..170f02e96717 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -143,7 +143,7 @@ static const struct amdgpu_video_codec_info rn_video_codecs_decode_array[] = {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, }; @@ -156,7 +156,7 @@ static const struct amdgpu_video_codecs rn_video_codecs_decode = static const struct amdgpu_video_codec_info vcn_4_0_3_video_codecs_decode_array[] = { {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)}, }; diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index 27c26e42120e..fb6797467571 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -72,7 +72,7 @@ static const struct amdgpu_video_codecs vcn_4_0_0_video_codecs_encode_vcn1 = { static const struct amdgpu_video_codec_info vcn_4_0_0_video_codecs_decode_array_vcn0[] = { {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)}, }; @@ -80,7 +80,7 @@ static const struct amdgpu_video_codec_info vcn_4_0_0_video_codecs_decode_array_ static const struct amdgpu_video_codec_info vcn_4_0_0_video_codecs_decode_array_vcn1[] = { {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, }; -- cgit v1.2.3 From 8e1d1905951dffe4980ed73a330b770281ebac85 Mon Sep 17 00:00:00 2001 From: Mukul Joshi Date: Tue, 23 Apr 2024 14:40:37 -0400 Subject: drm/amdgpu: Fix VRAM memory accounting Subtract the VRAM pinned memory when checking for available memory in amdgpu_amdkfd_reserve_mem_limit function since that memory is not available for use. Signed-off-by: Mukul Joshi Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 2131de36e3da..e4d4e55c08ad 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -220,7 +220,7 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, (kfd_mem_limit.ttm_mem_used + ttm_mem_needed > kfd_mem_limit.max_ttm_mem_limit) || (adev && xcp_id >= 0 && adev->kfd.vram_used[xcp_id] + vram_needed > - vram_size - reserved_for_pt)) { + vram_size - reserved_for_pt - atomic64_read(&adev->vram_pin_size))) { ret = -ENOMEM; goto release; } -- cgit v1.2.3 From 754c366e41d237ca90708c9e1b803a7471358622 Mon Sep 17 00:00:00 2001 From: Sonny Jiang Date: Tue, 23 Apr 2024 12:52:25 -0400 Subject: drm/amdgpu: update fw_share for VCN5 kmd_fw_shared changed in VCN5 Signed-off-by: Sonny Jiang Reviewed-by: Ruijing Dong Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 5 ++++- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h | 10 ++++++++++ drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c | 14 +++++++------- 3 files changed, 21 insertions(+), 8 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index bb85772b1374..677eb141554e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -185,7 +185,10 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) bo_size += AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); - if (amdgpu_ip_version(adev, UVD_HWIP, 0) >= IP_VERSION(4, 0, 0)) { + if (amdgpu_ip_version(adev, UVD_HWIP, 0) >= IP_VERSION(5, 0, 0)) { + fw_shared_size = AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn5_fw_shared)); + log_offset = offsetof(struct amdgpu_vcn5_fw_shared, fw_log); + } else if (amdgpu_ip_version(adev, UVD_HWIP, 0) >= IP_VERSION(4, 0, 0)) { fw_shared_size = AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared)); log_offset = offsetof(struct amdgpu_vcn4_fw_shared, fw_log); } else { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index a418393d89ec..9f06def236fd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -454,6 +454,16 @@ struct amdgpu_vcn_rb_metadata { uint8_t pad[26]; }; +struct amdgpu_vcn5_fw_shared { + uint32_t present_flag_0; + uint8_t pad[12]; + struct amdgpu_fw_shared_unified_queue_struct sq; + uint8_t pad1[8]; + struct amdgpu_fw_shared_fw_logging fw_log; + struct amdgpu_fw_shared_rb_setup rb_setup; + uint8_t pad2[4]; +}; + #define VCN_BLOCK_ENCODE_DISABLE_MASK 0x80 #define VCN_BLOCK_DECODE_DISABLE_MASK 0x40 #define VCN_BLOCK_QUEUE_DISABLE_MASK 0xC0 diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c index b9455b6efa17..851975b5ce29 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c @@ -95,7 +95,7 @@ static int vcn_v5_0_0_sw_init(void *handle) return r; for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - volatile struct amdgpu_vcn4_fw_shared *fw_shared; + volatile struct amdgpu_vcn5_fw_shared *fw_shared; if (adev->vcn.harvest_config & (1 << i)) continue; @@ -154,7 +154,7 @@ static int vcn_v5_0_0_sw_fini(void *handle) if (drm_dev_enter(adev_to_drm(adev), &idx)) { for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - volatile struct amdgpu_vcn4_fw_shared *fw_shared; + volatile struct amdgpu_vcn5_fw_shared *fw_shared; if (adev->vcn.harvest_config & (1 << i)) continue; @@ -335,7 +335,7 @@ static void vcn_v5_0_0_mc_resume(struct amdgpu_device *adev, int inst) upper_32_bits(adev->vcn.inst[inst].fw_shared.gpu_addr)); WREG32_SOC15(VCN, inst, regUVD_VCPU_NONCACHE_OFFSET0, 0); WREG32_SOC15(VCN, inst, regUVD_VCPU_NONCACHE_SIZE0, - AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared))); + AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn5_fw_shared))); } /** @@ -439,7 +439,7 @@ static void vcn_v5_0_0_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_i VCN, inst_idx, regUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect); WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( VCN, inst_idx, regUVD_VCPU_NONCACHE_SIZE0), - AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared)), 0, indirect); + AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn5_fw_shared)), 0, indirect); /* VCN global tiling registers */ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( @@ -616,7 +616,7 @@ static void vcn_v5_0_0_enable_clock_gating(struct amdgpu_device *adev, int inst) */ static int vcn_v5_0_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect) { - volatile struct amdgpu_vcn4_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr; + volatile struct amdgpu_vcn5_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr; struct amdgpu_ring *ring; uint32_t tmp; @@ -713,7 +713,7 @@ static int vcn_v5_0_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, b */ static int vcn_v5_0_0_start(struct amdgpu_device *adev) { - volatile struct amdgpu_vcn4_fw_shared *fw_shared; + volatile struct amdgpu_vcn5_fw_shared *fw_shared; struct amdgpu_ring *ring; uint32_t tmp; int i, j, k, r; @@ -894,7 +894,7 @@ static void vcn_v5_0_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx) */ static int vcn_v5_0_0_stop(struct amdgpu_device *adev) { - volatile struct amdgpu_vcn4_fw_shared *fw_shared; + volatile struct amdgpu_vcn5_fw_shared *fw_shared; uint32_t tmp; int i, r = 0; -- cgit v1.2.3 From 497d7cee24572db59cbfc4875d0c9270cee01e7f Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 19 Apr 2024 18:03:32 -0400 Subject: drm/amdgpu: add a spinlock to wb allocation As we use wb slots more dynamically, we need to lock access to avoid racing on allocation or free. Reviewed-by: Shaoyun.liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 11 ++++++++++- 2 files changed, 11 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index cac0ca64367b..f87d53e183c3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -502,6 +502,7 @@ struct amdgpu_wb { uint64_t gpu_addr; u32 num_wb; /* Number of wb slots actually reserved for amdgpu. */ unsigned long used[DIV_ROUND_UP(AMDGPU_MAX_WB, BITS_PER_LONG)]; + spinlock_t lock; }; int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index f8a34db5d9e3..869256394136 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1482,13 +1482,17 @@ static int amdgpu_device_wb_init(struct amdgpu_device *adev) */ int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb) { - unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb); + unsigned long flags, offset; + spin_lock_irqsave(&adev->wb.lock, flags); + offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb); if (offset < adev->wb.num_wb) { __set_bit(offset, adev->wb.used); + spin_unlock_irqrestore(&adev->wb.lock, flags); *wb = offset << 3; /* convert to dw offset */ return 0; } else { + spin_unlock_irqrestore(&adev->wb.lock, flags); return -EINVAL; } } @@ -1503,9 +1507,13 @@ int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb) */ void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb) { + unsigned long flags; + wb >>= 3; + spin_lock_irqsave(&adev->wb.lock, flags); if (wb < adev->wb.num_wb) __clear_bit(wb, adev->wb.used); + spin_unlock_irqrestore(&adev->wb.lock, flags); } /** @@ -4061,6 +4069,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, spin_lock_init(&adev->se_cac_idx_lock); spin_lock_init(&adev->audio_endpt_idx_lock); spin_lock_init(&adev->mm_stats.lock); + spin_lock_init(&adev->wb.lock); INIT_LIST_HEAD(&adev->shadow_list); mutex_init(&adev->shadow_list_lock); -- cgit v1.2.3 From eef016ba89862ba8317916269f9f369f317cd264 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 18 Apr 2024 23:30:46 -0400 Subject: drm/amdgpu/mes11: Use a separate fence per transaction We can't use a shared fence location because each transaction should be considered independently. Reviewed-by: Shaoyun.liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 12 ++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h | 4 ++++ drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 21 +++++++++++++++++---- 3 files changed, 33 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index 310c8f0c21da..21d1d508ee10 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -32,6 +32,18 @@ #define AMDGPU_MES_MAX_NUM_OF_QUEUES_PER_PROCESS 1024 #define AMDGPU_ONE_DOORBELL_SIZE 8 +signed long amdgpu_mes_fence_wait_polling(u64 *fence, + u64 wait_seq, + signed long timeout) +{ + + while ((s64)(wait_seq - *fence) > 0 && timeout > 0) { + udelay(2); + timeout -= 2; + } + return timeout > 0 ? timeout : 0; +} + int amdgpu_mes_doorbell_process_slice(struct amdgpu_device *adev) { return roundup(AMDGPU_ONE_DOORBELL_SIZE * diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h index 6b3e1844eac5..b99a2b3cffe3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h @@ -340,6 +340,10 @@ struct amdgpu_mes_funcs { #define amdgpu_mes_kiq_hw_init(adev) (adev)->mes.kiq_hw_init((adev)) #define amdgpu_mes_kiq_hw_fini(adev) (adev)->mes.kiq_hw_fini((adev)) +signed long amdgpu_mes_fence_wait_polling(u64 *fence, + u64 wait_seq, + signed long timeout); + int amdgpu_mes_ctx_get_offs(struct amdgpu_ring *ring, unsigned int id_offs); int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe); diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index 5c08ad234439..0d1407f25005 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -163,6 +163,10 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, unsigned long flags; signed long timeout = 3000000; /* 3000 ms */ const char *op_str, *misc_op_str; + u32 fence_offset; + u64 fence_gpu_addr; + u64 *fence_ptr; + int ret; if (x_pkt->header.opcode >= MES_SCH_API_MAX) return -EINVAL; @@ -175,15 +179,24 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, } BUG_ON(size % 4 != 0); + ret = amdgpu_device_wb_get(adev, &fence_offset); + if (ret) + return ret; + fence_gpu_addr = + adev->wb.gpu_addr + (fence_offset * 4); + fence_ptr = (u64 *)&adev->wb.wb[fence_offset]; + *fence_ptr = 0; + spin_lock_irqsave(&mes->ring_lock, flags); if (amdgpu_ring_alloc(ring, ndw)) { spin_unlock_irqrestore(&mes->ring_lock, flags); + amdgpu_device_wb_free(adev, fence_offset); return -ENOMEM; } api_status = (struct MES_API_STATUS *)((char *)pkt + api_status_off); - api_status->api_completion_fence_addr = mes->ring.fence_drv.gpu_addr; - api_status->api_completion_fence_value = ++mes->ring.fence_drv.sync_seq; + api_status->api_completion_fence_addr = fence_gpu_addr; + api_status->api_completion_fence_value = 1; amdgpu_ring_write_multiple(ring, pkt, ndw); amdgpu_ring_commit(ring); @@ -199,8 +212,8 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, else dev_dbg(adev->dev, "MES msg=%d was emitted\n", x_pkt->header.opcode); - r = amdgpu_fence_wait_polling(ring, ring->fence_drv.sync_seq, - timeout); + r = amdgpu_mes_fence_wait_polling(fence_ptr, (u64)1, timeout); + amdgpu_device_wb_free(adev, fence_offset); if (r < 1) { if (misc_op_str) -- cgit v1.2.3 From 88a9a467c548d0b3c7761b4fd54a68e70f9c0944 Mon Sep 17 00:00:00 2001 From: Jesse Zhang Date: Wed, 24 Apr 2024 17:10:46 +0800 Subject: drm/amdgpu: Using uninitialized value *size when calling amdgpu_vce_cs_reloc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Initialize the size before calling amdgpu_vce_cs_reloc, such as case 0x03000001. V2: To really improve the handling we would actually need to have a separate value of 0xffffffff.(Christian) Signed-off-by: Jesse Zhang Suggested-by: Christian König Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index 59acf424a078..968ca2c84ef7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -743,7 +743,8 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t created = 0; uint32_t allocated = 0; uint32_t tmp, handle = 0; - uint32_t *size = &tmp; + uint32_t dummy = 0xffffffff; + uint32_t *size = &dummy; unsigned int idx; int i, r = 0; -- cgit v1.2.3 From 98b5bc878d4b522c035309c8f6d3247d54050369 Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Tue, 19 Mar 2024 09:57:58 +0800 Subject: drm/amdgpu: add message fifo to handle RAS poison events Add message fifo to handle RAS poison events. Signed-off-by: YiPeng Chai Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 35 +++++++++++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 18 +++++++++++++++++ 2 files changed, 53 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index fe4fdb079bd1..6641f27cb35c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -2703,6 +2703,40 @@ static void amdgpu_ras_validate_threshold(struct amdgpu_device *adev, } } +int amdgpu_ras_put_poison_req(struct amdgpu_device *adev, + enum amdgpu_ras_block block, uint16_t pasid, + pasid_notify pasid_fn, void *data, uint32_t reset) +{ + int ret = 0; + struct ras_poison_msg poison_msg; + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + + memset(&poison_msg, 0, sizeof(poison_msg)); + poison_msg.block = block; + poison_msg.pasid = pasid; + poison_msg.reset = reset; + poison_msg.pasid_fn = pasid_fn; + poison_msg.data = data; + + ret = kfifo_put(&con->poison_fifo, poison_msg); + if (!ret) { + dev_err(adev->dev, "Poison message fifo is full!\n"); + return -ENOSPC; + } + + return 0; +} + +#ifdef PRE_DEFINED_FUNCTION +static int amdgpu_ras_get_poison_req(struct amdgpu_device *adev, + struct ras_poison_msg *poison_msg) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + + return kfifo_get(&con->poison_fifo, poison_msg); +} +#endif + static int amdgpu_ras_page_retirement_thread(void *param) { struct amdgpu_device *adev = (struct amdgpu_device *)param; @@ -2793,6 +2827,7 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev) } mutex_init(&con->page_rsv_lock); + INIT_KFIFO(con->poison_fifo); mutex_init(&con->page_retirement_lock); init_waitqueue_head(&con->page_retirement_wq); atomic_set(&con->page_retirement_req_cnt, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index ab5bf573378e..2b15996f1ede 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -26,6 +26,7 @@ #include #include +#include #include "ta_ras_if.h" #include "amdgpu_ras_eeprom.h" #include "amdgpu_smuio.h" @@ -442,6 +443,17 @@ struct ras_query_context { u64 event_id; }; +typedef int (*pasid_notify)(struct amdgpu_device *adev, + uint16_t pasid, void *data); + +struct ras_poison_msg { + enum amdgpu_ras_block block; + uint16_t pasid; + uint32_t reset; + pasid_notify pasid_fn; + void *data; +}; + struct amdgpu_ras { /* ras infrastructure */ /* for ras itself. */ @@ -501,6 +513,8 @@ struct amdgpu_ras { struct mutex page_retirement_lock; atomic_t page_retirement_req_cnt; struct mutex page_rsv_lock; + DECLARE_KFIFO(poison_fifo, struct ras_poison_msg, 128); + /* Fatal error detected flag */ atomic_t fed; @@ -913,4 +927,8 @@ u64 amdgpu_ras_acquire_event_id(struct amdgpu_device *adev, enum ras_event_type int amdgpu_ras_reserve_page(struct amdgpu_device *adev, uint64_t pfn); +int amdgpu_ras_put_poison_req(struct amdgpu_device *adev, + enum amdgpu_ras_block block, uint16_t pasid, + pasid_notify pasid_fn, void *data, uint32_t reset); + #endif -- cgit v1.2.3 From f493dd64ee6680dc5bb46d7c800346eadb18049a Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Mon, 22 Apr 2024 17:37:36 +0800 Subject: drm/amdgpu: prepare for logging ecc errors Prepare for logging ecc errors. Signed-off-by: YiPeng Chai Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 32 ++++++++++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 23 +++++++++++++++++++++++ 2 files changed, 55 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 6641f27cb35c..fb2f88005cf3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -2737,6 +2737,35 @@ static int amdgpu_ras_get_poison_req(struct amdgpu_device *adev, } #endif +static void amdgpu_ras_ecc_log_init(struct ras_ecc_log_info *ecc_log) +{ + mutex_init(&ecc_log->lock); + + /* Set any value as siphash key */ + memset(&ecc_log->ecc_key, 0xad, sizeof(ecc_log->ecc_key)); + + INIT_RADIX_TREE(&ecc_log->de_page_tree, GFP_KERNEL); + ecc_log->de_updated = false; +} + +static void amdgpu_ras_ecc_log_fini(struct ras_ecc_log_info *ecc_log) +{ + struct radix_tree_iter iter; + void __rcu **slot; + struct ras_ecc_err *ecc_err; + + mutex_lock(&ecc_log->lock); + radix_tree_for_each_slot(slot, &ecc_log->de_page_tree, &iter, 0) { + ecc_err = radix_tree_deref_slot(slot); + kfree(ecc_err->err_pages.pfn); + kfree(ecc_err); + radix_tree_iter_delete(&ecc_log->de_page_tree, &iter, slot); + } + mutex_unlock(&ecc_log->lock); + + mutex_destroy(&ecc_log->lock); + ecc_log->de_updated = false; +} static int amdgpu_ras_page_retirement_thread(void *param) { struct amdgpu_device *adev = (struct amdgpu_device *)param; @@ -2838,6 +2867,7 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev) dev_warn(adev->dev, "Failed to create umc_page_retirement thread!!!\n"); } + amdgpu_ras_ecc_log_init(&con->umc_ecc_log); #ifdef CONFIG_X86_MCE_AMD if ((adev->asic_type == CHIP_ALDEBARAN) && (adev->gmc.xgmi.connected_to_cpu)) @@ -2882,6 +2912,8 @@ static int amdgpu_ras_recovery_fini(struct amdgpu_device *adev) cancel_work_sync(&con->recovery_work); + amdgpu_ras_ecc_log_fini(&con->umc_ecc_log); + mutex_lock(&con->recovery_lock); con->eh_data = NULL; kfree(data->bps); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index 2b15996f1ede..634654cf2634 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -27,6 +27,8 @@ #include #include #include +#include +#include #include "ta_ras_if.h" #include "amdgpu_ras_eeprom.h" #include "amdgpu_smuio.h" @@ -454,6 +456,26 @@ struct ras_poison_msg { void *data; }; +struct ras_err_pages { + uint32_t count; + uint64_t *pfn; +}; + +struct ras_ecc_err { + u64 hash_index; + uint64_t status; + uint64_t ipid; + uint64_t addr; + struct ras_err_pages err_pages; +}; + +struct ras_ecc_log_info { + struct mutex lock; + siphash_key_t ecc_key; + struct radix_tree_root de_page_tree; + bool de_updated; +}; + struct amdgpu_ras { /* ras infrastructure */ /* for ras itself. */ @@ -514,6 +536,7 @@ struct amdgpu_ras { atomic_t page_retirement_req_cnt; struct mutex page_rsv_lock; DECLARE_KFIFO(poison_fifo, struct ras_poison_msg, 128); + struct ras_ecc_log_info umc_ecc_log; /* Fatal error detected flag */ atomic_t fed; -- cgit v1.2.3 From a734adfbcdb0e8c382fc41d3fe8b7d194c6535f6 Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Mon, 11 Mar 2024 17:02:40 +0800 Subject: drm/amdgpu: add poison creation handler Add poison creation handler. Signed-off-by: YiPeng Chai Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 76 ++++++++++++++++++++++++++++++--- 1 file changed, 69 insertions(+), 7 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index fb2f88005cf3..66186e28ab92 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -2076,6 +2076,17 @@ static void amdgpu_ras_interrupt_poison_creation_handler(struct ras_manager *obj { dev_info(obj->adev->dev, "Poison is created\n"); + + if (amdgpu_ip_version(obj->adev, UMC_HWIP, 0) >= IP_VERSION(12, 0, 0)) { + struct amdgpu_ras *con = amdgpu_ras_get_context(obj->adev); + + amdgpu_ras_put_poison_req(obj->adev, + AMDGPU_RAS_BLOCK__UMC, 0, NULL, NULL, false); + + atomic_inc(&con->page_retirement_req_cnt); + + wake_up(&con->page_retirement_wq); + } } static void amdgpu_ras_interrupt_umc_handler(struct ras_manager *obj, @@ -2727,7 +2738,6 @@ int amdgpu_ras_put_poison_req(struct amdgpu_device *adev, return 0; } -#ifdef PRE_DEFINED_FUNCTION static int amdgpu_ras_get_poison_req(struct amdgpu_device *adev, struct ras_poison_msg *poison_msg) { @@ -2735,7 +2745,6 @@ static int amdgpu_ras_get_poison_req(struct amdgpu_device *adev, return kfifo_get(&con->poison_fifo, poison_msg); } -#endif static void amdgpu_ras_ecc_log_init(struct ras_ecc_log_info *ecc_log) { @@ -2766,10 +2775,54 @@ static void amdgpu_ras_ecc_log_fini(struct ras_ecc_log_info *ecc_log) mutex_destroy(&ecc_log->lock); ecc_log->de_updated = false; } + +static int amdgpu_ras_query_ecc_status(struct amdgpu_device *adev, + enum amdgpu_ras_block ras_block, uint32_t timeout_ms) +{ + int ret = 0; + struct ras_ecc_log_info *ecc_log; + struct ras_query_if info; + uint32_t timeout = timeout_ms; + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + + memset(&info, 0, sizeof(info)); + info.head.block = ras_block; + + ecc_log = &ras->umc_ecc_log; + ecc_log->de_updated = false; + do { + ret = amdgpu_ras_query_error_status(adev, &info); + if (ret) { + dev_err(adev->dev, "Failed to query ras error! ret:%d\n", ret); + return ret; + } + + if (timeout && !ecc_log->de_updated) { + msleep(1); + timeout--; + } + } while (timeout && !ecc_log->de_updated); + + if (timeout_ms && !timeout) { + dev_warn(adev->dev, "Can't find deferred error\n"); + return -ETIMEDOUT; + } + + return 0; +} + +static void amdgpu_ras_poison_creation_handler(struct amdgpu_device *adev, + uint32_t timeout) +{ + amdgpu_ras_query_ecc_status(adev, AMDGPU_RAS_BLOCK__UMC, timeout); +} + static int amdgpu_ras_page_retirement_thread(void *param) { struct amdgpu_device *adev = (struct amdgpu_device *)param; struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + struct ras_poison_msg poison_msg; + enum amdgpu_ras_block ras_block; while (!kthread_should_stop()) { @@ -2780,13 +2833,22 @@ static int amdgpu_ras_page_retirement_thread(void *param) if (kthread_should_stop()) break; - dev_info(adev->dev, "Start processing page retirement. request:%d\n", - atomic_read(&con->page_retirement_req_cnt)); - atomic_dec(&con->page_retirement_req_cnt); - amdgpu_umc_bad_page_polling_timeout(adev, - 0, MAX_UMC_POISON_POLLING_TIME_ASYNC); + if (!amdgpu_ras_get_poison_req(adev, &poison_msg)) + continue; + + ras_block = poison_msg.block; + + dev_info(adev->dev, "Start processing ras block %s(%d)\n", + ras_block_str(ras_block), ras_block); + + if (ras_block == AMDGPU_RAS_BLOCK__UMC) + amdgpu_ras_poison_creation_handler(adev, + MAX_UMC_POISON_POLLING_TIME_ASYNC); + else + amdgpu_umc_bad_page_polling_timeout(adev, + false, MAX_UMC_POISON_POLLING_TIME_ASYNC); } return 0; -- cgit v1.2.3 From 95b4063de4f418135984b33528c44eeb4f9f4baa Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Tue, 19 Mar 2024 10:14:16 +0800 Subject: drm/amdgpu: add interface to update umc v12_0 ecc status Add interface to update umc v12_0 ecc status. Signed-off-by: YiPeng Chai Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 2 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c | 9 ++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h | 6 ++++++ drivers/gpu/drm/amd/amdgpu/umc_v12_0.c | 24 ++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/umc_v12_0.h | 3 +++ .../gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 5 +++++ 6 files changed, 49 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 66186e28ab92..63b5723e26ea 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -4215,6 +4215,8 @@ void amdgpu_ras_add_mca_err_addr(struct ras_err_info *err_info, struct ras_err_a { struct ras_err_addr *mca_err_addr; + /* This function will be retired. */ + return; mca_err_addr = kzalloc(sizeof(*mca_err_addr), GFP_KERNEL); if (!mca_err_addr) return; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c index f486510fc94c..7006a57277ef 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c @@ -437,3 +437,12 @@ int amdgpu_umc_loop_channels(struct amdgpu_device *adev, return 0; } + +int amdgpu_umc_update_ecc_status(struct amdgpu_device *adev, + uint64_t status, uint64_t ipid, uint64_t addr) +{ + if (adev->umc.ras->update_ecc_status) + return adev->umc.ras->update_ecc_status(adev, + status, ipid, addr); + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h index 563b0249247e..4f3834fa10a8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h @@ -66,6 +66,8 @@ struct amdgpu_umc_ras { void *ras_error_status); bool (*check_ecc_err_status)(struct amdgpu_device *adev, enum amdgpu_mca_error_type type, void *ras_error_status); + int (*update_ecc_status)(struct amdgpu_device *adev, + uint64_t status, uint64_t ipid, uint64_t addr); }; struct amdgpu_umc_funcs { @@ -122,4 +124,8 @@ int amdgpu_umc_loop_channels(struct amdgpu_device *adev, int amdgpu_umc_bad_page_polling_timeout(struct amdgpu_device *adev, uint32_t reset, uint32_t timeout_ms); + +int amdgpu_umc_update_ecc_status(struct amdgpu_device *adev, + uint64_t status, uint64_t ipid, uint64_t addr); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c index a0122b22eda4..81435533c4a7 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c @@ -479,6 +479,29 @@ static int umc_v12_0_ras_late_init(struct amdgpu_device *adev, struct ras_common return 0; } +static int umc_v12_0_update_ecc_status(struct amdgpu_device *adev, + uint64_t status, uint64_t ipid, uint64_t addr) +{ + uint16_t hwid, mcatype; + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + + hwid = REG_GET_FIELD(ipid, MCMP1_IPIDT0, HardwareID); + mcatype = REG_GET_FIELD(ipid, MCMP1_IPIDT0, McaType); + + if ((hwid != MCA_UMC_HWID_V12_0) || (mcatype != MCA_UMC_MCATYPE_V12_0)) + return 0; + + if (!status) + return 0; + + if (!umc_v12_0_is_deferred_error(adev, status)) + return 0; + + con->umc_ecc_log.de_updated = true; + + return 0; +} + struct amdgpu_umc_ras umc_v12_0_ras = { .ras_block = { .hw_ops = &umc_v12_0_ras_hw_ops, @@ -489,5 +512,6 @@ struct amdgpu_umc_ras umc_v12_0_ras = { .ecc_info_query_ras_error_count = umc_v12_0_ecc_info_query_ras_error_count, .ecc_info_query_ras_error_address = umc_v12_0_ecc_info_query_ras_error_address, .check_ecc_err_status = umc_v12_0_check_ecc_err_status, + .update_ecc_status = umc_v12_0_update_ecc_status, }; diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h index 1d5f44dcffdd..5c2d7e127608 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h +++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h @@ -62,6 +62,9 @@ /* row bits in SOC physical address */ #define UMC_V12_0_PA_R13_BIT 35 +#define MCA_UMC_HWID_V12_0 0x96 +#define MCA_UMC_MCATYPE_V12_0 0x0 + #define MCA_IPID_LO_2_UMC_CH(_ipid_lo) (((((_ipid_lo) >> 20) & 0x1) * 4) + \ (((_ipid_lo) >> 12) & 0xF)) #define MCA_IPID_LO_2_UMC_INST(_ipid_lo) (((_ipid_lo) >> 21) & 0x7) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index 7a9f85dc06b5..4d3eca2fc3f1 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -2716,6 +2716,11 @@ static int mca_umc_mca_get_err_count(const struct mca_ras_info *mca_ras, struct umc_v12_0_is_correctable_error(adev, status0)) *count = (ext_error_code == 0) ? odecc_err_cnt : 1; + amdgpu_umc_update_ecc_status(adev, + entry->regs[MCA_REG_IDX_STATUS], + entry->regs[MCA_REG_IDX_IPID], + entry->regs[MCA_REG_IDX_ADDR]); + return 0; } -- cgit v1.2.3 From b2aa6b108dd3bf081f0848f07ba74ad73ec635be Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Mon, 11 Mar 2024 17:29:26 +0800 Subject: drm/amdgpu: umc v12_0 converts error address Umc v12_0 converts error address. Signed-off-by: YiPeng Chai Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/umc_v12_0.c | 94 +++++++++++++++++++++++++++++++++- drivers/gpu/drm/amd/amdgpu/umc_v12_0.h | 12 +++++ 2 files changed, 105 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c index 81435533c4a7..085dcfe16b5e 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c @@ -222,6 +222,66 @@ static void umc_v12_0_convert_error_address(struct amdgpu_device *adev, } } +static int umc_v12_0_convert_err_addr(struct amdgpu_device *adev, + struct ta_ras_query_address_input *addr_in, + uint64_t *pfns, int len) +{ + uint32_t col, row, row_xor, bank, channel_index; + uint64_t soc_pa, retired_page, column, err_addr; + struct ta_ras_query_address_output addr_out; + uint32_t pos = 0; + + err_addr = addr_in->ma.err_addr; + addr_in->addr_type = TA_RAS_MCA_TO_PA; + if (psp_ras_query_address(&adev->psp, addr_in, &addr_out)) { + dev_warn(adev->dev, "Failed to query RAS physical address for 0x%llx", + err_addr); + return 0; + } + + soc_pa = addr_out.pa.pa; + bank = addr_out.pa.bank; + channel_index = addr_out.pa.channel_idx; + + col = (err_addr >> 1) & 0x1fULL; + row = (err_addr >> 10) & 0x3fffULL; + row_xor = row ^ (0x1ULL << 13); + /* clear [C3 C2] in soc physical address */ + soc_pa &= ~(0x3ULL << UMC_V12_0_PA_C2_BIT); + /* clear [C4] in soc physical address */ + soc_pa &= ~(0x1ULL << UMC_V12_0_PA_C4_BIT); + + /* loop for all possibilities of [C4 C3 C2] */ + for (column = 0; column < UMC_V12_0_NA_MAP_PA_NUM; column++) { + retired_page = soc_pa | ((column & 0x3) << UMC_V12_0_PA_C2_BIT); + retired_page |= (((column & 0x4) >> 2) << UMC_V12_0_PA_C4_BIT); + + if (pos >= len) + return 0; + pfns[pos++] = retired_page >> AMDGPU_GPU_PAGE_SHIFT; + + /* include column bit 0 and 1 */ + col &= 0x3; + col |= (column << 2); + dev_info(adev->dev, + "Error Address(PA):0x%-10llx Row:0x%-4x Col:0x%-2x Bank:0x%x Channel:0x%x\n", + retired_page, row, col, bank, channel_index); + + /* shift R13 bit */ + retired_page ^= (0x1ULL << UMC_V12_0_PA_R13_BIT); + + if (pos >= len) + return 0; + pfns[pos++] = retired_page >> AMDGPU_GPU_PAGE_SHIFT; + + dev_info(adev->dev, + "Error Address(PA):0x%-10llx Row:0x%-4x Col:0x%-2x Bank:0x%x Channel:0x%x\n", + retired_page, row_xor, col, bank, channel_index); + } + + return pos; +} + static int umc_v12_0_query_error_address(struct amdgpu_device *adev, uint32_t node_inst, uint32_t umc_inst, uint32_t ch_inst, void *data) @@ -482,8 +542,12 @@ static int umc_v12_0_ras_late_init(struct amdgpu_device *adev, struct ras_common static int umc_v12_0_update_ecc_status(struct amdgpu_device *adev, uint64_t status, uint64_t ipid, uint64_t addr) { - uint16_t hwid, mcatype; struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + uint16_t hwid, mcatype; + struct ta_ras_query_address_input addr_in; + uint64_t page_pfn[UMC_V12_0_BAD_PAGE_NUM_PER_CHANNEL]; + uint64_t err_addr; + int count; hwid = REG_GET_FIELD(ipid, MCMP1_IPIDT0, HardwareID); mcatype = REG_GET_FIELD(ipid, MCMP1_IPIDT0, McaType); @@ -497,6 +561,34 @@ static int umc_v12_0_update_ecc_status(struct amdgpu_device *adev, if (!umc_v12_0_is_deferred_error(adev, status)) return 0; + err_addr = REG_GET_FIELD(addr, + MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr); + + dev_info(adev->dev, + "UMC:IPID:0x%llx, socket:%llu, aid:%llu, inst:%llu, ch:%llu, err_addr:0x%llx\n", + ipid, + MCA_IPID_2_SOCKET_ID(ipid), + MCA_IPID_2_DIE_ID(ipid), + MCA_IPID_2_UMC_INST(ipid), + MCA_IPID_2_UMC_CH(ipid), + err_addr); + + memset(page_pfn, 0, sizeof(page_pfn)); + + memset(&addr_in, 0, sizeof(addr_in)); + addr_in.ma.err_addr = err_addr; + addr_in.ma.ch_inst = MCA_IPID_2_UMC_CH(ipid); + addr_in.ma.umc_inst = MCA_IPID_2_UMC_INST(ipid); + addr_in.ma.node_inst = MCA_IPID_2_DIE_ID(ipid); + addr_in.ma.socket_id = MCA_IPID_2_SOCKET_ID(ipid); + + count = umc_v12_0_convert_err_addr(adev, + &addr_in, page_pfn, ARRAY_SIZE(page_pfn)); + if (count <= 0) { + dev_warn(adev->dev, "Fail to convert error address! count:%d\n", count); + return 0; + } + con->umc_ecc_log.de_updated = true; return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h index 5c2d7e127608..b4974793850b 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h +++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h @@ -69,6 +69,18 @@ (((_ipid_lo) >> 12) & 0xF)) #define MCA_IPID_LO_2_UMC_INST(_ipid_lo) (((_ipid_lo) >> 21) & 0x7) +#define MCA_IPID_2_DIE_ID(ipid) ((REG_GET_FIELD(ipid, MCMP1_IPIDT0, InstanceIdHi) >> 2) & 0x03) + +#define MCA_IPID_2_UMC_CH(ipid) \ + (MCA_IPID_LO_2_UMC_CH(REG_GET_FIELD(ipid, MCMP1_IPIDT0, InstanceIdLo))) + +#define MCA_IPID_2_UMC_INST(ipid) \ + (MCA_IPID_LO_2_UMC_INST(REG_GET_FIELD(ipid, MCMP1_IPIDT0, InstanceIdLo))) + +#define MCA_IPID_2_SOCKET_ID(ipid) \ + (((REG_GET_FIELD(ipid, MCMP1_IPIDT0, InstanceIdLo) & 0x1) << 2) | \ + (REG_GET_FIELD(ipid, MCMP1_IPIDT0, InstanceIdHi) & 0x03)) + bool umc_v12_0_is_deferred_error(struct amdgpu_device *adev, uint64_t mc_umc_status); bool umc_v12_0_is_uncorrectable_error(struct amdgpu_device *adev, uint64_t mc_umc_status); bool umc_v12_0_is_correctable_error(struct amdgpu_device *adev, uint64_t mc_umc_status); -- cgit v1.2.3 From f27defca68824e8e97218b8816249f258d3d5d32 Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Mon, 18 Mar 2024 11:48:07 +0800 Subject: drm/amdgpu: umc v12_0 logs ecc errors 1. umc v12_0 logs ecc errors. 2. Reserve newly detected ecc error pages. 3. Add tag for bad pages, so that they can be retired later. Signed-off-by: YiPeng Chai Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c | 67 +++++++++++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h | 7 +++- drivers/gpu/drm/amd/amdgpu/umc_v12_0.c | 41 +++++++++++++++++++- 3 files changed, 113 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c index 7006a57277ef..0f2d765c4e2d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c @@ -21,10 +21,13 @@ * */ +#include #include "amdgpu.h" #include "umc_v6_7.h" #define MAX_UMC_POISON_POLLING_TIME_SYNC 20 //ms +#define MAX_UMC_HASH_STRING_SIZE 256 + static int amdgpu_umc_convert_error_address(struct amdgpu_device *adev, struct ras_err_data *err_data, uint64_t err_addr, uint32_t ch_inst, uint32_t umc_inst) @@ -446,3 +449,67 @@ int amdgpu_umc_update_ecc_status(struct amdgpu_device *adev, status, ipid, addr); return 0; } + +static int amdgpu_umc_uint64_cmp(const void *a, const void *b) +{ + uint64_t *addr_a = (uint64_t *)a; + uint64_t *addr_b = (uint64_t *)b; + + if (*addr_a > *addr_b) + return 1; + else if (*addr_a < *addr_b) + return -1; + else + return 0; +} + +/* Use string hash to avoid logging the same bad pages repeatedly */ +int amdgpu_umc_build_pages_hash(struct amdgpu_device *adev, + uint64_t *pfns, int len, uint64_t *val) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + char buf[MAX_UMC_HASH_STRING_SIZE] = {0}; + int offset = 0, i = 0; + uint64_t hash_val; + + if (!pfns || !len) + return -EINVAL; + + sort(pfns, len, sizeof(uint64_t), amdgpu_umc_uint64_cmp, NULL); + + for (i = 0; i < len; i++) + offset += snprintf(&buf[offset], sizeof(buf) - offset, "%llx", pfns[i]); + + hash_val = siphash(buf, offset, &con->umc_ecc_log.ecc_key); + + *val = hash_val; + + return 0; +} + +int amdgpu_umc_logs_ecc_err(struct amdgpu_device *adev, + struct radix_tree_root *ecc_tree, struct ras_ecc_err *ecc_err) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + struct ras_ecc_log_info *ecc_log; + int ret; + + ecc_log = &con->umc_ecc_log; + + mutex_lock(&ecc_log->lock); + ret = radix_tree_insert(ecc_tree, ecc_err->hash_index, ecc_err); + if (!ret) { + struct ras_err_pages *err_pages = &ecc_err->err_pages; + int i; + + /* Reserve memory */ + for (i = 0; i < err_pages->count; i++) + amdgpu_ras_reserve_page(adev, err_pages->pfn[i]); + + radix_tree_tag_set(ecc_tree, + ecc_err->hash_index, UMC_ECC_NEW_DETECTED_TAG); + } + mutex_unlock(&ecc_log->lock); + + return ret; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h index 4f3834fa10a8..c83d24097c5c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h @@ -52,6 +52,8 @@ #define LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst) \ LOOP_UMC_NODE_INST((node_inst)) LOOP_UMC_INST_AND_CH((umc_inst), (ch_inst)) +/* Page retirement tag */ +#define UMC_ECC_NEW_DETECTED_TAG 0x1 typedef int (*umc_func)(struct amdgpu_device *adev, uint32_t node_inst, uint32_t umc_inst, uint32_t ch_inst, void *data); @@ -127,5 +129,8 @@ int amdgpu_umc_bad_page_polling_timeout(struct amdgpu_device *adev, int amdgpu_umc_update_ecc_status(struct amdgpu_device *adev, uint64_t status, uint64_t ipid, uint64_t addr); - +int amdgpu_umc_build_pages_hash(struct amdgpu_device *adev, + uint64_t *pfns, int len, uint64_t *val); +int amdgpu_umc_logs_ecc_err(struct amdgpu_device *adev, + struct radix_tree_root *ecc_tree, struct ras_ecc_err *ecc_err); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c index 085dcfe16b5e..6c2b61ef5b57 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c @@ -546,8 +546,10 @@ static int umc_v12_0_update_ecc_status(struct amdgpu_device *adev, uint16_t hwid, mcatype; struct ta_ras_query_address_input addr_in; uint64_t page_pfn[UMC_V12_0_BAD_PAGE_NUM_PER_CHANNEL]; - uint64_t err_addr; + uint64_t err_addr, hash_val = 0; + struct ras_ecc_err *ecc_err; int count; + int ret; hwid = REG_GET_FIELD(ipid, MCMP1_IPIDT0, HardwareID); mcatype = REG_GET_FIELD(ipid, MCMP1_IPIDT0, McaType); @@ -589,6 +591,43 @@ static int umc_v12_0_update_ecc_status(struct amdgpu_device *adev, return 0; } + ret = amdgpu_umc_build_pages_hash(adev, + page_pfn, count, &hash_val); + if (ret) { + dev_err(adev->dev, "Fail to build error pages hash\n"); + return ret; + } + + ecc_err = kzalloc(sizeof(*ecc_err), GFP_KERNEL); + if (!ecc_err) + return -ENOMEM; + + ecc_err->err_pages.pfn = kcalloc(count, sizeof(*ecc_err->err_pages.pfn), GFP_KERNEL); + if (!ecc_err->err_pages.pfn) { + kfree(ecc_err); + return -ENOMEM; + } + + memcpy(ecc_err->err_pages.pfn, page_pfn, count * sizeof(*ecc_err->err_pages.pfn)); + ecc_err->err_pages.count = count; + + ecc_err->hash_index = hash_val; + ecc_err->status = status; + ecc_err->ipid = ipid; + ecc_err->addr = addr; + + ret = amdgpu_umc_logs_ecc_err(adev, &con->umc_ecc_log.de_page_tree, ecc_err); + if (ret) { + if (ret == -EEXIST) + con->umc_ecc_log.de_updated = true; + else + dev_err(adev->dev, "Fail to log ecc error! ret:%d\n", ret); + + kfree(ecc_err->err_pages.pfn); + kfree(ecc_err); + return ret; + } + con->umc_ecc_log.de_updated = true; return 0; -- cgit v1.2.3 From 2cf8e50ec381e6a6be3835a421f279d88fcb5ba4 Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Mon, 22 Apr 2024 17:38:54 +0800 Subject: drm/amdgpu: Add delay work to retire bad pages Add delay work to retire bad pages. Signed-off-by: YiPeng Chai Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 36 ++++++++++++++++++++++++++++++++- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h | 3 +++ 4 files changed, 40 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 63b5723e26ea..72daa51f8beb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -122,6 +122,8 @@ const char *get_ras_block_str(struct ras_common_if *ras_block) #define MAX_UMC_POISON_POLLING_TIME_ASYNC 100 //ms +#define AMDGPU_RAS_RETIRE_PAGE_INTERVAL 100 //ms + enum amdgpu_ras_retire_page_reservation { AMDGPU_RAS_RETIRE_PAGE_RESERVED, AMDGPU_RAS_RETIRE_PAGE_PENDING, @@ -2776,6 +2778,30 @@ static void amdgpu_ras_ecc_log_fini(struct ras_ecc_log_info *ecc_log) ecc_log->de_updated = false; } +static void amdgpu_ras_do_page_retirement(struct work_struct *work) +{ + struct amdgpu_ras *con = container_of(work, struct amdgpu_ras, + page_retirement_dwork.work); + struct amdgpu_device *adev = con->adev; + struct ras_err_data err_data; + + if (amdgpu_in_reset(adev) || atomic_read(&con->in_recovery)) + return; + + amdgpu_ras_error_data_init(&err_data); + + amdgpu_umc_handle_bad_pages(adev, &err_data); + + amdgpu_ras_error_data_fini(&err_data); + + mutex_lock(&con->umc_ecc_log.lock); + if (radix_tree_tagged(&con->umc_ecc_log.de_page_tree, + UMC_ECC_NEW_DETECTED_TAG)) + schedule_delayed_work(&con->page_retirement_dwork, + msecs_to_jiffies(AMDGPU_RAS_RETIRE_PAGE_INTERVAL)); + mutex_unlock(&con->umc_ecc_log.lock); +} + static int amdgpu_ras_query_ecc_status(struct amdgpu_device *adev, enum amdgpu_ras_block ras_block, uint32_t timeout_ms) { @@ -2814,7 +2840,12 @@ static int amdgpu_ras_query_ecc_status(struct amdgpu_device *adev, static void amdgpu_ras_poison_creation_handler(struct amdgpu_device *adev, uint32_t timeout) { - amdgpu_ras_query_ecc_status(adev, AMDGPU_RAS_BLOCK__UMC, timeout); + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + int ret; + + ret = amdgpu_ras_query_ecc_status(adev, AMDGPU_RAS_BLOCK__UMC, timeout); + if (!ret) + schedule_delayed_work(&con->page_retirement_dwork, 0); } static int amdgpu_ras_page_retirement_thread(void *param) @@ -2929,6 +2960,7 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev) dev_warn(adev->dev, "Failed to create umc_page_retirement thread!!!\n"); } + INIT_DELAYED_WORK(&con->page_retirement_dwork, amdgpu_ras_do_page_retirement); amdgpu_ras_ecc_log_init(&con->umc_ecc_log); #ifdef CONFIG_X86_MCE_AMD if ((adev->asic_type == CHIP_ALDEBARAN) && @@ -2974,6 +3006,8 @@ static int amdgpu_ras_recovery_fini(struct amdgpu_device *adev) cancel_work_sync(&con->recovery_work); + cancel_delayed_work_sync(&con->page_retirement_dwork); + amdgpu_ras_ecc_log_fini(&con->umc_ecc_log); mutex_lock(&con->recovery_lock); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index 634654cf2634..cb5a0f31d201 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -537,6 +537,7 @@ struct amdgpu_ras { struct mutex page_rsv_lock; DECLARE_KFIFO(poison_fifo, struct ras_poison_msg, 128); struct ras_ecc_log_info umc_ecc_log; + struct delayed_work page_retirement_dwork; /* Fatal error detected flag */ atomic_t fed; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c index 0f2d765c4e2d..2bd88218c20e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c @@ -89,7 +89,7 @@ out_fini_err_data: return ret; } -static void amdgpu_umc_handle_bad_pages(struct amdgpu_device *adev, +void amdgpu_umc_handle_bad_pages(struct amdgpu_device *adev, void *ras_error_status) { struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h index c83d24097c5c..2d08d076f7c9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h @@ -133,4 +133,7 @@ int amdgpu_umc_build_pages_hash(struct amdgpu_device *adev, uint64_t *pfns, int len, uint64_t *val); int amdgpu_umc_logs_ecc_err(struct amdgpu_device *adev, struct radix_tree_root *ecc_tree, struct ras_ecc_err *ecc_err); + +void amdgpu_umc_handle_bad_pages(struct amdgpu_device *adev, + void *ras_error_status); #endif -- cgit v1.2.3 From e74313be5a71df63e307ad98b6ab202b8a222817 Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Fri, 22 Mar 2024 12:06:01 +0800 Subject: drm/amdgpu: add condition check for amdgpu_umc_fill_error_record Add condition check for amdgpu_umc_fill_error_record. Signed-off-by: YiPeng Chai Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c | 20 +++++++++++++++++--- drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h | 2 +- 3 files changed, 19 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index cb5a0f31d201..c8980d5f6540 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -579,6 +579,7 @@ struct ras_err_data { unsigned long de_count; unsigned long err_addr_cnt; struct eeprom_table_record *err_addr; + unsigned long err_addr_len; u32 err_list_count; struct list_head err_node_list; }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c index 2bd88218c20e..dcda3d24bee3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c @@ -66,6 +66,8 @@ int amdgpu_umc_page_retirement_mca(struct amdgpu_device *adev, goto out_fini_err_data; } + err_data.err_addr_len = adev->umc.max_ras_err_cnt_per_query; + /* * Translate UMC channel address to Physical address */ @@ -121,6 +123,8 @@ void amdgpu_umc_handle_bad_pages(struct amdgpu_device *adev, if(!err_data->err_addr) dev_warn(adev->dev, "Failed to alloc memory for " "umc error address record!\n"); + else + err_data->err_addr_len = adev->umc.max_ras_err_cnt_per_query; /* umc query_ras_error_address is also responsible for clearing * error status @@ -146,6 +150,8 @@ void amdgpu_umc_handle_bad_pages(struct amdgpu_device *adev, if(!err_data->err_addr) dev_warn(adev->dev, "Failed to alloc memory for " "umc error address record!\n"); + else + err_data->err_addr_len = adev->umc.max_ras_err_cnt_per_query; /* umc query_ras_error_address is also responsible for clearing * error status @@ -389,14 +395,20 @@ int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev, return 0; } -void amdgpu_umc_fill_error_record(struct ras_err_data *err_data, +int amdgpu_umc_fill_error_record(struct ras_err_data *err_data, uint64_t err_addr, uint64_t retired_page, uint32_t channel_index, uint32_t umc_inst) { - struct eeprom_table_record *err_rec = - &err_data->err_addr[err_data->err_addr_cnt]; + struct eeprom_table_record *err_rec; + + if (!err_data || + !err_data->err_addr || + (err_data->err_addr_cnt >= err_data->err_addr_len)) + return -EINVAL; + + err_rec = &err_data->err_addr[err_data->err_addr_cnt]; err_rec->address = err_addr; /* page frame address is saved */ @@ -408,6 +420,8 @@ void amdgpu_umc_fill_error_record(struct ras_err_data *err_data, err_rec->mcumc_id = umc_inst; err_data->err_addr_cnt++; + + return 0; } int amdgpu_umc_loop_channels(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h index 2d08d076f7c9..9e77e6d48e3b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h @@ -109,7 +109,7 @@ int amdgpu_umc_poison_handler(struct amdgpu_device *adev, int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry); -void amdgpu_umc_fill_error_record(struct ras_err_data *err_data, +int amdgpu_umc_fill_error_record(struct ras_err_data *err_data, uint64_t err_addr, uint64_t retired_page, uint32_t channel_index, -- cgit v1.2.3 From 314c38cde6870a3189d241b6c6c189661243bc91 Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Mon, 18 Mar 2024 14:24:13 +0800 Subject: drm/amdgpu: retire bad pages for umc v12_0 Retire bad pages for umc v12_0. Signed-off-by: YiPeng Chai Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/umc_v12_0.c | 59 ++++++++++++++++++++++++++++++++-- 1 file changed, 57 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c index 6c2b61ef5b57..21dc51c013d1 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c @@ -28,6 +28,8 @@ #include "umc/umc_12_0_0_sh_mask.h" #include "mp/mp_13_0_6_sh_mask.h" +#define MAX_ECC_NUM_PER_RETIREMENT 32 + static inline uint64_t get_umc_v12_0_reg_offset(struct amdgpu_device *adev, uint32_t node_inst, uint32_t umc_inst, @@ -374,6 +376,7 @@ static int umc_v12_0_err_cnt_init_per_channel(struct amdgpu_device *adev, return 0; } +#ifdef TO_BE_REMOVED static void umc_v12_0_ecc_info_query_ras_error_count(struct amdgpu_device *adev, void *ras_error_status) { @@ -442,6 +445,7 @@ static void umc_v12_0_ecc_info_query_ras_error_address(struct amdgpu_device *ade } } } +#endif static bool umc_v12_0_check_ecc_err_status(struct amdgpu_device *adev, enum amdgpu_mca_error_type type, void *ras_error_status) @@ -633,6 +637,58 @@ static int umc_v12_0_update_ecc_status(struct amdgpu_device *adev, return 0; } +static int umc_v12_0_fill_error_record(struct amdgpu_device *adev, + struct ras_ecc_err *ecc_err, void *ras_error_status) +{ + struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; + uint32_t i = 0; + int ret = 0; + + if (!err_data || !ecc_err) + return -EINVAL; + + for (i = 0; i < ecc_err->err_pages.count; i++) { + ret = amdgpu_umc_fill_error_record(err_data, + ecc_err->addr, + ecc_err->err_pages.pfn[i] << AMDGPU_GPU_PAGE_SHIFT, + MCA_IPID_2_UMC_CH(ecc_err->ipid), + MCA_IPID_2_UMC_INST(ecc_err->ipid)); + if (ret) + break; + } + + err_data->de_count++; + + return ret; +} + +static void umc_v12_0_query_ras_ecc_err_addr(struct amdgpu_device *adev, + void *ras_error_status) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + struct ras_ecc_err *entries[MAX_ECC_NUM_PER_RETIREMENT]; + struct radix_tree_root *ecc_tree; + int new_detected, ret, i; + + ecc_tree = &con->umc_ecc_log.de_page_tree; + + mutex_lock(&con->umc_ecc_log.lock); + new_detected = radix_tree_gang_lookup_tag(ecc_tree, (void **)entries, + 0, ARRAY_SIZE(entries), UMC_ECC_NEW_DETECTED_TAG); + for (i = 0; i < new_detected; i++) { + if (!entries[i]) + continue; + + ret = umc_v12_0_fill_error_record(adev, entries[i], ras_error_status); + if (ret) { + dev_err(adev->dev, "Fail to fill umc error record, ret:%d\n", ret); + break; + } + radix_tree_tag_clear(ecc_tree, entries[i]->hash_index, UMC_ECC_NEW_DETECTED_TAG); + } + mutex_unlock(&con->umc_ecc_log.lock); +} + struct amdgpu_umc_ras umc_v12_0_ras = { .ras_block = { .hw_ops = &umc_v12_0_ras_hw_ops, @@ -640,8 +696,7 @@ struct amdgpu_umc_ras umc_v12_0_ras = { }, .err_cnt_init = umc_v12_0_err_cnt_init, .query_ras_poison_mode = umc_v12_0_query_ras_poison_mode, - .ecc_info_query_ras_error_count = umc_v12_0_ecc_info_query_ras_error_count, - .ecc_info_query_ras_error_address = umc_v12_0_ecc_info_query_ras_error_address, + .ecc_info_query_ras_error_address = umc_v12_0_query_ras_ecc_err_addr, .check_ecc_err_status = umc_v12_0_check_ecc_err_status, .update_ecc_status = umc_v12_0_update_ecc_status, }; -- cgit v1.2.3 From bfa579b38b865879223f61f6ae295c939d4f7d11 Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Mon, 22 Apr 2024 17:40:03 +0800 Subject: drm/amdgpu: prepare to handle pasid poison consumption Prepare to handle pasid poison consumption. Signed-off-by: YiPeng Chai Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 9 ++++++++- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 5 +++++ drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c | 20 +++++++++++++------- drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h | 3 +++ drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c | 3 ++- 5 files changed, 31 insertions(+), 9 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 3b4591f554f1..7ba05f030dd1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -747,10 +747,17 @@ bool amdgpu_amdkfd_is_fed(struct amdgpu_device *adev) return amdgpu_ras_get_fed_status(adev); } +void amdgpu_amdkfd_ras_pasid_poison_consumption_handler(struct amdgpu_device *adev, + enum amdgpu_ras_block block, uint16_t pasid, + pasid_notify pasid_fn, void *data, uint32_t reset) +{ + amdgpu_umc_pasid_poison_handler(adev, block, pasid, pasid_fn, data, reset); +} + void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev, enum amdgpu_ras_block block, uint32_t reset) { - amdgpu_umc_poison_handler(adev, block, reset); + amdgpu_umc_pasid_poison_handler(adev, block, 0, NULL, NULL, reset); } int amdgpu_amdkfd_send_close_event_drain_irq(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index c51954c9052e..1de021ebdd46 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -337,6 +337,11 @@ int amdgpu_amdkfd_get_tile_config(struct amdgpu_device *adev, struct tile_config *config); void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev, enum amdgpu_ras_block block, uint32_t reset); + +void amdgpu_amdkfd_ras_pasid_poison_consumption_handler(struct amdgpu_device *adev, + enum amdgpu_ras_block block, uint16_t pasid, + pasid_notify pasid_fn, void *data, uint32_t reset); + bool amdgpu_amdkfd_is_fed(struct amdgpu_device *adev); bool amdgpu_amdkfd_bo_mapped_to_dev(struct amdgpu_device *adev, struct kgd_mem *mem); void amdgpu_amdkfd_block_mmu_notifications(void *p); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c index dcda3d24bee3..8ebbca9e2e22 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c @@ -252,8 +252,9 @@ int amdgpu_umc_bad_page_polling_timeout(struct amdgpu_device *adev, return 0; } -int amdgpu_umc_poison_handler(struct amdgpu_device *adev, - enum amdgpu_ras_block block, uint32_t reset) +int amdgpu_umc_pasid_poison_handler(struct amdgpu_device *adev, + enum amdgpu_ras_block block, uint16_t pasid, + pasid_notify pasid_fn, void *data, uint32_t reset) { int ret = AMDGPU_RAS_SUCCESS; @@ -291,16 +292,14 @@ int amdgpu_umc_poison_handler(struct amdgpu_device *adev, amdgpu_ras_error_data_fini(&err_data); } else { - if (reset) { - amdgpu_umc_bad_page_polling_timeout(adev, - reset, MAX_UMC_POISON_POLLING_TIME_SYNC); - } else { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + amdgpu_ras_put_poison_req(adev, + block, pasid, pasid_fn, data, reset); + atomic_inc(&con->page_retirement_req_cnt); wake_up(&con->page_retirement_wq); - } } } else { if (adev->virt.ops && adev->virt.ops->ras_poison_handler) @@ -313,6 +312,13 @@ int amdgpu_umc_poison_handler(struct amdgpu_device *adev, return ret; } +int amdgpu_umc_poison_handler(struct amdgpu_device *adev, + enum amdgpu_ras_block block, uint32_t reset) +{ + return amdgpu_umc_pasid_poison_handler(adev, + block, 0, NULL, NULL, reset); +} + int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev, void *ras_error_status, struct amdgpu_iv_entry *entry) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h index 9e77e6d48e3b..5f50c69c3cec 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h @@ -106,6 +106,9 @@ int amdgpu_umc_ras_sw_init(struct amdgpu_device *adev); int amdgpu_umc_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block); int amdgpu_umc_poison_handler(struct amdgpu_device *adev, enum amdgpu_ras_block block, uint32_t reset); +int amdgpu_umc_pasid_poison_handler(struct amdgpu_device *adev, + enum amdgpu_ras_block block, uint16_t pasid, + pasid_notify pasid_fn, void *data, uint32_t reset); int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c index c3beb872adf8..e1c21d250611 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c @@ -190,7 +190,8 @@ static void event_interrupt_poison_consumption_v9(struct kfd_node *dev, dev_warn(dev->adev->dev, "poison is consumed by client %d, kick off gpu reset flow\n", client_id); - amdgpu_amdkfd_ras_poison_consumption_handler(dev->adev, block, reset); + amdgpu_amdkfd_ras_pasid_poison_consumption_handler(dev->adev, + block, pasid, NULL, NULL, reset); } static bool context_id_expected(struct kfd_dev *dev) -- cgit v1.2.3 From 370fbff4cc6fe02ddeb1aeff43fea3e32b828e6a Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Mon, 11 Mar 2024 17:52:37 +0800 Subject: drm/amdgpu: add poison consumption handler Add poison consumption handler. Signed-off-by: YiPeng Chai Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 43 ++++++++++++++++++++++++++++++--- 1 file changed, 39 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 72daa51f8beb..2b6018e7b696 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -2848,12 +2848,35 @@ static void amdgpu_ras_poison_creation_handler(struct amdgpu_device *adev, schedule_delayed_work(&con->page_retirement_dwork, 0); } +static int amdgpu_ras_poison_consumption_handler(struct amdgpu_device *adev, + struct ras_poison_msg *poison_msg) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + uint32_t reset = poison_msg->reset; + uint16_t pasid = poison_msg->pasid; + + kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); + + if (poison_msg->pasid_fn) + poison_msg->pasid_fn(adev, pasid, poison_msg->data); + + if (reset) { + flush_delayed_work(&con->page_retirement_dwork); + + con->gpu_reset_flags |= reset; + amdgpu_ras_reset_gpu(adev); + } + + return 0; +} + static int amdgpu_ras_page_retirement_thread(void *param) { struct amdgpu_device *adev = (struct amdgpu_device *)param; struct amdgpu_ras *con = amdgpu_ras_get_context(adev); struct ras_poison_msg poison_msg; enum amdgpu_ras_block ras_block; + bool poison_creation_is_handled = false; while (!kthread_should_stop()) { @@ -2874,12 +2897,24 @@ static int amdgpu_ras_page_retirement_thread(void *param) dev_info(adev->dev, "Start processing ras block %s(%d)\n", ras_block_str(ras_block), ras_block); - if (ras_block == AMDGPU_RAS_BLOCK__UMC) + if (ras_block == AMDGPU_RAS_BLOCK__UMC) { amdgpu_ras_poison_creation_handler(adev, MAX_UMC_POISON_POLLING_TIME_ASYNC); - else - amdgpu_umc_bad_page_polling_timeout(adev, - false, MAX_UMC_POISON_POLLING_TIME_ASYNC); + poison_creation_is_handled = true; + } else { + /* poison_creation_is_handled: + * false: no poison creation interrupt, but it has poison + * consumption interrupt. + * true: It has poison creation interrupt at the beginning, + * but it has no poison creation interrupt later. + */ + amdgpu_ras_poison_creation_handler(adev, + poison_creation_is_handled ? + 0 : MAX_UMC_POISON_POLLING_TIME_ASYNC); + + amdgpu_ras_poison_consumption_handler(adev, &poison_msg); + poison_creation_is_handled = false; + } } return 0; -- cgit v1.2.3 From e02387408117c5bccbcb123c50519b8a05444ac5 Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Fri, 22 Mar 2024 15:03:07 +0800 Subject: drm/amdgpu: support ACA logging ecc errors support ACA logging ecc errors. Signed-off-by: YiPeng Chai Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/umc_v12_0.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c index 21dc51c013d1..bfe61d86ee6c 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c @@ -510,6 +510,11 @@ static int umc_v12_0_aca_bank_parser(struct aca_handle *handle, struct aca_bank if (ret) return ret; + amdgpu_umc_update_ecc_status(adev, + bank->regs[ACA_REG_IDX_STATUS], + bank->regs[ACA_REG_IDX_IPID], + bank->regs[ACA_REG_IDX_ADDR]); + ext_error_code = ACA_REG__STATUS__ERRORCODEEXT(status); count = ext_error_code == 0 ? ACA_REG__MISC0__ERRCNT(bank->regs[ACA_REG_IDX_MISC0]) : 1ULL; -- cgit v1.2.3 From bcc093488503226f0d5519d2f0561c497b15cb39 Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Wed, 3 Apr 2024 14:11:41 +0800 Subject: drm/amdgpu: Fix address translation defect retired_page is page frame and should be expanded to the full address when querying status. Signed-off-by: YiPeng Chai Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 2b6018e7b696..0e6a18ec48bd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -2399,7 +2399,7 @@ static int amdgpu_ras_badpages_read(struct amdgpu_device *adev, .flags = AMDGPU_RAS_RETIRE_PAGE_RESERVED, }; status = amdgpu_vram_mgr_query_page_status(&adev->mman.vram_mgr, - data->bps[i].retired_page); + data->bps[i].retired_page << AMDGPU_GPU_PAGE_SHIFT); if (status == -EBUSY) (*bps)[i].flags = AMDGPU_RAS_RETIRE_PAGE_PENDING; else if (status == -ENOENT) -- cgit v1.2.3 From 48fa90718b2ae1d0f17ba94f84e4f93d9f6068cd Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Mon, 8 Apr 2024 14:48:50 +0800 Subject: drm/amdgpu: Use new interface to reserve bad page Use new interface to reserve bad page. Signed-off-by: YiPeng Chai Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 0e6a18ec48bd..8c58fd8a08de 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -2558,9 +2558,7 @@ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev, goto out; } - amdgpu_vram_mgr_reserve_range(&adev->mman.vram_mgr, - bps[i].retired_page << AMDGPU_GPU_PAGE_SHIFT, - AMDGPU_GPU_PAGE_SIZE); + amdgpu_ras_reserve_page(adev, bps[i].retired_page); memcpy(&data->bps[data->count], &bps[i], sizeof(*data->bps)); data->count++; -- cgit v1.2.3 From 2e55bcf3d742a4946d862b86e39e75a95cc6f1c0 Mon Sep 17 00:00:00 2001 From: Ma Jun Date: Mon, 22 Apr 2024 10:07:51 +0800 Subject: drm/amdgpu: Initialize timestamp for some legacy SOCs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Initialize the interrupt timestamp for some legacy SOCs to fix the coverity issue "Uninitialized scalar variable" Signed-off-by: Ma Jun Suggested-by: Christian König Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c index 7e6d09730e6d..665c63f55278 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c @@ -445,6 +445,14 @@ void amdgpu_irq_dispatch(struct amdgpu_device *adev, entry.ih = ih; entry.iv_entry = (const uint32_t *)&ih->ring[ring_index]; + + /* + * timestamp is not supported on some legacy SOCs (cik, cz, iceland, + * si and tonga), so initialize timestamp and timestamp_src to 0 + */ + entry.timestamp = 0; + entry.timestamp_src = 0; + amdgpu_ih_decode_iv(adev, &entry); trace_amdgpu_iv(ih - &adev->irq.ih, &entry); -- cgit v1.2.3 From 8b2faf1a4f3b6c748c0da36cda865a226534d520 Mon Sep 17 00:00:00 2001 From: Bob Zhou Date: Tue, 23 Apr 2024 16:58:11 +0800 Subject: drm/amdgpu: add error handle to avoid out-of-bounds MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit if the sdma_v4_0_irq_id_to_seq return -EINVAL, the process should be stop to avoid out-of-bounds read, so directly return -EINVAL. Signed-off-by: Bob Zhou Acked-by: Christian König Reviewed-by: Le Ma Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index e2e3856938ed..101038395c3b 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -2021,6 +2021,9 @@ static int sdma_v4_0_process_trap_irq(struct amdgpu_device *adev, DRM_DEBUG("IH: SDMA trap\n"); instance = sdma_v4_0_irq_id_to_seq(entry->client_id); + if (instance < 0) + return instance; + switch (entry->ring_id) { case 0: amdgpu_fence_process(&adev->sdma.instance[instance].ring); -- cgit v1.2.3 From cd48b97ce7787cf271f56ed4ea2037e1680cb29a Mon Sep 17 00:00:00 2001 From: Bob Zhou Date: Wed, 24 Apr 2024 15:24:05 +0800 Subject: drm/amdgpu: add return result for amdgpu_i2c_{get/put}_byte MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After amdgpu_i2c_get_byte fail, amdgpu_i2c_put_byte shouldn't be conducted to put wrong value. So return and check the i2c transfer result. Signed-off-by: Bob Zhou Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c | 47 ++++++++++++++++++++------------- 1 file changed, 28 insertions(+), 19 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c index d79cb13e1aa8..00d6211e0fbf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c @@ -279,7 +279,7 @@ amdgpu_i2c_lookup(struct amdgpu_device *adev, return NULL; } -static void amdgpu_i2c_get_byte(struct amdgpu_i2c_chan *i2c_bus, +static int amdgpu_i2c_get_byte(struct amdgpu_i2c_chan *i2c_bus, u8 slave_addr, u8 addr, u8 *val) @@ -304,16 +304,18 @@ static void amdgpu_i2c_get_byte(struct amdgpu_i2c_chan *i2c_bus, out_buf[0] = addr; out_buf[1] = 0; - if (i2c_transfer(&i2c_bus->adapter, msgs, 2) == 2) { - *val = in_buf[0]; - DRM_DEBUG("val = 0x%02x\n", *val); - } else { - DRM_DEBUG("i2c 0x%02x 0x%02x read failed\n", - addr, *val); + if (i2c_transfer(&i2c_bus->adapter, msgs, 2) != 2) { + DRM_DEBUG("i2c 0x%02x read failed\n", addr); + return -EIO; } + + *val = in_buf[0]; + DRM_DEBUG("val = 0x%02x\n", *val); + + return 0; } -static void amdgpu_i2c_put_byte(struct amdgpu_i2c_chan *i2c_bus, +static int amdgpu_i2c_put_byte(struct amdgpu_i2c_chan *i2c_bus, u8 slave_addr, u8 addr, u8 val) @@ -329,9 +331,12 @@ static void amdgpu_i2c_put_byte(struct amdgpu_i2c_chan *i2c_bus, out_buf[0] = addr; out_buf[1] = val; - if (i2c_transfer(&i2c_bus->adapter, &msg, 1) != 1) - DRM_DEBUG("i2c 0x%02x 0x%02x write failed\n", - addr, val); + if (i2c_transfer(&i2c_bus->adapter, &msg, 1) != 1) { + DRM_DEBUG("i2c 0x%02x 0x%02x write failed\n", addr, val); + return -EIO; + } + + return 0; } /* ddc router switching */ @@ -346,16 +351,18 @@ amdgpu_i2c_router_select_ddc_port(const struct amdgpu_connector *amdgpu_connecto if (!amdgpu_connector->router_bus) return; - amdgpu_i2c_get_byte(amdgpu_connector->router_bus, + if (amdgpu_i2c_get_byte(amdgpu_connector->router_bus, amdgpu_connector->router.i2c_addr, - 0x3, &val); + 0x3, &val)) + return; val &= ~amdgpu_connector->router.ddc_mux_control_pin; amdgpu_i2c_put_byte(amdgpu_connector->router_bus, amdgpu_connector->router.i2c_addr, 0x3, val); - amdgpu_i2c_get_byte(amdgpu_connector->router_bus, + if (amdgpu_i2c_get_byte(amdgpu_connector->router_bus, amdgpu_connector->router.i2c_addr, - 0x1, &val); + 0x1, &val)) + return; val &= ~amdgpu_connector->router.ddc_mux_control_pin; val |= amdgpu_connector->router.ddc_mux_state; amdgpu_i2c_put_byte(amdgpu_connector->router_bus, @@ -375,16 +382,18 @@ amdgpu_i2c_router_select_cd_port(const struct amdgpu_connector *amdgpu_connector if (!amdgpu_connector->router_bus) return; - amdgpu_i2c_get_byte(amdgpu_connector->router_bus, + if (amdgpu_i2c_get_byte(amdgpu_connector->router_bus, amdgpu_connector->router.i2c_addr, - 0x3, &val); + 0x3, &val)) + return; val &= ~amdgpu_connector->router.cd_mux_control_pin; amdgpu_i2c_put_byte(amdgpu_connector->router_bus, amdgpu_connector->router.i2c_addr, 0x3, val); - amdgpu_i2c_get_byte(amdgpu_connector->router_bus, + if (amdgpu_i2c_get_byte(amdgpu_connector->router_bus, amdgpu_connector->router.i2c_addr, - 0x1, &val); + 0x1, &val)) + return; val &= ~amdgpu_connector->router.cd_mux_control_pin; val |= amdgpu_connector->router.cd_mux_state; amdgpu_i2c_put_byte(amdgpu_connector->router_bus, -- cgit v1.2.3 From 2d10c3dbde073ac005303b313d3e2cb99381eb6f Mon Sep 17 00:00:00 2001 From: Jesse Zhang Date: Wed, 24 Apr 2024 12:59:22 +0800 Subject: drm/amdgpu: add check before free wb entry Check if ring is not a mes queue before freeing the wb entry, because we only allocate a wb entry when it's not a mes queue. Signed-off-by: Jesse Zhang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 3 ++- drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 3 ++- drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c | 3 ++- 3 files changed, 6 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index 45a2d0a5a2d7..b7d33d78bce0 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -999,7 +999,8 @@ static int sdma_v5_0_ring_test_ring(struct amdgpu_ring *ring) r = amdgpu_ring_alloc(ring, 20); if (r) { DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); - amdgpu_device_wb_free(adev, index); + if (!ring->is_mes_queue) + amdgpu_device_wb_free(adev, index); return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index 43e64b2da575..cc9e961f0078 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -839,7 +839,8 @@ static int sdma_v5_2_ring_test_ring(struct amdgpu_ring *ring) r = amdgpu_ring_alloc(ring, 20); if (r) { DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); - amdgpu_device_wb_free(adev, index); + if (!ring->is_mes_queue) + amdgpu_device_wb_free(adev, index); return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c index 1f4877195213..c833b6b8373b 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c @@ -861,7 +861,8 @@ static int sdma_v6_0_ring_test_ring(struct amdgpu_ring *ring) r = amdgpu_ring_alloc(ring, 5); if (r) { DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); - amdgpu_device_wb_free(adev, index); + if (!ring->is_mes_queue) + amdgpu_device_wb_free(adev, index); return r; } -- cgit v1.2.3 From 7bfd16d0ec374629ab4346affe1e644a503ba44c Mon Sep 17 00:00:00 2001 From: Jesse Zhang Date: Thu, 25 Apr 2024 10:04:08 +0800 Subject: drm/amdgpu: initialize the last_jump_jiffies in atom_exec_context The parameter "last_jump_jiffies" should be initialized before being used in the function atom_op_jump. Signed-off-by: Jesse Zhang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/atom.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/atom.c b/drivers/gpu/drm/amd/amdgpu/atom.c index 72362df352f6..d552e013354c 100644 --- a/drivers/gpu/drm/amd/amdgpu/atom.c +++ b/drivers/gpu/drm/amd/amdgpu/atom.c @@ -1243,6 +1243,7 @@ static int amdgpu_atom_execute_table_locked(struct atom_context *ctx, int index, ectx.ps_size = params_size; ectx.abort = false; ectx.last_jump = 0; + ectx.last_jump_jiffies = 0; if (ws) { ectx.ws = kcalloc(4, ws, GFP_KERNEL); ectx.ws_size = ws; -- cgit v1.2.3 From 506c245f3f1cd989cb89811a7f06e04ff8813a0d Mon Sep 17 00:00:00 2001 From: Bob Zhou Date: Tue, 23 Apr 2024 13:32:39 +0800 Subject: drm/amdgpu: fix double free err_addr pointer warnings In amdgpu_umc_bad_page_polling_timeout, the amdgpu_umc_handle_bad_pages will be run many times so that double free err_addr in some special case. So set the err_addr to NULL to avoid the warnings. Signed-off-by: Bob Zhou Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c index 8ebbca9e2e22..540e0f066b26 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c @@ -179,6 +179,7 @@ void amdgpu_umc_handle_bad_pages(struct amdgpu_device *adev, } kfree(err_data->err_addr); + err_data->err_addr = NULL; mutex_unlock(&con->page_retirement_lock); } -- cgit v1.2.3 From 6f3b69139c3c1f7880ef52cc29571cb74ce8220a Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Wed, 24 Apr 2024 10:21:30 +0800 Subject: drm/amdgpu: Fix ras mode2 reset failure in ras aca mode Fix ras mode2 reset failure in ras aca mode. Signed-off-by: YiPeng Chai Reviewed-by: Yang Wang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 8c58fd8a08de..1adc81a55734 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -1250,6 +1250,10 @@ int amdgpu_ras_bind_aca(struct amdgpu_device *adev, enum amdgpu_ras_block blk, { struct ras_manager *obj; + /* in resume phase, no need to create aca fs node */ + if (adev->in_suspend || amdgpu_in_reset(adev)) + return 0; + obj = get_ras_manager(adev, blk); if (!obj) return -EINVAL; -- cgit v1.2.3 From acce6479e30f73ab0872e93a75aed1fb791d04ec Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Thu, 25 Apr 2024 11:22:32 +0530 Subject: drm/amdgpu: Fix buffer size in gfx_v9_4_3_init_ cp_compute_microcode() and rlc_microcode() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The function gfx_v9_4_3_init_microcode in gfx_v9_4_3.c was generating about potential truncation of output when using the snprintf function. The issue was due to the size of the buffer 'ucode_prefix' being too small to accommodate the maximum possible length of the string being written into it. The string being written is "amdgpu/%s_mec.bin" or "amdgpu/%s_rlc.bin", where %s is replaced by the value of 'chip_name'. The length of this string without the %s is 16 characters. The warning message indicated that 'chip_name' could be up to 29 characters long, resulting in a total of 45 characters, which exceeds the buffer size of 30 characters. To resolve this issue, the size of the 'ucode_prefix' buffer has been reduced from 30 to 15. This ensures that the maximum possible length of the string being written into the buffer will not exceed its size, thus preventing potential buffer overflow and truncation issues. Fixes the below with gcc W=1: drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c: In function ‘gfx_v9_4_3_early_init’: drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c:379:52: warning: ‘%s’ directive output may be truncated writing up to 29 bytes into a region of size 23 [-Wformat-truncation=] 379 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); | ^~ ...... 439 | r = gfx_v9_4_3_init_rlc_microcode(adev, ucode_prefix); | ~~~~~~~~~~~~ drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c:379:9: note: ‘snprintf’ output between 16 and 45 bytes into a destination of size 30 379 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c:413:52: warning: ‘%s’ directive output may be truncated writing up to 29 bytes into a region of size 23 [-Wformat-truncation=] 413 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); | ^~ ...... 443 | r = gfx_v9_4_3_init_cp_compute_microcode(adev, ucode_prefix); | ~~~~~~~~~~~~ drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c:413:9: note: ‘snprintf’ output between 16 and 45 bytes into a destination of size 30 413 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Fixes: 86301129698b ("drm/amdgpu: split gc v9_4_3 functionality from gc v9_0") Cc: Hawking Zhang Cc: Christian König Cc: Alex Deucher Cc: Lijo Lazar Signed-off-by: Srinivasan Shanmugam Suggested-by: Lijo Lazar Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 0e429b7ed036..7b16e8cca86a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -431,7 +431,7 @@ out: static int gfx_v9_4_3_init_microcode(struct amdgpu_device *adev) { - char ucode_prefix[30]; + char ucode_prefix[15]; int r; amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix)); -- cgit v1.2.3 From bd31e5026dc39e7ca46ffb763c513130f405b1a8 Mon Sep 17 00:00:00 2001 From: Lancelot SIX Date: Wed, 3 Apr 2024 10:21:24 +0100 Subject: drm/amdkfd: Enable SQ watchpoint for gfx10 There are new control registers introduced in gfx10 used to configure hardware watchpoints triggered by SMEM instructions: SQ_WATCH{0,1,2,3}_{CNTL_ADDR_HI,ADDR_L}. Those registers work in a similar way as the TCP_WATCH* registers currently used for gfx9 and above. This patch adds support to program the SQ_WATCH registers for gfx10. The SQ_WATCH?_CNTL.MASK field has one bit more than TCP_WATCH?_CNTL.MASK, so SQ watchpoints can have a finer granularity than TCP_WATCH watchpoints. In this patch, we keep the capabilities advertised to the debugger unchanged (HSA_DBG_WATCH_ADDR_MASK_*_BIT_GFX10) as this reflects what both TCP and SQ watchpoints can do and both watchpoints are configured together. Signed-off-by: Lancelot SIX Reviewed-by: Jonathan Kim Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c | 71 ++++++++++++++++++---- 1 file changed, 58 insertions(+), 13 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c index 69810b3f1c63..3ab6c3aa0ad1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c @@ -881,6 +881,7 @@ uint32_t kgd_gfx_v10_set_wave_launch_mode(struct amdgpu_device *adev, } #define TCP_WATCH_STRIDE (mmTCP_WATCH1_ADDR_H - mmTCP_WATCH0_ADDR_H) +#define SQ_WATCH_STRIDE (mmSQ_WATCH1_ADDR_H - mmSQ_WATCH0_ADDR_H) uint32_t kgd_gfx_v10_set_address_watch(struct amdgpu_device *adev, uint64_t watch_address, uint32_t watch_address_mask, @@ -889,55 +890,93 @@ uint32_t kgd_gfx_v10_set_address_watch(struct amdgpu_device *adev, uint32_t debug_vmid, uint32_t inst) { + /* SQ_WATCH?_ADDR_* and TCP_WATCH?_ADDR_* are programmed with the + * same values. + */ uint32_t watch_address_high; uint32_t watch_address_low; - uint32_t watch_address_cntl; - - watch_address_cntl = 0; + uint32_t tcp_watch_address_cntl; + uint32_t sq_watch_address_cntl; watch_address_low = lower_32_bits(watch_address); watch_address_high = upper_32_bits(watch_address) & 0xffff; - watch_address_cntl = REG_SET_FIELD(watch_address_cntl, + tcp_watch_address_cntl = 0; + tcp_watch_address_cntl = REG_SET_FIELD(tcp_watch_address_cntl, TCP_WATCH0_CNTL, VMID, debug_vmid); - watch_address_cntl = REG_SET_FIELD(watch_address_cntl, + tcp_watch_address_cntl = REG_SET_FIELD(tcp_watch_address_cntl, TCP_WATCH0_CNTL, MODE, watch_mode); - watch_address_cntl = REG_SET_FIELD(watch_address_cntl, + tcp_watch_address_cntl = REG_SET_FIELD(tcp_watch_address_cntl, TCP_WATCH0_CNTL, MASK, watch_address_mask >> 7); + sq_watch_address_cntl = 0; + sq_watch_address_cntl = REG_SET_FIELD(sq_watch_address_cntl, + SQ_WATCH0_CNTL, + VMID, + debug_vmid); + sq_watch_address_cntl = REG_SET_FIELD(sq_watch_address_cntl, + SQ_WATCH0_CNTL, + MODE, + watch_mode); + sq_watch_address_cntl = REG_SET_FIELD(sq_watch_address_cntl, + SQ_WATCH0_CNTL, + MASK, + watch_address_mask >> 6); + /* Turning off this watch point until we set all the registers */ - watch_address_cntl = REG_SET_FIELD(watch_address_cntl, + tcp_watch_address_cntl = REG_SET_FIELD(tcp_watch_address_cntl, TCP_WATCH0_CNTL, VALID, 0); - WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) + (watch_id * TCP_WATCH_STRIDE)), - watch_address_cntl); + tcp_watch_address_cntl); + + sq_watch_address_cntl = REG_SET_FIELD(sq_watch_address_cntl, + SQ_WATCH0_CNTL, + VALID, + 0); + WREG32((SOC15_REG_OFFSET(GC, 0, mmSQ_WATCH0_CNTL) + + (watch_id * SQ_WATCH_STRIDE)), + sq_watch_address_cntl); + /* Program {TCP,SQ}_WATCH?_ADDR* */ WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_ADDR_H) + (watch_id * TCP_WATCH_STRIDE)), watch_address_high); - WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_ADDR_L) + (watch_id * TCP_WATCH_STRIDE)), watch_address_low); + WREG32((SOC15_REG_OFFSET(GC, 0, mmSQ_WATCH0_ADDR_H) + + (watch_id * SQ_WATCH_STRIDE)), + watch_address_high); + WREG32((SOC15_REG_OFFSET(GC, 0, mmSQ_WATCH0_ADDR_L) + + (watch_id * SQ_WATCH_STRIDE)), + watch_address_low); + /* Enable the watch point */ - watch_address_cntl = REG_SET_FIELD(watch_address_cntl, + tcp_watch_address_cntl = REG_SET_FIELD(tcp_watch_address_cntl, TCP_WATCH0_CNTL, VALID, 1); - WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) + (watch_id * TCP_WATCH_STRIDE)), - watch_address_cntl); + tcp_watch_address_cntl); + + sq_watch_address_cntl = REG_SET_FIELD(sq_watch_address_cntl, + SQ_WATCH0_CNTL, + VALID, + 1); + WREG32((SOC15_REG_OFFSET(GC, 0, mmSQ_WATCH0_CNTL) + + (watch_id * SQ_WATCH_STRIDE)), + sq_watch_address_cntl); return 0; } @@ -953,8 +992,14 @@ uint32_t kgd_gfx_v10_clear_address_watch(struct amdgpu_device *adev, (watch_id * TCP_WATCH_STRIDE)), watch_address_cntl); + WREG32((SOC15_REG_OFFSET(GC, 0, mmSQ_WATCH0_CNTL) + + (watch_id * SQ_WATCH_STRIDE)), + watch_address_cntl); + return 0; } +#undef TCP_WATCH_STRIDE +#undef SQ_WATCH_STRIDE /* kgd_gfx_v10_get_iq_wait_times: Returns the mmCP_IQ_WAIT_TIME1/2 values -- cgit v1.2.3 From e362b7c8f8c7af00d06f0ab609629101aebae993 Mon Sep 17 00:00:00 2001 From: Arunpravin Paneer Selvam Date: Thu, 25 Apr 2024 21:05:55 +0530 Subject: drm/amdgpu: Modify the contiguous flags behaviour MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now we have two flags for contiguous VRAM buffer allocation. If the application request for AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, it would set the ttm place TTM_PL_FLAG_CONTIGUOUS flag in the buffer's placement function. This patch will change the default behaviour of the two flags. When we set AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS - This means contiguous is not mandatory. - we will try to allocate the contiguous buffer. Say if the allocation fails, we fallback to allocate the individual pages. When we setTTM_PL_FLAG_CONTIGUOUS - This means contiguous allocation is mandatory. - we are setting this in amdgpu_bo_pin_restricted() before bo validation and check this flag in the vram manager file. - if this is set, we should allocate the buffer pages contiguously. the allocation fails, we return -ENOSPC. v2: - keep the mem_flags and bo->flags check as is(Christian) - place the TTM_PL_FLAG_CONTIGUOUS flag setting into the amdgpu_bo_pin_restricted function placement range iteration loop(Christian) - rename find_pages with amdgpu_vram_mgr_calculate_pages_per_block (Christian) - Keep the kernel BO allocation as is(Christain) - If BO pin vram allocation failed, we need to return -ENOSPC as RDMA cannot work with scattered VRAM pages(Philip) v3(Christian): - keep contiguous flag handling outside of pages_per_block calculation - remove the hacky implementation in contiguous flag error handling code v4(Christian): - use any variable and return value for non-contiguous fallback v5: rebase to amd-staging-drm-next branch Signed-off-by: Arunpravin Paneer Selvam Suggested-by: Christian König Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 8 +++++++- drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 23 +++++++++++++++++------ 2 files changed, 24 insertions(+), 7 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 69513e34f97b..706345ea1430 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -153,8 +153,10 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain) else places[c].flags |= TTM_PL_FLAG_TOPDOWN; - if (flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS) + if (abo->tbo.type == ttm_bo_type_kernel && + flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS) places[c].flags |= TTM_PL_FLAG_CONTIGUOUS; + c++; } @@ -967,6 +969,10 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, if (!bo->placements[i].lpfn || (lpfn && lpfn < bo->placements[i].lpfn)) bo->placements[i].lpfn = lpfn; + + if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS && + bo->placements[i].mem_type == TTM_PL_VRAM) + bo->placements[i].flags |= TTM_PL_FLAG_CONTIGUOUS; } r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index 8db880244324..f23002ed2b42 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -450,6 +450,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, { struct amdgpu_vram_mgr *mgr = to_vram_mgr(man); struct amdgpu_device *adev = to_amdgpu_device(mgr); + struct amdgpu_bo *bo = ttm_to_amdgpu_bo(tbo); u64 vis_usage = 0, max_bytes, min_block_size; struct amdgpu_vram_mgr_resource *vres; u64 size, remaining_size, lpfn, fpfn; @@ -468,7 +469,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, if (tbo->type != ttm_bo_type_kernel) max_bytes -= AMDGPU_VM_RESERVED_VRAM; - if (place->flags & TTM_PL_FLAG_CONTIGUOUS) { + if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS) { pages_per_block = ~0ul; } else { #ifdef CONFIG_TRANSPARENT_HUGEPAGE @@ -477,7 +478,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, /* default to 2MB */ pages_per_block = 2UL << (20UL - PAGE_SHIFT); #endif - pages_per_block = max_t(uint32_t, pages_per_block, + pages_per_block = max_t(u32, pages_per_block, tbo->page_alignment); } @@ -498,7 +499,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, if (place->flags & TTM_PL_FLAG_TOPDOWN) vres->flags |= DRM_BUDDY_TOPDOWN_ALLOCATION; - if (place->flags & TTM_PL_FLAG_CONTIGUOUS) + if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS) vres->flags |= DRM_BUDDY_CONTIGUOUS_ALLOCATION; if (fpfn || lpfn != mgr->mm.size) @@ -514,21 +515,31 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, else min_block_size = mgr->default_page_size; - BUG_ON(min_block_size < mm->chunk_size); - /* Limit maximum size to 2GiB due to SG table limitations */ size = min(remaining_size, 2ULL << 30); if ((size >= (u64)pages_per_block << PAGE_SHIFT) && - !(size & (((u64)pages_per_block << PAGE_SHIFT) - 1))) + !(size & (((u64)pages_per_block << PAGE_SHIFT) - 1))) min_block_size = (u64)pages_per_block << PAGE_SHIFT; + BUG_ON(min_block_size < mm->chunk_size); + r = drm_buddy_alloc_blocks(mm, fpfn, lpfn, size, min_block_size, &vres->blocks, vres->flags); + + if (unlikely(r == -ENOSPC) && pages_per_block == ~0ul && + !(place->flags & TTM_PL_FLAG_CONTIGUOUS)) { + vres->flags &= ~DRM_BUDDY_CONTIGUOUS_ALLOCATION; + pages_per_block = max_t(u32, 2UL << (20UL - PAGE_SHIFT), + tbo->page_alignment); + + continue; + } + if (unlikely(r)) goto error_free_blocks; -- cgit v1.2.3 From 2a8f7464d33c759c3848737399d155a6c83c1ffa Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Thu, 25 Apr 2024 15:45:56 +0530 Subject: drm/amdgpu: skip ip dump if devcoredump flag is set Do not dump the ip registers during driver reload in passthrough environment. Signed-off-by: Sunil Khatri Reviewed-by: Alex Deucher Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 869256394136..861ccff78af9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -5368,14 +5368,15 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle, tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device, reset_list); - if (!test_bit(AMDGPU_SKIP_COREDUMP, &reset_context->flags)) + if (!test_bit(AMDGPU_SKIP_COREDUMP, &reset_context->flags)) { amdgpu_reset_reg_dumps(tmp_adev); - /* Trigger ip dump before we reset the asic */ - for (i = 0; i < tmp_adev->num_ip_blocks; i++) - if (tmp_adev->ip_blocks[i].version->funcs->dump_ip_state) - tmp_adev->ip_blocks[i].version->funcs->dump_ip_state( - (void *)tmp_adev); + /* Trigger ip dump before we reset the asic */ + for (i = 0; i < tmp_adev->num_ip_blocks; i++) + if (tmp_adev->ip_blocks[i].version->funcs->dump_ip_state) + tmp_adev->ip_blocks[i].version->funcs + ->dump_ip_state((void *)tmp_adev); + } reset_context->reset_device_list = device_list_handle; r = amdgpu_reset_perform_reset(tmp_adev, reset_context); -- cgit v1.2.3 From 9a5f15d2a29d06ce5bd50919da7221cda92afb69 Mon Sep 17 00:00:00 2001 From: Tim Huang Date: Tue, 23 Apr 2024 14:06:28 +0800 Subject: drm/amdgpu: fix uninitialized scalar variable warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Clear warning that uses uninitialized value fw_size. Signed-off-by: Tim Huang Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 55d5508987ff..1d955652f3ba 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -1206,7 +1206,8 @@ void amdgpu_gfx_cp_init_microcode(struct amdgpu_device *adev, fw_size = le32_to_cpu(cp_hdr_v2_0->data_size_bytes); break; default: - break; + dev_err(adev->dev, "Invalid ucode id %u\n", ucode_id); + return; } if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { -- cgit v1.2.3 From 029c2b03892bfd7bcf2fc8053ff1641aa73ff58c Mon Sep 17 00:00:00 2001 From: Jack Xiao Date: Fri, 1 Mar 2024 17:33:01 +0800 Subject: drm/amdgpu/mes: add mes mapping legacy queue support Add mes mapping legacy queue framework support. Signed-off-by: Jack Xiao Reviewed-by: Hawking Zhang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 22 ++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h | 14 ++++++++++++++ 2 files changed, 36 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index 21d1d508ee10..5ca5c47ab54e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -786,6 +786,28 @@ int amdgpu_mes_remove_hw_queue(struct amdgpu_device *adev, int queue_id) return 0; } +int amdgpu_mes_map_legacy_queue(struct amdgpu_device *adev, + struct amdgpu_ring *ring) +{ + struct mes_map_legacy_queue_input queue_input; + int r; + + memset(&queue_input, 0, sizeof(queue_input)); + + queue_input.queue_type = ring->funcs->type; + queue_input.doorbell_offset = ring->doorbell_index; + queue_input.pipe_id = ring->pipe; + queue_input.queue_id = ring->queue; + queue_input.mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); + queue_input.wptr_addr = ring->wptr_gpu_addr; + + r = adev->mes.funcs->map_legacy_queue(&adev->mes, &queue_input); + if (r) + DRM_ERROR("failed to map legacy queue\n"); + + return r; +} + int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev, struct amdgpu_ring *ring, enum amdgpu_unmap_queues_action action, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h index b99a2b3cffe3..df9f0404d842 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h @@ -248,6 +248,15 @@ struct mes_remove_queue_input { uint64_t gang_context_addr; }; +struct mes_map_legacy_queue_input { + uint32_t queue_type; + uint32_t doorbell_offset; + uint32_t pipe_id; + uint32_t queue_id; + uint64_t mqd_addr; + uint64_t wptr_addr; +}; + struct mes_unmap_legacy_queue_input { enum amdgpu_unmap_queues_action action; uint32_t queue_type; @@ -324,6 +333,9 @@ struct amdgpu_mes_funcs { int (*remove_hw_queue)(struct amdgpu_mes *mes, struct mes_remove_queue_input *input); + int (*map_legacy_queue)(struct amdgpu_mes *mes, + struct mes_map_legacy_queue_input *input); + int (*unmap_legacy_queue)(struct amdgpu_mes *mes, struct mes_unmap_legacy_queue_input *input); @@ -367,6 +379,8 @@ int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id, int *queue_id); int amdgpu_mes_remove_hw_queue(struct amdgpu_device *adev, int queue_id); +int amdgpu_mes_map_legacy_queue(struct amdgpu_device *adev, + struct amdgpu_ring *ring); int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev, struct amdgpu_ring *ring, enum amdgpu_unmap_queues_action action, -- cgit v1.2.3 From ea686fef5489ef7a2450a9fdbcc732b837fb46a8 Mon Sep 17 00:00:00 2001 From: Jesse Zhang Date: Thu, 25 Apr 2024 15:16:40 +0800 Subject: drm/amdgpu: fix the warning about the expression (int)size - len Converting size from size_t to int may overflow. v2: keep reverse xmas tree order (Christian) Signed-off-by: Jesse Zhang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index f5d0fa207a88..b62ae3c91a9d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -2065,12 +2065,13 @@ static ssize_t amdgpu_reset_dump_register_list_write(struct file *f, struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private; char reg_offset[11]; uint32_t *new = NULL, *tmp = NULL; - int ret, i = 0, len = 0; + unsigned int len = 0; + int ret, i = 0; do { memset(reg_offset, 0, 11); if (copy_from_user(reg_offset, buf + len, - min(10, ((int)size-len)))) { + min(10, (size-len)))) { ret = -EFAULT; goto error_free; } -- cgit v1.2.3 From d7c128cb775ef21c29c3ad7113f5bd4ba886efa9 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 22 Apr 2024 12:30:33 +0200 Subject: Revert "drm: Switch DRM_DISPLAY_HDMI_HELPER to depends on" This reverts commit f6d2dc03fa8546b284dd8c1af027d9fac5725921, as helper code should always be selected by the driver that needs it, for the convenience of the final user configuring a kernel. The user who configures a kernel should not need to know which helpers are needed for the driver he is interested in. Making a driver depend on helper code means that the user needs to know which helpers to enable first, which is very user-unfriendly. Signed-off-by: Geert Uytterhoeven Acked-by: Arnd Bergmann Link: https://patchwork.freedesktop.org/patch/msgid/bd288a5943dab8609f2d1f2bf413595a61df727a.1713780345.git.geert+renesas@glider.be Signed-off-by: Maxime Ripard --- drivers/gpu/drm/amd/amdgpu/Kconfig | 2 +- drivers/gpu/drm/bridge/synopsys/Kconfig | 2 +- drivers/gpu/drm/display/Kconfig | 1 - drivers/gpu/drm/i915/Kconfig | 2 +- drivers/gpu/drm/nouveau/Kconfig | 2 +- drivers/gpu/drm/tegra/Kconfig | 2 +- drivers/gpu/drm/vc4/Kconfig | 2 +- drivers/gpu/drm/xe/Kconfig | 2 +- 8 files changed, 7 insertions(+), 8 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig index b0365cc1374e..1662dc49f18e 100644 --- a/drivers/gpu/drm/amd/amdgpu/Kconfig +++ b/drivers/gpu/drm/amd/amdgpu/Kconfig @@ -5,12 +5,12 @@ config DRM_AMDGPU depends on DRM depends on DRM_DISPLAY_DP_HELPER depends on DRM_DISPLAY_HDCP_HELPER - depends on DRM_DISPLAY_HDMI_HELPER depends on DRM_DISPLAY_HELPER depends on MMU depends on PCI depends on !UML select FW_LOADER + select DRM_DISPLAY_HDMI_HELPER select DRM_KMS_HELPER select DRM_SCHED select DRM_TTM diff --git a/drivers/gpu/drm/bridge/synopsys/Kconfig b/drivers/gpu/drm/bridge/synopsys/Kconfig index 387f5bd86089..f366ece47146 100644 --- a/drivers/gpu/drm/bridge/synopsys/Kconfig +++ b/drivers/gpu/drm/bridge/synopsys/Kconfig @@ -1,8 +1,8 @@ # SPDX-License-Identifier: GPL-2.0-only config DRM_DW_HDMI tristate - depends on DRM_DISPLAY_HDMI_HELPER depends on DRM_DISPLAY_HELPER + select DRM_DISPLAY_HDMI_HELPER select DRM_KMS_HELPER select REGMAP_MMIO select CEC_CORE if CEC_NOTIFIER diff --git a/drivers/gpu/drm/display/Kconfig b/drivers/gpu/drm/display/Kconfig index 01f2a231aa5f..d65f1a37c08c 100644 --- a/drivers/gpu/drm/display/Kconfig +++ b/drivers/gpu/drm/display/Kconfig @@ -74,6 +74,5 @@ config DRM_DISPLAY_HDCP_HELPER config DRM_DISPLAY_HDMI_HELPER bool "DRM HDMI Helpers" depends on DRM_DISPLAY_HELPER - default y help DRM display helpers for HDMI. diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig index 4f0d18a16b0f..87ef8c4d72a5 100644 --- a/drivers/gpu/drm/i915/Kconfig +++ b/drivers/gpu/drm/i915/Kconfig @@ -4,7 +4,6 @@ config DRM_I915 depends on DRM depends on DRM_DISPLAY_DP_HELPER depends on DRM_DISPLAY_HDCP_HELPER - depends on DRM_DISPLAY_HDMI_HELPER depends on DRM_DISPLAY_HELPER depends on X86 && PCI depends on !PREEMPT_RT @@ -14,6 +13,7 @@ config DRM_I915 # the shmem_readpage() which depends upon tmpfs select SHMEM select TMPFS + select DRM_DISPLAY_HDMI_HELPER select DRM_KMS_HELPER select DRM_PANEL select DRM_MIPI_DSI diff --git a/drivers/gpu/drm/nouveau/Kconfig b/drivers/gpu/drm/nouveau/Kconfig index 4c10b400658c..7cc305b2826d 100644 --- a/drivers/gpu/drm/nouveau/Kconfig +++ b/drivers/gpu/drm/nouveau/Kconfig @@ -3,12 +3,12 @@ config DRM_NOUVEAU tristate "Nouveau (NVIDIA) cards" depends on DRM depends on DRM_DISPLAY_DP_HELPER - depends on DRM_DISPLAY_HDMI_HELPER depends on DRM_DISPLAY_HELPER depends on PCI depends on MMU select IOMMU_API select FW_LOADER + select DRM_DISPLAY_HDMI_HELPER select DRM_KMS_HELPER select DRM_TTM select DRM_TTM_HELPER diff --git a/drivers/gpu/drm/tegra/Kconfig b/drivers/gpu/drm/tegra/Kconfig index 6974caa99ece..bb6e35261f11 100644 --- a/drivers/gpu/drm/tegra/Kconfig +++ b/drivers/gpu/drm/tegra/Kconfig @@ -6,9 +6,9 @@ config DRM_TEGRA depends on DRM depends on DRM_DISPLAY_DP_AUX_BUS depends on DRM_DISPLAY_DP_HELPER - depends on DRM_DISPLAY_HDMI_HELPER depends on DRM_DISPLAY_HELPER depends on OF + select DRM_DISPLAY_HDMI_HELPER select DRM_KMS_HELPER select DRM_MIPI_DSI select DRM_PANEL diff --git a/drivers/gpu/drm/vc4/Kconfig b/drivers/gpu/drm/vc4/Kconfig index 4801f8b64d3d..98772a6b5bf0 100644 --- a/drivers/gpu/drm/vc4/Kconfig +++ b/drivers/gpu/drm/vc4/Kconfig @@ -4,13 +4,13 @@ config DRM_VC4 depends on ARCH_BCM || ARCH_BCM2835 || COMPILE_TEST depends on COMMON_CLK depends on DRM - depends on DRM_DISPLAY_HDMI_HELPER depends on DRM_DISPLAY_HELPER depends on PM # Make sure not 'y' when RASPBERRYPI_FIRMWARE is 'm'. This can only # happen when COMPILE_TEST=y, hence the added !RASPBERRYPI_FIRMWARE. depends on RASPBERRYPI_FIRMWARE || (COMPILE_TEST && !RASPBERRYPI_FIRMWARE) depends on SND && SND_SOC + select DRM_DISPLAY_HDMI_HELPER select DRM_KMS_HELPER select DRM_GEM_DMA_HELPER select DRM_PANEL_BRIDGE diff --git a/drivers/gpu/drm/xe/Kconfig b/drivers/gpu/drm/xe/Kconfig index 782934be0a77..ce2f6635ff74 100644 --- a/drivers/gpu/drm/xe/Kconfig +++ b/drivers/gpu/drm/xe/Kconfig @@ -5,7 +5,6 @@ config DRM_XE depends on DRM depends on DRM_DISPLAY_DP_HELPER depends on DRM_DISPLAY_HDCP_HELPER - depends on DRM_DISPLAY_HDMI_HELPER depends on DRM_DISPLAY_HELPER depends on MMU depends on PCI @@ -20,6 +19,7 @@ config DRM_XE select DRM_KUNIT_TEST_HELPERS if DRM_XE_KUNIT_TEST != n select DRM_PANEL select DRM_SUBALLOC_HELPER + select DRM_DISPLAY_HDMI_HELPER select DRM_MIPI_DSI select RELAY select IRQ_WORK -- cgit v1.2.3 From 95734469533ce6fec8d9677e15e29ea82f26f590 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 22 Apr 2024 12:30:34 +0200 Subject: Revert "drm: Switch DRM_DISPLAY_HDCP_HELPER to depends on" This reverts commit 3166e7e6d935caaef07605a5c90773fbf9ffeaf4, as helper code should always be selected by the driver that needs it, for the convenience of the final user configuring a kernel. The user who configures a kernel should not need to know which helpers are needed for the driver he is interested in. Making a driver depend on helper code means that the user needs to know which helpers to enable first, which is very user-unfriendly. Signed-off-by: Geert Uytterhoeven Acked-by: Arnd Bergmann Link: https://patchwork.freedesktop.org/patch/msgid/a40e70a0abd3d841c23c107d452a43fdd70ef37a.1713780345.git.geert+renesas@glider.be Signed-off-by: Maxime Ripard --- drivers/gpu/drm/amd/amdgpu/Kconfig | 2 +- drivers/gpu/drm/bridge/Kconfig | 2 +- drivers/gpu/drm/bridge/analogix/Kconfig | 2 +- drivers/gpu/drm/bridge/cadence/Kconfig | 2 +- drivers/gpu/drm/display/Kconfig | 1 - drivers/gpu/drm/i915/Kconfig | 2 +- drivers/gpu/drm/xe/Kconfig | 2 +- 7 files changed, 6 insertions(+), 7 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig index 1662dc49f18e..ba09121e7deb 100644 --- a/drivers/gpu/drm/amd/amdgpu/Kconfig +++ b/drivers/gpu/drm/amd/amdgpu/Kconfig @@ -4,13 +4,13 @@ config DRM_AMDGPU tristate "AMD GPU" depends on DRM depends on DRM_DISPLAY_DP_HELPER - depends on DRM_DISPLAY_HDCP_HELPER depends on DRM_DISPLAY_HELPER depends on MMU depends on PCI depends on !UML select FW_LOADER select DRM_DISPLAY_HDMI_HELPER + select DRM_DISPLAY_HDCP_HELPER select DRM_KMS_HELPER select DRM_SCHED select DRM_TTM diff --git a/drivers/gpu/drm/bridge/Kconfig b/drivers/gpu/drm/bridge/Kconfig index 30a17876ff50..801d5b7d3e5d 100644 --- a/drivers/gpu/drm/bridge/Kconfig +++ b/drivers/gpu/drm/bridge/Kconfig @@ -94,9 +94,9 @@ config DRM_ITE_IT6505 tristate "ITE IT6505 DisplayPort bridge" depends on DRM_DISPLAY_DP_AUX_BUS depends on DRM_DISPLAY_DP_HELPER - depends on DRM_DISPLAY_HDCP_HELPER depends on DRM_DISPLAY_HELPER depends on OF + select DRM_DISPLAY_HDCP_HELPER select DRM_KMS_HELPER select EXTCON select CRYPTO diff --git a/drivers/gpu/drm/bridge/analogix/Kconfig b/drivers/gpu/drm/bridge/analogix/Kconfig index 12bfea53bf24..9659df6718de 100644 --- a/drivers/gpu/drm/bridge/analogix/Kconfig +++ b/drivers/gpu/drm/bridge/analogix/Kconfig @@ -35,9 +35,9 @@ config DRM_ANALOGIX_ANX7625 depends on DRM depends on DRM_DISPLAY_DP_AUX_BUS depends on DRM_DISPLAY_DP_HELPER - depends on DRM_DISPLAY_HDCP_HELPER depends on DRM_DISPLAY_HELPER depends on OF + select DRM_DISPLAY_HDCP_HELPER select DRM_MIPI_DSI help ANX7625 is an ultra-low power 4K mobile HD transmitter diff --git a/drivers/gpu/drm/bridge/cadence/Kconfig b/drivers/gpu/drm/bridge/cadence/Kconfig index 7817f6f56607..3480fd4d0a5f 100644 --- a/drivers/gpu/drm/bridge/cadence/Kconfig +++ b/drivers/gpu/drm/bridge/cadence/Kconfig @@ -24,9 +24,9 @@ endif config DRM_CDNS_MHDP8546 tristate "Cadence DPI/DP bridge" depends on DRM_DISPLAY_DP_HELPER - depends on DRM_DISPLAY_HDCP_HELPER depends on DRM_DISPLAY_HELPER depends on OF + select DRM_DISPLAY_HDCP_HELPER select DRM_KMS_HELPER select DRM_PANEL_BRIDGE help diff --git a/drivers/gpu/drm/display/Kconfig b/drivers/gpu/drm/display/Kconfig index d65f1a37c08c..9801f47a3704 100644 --- a/drivers/gpu/drm/display/Kconfig +++ b/drivers/gpu/drm/display/Kconfig @@ -67,7 +67,6 @@ config DRM_DISPLAY_DP_TUNNEL_STATE_DEBUG config DRM_DISPLAY_HDCP_HELPER bool "DRM HDCD Helpers" depends on DRM_DISPLAY_HELPER - default y help DRM display helpers for HDCP. diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig index 87ef8c4d72a5..dbde4e29d93a 100644 --- a/drivers/gpu/drm/i915/Kconfig +++ b/drivers/gpu/drm/i915/Kconfig @@ -3,7 +3,6 @@ config DRM_I915 tristate "Intel 8xx/9xx/G3x/G4x/HD Graphics" depends on DRM depends on DRM_DISPLAY_DP_HELPER - depends on DRM_DISPLAY_HDCP_HELPER depends on DRM_DISPLAY_HELPER depends on X86 && PCI depends on !PREEMPT_RT @@ -13,6 +12,7 @@ config DRM_I915 # the shmem_readpage() which depends upon tmpfs select SHMEM select TMPFS + select DRM_DISPLAY_HDCP_HELPER select DRM_DISPLAY_HDMI_HELPER select DRM_KMS_HELPER select DRM_PANEL diff --git a/drivers/gpu/drm/xe/Kconfig b/drivers/gpu/drm/xe/Kconfig index ce2f6635ff74..d97b3f4af5ed 100644 --- a/drivers/gpu/drm/xe/Kconfig +++ b/drivers/gpu/drm/xe/Kconfig @@ -4,7 +4,6 @@ config DRM_XE depends on (m || (y && KUNIT=y)) depends on DRM depends on DRM_DISPLAY_DP_HELPER - depends on DRM_DISPLAY_HDCP_HELPER depends on DRM_DISPLAY_HELPER depends on MMU depends on PCI @@ -19,6 +18,7 @@ config DRM_XE select DRM_KUNIT_TEST_HELPERS if DRM_XE_KUNIT_TEST != n select DRM_PANEL select DRM_SUBALLOC_HELPER + select DRM_DISPLAY_HDCP_HELPER select DRM_DISPLAY_HDMI_HELPER select DRM_MIPI_DSI select RELAY -- cgit v1.2.3 From 7fe302ae198a35ac071d3a9e78ebd8e14b0958eb Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 22 Apr 2024 12:30:35 +0200 Subject: Revert "drm: Switch DRM_DISPLAY_DP_HELPER to depends on" This reverts commit 0323287de87d7e6e9c22c57d7440aa353a2298d0, as helper code should always be selected by the driver that needs it, for the convenience of the final user configuring a kernel. The user who configures a kernel should not need to know which helpers are needed for the driver he is interested in. Making a driver depend on helper code means that the user needs to know which helpers to enable first, which is very user-unfriendly. Signed-off-by: Geert Uytterhoeven Acked-by: Arnd Bergmann Link: https://patchwork.freedesktop.org/patch/msgid/89ac456805746b6d0c888f10c5120b11aacd3319.1713780345.git.geert+renesas@glider.be Signed-off-by: Maxime Ripard --- drivers/gpu/drm/Kconfig | 2 +- drivers/gpu/drm/amd/amdgpu/Kconfig | 2 +- drivers/gpu/drm/bridge/Kconfig | 10 +++++----- drivers/gpu/drm/bridge/analogix/Kconfig | 6 +++--- drivers/gpu/drm/bridge/cadence/Kconfig | 2 +- drivers/gpu/drm/display/Kconfig | 1 - drivers/gpu/drm/exynos/Kconfig | 2 +- drivers/gpu/drm/i915/Kconfig | 2 +- drivers/gpu/drm/mediatek/Kconfig | 2 +- drivers/gpu/drm/msm/Kconfig | 2 +- drivers/gpu/drm/nouveau/Kconfig | 2 +- drivers/gpu/drm/panel/Kconfig | 8 ++++---- drivers/gpu/drm/radeon/Kconfig | 2 +- drivers/gpu/drm/rockchip/Kconfig | 4 ++-- drivers/gpu/drm/tegra/Kconfig | 2 +- drivers/gpu/drm/xe/Kconfig | 2 +- drivers/gpu/drm/xlnx/Kconfig | 2 +- 17 files changed, 26 insertions(+), 27 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index 959b19a04101..33792ca3eeb7 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -75,11 +75,11 @@ config DRM_KUNIT_TEST_HELPERS config DRM_KUNIT_TEST tristate "KUnit tests for DRM" if !KUNIT_ALL_TESTS depends on DRM - depends on DRM_DISPLAY_DP_HELPER depends on DRM_DISPLAY_HELPER depends on KUNIT depends on MMU select DRM_BUDDY + select DRM_DISPLAY_DP_HELPER select DRM_EXEC select DRM_EXPORT_FOR_TESTS if m select DRM_GEM_SHMEM_HELPER diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig index ba09121e7deb..cf931b94a188 100644 --- a/drivers/gpu/drm/amd/amdgpu/Kconfig +++ b/drivers/gpu/drm/amd/amdgpu/Kconfig @@ -3,12 +3,12 @@ config DRM_AMDGPU tristate "AMD GPU" depends on DRM - depends on DRM_DISPLAY_DP_HELPER depends on DRM_DISPLAY_HELPER depends on MMU depends on PCI depends on !UML select FW_LOADER + select DRM_DISPLAY_DP_HELPER select DRM_DISPLAY_HDMI_HELPER select DRM_DISPLAY_HDCP_HELPER select DRM_KMS_HELPER diff --git a/drivers/gpu/drm/bridge/Kconfig b/drivers/gpu/drm/bridge/Kconfig index 801d5b7d3e5d..7d1da1780415 100644 --- a/drivers/gpu/drm/bridge/Kconfig +++ b/drivers/gpu/drm/bridge/Kconfig @@ -93,9 +93,9 @@ config DRM_FSL_LDB config DRM_ITE_IT6505 tristate "ITE IT6505 DisplayPort bridge" depends on DRM_DISPLAY_DP_AUX_BUS - depends on DRM_DISPLAY_DP_HELPER depends on DRM_DISPLAY_HELPER depends on OF + select DRM_DISPLAY_DP_HELPER select DRM_DISPLAY_HDCP_HELPER select DRM_KMS_HELPER select EXTCON @@ -233,9 +233,9 @@ config DRM_PARADE_PS8622 config DRM_PARADE_PS8640 tristate "Parade PS8640 MIPI DSI to eDP Converter" depends on DRM_DISPLAY_DP_AUX_BUS - depends on DRM_DISPLAY_DP_HELPER depends on DRM_DISPLAY_HELPER depends on OF + select DRM_DISPLAY_DP_HELPER select DRM_KMS_HELPER select DRM_MIPI_DSI select DRM_PANEL @@ -319,9 +319,9 @@ config DRM_TOSHIBA_TC358764 config DRM_TOSHIBA_TC358767 tristate "Toshiba TC358767 eDP bridge" - depends on DRM_DISPLAY_DP_HELPER depends on DRM_DISPLAY_HELPER depends on OF + select DRM_DISPLAY_DP_HELPER select DRM_KMS_HELPER select REGMAP_I2C select DRM_MIPI_DSI @@ -342,9 +342,9 @@ config DRM_TOSHIBA_TC358768 config DRM_TOSHIBA_TC358775 tristate "Toshiba TC358775 DSI/LVDS bridge" - depends on DRM_DISPLAY_DP_HELPER depends on DRM_DISPLAY_HELPER depends on OF + select DRM_DISPLAY_DP_HELPER select DRM_KMS_HELPER select REGMAP_I2C select DRM_PANEL @@ -388,9 +388,9 @@ config DRM_TI_SN65DSI83 config DRM_TI_SN65DSI86 tristate "TI SN65DSI86 DSI to eDP bridge" depends on DRM_DISPLAY_DP_AUX_BUS - depends on DRM_DISPLAY_DP_HELPER depends on DRM_DISPLAY_HELPER depends on OF + select DRM_DISPLAY_DP_HELPER select DRM_KMS_HELPER select REGMAP_I2C select DRM_PANEL diff --git a/drivers/gpu/drm/bridge/analogix/Kconfig b/drivers/gpu/drm/bridge/analogix/Kconfig index 9659df6718de..ec98c9453573 100644 --- a/drivers/gpu/drm/bridge/analogix/Kconfig +++ b/drivers/gpu/drm/bridge/analogix/Kconfig @@ -1,10 +1,10 @@ # SPDX-License-Identifier: GPL-2.0-only config DRM_ANALOGIX_ANX6345 tristate "Analogix ANX6345 bridge" - depends on DRM_DISPLAY_DP_HELPER depends on DRM_DISPLAY_HELPER depends on OF select DRM_ANALOGIX_DP + select DRM_DISPLAY_DP_HELPER select DRM_KMS_HELPER select REGMAP_I2C help @@ -15,9 +15,9 @@ config DRM_ANALOGIX_ANX6345 config DRM_ANALOGIX_ANX78XX tristate "Analogix ANX78XX bridge" - depends on DRM_DISPLAY_DP_HELPER depends on DRM_DISPLAY_HELPER select DRM_ANALOGIX_DP + select DRM_DISPLAY_DP_HELPER select DRM_KMS_HELPER select REGMAP_I2C help @@ -34,9 +34,9 @@ config DRM_ANALOGIX_ANX7625 tristate "Analogix Anx7625 MIPI to DP interface support" depends on DRM depends on DRM_DISPLAY_DP_AUX_BUS - depends on DRM_DISPLAY_DP_HELPER depends on DRM_DISPLAY_HELPER depends on OF + select DRM_DISPLAY_DP_HELPER select DRM_DISPLAY_HDCP_HELPER select DRM_MIPI_DSI help diff --git a/drivers/gpu/drm/bridge/cadence/Kconfig b/drivers/gpu/drm/bridge/cadence/Kconfig index 3480fd4d0a5f..20143afded40 100644 --- a/drivers/gpu/drm/bridge/cadence/Kconfig +++ b/drivers/gpu/drm/bridge/cadence/Kconfig @@ -23,9 +23,9 @@ endif config DRM_CDNS_MHDP8546 tristate "Cadence DPI/DP bridge" - depends on DRM_DISPLAY_DP_HELPER depends on DRM_DISPLAY_HELPER depends on OF + select DRM_DISPLAY_DP_HELPER select DRM_DISPLAY_HDCP_HELPER select DRM_KMS_HELPER select DRM_PANEL_BRIDGE diff --git a/drivers/gpu/drm/display/Kconfig b/drivers/gpu/drm/display/Kconfig index 9801f47a3704..0cd439691422 100644 --- a/drivers/gpu/drm/display/Kconfig +++ b/drivers/gpu/drm/display/Kconfig @@ -39,7 +39,6 @@ config DRM_DISPLAY_DP_AUX_CHARDEV config DRM_DISPLAY_DP_HELPER bool "DRM DisplayPort Helpers" depends on DRM_DISPLAY_HELPER - default y help DRM display helpers for DisplayPort. diff --git a/drivers/gpu/drm/exynos/Kconfig b/drivers/gpu/drm/exynos/Kconfig index 6a26a0b8eff2..4b0183bf221c 100644 --- a/drivers/gpu/drm/exynos/Kconfig +++ b/drivers/gpu/drm/exynos/Kconfig @@ -67,9 +67,9 @@ config DRM_EXYNOS_DSI config DRM_EXYNOS_DP bool "Exynos specific extensions for Analogix DP driver" depends on DRM_EXYNOS_FIMD || DRM_EXYNOS7_DECON - depends on DRM_DISPLAY_DP_HELPER depends on DRM_DISPLAY_HELPER select DRM_ANALOGIX_DP + select DRM_DISPLAY_DP_HELPER default DRM_EXYNOS select DRM_PANEL help diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig index dbde4e29d93a..43183a68a095 100644 --- a/drivers/gpu/drm/i915/Kconfig +++ b/drivers/gpu/drm/i915/Kconfig @@ -2,7 +2,6 @@ config DRM_I915 tristate "Intel 8xx/9xx/G3x/G4x/HD Graphics" depends on DRM - depends on DRM_DISPLAY_DP_HELPER depends on DRM_DISPLAY_HELPER depends on X86 && PCI depends on !PREEMPT_RT @@ -12,6 +11,7 @@ config DRM_I915 # the shmem_readpage() which depends upon tmpfs select SHMEM select TMPFS + select DRM_DISPLAY_DP_HELPER select DRM_DISPLAY_HDCP_HELPER select DRM_DISPLAY_HDMI_HELPER select DRM_KMS_HELPER diff --git a/drivers/gpu/drm/mediatek/Kconfig b/drivers/gpu/drm/mediatek/Kconfig index 6caab8d4d4e0..2add54486ac4 100644 --- a/drivers/gpu/drm/mediatek/Kconfig +++ b/drivers/gpu/drm/mediatek/Kconfig @@ -23,10 +23,10 @@ config DRM_MEDIATEK config DRM_MEDIATEK_DP tristate "DRM DPTX Support for MediaTek SoCs" depends on DRM_DISPLAY_DP_AUX_BUS - depends on DRM_DISPLAY_DP_HELPER depends on DRM_DISPLAY_HELPER depends on DRM_MEDIATEK select PHY_MTK_DP + select DRM_DISPLAY_DP_HELPER help DRM/KMS Display Port driver for MediaTek SoCs. diff --git a/drivers/gpu/drm/msm/Kconfig b/drivers/gpu/drm/msm/Kconfig index f7708590583e..28a898722ace 100644 --- a/drivers/gpu/drm/msm/Kconfig +++ b/drivers/gpu/drm/msm/Kconfig @@ -6,7 +6,6 @@ config DRM_MSM depends on COMMON_CLK depends on DRM depends on DRM_DISPLAY_DP_AUX_BUS - depends on DRM_DISPLAY_DP_HELPER depends on DRM_DISPLAY_HELPER depends on IOMMU_SUPPORT depends on QCOM_AOSS_QMP || QCOM_AOSS_QMP=n @@ -17,6 +16,7 @@ config DRM_MSM select IOMMU_IO_PGTABLE select QCOM_MDT_LOADER if ARCH_QCOM select REGULATOR + select DRM_DISPLAY_DP_HELPER select DRM_EXEC select DRM_KMS_HELPER select DRM_PANEL diff --git a/drivers/gpu/drm/nouveau/Kconfig b/drivers/gpu/drm/nouveau/Kconfig index 7cc305b2826d..5ac852b816db 100644 --- a/drivers/gpu/drm/nouveau/Kconfig +++ b/drivers/gpu/drm/nouveau/Kconfig @@ -2,12 +2,12 @@ config DRM_NOUVEAU tristate "Nouveau (NVIDIA) cards" depends on DRM - depends on DRM_DISPLAY_DP_HELPER depends on DRM_DISPLAY_HELPER depends on PCI depends on MMU select IOMMU_API select FW_LOADER + select DRM_DISPLAY_DP_HELPER select DRM_DISPLAY_HDMI_HELPER select DRM_KMS_HELPER select DRM_TTM diff --git a/drivers/gpu/drm/panel/Kconfig b/drivers/gpu/drm/panel/Kconfig index e54f6f5604ed..0a0d97464f02 100644 --- a/drivers/gpu/drm/panel/Kconfig +++ b/drivers/gpu/drm/panel/Kconfig @@ -545,10 +545,10 @@ config DRM_PANEL_RAYDIUM_RM68200 config DRM_PANEL_RAYDIUM_RM692E5 tristate "Raydium RM692E5-based DSI panel" depends on BACKLIGHT_CLASS_DEVICE - depends on DRM_DISPLAY_DP_HELPER depends on DRM_DISPLAY_HELPER depends on DRM_MIPI_DSI depends on OF + select DRM_DISPLAY_DP_HELPER help Say Y here if you want to enable support for Raydium RM692E5-based display panels, such as the one found in the Fairphone 5 smartphone. @@ -584,10 +584,10 @@ config DRM_PANEL_SAMSUNG_ATNA33XC20 tristate "Samsung ATNA33XC20 eDP panel" depends on BACKLIGHT_CLASS_DEVICE depends on DRM_DISPLAY_DP_AUX_BUS - depends on DRM_DISPLAY_DP_HELPER depends on DRM_DISPLAY_HELPER depends on OF depends on PM + select DRM_DISPLAY_DP_HELPER help DRM panel driver for the Samsung ATNA33XC20 panel. This panel can't be handled by the DRM_PANEL_SIMPLE driver because its power @@ -824,11 +824,11 @@ config DRM_PANEL_EDP tristate "support for simple Embedded DisplayPort panels" depends on BACKLIGHT_CLASS_DEVICE depends on DRM_DISPLAY_DP_AUX_BUS - depends on DRM_DISPLAY_DP_HELPER depends on DRM_DISPLAY_HELPER depends on OF depends on PM select VIDEOMODE_HELPERS + select DRM_DISPLAY_DP_HELPER select DRM_KMS_HELPER help DRM panel driver for dumb eDP panels that need at most a regulator and @@ -903,10 +903,10 @@ config DRM_PANEL_TRULY_NT35597_WQXGA config DRM_PANEL_VISIONOX_R66451 tristate "Visionox R66451" depends on BACKLIGHT_CLASS_DEVICE - depends on DRM_DISPLAY_DP_HELPER depends on DRM_DISPLAY_HELPER depends on DRM_MIPI_DSI depends on OF + select DRM_DISPLAY_DP_HELPER help Say Y here if you want to enable support for Visionox R66451 1080x2340 AMOLED DSI panel. diff --git a/drivers/gpu/drm/radeon/Kconfig b/drivers/gpu/drm/radeon/Kconfig index 18c867219a70..07d330450f05 100644 --- a/drivers/gpu/drm/radeon/Kconfig +++ b/drivers/gpu/drm/radeon/Kconfig @@ -4,11 +4,11 @@ config DRM_RADEON tristate "ATI Radeon" depends on AGP || !AGP depends on DRM - depends on DRM_DISPLAY_DP_HELPER depends on DRM_DISPLAY_HELPER depends on PCI depends on MMU select FW_LOADER + select DRM_DISPLAY_DP_HELPER select DRM_KMS_HELPER select DRM_SUBALLOC_HELPER select DRM_TTM diff --git a/drivers/gpu/drm/rockchip/Kconfig b/drivers/gpu/drm/rockchip/Kconfig index 4b49a14758fe..b72c0bbf346d 100644 --- a/drivers/gpu/drm/rockchip/Kconfig +++ b/drivers/gpu/drm/rockchip/Kconfig @@ -35,9 +35,9 @@ config ROCKCHIP_VOP2 config ROCKCHIP_ANALOGIX_DP bool "Rockchip specific extensions for Analogix DP driver" - depends on DRM_DISPLAY_DP_HELPER depends on DRM_DISPLAY_HELPER depends on ROCKCHIP_VOP + select DRM_DISPLAY_DP_HELPER help This selects support for Rockchip SoC specific extensions for the Analogix Core DP driver. If you want to enable DP @@ -45,9 +45,9 @@ config ROCKCHIP_ANALOGIX_DP config ROCKCHIP_CDN_DP bool "Rockchip cdn DP" - depends on DRM_DISPLAY_DP_HELPER depends on DRM_DISPLAY_HELPER depends on EXTCON=y || (EXTCON=m && DRM_ROCKCHIP=m) + select DRM_DISPLAY_DP_HELPER help This selects support for Rockchip SoC specific extensions for the cdn DP driver. If you want to enable Dp on diff --git a/drivers/gpu/drm/tegra/Kconfig b/drivers/gpu/drm/tegra/Kconfig index bb6e35261f11..e0385d175ec6 100644 --- a/drivers/gpu/drm/tegra/Kconfig +++ b/drivers/gpu/drm/tegra/Kconfig @@ -5,9 +5,9 @@ config DRM_TEGRA depends on COMMON_CLK depends on DRM depends on DRM_DISPLAY_DP_AUX_BUS - depends on DRM_DISPLAY_DP_HELPER depends on DRM_DISPLAY_HELPER depends on OF + select DRM_DISPLAY_DP_HELPER select DRM_DISPLAY_HDMI_HELPER select DRM_KMS_HELPER select DRM_MIPI_DSI diff --git a/drivers/gpu/drm/xe/Kconfig b/drivers/gpu/drm/xe/Kconfig index d97b3f4af5ed..4407e14338fc 100644 --- a/drivers/gpu/drm/xe/Kconfig +++ b/drivers/gpu/drm/xe/Kconfig @@ -3,7 +3,6 @@ config DRM_XE tristate "Intel Xe Graphics" depends on (m || (y && KUNIT=y)) depends on DRM - depends on DRM_DISPLAY_DP_HELPER depends on DRM_DISPLAY_HELPER depends on MMU depends on PCI @@ -18,6 +17,7 @@ config DRM_XE select DRM_KUNIT_TEST_HELPERS if DRM_XE_KUNIT_TEST != n select DRM_PANEL select DRM_SUBALLOC_HELPER + select DRM_DISPLAY_DP_HELPER select DRM_DISPLAY_HDCP_HELPER select DRM_DISPLAY_HDMI_HELPER select DRM_MIPI_DSI diff --git a/drivers/gpu/drm/xlnx/Kconfig b/drivers/gpu/drm/xlnx/Kconfig index 41d753b14ccd..7a14a8c2e7be 100644 --- a/drivers/gpu/drm/xlnx/Kconfig +++ b/drivers/gpu/drm/xlnx/Kconfig @@ -4,12 +4,12 @@ config DRM_ZYNQMP_DPSUB depends on COMMON_CLK depends on DMADEVICES depends on DRM - depends on DRM_DISPLAY_DP_HELPER depends on DRM_DISPLAY_HELPER depends on OF depends on PHY_XILINX_ZYNQMP depends on XILINX_ZYNQMP_DPDMA select DMA_ENGINE + select DRM_DISPLAY_DP_HELPER select DRM_GEM_DMA_HELPER select DRM_KMS_HELPER select GENERIC_PHY -- cgit v1.2.3 From 05b8b6dd225d541b16145a0578ed93d91e43f0c1 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 22 Apr 2024 12:30:37 +0200 Subject: Revert "drm: Switch DRM_DISPLAY_HELPER to depends on" This reverts commit e075e496f516bf92bc0cbaf94d64e8d4a6b58321, as helper code should always be selected by the driver that needs it, for the convenience of the final user configuring a kernel. The user who configures a kernel should not need to know which helpers are needed for the driver he is interested in. Making a driver depend on helper code means that the user needs to know which helpers to enable first, which is very user-unfriendly. Signed-off-by: Geert Uytterhoeven Acked-by: Arnd Bergmann Link: https://patchwork.freedesktop.org/patch/msgid/1ba76cc4d96a8afefff5d1bc42fb1e1329c5da68.1713780345.git.geert+renesas@glider.be Signed-off-by: Maxime Ripard --- drivers/gpu/drm/Kconfig | 6 ++---- drivers/gpu/drm/amd/amdgpu/Kconfig | 6 ++---- drivers/gpu/drm/bridge/Kconfig | 10 +++++----- drivers/gpu/drm/bridge/analogix/Kconfig | 6 +++--- drivers/gpu/drm/bridge/cadence/Kconfig | 4 ++-- drivers/gpu/drm/bridge/synopsys/Kconfig | 2 +- drivers/gpu/drm/display/Kconfig | 1 - drivers/gpu/drm/exynos/Kconfig | 2 +- drivers/gpu/drm/i915/Kconfig | 2 +- drivers/gpu/drm/mediatek/Kconfig | 2 +- drivers/gpu/drm/msm/Kconfig | 4 ++-- drivers/gpu/drm/nouveau/Kconfig | 6 ++---- drivers/gpu/drm/panel/Kconfig | 20 ++++++++++---------- drivers/gpu/drm/radeon/Kconfig | 6 ++---- drivers/gpu/drm/rockchip/Kconfig | 4 ++-- drivers/gpu/drm/tegra/Kconfig | 2 +- drivers/gpu/drm/vc4/Kconfig | 8 ++++---- drivers/gpu/drm/xe/Kconfig | 7 ++----- drivers/gpu/drm/xlnx/Kconfig | 6 ++---- 19 files changed, 45 insertions(+), 59 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index 33792ca3eeb7..bf4020915e29 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -74,12 +74,10 @@ config DRM_KUNIT_TEST_HELPERS config DRM_KUNIT_TEST tristate "KUnit tests for DRM" if !KUNIT_ALL_TESTS - depends on DRM - depends on DRM_DISPLAY_HELPER - depends on KUNIT - depends on MMU + depends on DRM && KUNIT && MMU select DRM_BUDDY select DRM_DISPLAY_DP_HELPER + select DRM_DISPLAY_HELPER select DRM_EXEC select DRM_EXPORT_FOR_TESTS if m select DRM_GEM_SHMEM_HELPER diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig index cf931b94a188..22d88f8ef527 100644 --- a/drivers/gpu/drm/amd/amdgpu/Kconfig +++ b/drivers/gpu/drm/amd/amdgpu/Kconfig @@ -2,15 +2,13 @@ config DRM_AMDGPU tristate "AMD GPU" - depends on DRM - depends on DRM_DISPLAY_HELPER - depends on MMU - depends on PCI + depends on DRM && PCI && MMU depends on !UML select FW_LOADER select DRM_DISPLAY_DP_HELPER select DRM_DISPLAY_HDMI_HELPER select DRM_DISPLAY_HDCP_HELPER + select DRM_DISPLAY_HELPER select DRM_KMS_HELPER select DRM_SCHED select DRM_TTM diff --git a/drivers/gpu/drm/bridge/Kconfig b/drivers/gpu/drm/bridge/Kconfig index 8c0883bc3f2a..c621be1a99a8 100644 --- a/drivers/gpu/drm/bridge/Kconfig +++ b/drivers/gpu/drm/bridge/Kconfig @@ -92,10 +92,10 @@ config DRM_FSL_LDB config DRM_ITE_IT6505 tristate "ITE IT6505 DisplayPort bridge" - depends on DRM_DISPLAY_HELPER depends on OF select DRM_DISPLAY_DP_HELPER select DRM_DISPLAY_HDCP_HELPER + select DRM_DISPLAY_HELPER select DRM_DISPLAY_DP_AUX_BUS select DRM_KMS_HELPER select EXTCON @@ -232,9 +232,9 @@ config DRM_PARADE_PS8622 config DRM_PARADE_PS8640 tristate "Parade PS8640 MIPI DSI to eDP Converter" - depends on DRM_DISPLAY_HELPER depends on OF select DRM_DISPLAY_DP_HELPER + select DRM_DISPLAY_HELPER select DRM_DISPLAY_DP_AUX_BUS select DRM_KMS_HELPER select DRM_MIPI_DSI @@ -319,9 +319,9 @@ config DRM_TOSHIBA_TC358764 config DRM_TOSHIBA_TC358767 tristate "Toshiba TC358767 eDP bridge" - depends on DRM_DISPLAY_HELPER depends on OF select DRM_DISPLAY_DP_HELPER + select DRM_DISPLAY_HELPER select DRM_KMS_HELPER select REGMAP_I2C select DRM_MIPI_DSI @@ -342,9 +342,9 @@ config DRM_TOSHIBA_TC358768 config DRM_TOSHIBA_TC358775 tristate "Toshiba TC358775 DSI/LVDS bridge" - depends on DRM_DISPLAY_HELPER depends on OF select DRM_DISPLAY_DP_HELPER + select DRM_DISPLAY_HELPER select DRM_KMS_HELPER select REGMAP_I2C select DRM_PANEL @@ -387,9 +387,9 @@ config DRM_TI_SN65DSI83 config DRM_TI_SN65DSI86 tristate "TI SN65DSI86 DSI to eDP bridge" - depends on DRM_DISPLAY_HELPER depends on OF select DRM_DISPLAY_DP_HELPER + select DRM_DISPLAY_HELPER select DRM_KMS_HELPER select REGMAP_I2C select DRM_PANEL diff --git a/drivers/gpu/drm/bridge/analogix/Kconfig b/drivers/gpu/drm/bridge/analogix/Kconfig index 16d18dde483a..4846b2e9be7c 100644 --- a/drivers/gpu/drm/bridge/analogix/Kconfig +++ b/drivers/gpu/drm/bridge/analogix/Kconfig @@ -1,10 +1,10 @@ # SPDX-License-Identifier: GPL-2.0-only config DRM_ANALOGIX_ANX6345 tristate "Analogix ANX6345 bridge" - depends on DRM_DISPLAY_HELPER depends on OF select DRM_ANALOGIX_DP select DRM_DISPLAY_DP_HELPER + select DRM_DISPLAY_HELPER select DRM_KMS_HELPER select REGMAP_I2C help @@ -15,9 +15,9 @@ config DRM_ANALOGIX_ANX6345 config DRM_ANALOGIX_ANX78XX tristate "Analogix ANX78XX bridge" - depends on DRM_DISPLAY_HELPER select DRM_ANALOGIX_DP select DRM_DISPLAY_DP_HELPER + select DRM_DISPLAY_HELPER select DRM_KMS_HELPER select REGMAP_I2C help @@ -33,10 +33,10 @@ config DRM_ANALOGIX_DP config DRM_ANALOGIX_ANX7625 tristate "Analogix Anx7625 MIPI to DP interface support" depends on DRM - depends on DRM_DISPLAY_HELPER depends on OF select DRM_DISPLAY_DP_HELPER select DRM_DISPLAY_HDCP_HELPER + select DRM_DISPLAY_HELPER select DRM_DISPLAY_DP_AUX_BUS select DRM_MIPI_DSI help diff --git a/drivers/gpu/drm/bridge/cadence/Kconfig b/drivers/gpu/drm/bridge/cadence/Kconfig index 20143afded40..cced81633ddc 100644 --- a/drivers/gpu/drm/bridge/cadence/Kconfig +++ b/drivers/gpu/drm/bridge/cadence/Kconfig @@ -23,12 +23,12 @@ endif config DRM_CDNS_MHDP8546 tristate "Cadence DPI/DP bridge" - depends on DRM_DISPLAY_HELPER - depends on OF select DRM_DISPLAY_DP_HELPER select DRM_DISPLAY_HDCP_HELPER + select DRM_DISPLAY_HELPER select DRM_KMS_HELPER select DRM_PANEL_BRIDGE + depends on OF help Support Cadence DPI to DP bridge. This is an internal bridge and is meant to be directly embedded in a SoC. diff --git a/drivers/gpu/drm/bridge/synopsys/Kconfig b/drivers/gpu/drm/bridge/synopsys/Kconfig index f366ece47146..15fc182d05ef 100644 --- a/drivers/gpu/drm/bridge/synopsys/Kconfig +++ b/drivers/gpu/drm/bridge/synopsys/Kconfig @@ -1,8 +1,8 @@ # SPDX-License-Identifier: GPL-2.0-only config DRM_DW_HDMI tristate - depends on DRM_DISPLAY_HELPER select DRM_DISPLAY_HDMI_HELPER + select DRM_DISPLAY_HELPER select DRM_KMS_HELPER select REGMAP_MMIO select CEC_CORE if CEC_NOTIFIER diff --git a/drivers/gpu/drm/display/Kconfig b/drivers/gpu/drm/display/Kconfig index cffa2acdbc6c..c77e7f85bd67 100644 --- a/drivers/gpu/drm/display/Kconfig +++ b/drivers/gpu/drm/display/Kconfig @@ -3,7 +3,6 @@ config DRM_DISPLAY_HELPER tristate "DRM Display Helpers" depends on DRM - default y help DRM helpers for display adapters. diff --git a/drivers/gpu/drm/exynos/Kconfig b/drivers/gpu/drm/exynos/Kconfig index 4b0183bf221c..733b109a5095 100644 --- a/drivers/gpu/drm/exynos/Kconfig +++ b/drivers/gpu/drm/exynos/Kconfig @@ -4,6 +4,7 @@ config DRM_EXYNOS depends on OF && DRM && COMMON_CLK depends on ARCH_S3C64XX || ARCH_S5PV210 || ARCH_EXYNOS || COMPILE_TEST depends on MMU + select DRM_DISPLAY_HELPER if DRM_EXYNOS_DP select DRM_KMS_HELPER select VIDEOMODE_HELPERS select FB_DMAMEM_HELPERS if DRM_FBDEV_EMULATION @@ -67,7 +68,6 @@ config DRM_EXYNOS_DSI config DRM_EXYNOS_DP bool "Exynos specific extensions for Analogix DP driver" depends on DRM_EXYNOS_FIMD || DRM_EXYNOS7_DECON - depends on DRM_DISPLAY_HELPER select DRM_ANALOGIX_DP select DRM_DISPLAY_DP_HELPER default DRM_EXYNOS diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig index 43183a68a095..5932024f8f95 100644 --- a/drivers/gpu/drm/i915/Kconfig +++ b/drivers/gpu/drm/i915/Kconfig @@ -2,7 +2,6 @@ config DRM_I915 tristate "Intel 8xx/9xx/G3x/G4x/HD Graphics" depends on DRM - depends on DRM_DISPLAY_HELPER depends on X86 && PCI depends on !PREEMPT_RT select INTEL_GTT if X86 @@ -14,6 +13,7 @@ config DRM_I915 select DRM_DISPLAY_DP_HELPER select DRM_DISPLAY_HDCP_HELPER select DRM_DISPLAY_HDMI_HELPER + select DRM_DISPLAY_HELPER select DRM_KMS_HELPER select DRM_PANEL select DRM_MIPI_DSI diff --git a/drivers/gpu/drm/mediatek/Kconfig b/drivers/gpu/drm/mediatek/Kconfig index 50bb28327f65..96cbe020f493 100644 --- a/drivers/gpu/drm/mediatek/Kconfig +++ b/drivers/gpu/drm/mediatek/Kconfig @@ -22,9 +22,9 @@ config DRM_MEDIATEK config DRM_MEDIATEK_DP tristate "DRM DPTX Support for MediaTek SoCs" - depends on DRM_DISPLAY_HELPER depends on DRM_MEDIATEK select PHY_MTK_DP + select DRM_DISPLAY_HELPER select DRM_DISPLAY_DP_HELPER select DRM_DISPLAY_DP_AUX_BUS help diff --git a/drivers/gpu/drm/msm/Kconfig b/drivers/gpu/drm/msm/Kconfig index 2055266506e5..27d72ed8b389 100644 --- a/drivers/gpu/drm/msm/Kconfig +++ b/drivers/gpu/drm/msm/Kconfig @@ -2,10 +2,9 @@ config DRM_MSM tristate "MSM DRM" + depends on DRM depends on ARCH_QCOM || SOC_IMX5 || COMPILE_TEST depends on COMMON_CLK - depends on DRM - depends on DRM_DISPLAY_HELPER depends on IOMMU_SUPPORT depends on QCOM_AOSS_QMP || QCOM_AOSS_QMP=n depends on QCOM_OCMEM || QCOM_OCMEM=n @@ -17,6 +16,7 @@ config DRM_MSM select REGULATOR select DRM_DISPLAY_DP_AUX_BUS select DRM_DISPLAY_DP_HELPER + select DRM_DISPLAY_HELPER select DRM_EXEC select DRM_KMS_HELPER select DRM_PANEL diff --git a/drivers/gpu/drm/nouveau/Kconfig b/drivers/gpu/drm/nouveau/Kconfig index 5ac852b816db..ceef470c9fbf 100644 --- a/drivers/gpu/drm/nouveau/Kconfig +++ b/drivers/gpu/drm/nouveau/Kconfig @@ -1,14 +1,12 @@ # SPDX-License-Identifier: GPL-2.0-only config DRM_NOUVEAU tristate "Nouveau (NVIDIA) cards" - depends on DRM - depends on DRM_DISPLAY_HELPER - depends on PCI - depends on MMU + depends on DRM && PCI && MMU select IOMMU_API select FW_LOADER select DRM_DISPLAY_DP_HELPER select DRM_DISPLAY_HDMI_HELPER + select DRM_DISPLAY_HELPER select DRM_KMS_HELPER select DRM_TTM select DRM_TTM_HELPER diff --git a/drivers/gpu/drm/panel/Kconfig b/drivers/gpu/drm/panel/Kconfig index 108ced2e8794..a979e9bb0905 100644 --- a/drivers/gpu/drm/panel/Kconfig +++ b/drivers/gpu/drm/panel/Kconfig @@ -544,11 +544,11 @@ config DRM_PANEL_RAYDIUM_RM68200 config DRM_PANEL_RAYDIUM_RM692E5 tristate "Raydium RM692E5-based DSI panel" - depends on BACKLIGHT_CLASS_DEVICE - depends on DRM_DISPLAY_HELPER - depends on DRM_MIPI_DSI depends on OF + depends on DRM_MIPI_DSI + depends on BACKLIGHT_CLASS_DEVICE select DRM_DISPLAY_DP_HELPER + select DRM_DISPLAY_HELPER help Say Y here if you want to enable support for Raydium RM692E5-based display panels, such as the one found in the Fairphone 5 smartphone. @@ -582,11 +582,11 @@ config DRM_PANEL_SAMSUNG_S6E88A0_AMS452EF01 config DRM_PANEL_SAMSUNG_ATNA33XC20 tristate "Samsung ATNA33XC20 eDP panel" - depends on BACKLIGHT_CLASS_DEVICE - depends on DRM_DISPLAY_HELPER depends on OF + depends on BACKLIGHT_CLASS_DEVICE depends on PM select DRM_DISPLAY_DP_HELPER + select DRM_DISPLAY_HELPER select DRM_DISPLAY_DP_AUX_BUS help DRM panel driver for the Samsung ATNA33XC20 panel. This panel can't @@ -822,12 +822,12 @@ config DRM_PANEL_STARTEK_KD070FHFID015 config DRM_PANEL_EDP tristate "support for simple Embedded DisplayPort panels" - depends on BACKLIGHT_CLASS_DEVICE - depends on DRM_DISPLAY_HELPER depends on OF + depends on BACKLIGHT_CLASS_DEVICE depends on PM select VIDEOMODE_HELPERS select DRM_DISPLAY_DP_HELPER + select DRM_DISPLAY_HELPER select DRM_DISPLAY_DP_AUX_BUS select DRM_KMS_HELPER help @@ -902,11 +902,11 @@ config DRM_PANEL_TRULY_NT35597_WQXGA config DRM_PANEL_VISIONOX_R66451 tristate "Visionox R66451" - depends on BACKLIGHT_CLASS_DEVICE - depends on DRM_DISPLAY_HELPER - depends on DRM_MIPI_DSI depends on OF + depends on DRM_MIPI_DSI + depends on BACKLIGHT_CLASS_DEVICE select DRM_DISPLAY_DP_HELPER + select DRM_DISPLAY_HELPER help Say Y here if you want to enable support for Visionox R66451 1080x2340 AMOLED DSI panel. diff --git a/drivers/gpu/drm/radeon/Kconfig b/drivers/gpu/drm/radeon/Kconfig index 07d330450f05..f98356be0af2 100644 --- a/drivers/gpu/drm/radeon/Kconfig +++ b/drivers/gpu/drm/radeon/Kconfig @@ -2,13 +2,11 @@ config DRM_RADEON tristate "ATI Radeon" + depends on DRM && PCI && MMU depends on AGP || !AGP - depends on DRM - depends on DRM_DISPLAY_HELPER - depends on PCI - depends on MMU select FW_LOADER select DRM_DISPLAY_DP_HELPER + select DRM_DISPLAY_HELPER select DRM_KMS_HELPER select DRM_SUBALLOC_HELPER select DRM_TTM diff --git a/drivers/gpu/drm/rockchip/Kconfig b/drivers/gpu/drm/rockchip/Kconfig index b72c0bbf346d..0d5260e10f27 100644 --- a/drivers/gpu/drm/rockchip/Kconfig +++ b/drivers/gpu/drm/rockchip/Kconfig @@ -35,8 +35,8 @@ config ROCKCHIP_VOP2 config ROCKCHIP_ANALOGIX_DP bool "Rockchip specific extensions for Analogix DP driver" - depends on DRM_DISPLAY_HELPER depends on ROCKCHIP_VOP + select DRM_DISPLAY_HELPER select DRM_DISPLAY_DP_HELPER help This selects support for Rockchip SoC specific extensions @@ -45,8 +45,8 @@ config ROCKCHIP_ANALOGIX_DP config ROCKCHIP_CDN_DP bool "Rockchip cdn DP" - depends on DRM_DISPLAY_HELPER depends on EXTCON=y || (EXTCON=m && DRM_ROCKCHIP=m) + select DRM_DISPLAY_HELPER select DRM_DISPLAY_DP_HELPER help This selects support for Rockchip SoC specific extensions diff --git a/drivers/gpu/drm/tegra/Kconfig b/drivers/gpu/drm/tegra/Kconfig index 44381ee6ea9e..782f51d3044a 100644 --- a/drivers/gpu/drm/tegra/Kconfig +++ b/drivers/gpu/drm/tegra/Kconfig @@ -4,10 +4,10 @@ config DRM_TEGRA depends on ARCH_TEGRA || COMPILE_TEST depends on COMMON_CLK depends on DRM - depends on DRM_DISPLAY_HELPER depends on OF select DRM_DISPLAY_DP_HELPER select DRM_DISPLAY_HDMI_HELPER + select DRM_DISPLAY_HELPER select DRM_DISPLAY_DP_AUX_BUS select DRM_KMS_HELPER select DRM_MIPI_DSI diff --git a/drivers/gpu/drm/vc4/Kconfig b/drivers/gpu/drm/vc4/Kconfig index 98772a6b5bf0..91dcf8d174d6 100644 --- a/drivers/gpu/drm/vc4/Kconfig +++ b/drivers/gpu/drm/vc4/Kconfig @@ -2,15 +2,15 @@ config DRM_VC4 tristate "Broadcom VC4 Graphics" depends on ARCH_BCM || ARCH_BCM2835 || COMPILE_TEST - depends on COMMON_CLK - depends on DRM - depends on DRM_DISPLAY_HELPER - depends on PM # Make sure not 'y' when RASPBERRYPI_FIRMWARE is 'm'. This can only # happen when COMPILE_TEST=y, hence the added !RASPBERRYPI_FIRMWARE. depends on RASPBERRYPI_FIRMWARE || (COMPILE_TEST && !RASPBERRYPI_FIRMWARE) + depends on DRM depends on SND && SND_SOC + depends on COMMON_CLK + depends on PM select DRM_DISPLAY_HDMI_HELPER + select DRM_DISPLAY_HELPER select DRM_KMS_HELPER select DRM_GEM_DMA_HELPER select DRM_PANEL_BRIDGE diff --git a/drivers/gpu/drm/xe/Kconfig b/drivers/gpu/drm/xe/Kconfig index 4407e14338fc..63f1e2d1649f 100644 --- a/drivers/gpu/drm/xe/Kconfig +++ b/drivers/gpu/drm/xe/Kconfig @@ -1,11 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only config DRM_XE tristate "Intel Xe Graphics" - depends on (m || (y && KUNIT=y)) - depends on DRM - depends on DRM_DISPLAY_HELPER - depends on MMU - depends on PCI + depends on DRM && PCI && MMU && (m || (y && KUNIT=y)) select INTERVAL_TREE # we need shmfs for the swappable backing store, and in particular # the shmem_readpage() which depends upon tmpfs @@ -20,6 +16,7 @@ config DRM_XE select DRM_DISPLAY_DP_HELPER select DRM_DISPLAY_HDCP_HELPER select DRM_DISPLAY_HDMI_HELPER + select DRM_DISPLAY_HELPER select DRM_MIPI_DSI select RELAY select IRQ_WORK diff --git a/drivers/gpu/drm/xlnx/Kconfig b/drivers/gpu/drm/xlnx/Kconfig index 7a14a8c2e7be..68ee897de9d7 100644 --- a/drivers/gpu/drm/xlnx/Kconfig +++ b/drivers/gpu/drm/xlnx/Kconfig @@ -1,15 +1,13 @@ config DRM_ZYNQMP_DPSUB tristate "ZynqMP DisplayPort Controller Driver" depends on ARCH_ZYNQMP || COMPILE_TEST - depends on COMMON_CLK + depends on COMMON_CLK && DRM && OF depends on DMADEVICES - depends on DRM - depends on DRM_DISPLAY_HELPER - depends on OF depends on PHY_XILINX_ZYNQMP depends on XILINX_ZYNQMP_DPDMA select DMA_ENGINE select DRM_DISPLAY_DP_HELPER + select DRM_DISPLAY_HELPER select DRM_GEM_DMA_HELPER select DRM_KMS_HELPER select GENERIC_PHY -- cgit v1.2.3