diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 378 |
1 files changed, 329 insertions, 49 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c index a38553f38fdc..dc94d58d33a6 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c @@ -30,6 +30,8 @@ #include "amdgpu_xcp.h" #include "amdgpu_ucode.h" #include "amdgpu_trace.h" +#include "amdgpu_reset.h" +#include "gc/gc_9_0_sh_mask.h" #include "sdma/sdma_4_4_2_offset.h" #include "sdma/sdma_4_4_2_sh_mask.h" @@ -105,6 +107,8 @@ static void sdma_v4_4_2_set_buffer_funcs(struct amdgpu_device *adev); static void sdma_v4_4_2_set_vm_pte_funcs(struct amdgpu_device *adev); static void sdma_v4_4_2_set_irq_funcs(struct amdgpu_device *adev); static void sdma_v4_4_2_set_ras_funcs(struct amdgpu_device *adev); +static void sdma_v4_4_2_set_engine_reset_funcs(struct amdgpu_device *adev); +static void sdma_v4_4_2_update_reset_mask(struct amdgpu_device *adev); static u32 sdma_v4_4_2_get_reg_offset(struct amdgpu_device *adev, u32 instance, u32 offset) @@ -189,6 +193,7 @@ static int sdma_v4_4_2_init_microcode(struct amdgpu_device *adev) for (i = 0; i < adev->sdma.num_instances; i++) { if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 2) || + amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 4) || amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 5)) { ret = amdgpu_sdma_init_microcode(adev, 0, true); break; @@ -667,11 +672,12 @@ static uint32_t sdma_v4_4_2_rb_cntl(struct amdgpu_ring *ring, uint32_t rb_cntl) * * @adev: amdgpu_device pointer * @i: instance to resume + * @restore: used to restore wptr when restart * * Set up the gfx DMA ring buffers and enable them. * Returns 0 for success, error for failure. */ -static void sdma_v4_4_2_gfx_resume(struct amdgpu_device *adev, unsigned int i) +static void sdma_v4_4_2_gfx_resume(struct amdgpu_device *adev, unsigned int i, bool restore) { struct amdgpu_ring *ring = &adev->sdma.instance[i].ring; u32 rb_cntl, ib_cntl, wptr_poll_cntl; @@ -679,6 +685,7 @@ static void sdma_v4_4_2_gfx_resume(struct amdgpu_device *adev, unsigned int i) u32 doorbell; u32 doorbell_offset; u64 wptr_gpu_addr; + u64 rwptr; wb_offset = (ring->rptr_offs * 4); @@ -698,16 +705,32 @@ static void sdma_v4_4_2_gfx_resume(struct amdgpu_device *adev, unsigned int i) WREG32_SDMA(i, regSDMA_GFX_RB_BASE, ring->gpu_addr >> 8); WREG32_SDMA(i, regSDMA_GFX_RB_BASE_HI, ring->gpu_addr >> 40); - ring->wptr = 0; + if (!restore) + ring->wptr = 0; /* before programing wptr to a less value, need set minor_ptr_update first */ WREG32_SDMA(i, regSDMA_GFX_MINOR_PTR_UPDATE, 1); + /* For the guilty queue, set RPTR to the current wptr to skip bad commands, + * It is not a guilty queue, restore cache_rptr and continue execution. + */ + if (adev->sdma.instance[i].gfx_guilty) + rwptr = ring->wptr; + else + rwptr = ring->cached_rptr; + /* Initialize the ring buffer's read and write pointers */ - WREG32_SDMA(i, regSDMA_GFX_RB_RPTR, 0); - WREG32_SDMA(i, regSDMA_GFX_RB_RPTR_HI, 0); - WREG32_SDMA(i, regSDMA_GFX_RB_WPTR, 0); - WREG32_SDMA(i, regSDMA_GFX_RB_WPTR_HI, 0); + if (restore) { + WREG32_SDMA(i, regSDMA_GFX_RB_RPTR, lower_32_bits(rwptr << 2)); + WREG32_SDMA(i, regSDMA_GFX_RB_RPTR_HI, upper_32_bits(rwptr << 2)); + WREG32_SDMA(i, regSDMA_GFX_RB_WPTR, lower_32_bits(rwptr << 2)); + WREG32_SDMA(i, regSDMA_GFX_RB_WPTR_HI, upper_32_bits(rwptr << 2)); + } else { + WREG32_SDMA(i, regSDMA_GFX_RB_RPTR, 0); + WREG32_SDMA(i, regSDMA_GFX_RB_RPTR_HI, 0); + WREG32_SDMA(i, regSDMA_GFX_RB_WPTR, 0); + WREG32_SDMA(i, regSDMA_GFX_RB_WPTR_HI, 0); + } doorbell = RREG32_SDMA(i, regSDMA_GFX_DOORBELL); doorbell_offset = RREG32_SDMA(i, regSDMA_GFX_DOORBELL_OFFSET); @@ -755,11 +778,12 @@ static void sdma_v4_4_2_gfx_resume(struct amdgpu_device *adev, unsigned int i) * * @adev: amdgpu_device pointer * @i: instance to resume + * @restore: boolean to say restore needed or not * * Set up the page DMA ring buffers and enable them. * Returns 0 for success, error for failure. */ -static void sdma_v4_4_2_page_resume(struct amdgpu_device *adev, unsigned int i) +static void sdma_v4_4_2_page_resume(struct amdgpu_device *adev, unsigned int i, bool restore) { struct amdgpu_ring *ring = &adev->sdma.instance[i].page; u32 rb_cntl, ib_cntl, wptr_poll_cntl; @@ -767,6 +791,7 @@ static void sdma_v4_4_2_page_resume(struct amdgpu_device *adev, unsigned int i) u32 doorbell; u32 doorbell_offset; u64 wptr_gpu_addr; + u64 rwptr; wb_offset = (ring->rptr_offs * 4); @@ -774,11 +799,26 @@ static void sdma_v4_4_2_page_resume(struct amdgpu_device *adev, unsigned int i) rb_cntl = sdma_v4_4_2_rb_cntl(ring, rb_cntl); WREG32_SDMA(i, regSDMA_PAGE_RB_CNTL, rb_cntl); + /* For the guilty queue, set RPTR to the current wptr to skip bad commands, + * It is not a guilty queue, restore cache_rptr and continue execution. + */ + if (adev->sdma.instance[i].page_guilty) + rwptr = ring->wptr; + else + rwptr = ring->cached_rptr; + /* Initialize the ring buffer's read and write pointers */ - WREG32_SDMA(i, regSDMA_PAGE_RB_RPTR, 0); - WREG32_SDMA(i, regSDMA_PAGE_RB_RPTR_HI, 0); - WREG32_SDMA(i, regSDMA_PAGE_RB_WPTR, 0); - WREG32_SDMA(i, regSDMA_PAGE_RB_WPTR_HI, 0); + if (restore) { + WREG32_SDMA(i, regSDMA_PAGE_RB_RPTR, lower_32_bits(rwptr << 2)); + WREG32_SDMA(i, regSDMA_PAGE_RB_RPTR_HI, upper_32_bits(rwptr << 2)); + WREG32_SDMA(i, regSDMA_PAGE_RB_WPTR, lower_32_bits(rwptr << 2)); + WREG32_SDMA(i, regSDMA_PAGE_RB_WPTR_HI, upper_32_bits(rwptr << 2)); + } else { + WREG32_SDMA(i, regSDMA_PAGE_RB_RPTR, 0); + WREG32_SDMA(i, regSDMA_PAGE_RB_RPTR_HI, 0); + WREG32_SDMA(i, regSDMA_PAGE_RB_WPTR, 0); + WREG32_SDMA(i, regSDMA_PAGE_RB_WPTR_HI, 0); + } /* set the wb address whether it's enabled or not */ WREG32_SDMA(i, regSDMA_PAGE_RB_RPTR_ADDR_HI, @@ -792,7 +832,8 @@ static void sdma_v4_4_2_page_resume(struct amdgpu_device *adev, unsigned int i) WREG32_SDMA(i, regSDMA_PAGE_RB_BASE, ring->gpu_addr >> 8); WREG32_SDMA(i, regSDMA_PAGE_RB_BASE_HI, ring->gpu_addr >> 40); - ring->wptr = 0; + if (!restore) + ring->wptr = 0; /* before programing wptr to a less value, need set minor_ptr_update first */ WREG32_SDMA(i, regSDMA_PAGE_MINOR_PTR_UPDATE, 1); @@ -911,12 +952,13 @@ static int sdma_v4_4_2_inst_load_microcode(struct amdgpu_device *adev, * * @adev: amdgpu_device pointer * @inst_mask: mask of dma engine instances to be enabled + * @restore: boolean to say restore needed or not * * Set up the DMA engines and enable them. * Returns 0 for success, error for failure. */ static int sdma_v4_4_2_inst_start(struct amdgpu_device *adev, - uint32_t inst_mask) + uint32_t inst_mask, bool restore) { struct amdgpu_ring *ring; uint32_t tmp_mask; @@ -927,7 +969,7 @@ static int sdma_v4_4_2_inst_start(struct amdgpu_device *adev, sdma_v4_4_2_inst_enable(adev, false, inst_mask); } else { /* bypass sdma microcode loading on Gopher */ - if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP && + if (!restore && adev->firmware.load_type != AMDGPU_FW_LOAD_PSP && adev->sdma.instance[0].fw) { r = sdma_v4_4_2_inst_load_microcode(adev, inst_mask); if (r) @@ -946,17 +988,19 @@ static int sdma_v4_4_2_inst_start(struct amdgpu_device *adev, uint32_t temp; WREG32_SDMA(i, regSDMA_SEM_WAIT_FAIL_TIMER_CNTL, 0); - sdma_v4_4_2_gfx_resume(adev, i); + sdma_v4_4_2_gfx_resume(adev, i, restore); if (adev->sdma.has_page_queue) - sdma_v4_4_2_page_resume(adev, i); + sdma_v4_4_2_page_resume(adev, i, restore); /* set utc l1 enable flag always to 1 */ temp = RREG32_SDMA(i, regSDMA_CNTL); temp = REG_SET_FIELD(temp, SDMA_CNTL, UTC_L1_ENABLE, 1); - /* enable context empty interrupt during initialization */ - temp = REG_SET_FIELD(temp, SDMA_CNTL, CTXEMPTY_INT_ENABLE, 1); - WREG32_SDMA(i, regSDMA_CNTL, temp); + if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) < IP_VERSION(4, 4, 5)) { + /* enable context empty interrupt during initialization */ + temp = REG_SET_FIELD(temp, SDMA_CNTL, CTXEMPTY_INT_ENABLE, 1); + WREG32_SDMA(i, regSDMA_CNTL, temp); + } if (!amdgpu_sriov_vf(adev)) { if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { /* unhalt engine */ @@ -1110,7 +1154,7 @@ static int sdma_v4_4_2_ring_test_ib(struct amdgpu_ring *ring, long timeout) r = -EINVAL; err1: - amdgpu_ib_free(adev, &ib, NULL); + amdgpu_ib_free(&ib, NULL); dma_fence_put(f); err0: amdgpu_device_wb_free(adev, index); @@ -1247,21 +1291,71 @@ static void sdma_v4_4_2_ring_emit_pipeline_sync(struct amdgpu_ring *ring) seq, 0xffffffff, 4); } - -/** - * sdma_v4_4_2_ring_emit_vm_flush - vm flush using sDMA +/* + * sdma_v4_4_2_get_invalidate_req - Construct the VM_INVALIDATE_ENG0_REQ register value + * @vmid: The VMID to invalidate + * @flush_type: The type of flush (0 = legacy, 1 = lightweight, 2 = heavyweight) * - * @ring: amdgpu_ring pointer - * @vmid: vmid number to use - * @pd_addr: address + * This function constructs the VM_INVALIDATE_ENG0_REQ register value for the specified VMID + * and flush type. It ensures that all relevant page table cache levels (L1 PTEs, L2 PTEs, and + * L2 PDEs) are invalidated. + */ +static uint32_t sdma_v4_4_2_get_invalidate_req(unsigned int vmid, + uint32_t flush_type) +{ + u32 req = 0; + + req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, + PER_VMID_INVALIDATE_REQ, 1 << vmid); + req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, flush_type); + req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1); + req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1); + req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1); + req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE2, 1); + req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L1_PTES, 1); + req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, + CLEAR_PROTECTION_FAULT_STATUS_ADDR, 0); + + return req; +} + +/* + * sdma_v4_4_2_ring_emit_vm_flush - Emit VM flush commands for SDMA + * @ring: The SDMA ring + * @vmid: The VMID to flush + * @pd_addr: The page directory address * - * Update the page table base and flush the VM TLB - * using sDMA. + * This function emits the necessary register writes and waits to perform a VM flush for the + * specified VMID. It updates the PTB address registers and issues a VM invalidation request + * using the specified VM invalidation engine. */ static void sdma_v4_4_2_ring_emit_vm_flush(struct amdgpu_ring *ring, - unsigned vmid, uint64_t pd_addr) + unsigned int vmid, uint64_t pd_addr) { - amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); + struct amdgpu_device *adev = ring->adev; + uint32_t req = sdma_v4_4_2_get_invalidate_req(vmid, 0); + unsigned int eng = ring->vm_inv_eng; + struct amdgpu_vmhub *hub = &adev->vmhub[ring->vm_hub]; + + amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + + (hub->ctx_addr_distance * vmid), + lower_32_bits(pd_addr)); + + amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + + (hub->ctx_addr_distance * vmid), + upper_32_bits(pd_addr)); + /* + * Construct and emit the VM invalidation packet + */ + amdgpu_ring_write(ring, + SDMA_PKT_VM_INVALIDATION_HEADER_OP(SDMA_OP_VM_INVALIDATE) | + SDMA_PKT_VM_INVALIDATION_HEADER_SUB_OP(SDMA_SUBOP_VM_INVALIDATE) | + SDMA_PKT_VM_INVALIDATION_HEADER_XCC0_ENG_ID(0x1f) | + SDMA_PKT_VM_INVALIDATION_HEADER_XCC1_ENG_ID(0x1f) | + SDMA_PKT_VM_INVALIDATION_HEADER_MMHUB_ENG_ID(eng)); + amdgpu_ring_write(ring, SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_INVALIDATEREQ(req)); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_INVALIDATEACK(BIT(vmid))); } static void sdma_v4_4_2_ring_emit_wreg(struct amdgpu_ring *ring, @@ -1308,6 +1402,7 @@ static int sdma_v4_4_2_early_init(struct amdgpu_ip_block *ip_block) sdma_v4_4_2_set_vm_pte_funcs(adev); sdma_v4_4_2_set_irq_funcs(adev); sdma_v4_4_2_set_ras_funcs(adev); + sdma_v4_4_2_set_engine_reset_funcs(adev); return 0; } @@ -1329,6 +1424,12 @@ static int sdma_v4_4_2_late_init(struct amdgpu_ip_block *ip_block) if (!amdgpu_persistent_edc_harvesting_supported(adev)) amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__SDMA); + /* The initialization is done in the late_init stage to ensure that the SMU + * initialization and capability setup are completed before we check the SDMA + * reset capability + */ + sdma_v4_4_2_update_reset_mask(adev); + return 0; } @@ -1384,9 +1485,20 @@ static int sdma_v4_4_2_sw_init(struct amdgpu_ip_block *ip_block) &adev->sdma.srbm_write_irq); if (r) return r; + + r = amdgpu_irq_add_id(adev, sdma_v4_4_2_seq_to_irq_id(i), + SDMA0_4_0__SRCID__SDMA_CTXEMPTY, + &adev->sdma.ctxt_empty_irq); + if (r) + return r; } for (i = 0; i < adev->sdma.num_instances; i++) { + mutex_init(&adev->sdma.instance[i].engine_reset_mutex); + /* Initialize guilty flags for GFX and PAGE queues */ + adev->sdma.instance[i].gfx_guilty = false; + adev->sdma.instance[i].page_guilty = false; + ring = &adev->sdma.instance[i].ring; ring->ring_obj = NULL; ring->use_doorbell = true; @@ -1430,7 +1542,6 @@ static int sdma_v4_4_2_sw_init(struct amdgpu_ip_block *ip_block) } } - /* TODO: Add queue reset mask when FW fully supports it */ adev->sdma.supported_reset = amdgpu_get_soft_full_reset_mask(&adev->sdma.instance[0].ring); @@ -1466,6 +1577,7 @@ static int sdma_v4_4_2_sw_fini(struct amdgpu_ip_block *ip_block) amdgpu_sdma_sysfs_reset_mask_fini(adev); if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 2) || + amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 4) || amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 5)) amdgpu_sdma_destroy_inst_ctx(adev, true); else @@ -1486,7 +1598,7 @@ static int sdma_v4_4_2_hw_init(struct amdgpu_ip_block *ip_block) if (!amdgpu_sriov_vf(adev)) sdma_v4_4_2_inst_init_golden_registers(adev, inst_mask); - r = sdma_v4_4_2_inst_start(adev, inst_mask); + r = sdma_v4_4_2_inst_start(adev, inst_mask, false); return r; } @@ -1514,7 +1626,7 @@ static int sdma_v4_4_2_hw_fini(struct amdgpu_ip_block *ip_block) return 0; } -static int sdma_v4_4_2_set_clockgating_state(void *handle, +static int sdma_v4_4_2_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state); static int sdma_v4_4_2_suspend(struct amdgpu_ip_block *ip_block) @@ -1522,7 +1634,7 @@ static int sdma_v4_4_2_suspend(struct amdgpu_ip_block *ip_block) struct amdgpu_device *adev = ip_block->adev; if (amdgpu_in_reset(adev)) - sdma_v4_4_2_set_clockgating_state(adev, AMD_CG_STATE_UNGATE); + sdma_v4_4_2_set_clockgating_state(ip_block, AMD_CG_STATE_UNGATE); return sdma_v4_4_2_hw_fini(ip_block); } @@ -1532,9 +1644,9 @@ static int sdma_v4_4_2_resume(struct amdgpu_ip_block *ip_block) return sdma_v4_4_2_hw_init(ip_block); } -static bool sdma_v4_4_2_is_idle(void *handle) +static bool sdma_v4_4_2_is_idle(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; u32 i; for (i = 0; i < adev->sdma.num_instances; i++) { @@ -1573,6 +1685,121 @@ static int sdma_v4_4_2_soft_reset(struct amdgpu_ip_block *ip_block) return 0; } +static bool sdma_v4_4_2_is_queue_selected(struct amdgpu_device *adev, uint32_t instance_id, bool is_page_queue) +{ + uint32_t reg_offset = is_page_queue ? regSDMA_PAGE_CONTEXT_STATUS : regSDMA_GFX_CONTEXT_STATUS; + uint32_t context_status = RREG32(sdma_v4_4_2_get_reg_offset(adev, instance_id, reg_offset)); + + /* Check if the SELECTED bit is set */ + return (context_status & SDMA_GFX_CONTEXT_STATUS__SELECTED_MASK) != 0; +} + +static bool sdma_v4_4_2_ring_is_guilty(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + uint32_t instance_id = ring->me; + + return sdma_v4_4_2_is_queue_selected(adev, instance_id, false); +} + +static bool sdma_v4_4_2_page_ring_is_guilty(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + uint32_t instance_id = ring->me; + + if (!adev->sdma.has_page_queue) + return false; + + return sdma_v4_4_2_is_queue_selected(adev, instance_id, true); +} + +static int sdma_v4_4_2_reset_queue(struct amdgpu_ring *ring, unsigned int vmid) +{ + struct amdgpu_device *adev = ring->adev; + u32 id = GET_INST(SDMA0, ring->me); + int r; + + if (!(adev->sdma.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)) + return -EOPNOTSUPP; + + amdgpu_amdkfd_suspend(adev, false); + r = amdgpu_sdma_reset_engine(adev, id); + amdgpu_amdkfd_resume(adev, false); + + return r; +} + +static int sdma_v4_4_2_stop_queue(struct amdgpu_device *adev, uint32_t instance_id) +{ + u32 inst_mask; + uint64_t rptr; + struct amdgpu_ring *ring = &adev->sdma.instance[instance_id].ring; + + if (amdgpu_sriov_vf(adev)) + return -EINVAL; + + /* Check if this queue is the guilty one */ + adev->sdma.instance[instance_id].gfx_guilty = + sdma_v4_4_2_is_queue_selected(adev, instance_id, false); + if (adev->sdma.has_page_queue) + adev->sdma.instance[instance_id].page_guilty = + sdma_v4_4_2_is_queue_selected(adev, instance_id, true); + + /* Cache the rptr before reset, after the reset, + * all of the registers will be reset to 0 + */ + rptr = amdgpu_ring_get_rptr(ring); + ring->cached_rptr = rptr; + /* Cache the rptr for the page queue if it exists */ + if (adev->sdma.has_page_queue) { + struct amdgpu_ring *page_ring = &adev->sdma.instance[instance_id].page; + rptr = amdgpu_ring_get_rptr(page_ring); + page_ring->cached_rptr = rptr; + } + + /* stop queue */ + inst_mask = 1 << ring->me; + sdma_v4_4_2_inst_gfx_stop(adev, inst_mask); + if (adev->sdma.has_page_queue) + sdma_v4_4_2_inst_page_stop(adev, inst_mask); + + return 0; +} + +static int sdma_v4_4_2_restore_queue(struct amdgpu_device *adev, uint32_t instance_id) +{ + int i; + u32 inst_mask; + struct amdgpu_ring *ring = &adev->sdma.instance[instance_id].ring; + + inst_mask = 1 << ring->me; + udelay(50); + + for (i = 0; i < adev->usec_timeout; i++) { + if (!REG_GET_FIELD(RREG32_SDMA(ring->me, regSDMA_F32_CNTL), SDMA_F32_CNTL, HALT)) + break; + udelay(1); + } + + if (i == adev->usec_timeout) { + dev_err(adev->dev, "timed out waiting for SDMA%d unhalt after reset\n", + ring->me); + return -ETIMEDOUT; + } + + return sdma_v4_4_2_inst_start(adev, inst_mask, true); +} + +static struct sdma_on_reset_funcs sdma_v4_4_2_engine_reset_funcs = { + .pre_reset = sdma_v4_4_2_stop_queue, + .post_reset = sdma_v4_4_2_restore_queue, +}; + +static void sdma_v4_4_2_set_engine_reset_funcs(struct amdgpu_device *adev) +{ + amdgpu_sdma_register_on_reset_callbacks(adev, &sdma_v4_4_2_engine_reset_funcs); +} + static int sdma_v4_4_2_set_trap_irq_state(struct amdgpu_device *adev, struct amdgpu_irq_src *source, unsigned type, @@ -1618,6 +1845,9 @@ static int sdma_v4_4_2_process_trap_irq(struct amdgpu_device *adev, case 0: amdgpu_fence_process(&adev->sdma.instance[i].ring); break; + case 1: + amdgpu_fence_process(&adev->sdma.instance[i].page); + break; default: break; } @@ -1755,6 +1985,16 @@ static int sdma_v4_4_2_process_srbm_write_irq(struct amdgpu_device *adev, return 0; } +static int sdma_v4_4_2_process_ctxt_empty_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + /* There is nothing useful to be done here, only kept for debug */ + dev_dbg_ratelimited(adev->dev, "SDMA context empty interrupt"); + sdma_v4_4_2_print_iv_entry(adev, entry); + return 0; +} + static void sdma_v4_4_2_inst_update_medium_grain_light_sleep( struct amdgpu_device *adev, bool enable, uint32_t inst_mask) { @@ -1821,10 +2061,10 @@ static void sdma_v4_4_2_inst_update_medium_grain_clock_gating( } } -static int sdma_v4_4_2_set_clockgating_state(void *handle, +static int sdma_v4_4_2_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; uint32_t inst_mask; if (amdgpu_sriov_vf(adev)) @@ -1839,15 +2079,15 @@ static int sdma_v4_4_2_set_clockgating_state(void *handle, return 0; } -static int sdma_v4_4_2_set_powergating_state(void *handle, +static int sdma_v4_4_2_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; } -static void sdma_v4_4_2_get_clockgating_state(void *handle, u64 *flags) +static void sdma_v4_4_2_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int data; if (amdgpu_sriov_vf(adev)) @@ -1895,7 +2135,6 @@ static void sdma_v4_4_2_dump_ip_state(struct amdgpu_ip_block *ip_block) if (!adev->sdma.ip_dump) return; - amdgpu_gfx_off_ctrl(adev, false); for (i = 0; i < adev->sdma.num_instances; i++) { instance_offset = i * reg_count; for (j = 0; j < reg_count; j++) @@ -1903,7 +2142,6 @@ static void sdma_v4_4_2_dump_ip_state(struct amdgpu_ip_block *ip_block) RREG32(sdma_v4_4_2_get_reg_offset(adev, i, sdma_reg_list_4_4_2[j].reg_offset)); } - amdgpu_gfx_off_ctrl(adev, true); } const struct amd_ip_funcs sdma_v4_4_2_ip_funcs = { @@ -1939,8 +2177,7 @@ static const struct amdgpu_ring_funcs sdma_v4_4_2_ring_funcs = { 3 + /* hdp invalidate */ 6 + /* sdma_v4_4_2_ring_emit_pipeline_sync */ /* sdma_v4_4_2_ring_emit_vm_flush */ - SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + - SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 + + 4 + 2 * 3 + 10 + 10 + 10, /* sdma_v4_4_2_ring_emit_fence x3 for user fence, vm fence */ .emit_ib_size = 7 + 6, /* sdma_v4_4_2_ring_emit_ib */ .emit_ib = sdma_v4_4_2_ring_emit_ib, @@ -1955,6 +2192,8 @@ static const struct amdgpu_ring_funcs sdma_v4_4_2_ring_funcs = { .emit_wreg = sdma_v4_4_2_ring_emit_wreg, .emit_reg_wait = sdma_v4_4_2_ring_emit_reg_wait, .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, + .reset = sdma_v4_4_2_reset_queue, + .is_guilty = sdma_v4_4_2_ring_is_guilty, }; static const struct amdgpu_ring_funcs sdma_v4_4_2_page_ring_funcs = { @@ -1970,8 +2209,7 @@ static const struct amdgpu_ring_funcs sdma_v4_4_2_page_ring_funcs = { 3 + /* hdp invalidate */ 6 + /* sdma_v4_4_2_ring_emit_pipeline_sync */ /* sdma_v4_4_2_ring_emit_vm_flush */ - SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + - SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 + + 4 + 2 * 3 + 10 + 10 + 10, /* sdma_v4_4_2_ring_emit_fence x3 for user fence, vm fence */ .emit_ib_size = 7 + 6, /* sdma_v4_4_2_ring_emit_ib */ .emit_ib = sdma_v4_4_2_ring_emit_ib, @@ -1986,6 +2224,8 @@ static const struct amdgpu_ring_funcs sdma_v4_4_2_page_ring_funcs = { .emit_wreg = sdma_v4_4_2_ring_emit_wreg, .emit_reg_wait = sdma_v4_4_2_ring_emit_reg_wait, .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, + .reset = sdma_v4_4_2_reset_queue, + .is_guilty = sdma_v4_4_2_page_ring_is_guilty, }; static void sdma_v4_4_2_set_ring_funcs(struct amdgpu_device *adev) @@ -2038,6 +2278,10 @@ static const struct amdgpu_irq_src_funcs sdma_v4_4_2_srbm_write_irq_funcs = { .process = sdma_v4_4_2_process_srbm_write_irq, }; +static const struct amdgpu_irq_src_funcs sdma_v4_4_2_ctxt_empty_irq_funcs = { + .process = sdma_v4_4_2_process_ctxt_empty_irq, +}; + static void sdma_v4_4_2_set_irq_funcs(struct amdgpu_device *adev) { adev->sdma.trap_irq.num_types = adev->sdma.num_instances; @@ -2046,6 +2290,7 @@ static void sdma_v4_4_2_set_irq_funcs(struct amdgpu_device *adev) adev->sdma.doorbell_invalid_irq.num_types = adev->sdma.num_instances; adev->sdma.pool_timeout_irq.num_types = adev->sdma.num_instances; adev->sdma.srbm_write_irq.num_types = adev->sdma.num_instances; + adev->sdma.ctxt_empty_irq.num_types = adev->sdma.num_instances; adev->sdma.trap_irq.funcs = &sdma_v4_4_2_trap_irq_funcs; adev->sdma.illegal_inst_irq.funcs = &sdma_v4_4_2_illegal_inst_irq_funcs; @@ -2054,6 +2299,7 @@ static void sdma_v4_4_2_set_irq_funcs(struct amdgpu_device *adev) adev->sdma.doorbell_invalid_irq.funcs = &sdma_v4_4_2_doorbell_invalid_irq_funcs; adev->sdma.pool_timeout_irq.funcs = &sdma_v4_4_2_pool_timeout_irq_funcs; adev->sdma.srbm_write_irq.funcs = &sdma_v4_4_2_srbm_write_irq_funcs; + adev->sdma.ctxt_empty_irq.funcs = &sdma_v4_4_2_ctxt_empty_irq_funcs; } /** @@ -2151,6 +2397,38 @@ static void sdma_v4_4_2_set_vm_pte_funcs(struct amdgpu_device *adev) adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances; } +/** + * sdma_v4_4_2_update_reset_mask - update reset mask for SDMA + * @adev: Pointer to the AMDGPU device structure + * + * This function update reset mask for SDMA and sets the supported + * reset types based on the IP version and firmware versions. + * + */ +static void sdma_v4_4_2_update_reset_mask(struct amdgpu_device *adev) +{ + /* per queue reset not supported for SRIOV */ + if (amdgpu_sriov_vf(adev)) + return; + + /* + * the user queue relies on MEC fw and pmfw when the sdma queue do reset. + * it needs to check both of them at here to skip old mec and pmfw. + */ + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { + case IP_VERSION(9, 4, 3): + case IP_VERSION(9, 4, 4): + if ((adev->gfx.mec_fw_version >= 0xb0) && amdgpu_dpm_reset_sdma_is_supported(adev)) + adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; + break; + case IP_VERSION(9, 5, 0): + /*TODO: enable the queue reset flag until fw supported */ + default: + break; + } + +} + const struct amdgpu_ip_block_version sdma_v4_4_2_ip_block = { .type = AMD_IP_BLOCK_TYPE_SDMA, .major = 4, @@ -2167,7 +2445,7 @@ static int sdma_v4_4_2_xcp_resume(void *handle, uint32_t inst_mask) if (!amdgpu_sriov_vf(adev)) sdma_v4_4_2_inst_init_golden_registers(adev, inst_mask); - r = sdma_v4_4_2_inst_start(adev, inst_mask); + r = sdma_v4_4_2_inst_start(adev, inst_mask, false); return r; } @@ -2312,11 +2590,13 @@ static int sdma_v4_4_2_aca_bank_parser(struct aca_handle *handle, struct aca_ban misc0 = bank->regs[ACA_REG_IDX_MISC0]; switch (type) { case ACA_SMU_TYPE_UE: + bank->aca_err_type = ACA_ERROR_TYPE_UE; ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_UE, 1ULL); break; case ACA_SMU_TYPE_CE: - ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_CE, + bank->aca_err_type = ACA_BANK_ERR_CE_DE_DECODE(bank); + ret = aca_error_cache_log_bank_error(handle, &info, bank->aca_err_type, ACA_REG__MISC0__ERRCNT(misc0)); break; default: |