From 86301129698be52f8398f92ea8564168f6bfcae1 Mon Sep 17 00:00:00 2001 From: Le Ma <le.ma@amd.com> Date: Tue, 24 May 2022 10:09:39 +0800 Subject: drm/amdgpu: split gc v9_4_3 functionality from gc v9_0 To prepare for gc v9_4_3 specific feature. v2: fix exports (Alex) Signed-off-by: Le Ma <le.ma@amd.com> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> Signed-off-by: Hawking Zhang <Hawking.Zhang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 2666 ++++++++++++++++++++++++++++++- 1 file changed, 2643 insertions(+), 23 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 5f8500577c02..212d48114da8 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -25,16 +25,292 @@ #include "amdgpu.h" #include "amdgpu_gfx.h" #include "soc15.h" +#include "soc15d.h" #include "soc15_common.h" #include "vega10_enum.h" +#include "clearstate_gfx9.h" +#include "v9_structs.h" + +#include "ivsrcid/gfx/irqsrcs_gfx_9_0.h" + #include "gc/gc_9_4_3_offset.h" #include "gc/gc_9_4_3_sh_mask.h" #include "gfx_v9_4_3.h" +MODULE_FIRMWARE("amdgpu/gc_9_4_3_mec.bin"); +MODULE_FIRMWARE("amdgpu/gc_9_4_3_rlc.bin"); + +#define GFX9_MEC_HPD_SIZE 4096 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L +static const struct soc15_reg_golden golden_settings_gc_9_4_3[] = { + +}; + +static void gfx_v9_4_3_set_ring_funcs(struct amdgpu_device *adev); +static void gfx_v9_4_3_set_irq_funcs(struct amdgpu_device *adev); +static void gfx_v9_4_3_set_gds_init(struct amdgpu_device *adev); +static void gfx_v9_4_3_set_rlc_funcs(struct amdgpu_device *adev); +static int gfx_v9_4_3_get_cu_info(struct amdgpu_device *adev, + struct amdgpu_cu_info *cu_info); + +static void gfx_v9_4_3_kiq_set_resources(struct amdgpu_ring *kiq_ring, + uint64_t queue_mask) +{ + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); + amdgpu_ring_write(kiq_ring, + PACKET3_SET_RESOURCES_VMID_MASK(0) | + /* vmid_mask:0* queue_type:0 (KIQ) */ + PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); + amdgpu_ring_write(kiq_ring, + lower_32_bits(queue_mask)); /* queue mask lo */ + amdgpu_ring_write(kiq_ring, + upper_32_bits(queue_mask)); /* queue mask hi */ + amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */ + amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */ + amdgpu_ring_write(kiq_ring, 0); /* oac mask */ + amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ +} + +static void gfx_v9_4_3_kiq_map_queues(struct amdgpu_ring *kiq_ring, + struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = kiq_ring->adev; + uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); + uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); + uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; + + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); + /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ + amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ + PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ + PACKET3_MAP_QUEUES_VMID(0) | /* VMID */ + PACKET3_MAP_QUEUES_QUEUE(ring->queue) | + PACKET3_MAP_QUEUES_PIPE(ring->pipe) | + PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) | + /*queue_type: normal compute queue */ + PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | + /* alloc format: all_on_one_pipe */ + PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | + PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) | + /* num_queues: must be 1 */ + PACKET3_MAP_QUEUES_NUM_QUEUES(1)); + amdgpu_ring_write(kiq_ring, + PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index)); + amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); + amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); + amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); + amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); +} + +static void gfx_v9_4_3_kiq_unmap_queues(struct amdgpu_ring *kiq_ring, + struct amdgpu_ring *ring, + enum amdgpu_unmap_queues_action action, + u64 gpu_addr, u64 seq) +{ + uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; + + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); + amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ + PACKET3_UNMAP_QUEUES_ACTION(action) | + PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) | + PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) | + PACKET3_UNMAP_QUEUES_NUM_QUEUES(1)); + amdgpu_ring_write(kiq_ring, + PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index)); + + if (action == PREEMPT_QUEUES_NO_UNMAP) { + amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr)); + amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr)); + amdgpu_ring_write(kiq_ring, seq); + } else { + amdgpu_ring_write(kiq_ring, 0); + amdgpu_ring_write(kiq_ring, 0); + amdgpu_ring_write(kiq_ring, 0); + } +} + +static void gfx_v9_4_3_kiq_query_status(struct amdgpu_ring *kiq_ring, + struct amdgpu_ring *ring, + u64 addr, + u64 seq) +{ + uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; + + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5)); + amdgpu_ring_write(kiq_ring, + PACKET3_QUERY_STATUS_CONTEXT_ID(0) | + PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) | + PACKET3_QUERY_STATUS_COMMAND(2)); + /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ + amdgpu_ring_write(kiq_ring, + PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) | + PACKET3_QUERY_STATUS_ENG_SEL(eng_sel)); + amdgpu_ring_write(kiq_ring, lower_32_bits(addr)); + amdgpu_ring_write(kiq_ring, upper_32_bits(addr)); + amdgpu_ring_write(kiq_ring, lower_32_bits(seq)); + amdgpu_ring_write(kiq_ring, upper_32_bits(seq)); +} + +static void gfx_v9_4_3_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring, + uint16_t pasid, uint32_t flush_type, + bool all_hub) +{ + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0)); + amdgpu_ring_write(kiq_ring, + PACKET3_INVALIDATE_TLBS_DST_SEL(1) | + PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) | + PACKET3_INVALIDATE_TLBS_PASID(pasid) | + PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type)); +} + +static const struct kiq_pm4_funcs gfx_v9_4_3_kiq_pm4_funcs = { + .kiq_set_resources = gfx_v9_4_3_kiq_set_resources, + .kiq_map_queues = gfx_v9_4_3_kiq_map_queues, + .kiq_unmap_queues = gfx_v9_4_3_kiq_unmap_queues, + .kiq_query_status = gfx_v9_4_3_kiq_query_status, + .kiq_invalidate_tlbs = gfx_v9_4_3_kiq_invalidate_tlbs, + .set_resources_size = 8, + .map_queues_size = 7, + .unmap_queues_size = 6, + .query_status_size = 7, + .invalidate_tlbs_size = 2, +}; + +static void gfx_v9_4_3_set_kiq_pm4_funcs(struct amdgpu_device *adev) +{ + adev->gfx.kiq[0].pmf = &gfx_v9_4_3_kiq_pm4_funcs; +} + +static void gfx_v9_4_3_init_golden_registers(struct amdgpu_device *adev) +{ + +} + +static void gfx_v9_4_3_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel, + bool wc, uint32_t reg, uint32_t val) +{ + amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); + amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) | + WRITE_DATA_DST_SEL(0) | + (wc ? WR_CONFIRM : 0)); + amdgpu_ring_write(ring, reg); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, val); +} + +static void gfx_v9_4_3_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, + int mem_space, int opt, uint32_t addr0, + uint32_t addr1, uint32_t ref, uint32_t mask, + uint32_t inv) +{ + amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); + amdgpu_ring_write(ring, + /* memory (1) or register (0) */ + (WAIT_REG_MEM_MEM_SPACE(mem_space) | + WAIT_REG_MEM_OPERATION(opt) | /* wait */ + WAIT_REG_MEM_FUNCTION(3) | /* equal */ + WAIT_REG_MEM_ENGINE(eng_sel))); + + if (mem_space) + BUG_ON(addr0 & 0x3); /* Dword align */ + amdgpu_ring_write(ring, addr0); + amdgpu_ring_write(ring, addr1); + amdgpu_ring_write(ring, ref); + amdgpu_ring_write(ring, mask); + amdgpu_ring_write(ring, inv); /* poll interval */ +} + +static int gfx_v9_4_3_ring_test_ring(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + uint32_t tmp = 0; + unsigned i; + int r; + + WREG32_SOC15(GC, 0, regSCRATCH_REG0, 0xCAFEDEAD); + r = amdgpu_ring_alloc(ring, 3); + if (r) + return r; + + amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); + amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0) - + PACKET3_SET_UCONFIG_REG_START); + amdgpu_ring_write(ring, 0xDEADBEEF); + amdgpu_ring_commit(ring); + + for (i = 0; i < adev->usec_timeout; i++) { + tmp = RREG32_SOC15(GC, 0, regSCRATCH_REG0); + if (tmp == 0xDEADBEEF) + break; + udelay(1); + } + + if (i >= adev->usec_timeout) + r = -ETIMEDOUT; + return r; +} + +static int gfx_v9_4_3_ring_test_ib(struct amdgpu_ring *ring, long timeout) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_ib ib; + struct dma_fence *f = NULL; + + unsigned index; + uint64_t gpu_addr; + uint32_t tmp; + long r; + + r = amdgpu_device_wb_get(adev, &index); + if (r) + return r; + + gpu_addr = adev->wb.gpu_addr + (index * 4); + adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); + memset(&ib, 0, sizeof(ib)); + r = amdgpu_ib_get(adev, NULL, 16, + AMDGPU_IB_POOL_DIRECT, &ib); + if (r) + goto err1; + + ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3); + ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; + ib.ptr[2] = lower_32_bits(gpu_addr); + ib.ptr[3] = upper_32_bits(gpu_addr); + ib.ptr[4] = 0xDEADBEEF; + ib.length_dw = 5; + + r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); + if (r) + goto err2; + + r = dma_fence_wait_timeout(f, false, timeout); + if (r == 0) { + r = -ETIMEDOUT; + goto err2; + } else if (r < 0) { + goto err2; + } + + tmp = adev->wb.wb[index]; + if (tmp == 0xDEADBEEF) + r = 0; + else + r = -EINVAL; + +err2: + amdgpu_ib_free(adev, &ib, NULL); + dma_fence_put(f); +err1: + amdgpu_device_wb_free(adev, index); + return r; +} + + +/* This value might differs per partition */ static uint64_t gfx_v9_4_3_get_gpu_clock_counter(struct amdgpu_device *adev) { uint64_t clock; @@ -50,6 +326,241 @@ static uint64_t gfx_v9_4_3_get_gpu_clock_counter(struct amdgpu_device *adev) return clock; } +static void gfx_v9_4_3_free_microcode(struct amdgpu_device *adev) +{ + amdgpu_ucode_release(&adev->gfx.pfp_fw); + amdgpu_ucode_release(&adev->gfx.me_fw); + amdgpu_ucode_release(&adev->gfx.ce_fw); + amdgpu_ucode_release(&adev->gfx.rlc_fw); + amdgpu_ucode_release(&adev->gfx.mec_fw); + amdgpu_ucode_release(&adev->gfx.mec2_fw); + + kfree(adev->gfx.rlc.register_list_format); +} + +static int gfx_v9_4_3_init_rlc_microcode(struct amdgpu_device *adev, + const char *chip_name) +{ + char fw_name[30]; + int err; + const struct rlc_firmware_header_v2_0 *rlc_hdr; + uint16_t version_major; + uint16_t version_minor; + + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); + + err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, fw_name); + if (err) + goto out; + rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; + + version_major = le16_to_cpu(rlc_hdr->header.header_version_major); + version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); + err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor); +out: + if (err) + amdgpu_ucode_release(&adev->gfx.rlc_fw); + + return err; +} + +static bool gfx_v9_4_3_should_disable_gfxoff(struct pci_dev *pdev) +{ + return true; +} + +static void gfx_v9_4_3_check_if_need_gfxoff(struct amdgpu_device *adev) +{ + if (gfx_v9_4_3_should_disable_gfxoff(adev->pdev)) + adev->pm.pp_feature &= ~PP_GFXOFF_MASK; +} + +static int gfx_v9_4_3_init_cp_compute_microcode(struct amdgpu_device *adev, + const char *chip_name) +{ + char fw_name[30]; + int err; + + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); + + err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, fw_name); + if (err) + goto out; + amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1); + amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT); + + adev->gfx.mec2_fw_version = adev->gfx.mec_fw_version; + adev->gfx.mec2_feature_version = adev->gfx.mec_feature_version; + + gfx_v9_4_3_check_if_need_gfxoff(adev); + +out: + if (err) + amdgpu_ucode_release(&adev->gfx.mec_fw); + return err; +} + +static int gfx_v9_4_3_init_microcode(struct amdgpu_device *adev) +{ + const char *chip_name; + int r; + + chip_name = "gc_9_4_3"; + + r = gfx_v9_4_3_init_rlc_microcode(adev, chip_name); + if (r) + return r; + + r = gfx_v9_4_3_init_cp_compute_microcode(adev, chip_name); + if (r) + return r; + + return r; +} + +static u32 gfx_v9_4_3_get_csb_size(struct amdgpu_device *adev) +{ + u32 count = 0; + const struct cs_section_def *sect = NULL; + const struct cs_extent_def *ext = NULL; + + /* begin clear state */ + count += 2; + /* context control state */ + count += 3; + + for (sect = gfx9_cs_data; sect->section != NULL; ++sect) { + for (ext = sect->section; ext->extent != NULL; ++ext) { + if (sect->id == SECT_CONTEXT) + count += 2 + ext->reg_count; + else + return 0; + } + } + + /* end clear state */ + count += 2; + /* clear state */ + count += 2; + + return count; +} + +static void gfx_v9_4_3_get_csb_buffer(struct amdgpu_device *adev, + volatile u32 *buffer) +{ + u32 count = 0, i; + const struct cs_section_def *sect = NULL; + const struct cs_extent_def *ext = NULL; + + if (adev->gfx.rlc.cs_data == NULL) + return; + if (buffer == NULL) + return; + + buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); + buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); + + buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1)); + buffer[count++] = cpu_to_le32(0x80000000); + buffer[count++] = cpu_to_le32(0x80000000); + + for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) { + for (ext = sect->section; ext->extent != NULL; ++ext) { + if (sect->id == SECT_CONTEXT) { + buffer[count++] = + cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count)); + buffer[count++] = cpu_to_le32(ext->reg_index - + PACKET3_SET_CONTEXT_REG_START); + for (i = 0; i < ext->reg_count; i++) + buffer[count++] = cpu_to_le32(ext->extent[i]); + } else { + return; + } + } + } + + buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); + buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE); + + buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0)); + buffer[count++] = cpu_to_le32(0); +} + +static void gfx_v9_4_3_mec_fini(struct amdgpu_device *adev) +{ + amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); + amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL); +} + +static int gfx_v9_4_3_mec_init(struct amdgpu_device *adev) +{ + int r, i; + u32 *hpd; + const __le32 *fw_data; + unsigned fw_size; + u32 *fw; + size_t mec_hpd_size; + + const struct gfx_firmware_header_v1_0 *mec_hdr; + + bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); + + /* take ownership of the relevant compute queues */ + amdgpu_gfx_compute_queue_acquire(adev); + mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE; + if (mec_hpd_size) { + r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &adev->gfx.mec.hpd_eop_obj, + &adev->gfx.mec.hpd_eop_gpu_addr, + (void **)&hpd); + if (r) { + dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); + gfx_v9_4_3_mec_fini(adev); + return r; + } + + if (amdgpu_emu_mode == 1) { + for (i = 0; i < mec_hpd_size / 4; i++) { + memset((void *)(hpd + i), 0, 4); + if (i % 50 == 0) + msleep(1); + } + } else { + memset(hpd, 0, mec_hpd_size); + } + + amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); + amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); + } + + mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; + + fw_data = (const __le32 *) + (adev->gfx.mec_fw->data + + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); + fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes); + + r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes, + PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, + &adev->gfx.mec.mec_fw_obj, + &adev->gfx.mec.mec_fw_gpu_addr, + (void **)&fw); + if (r) { + dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r); + gfx_v9_4_3_mec_fini(adev); + return r; + } + + memcpy(fw, fw_data, fw_size); + + amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj); + amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj); + + return 0; +} + static void gfx_v9_4_3_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, @@ -150,6 +661,400 @@ static void gfx_v9_4_3_select_me_pipe_q(struct amdgpu_device *adev, soc15_grbm_select(adev, me, pipe, q, vm); } +static const struct amdgpu_gfx_funcs gfx_v9_4_3_gfx_funcs = { + .get_gpu_clock_counter = &gfx_v9_4_3_get_gpu_clock_counter, + .select_se_sh = &gfx_v9_4_3_select_se_sh, + .read_wave_data = &gfx_v9_4_3_read_wave_data, + .read_wave_sgprs = &gfx_v9_4_3_read_wave_sgprs, + .read_wave_vgprs = &gfx_v9_4_3_read_wave_vgprs, + .select_me_pipe_q = &gfx_v9_4_3_select_me_pipe_q, +}; + +static int gfx_v9_4_3_gpu_early_init(struct amdgpu_device *adev) +{ + u32 gb_addr_config; + + adev->gfx.funcs = &gfx_v9_4_3_gfx_funcs; + + switch (adev->ip_versions[GC_HWIP][0]) { + case IP_VERSION(9, 4, 3): + adev->gfx.config.max_hw_contexts = 8; + adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; + adev->gfx.config.sc_prim_fifo_size_backend = 0x100; + adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; + adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; + gb_addr_config = RREG32_SOC15(GC, 0, regGB_ADDR_CONFIG); + break; + default: + BUG(); + break; + } + + adev->gfx.config.gb_addr_config = gb_addr_config; + + adev->gfx.config.gb_addr_config_fields.num_pipes = 1 << + REG_GET_FIELD( + adev->gfx.config.gb_addr_config, + GB_ADDR_CONFIG, + NUM_PIPES); + + adev->gfx.config.max_tile_pipes = + adev->gfx.config.gb_addr_config_fields.num_pipes; + + adev->gfx.config.gb_addr_config_fields.num_banks = 1 << + REG_GET_FIELD( + adev->gfx.config.gb_addr_config, + GB_ADDR_CONFIG, + NUM_BANKS); + adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 << + REG_GET_FIELD( + adev->gfx.config.gb_addr_config, + GB_ADDR_CONFIG, + MAX_COMPRESSED_FRAGS); + adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 << + REG_GET_FIELD( + adev->gfx.config.gb_addr_config, + GB_ADDR_CONFIG, + NUM_RB_PER_SE); + adev->gfx.config.gb_addr_config_fields.num_se = 1 << + REG_GET_FIELD( + adev->gfx.config.gb_addr_config, + GB_ADDR_CONFIG, + NUM_SHADER_ENGINES); + adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 + + REG_GET_FIELD( + adev->gfx.config.gb_addr_config, + GB_ADDR_CONFIG, + PIPE_INTERLEAVE_SIZE)); + + return 0; +} + +static int gfx_v9_4_3_compute_ring_init(struct amdgpu_device *adev, int ring_id, + int mec, int pipe, int queue) +{ + unsigned irq_type; + struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id]; + unsigned int hw_prio; + + ring = &adev->gfx.compute_ring[ring_id]; + + /* mec0 is me1 */ + ring->me = mec + 1; + ring->pipe = pipe; + ring->queue = queue; + + ring->ring_obj = NULL; + ring->use_doorbell = true; + ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1; + ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + + (ring_id * GFX9_MEC_HPD_SIZE); + ring->vm_hub = AMDGPU_GFXHUB_0; + sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); + + irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) + + ring->pipe; + hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ? + AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL; + /* type-2 packets are deprecated on MEC, use type-3 instead */ + return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type, + hw_prio, NULL); +} + +static int gfx_v9_4_3_sw_init(void *handle) +{ + int i, j, k, r, ring_id; + struct amdgpu_kiq *kiq; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + adev->gfx.mec.num_mec = 2; + adev->gfx.mec.num_pipe_per_mec = 4; + adev->gfx.mec.num_queue_per_pipe = 8; + + /* EOP Event */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq); + if (r) + return r; + + /* Privileged reg */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT, + &adev->gfx.priv_reg_irq); + if (r) + return r; + + /* Privileged inst */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT, + &adev->gfx.priv_inst_irq); + if (r) + return r; + + adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE; + + r = adev->gfx.rlc.funcs->init(adev); + if (r) { + DRM_ERROR("Failed to init rlc BOs!\n"); + return r; + } + + r = gfx_v9_4_3_mec_init(adev); + if (r) { + DRM_ERROR("Failed to init MEC BOs!\n"); + return r; + } + + /* set up the compute queues - allocate horizontally across pipes */ + ring_id = 0; + for (i = 0; i < adev->gfx.mec.num_mec; ++i) { + for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { + for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { + if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i, + k, j)) + continue; + + r = gfx_v9_4_3_compute_ring_init(adev, + ring_id, + i, k, j); + if (r) + return r; + + ring_id++; + } + } + } + + r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE, 0); + if (r) { + DRM_ERROR("Failed to init KIQ BOs!\n"); + return r; + } + + kiq = &adev->gfx.kiq[0]; + r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq, 0); + if (r) + return r; + + /* create MQD for all compute queues as wel as KIQ for SRIOV case */ + r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation), 0); + if (r) + return r; + + r = gfx_v9_4_3_gpu_early_init(adev); + if (r) + return r; + + return 0; +} + +static int gfx_v9_4_3_sw_fini(void *handle) +{ + int i; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + for (i = 0; i < adev->gfx.num_compute_rings; i++) + amdgpu_ring_fini(&adev->gfx.compute_ring[i]); + + amdgpu_gfx_mqd_sw_fini(adev, 0); + amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring); + amdgpu_gfx_kiq_fini(adev, 0); + + gfx_v9_4_3_mec_fini(adev); + amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj); + gfx_v9_4_3_free_microcode(adev); + + return 0; +} + +static u32 gfx_v9_4_3_get_rb_active_bitmap(struct amdgpu_device *adev) +{ + u32 data, mask; + + data = RREG32_SOC15(GC, 0, regCC_RB_BACKEND_DISABLE); + data |= RREG32_SOC15(GC, 0, regGC_USER_RB_BACKEND_DISABLE); + + data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK; + data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT; + + mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se / + adev->gfx.config.max_sh_per_se); + + return (~data) & mask; +} + +static void gfx_v9_4_3_setup_rb(struct amdgpu_device *adev) +{ + int i, j; + u32 data; + u32 active_rbs = 0; + u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se / + adev->gfx.config.max_sh_per_se; + + mutex_lock(&adev->grbm_idx_mutex); + for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { + for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { + gfx_v9_4_3_select_se_sh(adev, i, j, 0xffffffff); + data = gfx_v9_4_3_get_rb_active_bitmap(adev); + active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * + rb_bitmap_width_per_sh); + } + } + gfx_v9_4_3_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + mutex_unlock(&adev->grbm_idx_mutex); + + adev->gfx.config.backend_enable_mask = active_rbs; + adev->gfx.config.num_rbs = hweight32(active_rbs); +} + +#define DEFAULT_SH_MEM_BASES (0x6000) +static void gfx_v9_4_3_init_compute_vmid(struct amdgpu_device *adev) +{ + int i; + uint32_t sh_mem_config; + uint32_t sh_mem_bases; + + /* + * Configure apertures: + * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB) + * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB) + * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB) + */ + sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16); + + sh_mem_config = SH_MEM_ADDRESS_MODE_64 | + SH_MEM_ALIGNMENT_MODE_UNALIGNED << + SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT; + + mutex_lock(&adev->srbm_mutex); + for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { + soc15_grbm_select(adev, 0, 0, 0, i); + /* CP and shaders */ + WREG32_SOC15_RLC(GC, 0, regSH_MEM_CONFIG, sh_mem_config); + WREG32_SOC15_RLC(GC, 0, regSH_MEM_BASES, sh_mem_bases); + } + soc15_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + + /* Initialize all compute VMIDs to have no GDS, GWS, or OA + acccess. These should be enabled by FW for target VMIDs. */ + for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { + WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_BASE, 2 * i, 0); + WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_SIZE, 2 * i, 0); + WREG32_SOC15_OFFSET(GC, 0, regGDS_GWS_VMID0, i, 0); + WREG32_SOC15_OFFSET(GC, 0, regGDS_OA_VMID0, i, 0); + } +} + +static void gfx_v9_4_3_init_gds_vmid(struct amdgpu_device *adev) +{ + int vmid; + + /* + * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA + * access. Compute VMIDs should be enabled by FW for target VMIDs, + * the driver can enable them for graphics. VMID0 should maintain + * access so that HWS firmware can save/restore entries. + */ + for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) { + WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_BASE, 2 * vmid, 0); + WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_SIZE, 2 * vmid, 0); + WREG32_SOC15_OFFSET(GC, 0, regGDS_GWS_VMID0, vmid, 0); + WREG32_SOC15_OFFSET(GC, 0, regGDS_OA_VMID0, vmid, 0); + } +} + +static void gfx_v9_4_3_constants_init(struct amdgpu_device *adev) +{ + u32 tmp; + int i; + + WREG32_FIELD15_PREREG(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff); + + gfx_v9_4_3_setup_rb(adev); + gfx_v9_4_3_get_cu_info(adev, &adev->gfx.cu_info); + adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, regDB_DEBUG2); + + /* XXX SH_MEM regs */ + /* where to put LDS, scratch, GPUVM in FSA64 space */ + mutex_lock(&adev->srbm_mutex); + for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) { + soc15_grbm_select(adev, 0, 0, 0, i); + /* CP and shaders */ + if (i == 0) { + tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, + SH_MEM_ALIGNMENT_MODE_UNALIGNED); + tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE, + !!adev->gmc.noretry); + WREG32_SOC15_RLC(GC, 0, regSH_MEM_CONFIG, tmp); + WREG32_SOC15_RLC(GC, 0, regSH_MEM_BASES, 0); + } else { + tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, + SH_MEM_ALIGNMENT_MODE_UNALIGNED); + tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE, + !!adev->gmc.noretry); + WREG32_SOC15_RLC(GC, 0, regSH_MEM_CONFIG, tmp); + tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE, + (adev->gmc.private_aperture_start >> 48)); + tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE, + (adev->gmc.shared_aperture_start >> 48)); + WREG32_SOC15_RLC(GC, 0, regSH_MEM_BASES, tmp); + } + } + soc15_grbm_select(adev, 0, 0, 0, 0); + + mutex_unlock(&adev->srbm_mutex); + + gfx_v9_4_3_init_compute_vmid(adev); + gfx_v9_4_3_init_gds_vmid(adev); +} + +static void gfx_v9_4_3_enable_save_restore_machine(struct amdgpu_device *adev) +{ + WREG32_FIELD15_PREREG(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1); +} + +static void gfx_v9_4_3_init_csb(struct amdgpu_device *adev) +{ + adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr); + /* csib */ + WREG32_RLC(SOC15_REG_OFFSET(GC, 0, regRLC_CSIB_ADDR_HI), + adev->gfx.rlc.clear_state_gpu_addr >> 32); + WREG32_RLC(SOC15_REG_OFFSET(GC, 0, regRLC_CSIB_ADDR_LO), + adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); + WREG32_RLC(SOC15_REG_OFFSET(GC, 0, regRLC_CSIB_LENGTH), + adev->gfx.rlc.clear_state_size); +} + +static void gfx_v9_4_3_init_pg(struct amdgpu_device *adev) +{ + gfx_v9_4_3_init_csb(adev); + + /* + * Rlc save restore list is workable since v2_1. + * And it's needed by gfxoff feature. + */ + if (adev->gfx.rlc.is_rlc_v2_1) + gfx_v9_4_3_enable_save_restore_machine(adev); + + if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | + AMD_PG_SUPPORT_GFX_SMG | + AMD_PG_SUPPORT_GFX_DMG | + AMD_PG_SUPPORT_CP | + AMD_PG_SUPPORT_GDS | + AMD_PG_SUPPORT_RLC_SMU_HS)) { + WREG32_SOC15(GC, 0, regRLC_JUMP_TABLE_RESTORE, + adev->gfx.rlc.cp_table_gpu_addr >> 8); + } +} + +void gfx_v9_4_3_disable_gpa_mode(struct amdgpu_device *adev) +{ + uint32_t data; + + data = RREG32_SOC15(GC, 0, regCPC_PSP_DEBUG); + data |= CPC_PSP_DEBUG__UTCL2IUGPAOVERRIDE_MASK; + WREG32_SOC15(GC, 0, regCPC_PSP_DEBUG, data); +} + static bool gfx_v9_4_3_is_rlc_enabled(struct amdgpu_device *adev) { uint32_t rlc_setting; @@ -189,6 +1094,20 @@ static void gfx_v9_4_3_unset_safe_mode(struct amdgpu_device *adev) static int gfx_v9_4_3_rlc_init(struct amdgpu_device *adev) { + const struct cs_section_def *cs_data; + int r; + + adev->gfx.rlc.cs_data = gfx9_cs_data; + + cs_data = adev->gfx.rlc.cs_data; + + if (cs_data) { + /* init clear state block */ + r = amdgpu_gfx_rlc_init_csb(adev); + if (r) + return r; + } + /* init spm vmid with 0xf */ if (adev->gfx.rlc.funcs->update_spm_vmid) adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf); @@ -246,8 +1165,6 @@ static void gfx_v9_4_3_enable_gui_idle_interrupt(struct amdgpu_device *adev, tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0); tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0); tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0); - if (adev->gfx.num_gfx_rings) - tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0); WREG32_SOC15(GC, 0, regCP_INT_CNTL_RING0, tmp); } @@ -339,8 +1256,7 @@ static int gfx_v9_4_3_rlc_resume(struct amdgpu_device *adev) /* disable CG */ WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, 0); - /* TODO: revisit pg function */ - /* gfx_v9_4_3_init_pg(adev);*/ + gfx_v9_4_3_init_pg(adev); if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { /* legacy rlc firmware loading */ @@ -407,24 +1323,1728 @@ static bool gfx_v9_4_3_is_rlcg_access_range(struct amdgpu_device *adev, u32 offs ARRAY_SIZE(rlcg_access_gc_9_4_3)); } -const struct amdgpu_gfx_funcs gfx_v9_4_3_gfx_funcs = { - .get_gpu_clock_counter = &gfx_v9_4_3_get_gpu_clock_counter, - .select_se_sh = &gfx_v9_4_3_select_se_sh, - .read_wave_data = &gfx_v9_4_3_read_wave_data, - .read_wave_sgprs = &gfx_v9_4_3_read_wave_sgprs, - .read_wave_vgprs = &gfx_v9_4_3_read_wave_vgprs, - .select_me_pipe_q = &gfx_v9_4_3_select_me_pipe_q, -}; +static void gfx_v9_4_3_cp_compute_enable(struct amdgpu_device *adev, bool enable) +{ + if (enable) { + WREG32_SOC15_RLC(GC, 0, regCP_MEC_CNTL, 0); + } else { + WREG32_SOC15_RLC(GC, 0, regCP_MEC_CNTL, + (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); + adev->gfx.kiq[0].ring.sched.ready = false; + } + udelay(50); +} -const struct amdgpu_rlc_funcs gfx_v9_4_3_rlc_funcs = { - .is_rlc_enabled = gfx_v9_4_3_is_rlc_enabled, - .set_safe_mode = gfx_v9_4_3_set_safe_mode, - .unset_safe_mode = gfx_v9_4_3_unset_safe_mode, - .init = gfx_v9_4_3_rlc_init, - .resume = gfx_v9_4_3_rlc_resume, - .stop = gfx_v9_4_3_rlc_stop, - .reset = gfx_v9_4_3_rlc_reset, - .start = gfx_v9_4_3_rlc_start, - .update_spm_vmid = gfx_v9_4_3_update_spm_vmid, - .is_rlcg_access_range = gfx_v9_4_3_is_rlcg_access_range, +static int gfx_v9_4_3_cp_compute_load_microcode(struct amdgpu_device *adev) +{ + const struct gfx_firmware_header_v1_0 *mec_hdr; + const __le32 *fw_data; + unsigned i; + u32 tmp; + u32 mec_ucode_addr_offset; + u32 mec_ucode_data_offset; + + if (!adev->gfx.mec_fw) + return -EINVAL; + + gfx_v9_4_3_cp_compute_enable(adev, false); + + mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; + amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); + + fw_data = (const __le32 *) + (adev->gfx.mec_fw->data + + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); + tmp = 0; + tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0); + tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); + WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp); + + WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO, + adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000); + WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI, + upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr)); + + mec_ucode_addr_offset = + SOC15_REG_OFFSET(GC, 0, regCP_MEC_ME1_UCODE_ADDR); + mec_ucode_data_offset = + SOC15_REG_OFFSET(GC, 0, regCP_MEC_ME1_UCODE_DATA); + + /* MEC1 */ + WREG32(mec_ucode_addr_offset, mec_hdr->jt_offset); + for (i = 0; i < mec_hdr->jt_size; i++) + WREG32(mec_ucode_data_offset, + le32_to_cpup(fw_data + mec_hdr->jt_offset + i)); + + WREG32(mec_ucode_addr_offset, adev->gfx.mec_fw_version); + /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */ + + return 0; +} + +/* KIQ functions */ +static void gfx_v9_4_3_kiq_setting(struct amdgpu_ring *ring) +{ + uint32_t tmp; + struct amdgpu_device *adev = ring->adev; + + /* tell RLC which is KIQ queue */ + tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS); + tmp &= 0xffffff00; + tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); + WREG32_SOC15_RLC(GC, 0, regRLC_CP_SCHEDULERS, tmp); + tmp |= 0x80; + WREG32_SOC15_RLC(GC, 0, regRLC_CP_SCHEDULERS, tmp); +} + +static void gfx_v9_4_3_mqd_set_priority(struct amdgpu_ring *ring, struct v9_mqd *mqd) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { + if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) { + mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH; + mqd->cp_hqd_queue_priority = + AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM; + } + } +} + +static int gfx_v9_4_3_mqd_init(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + struct v9_mqd *mqd = ring->mqd_ptr; + uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; + uint32_t tmp; + + mqd->header = 0xC0310800; + mqd->compute_pipelinestat_enable = 0x00000001; + mqd->compute_static_thread_mgmt_se0 = 0xffffffff; + mqd->compute_static_thread_mgmt_se1 = 0xffffffff; + mqd->compute_static_thread_mgmt_se2 = 0xffffffff; + mqd->compute_static_thread_mgmt_se3 = 0xffffffff; + mqd->compute_misc_reserved = 0x00000003; + + mqd->dynamic_cu_mask_addr_lo = + lower_32_bits(ring->mqd_gpu_addr + + offsetof(struct v9_mqd_allocation, dynamic_cu_mask)); + mqd->dynamic_cu_mask_addr_hi = + upper_32_bits(ring->mqd_gpu_addr + + offsetof(struct v9_mqd_allocation, dynamic_cu_mask)); + + eop_base_addr = ring->eop_gpu_addr >> 8; + mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; + mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); + + /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ + tmp = RREG32_SOC15(GC, 0, regCP_HQD_EOP_CONTROL); + tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, + (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1)); + + mqd->cp_hqd_eop_control = tmp; + + /* enable doorbell? */ + tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL); + + if (ring->use_doorbell) { + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_OFFSET, ring->doorbell_index); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_EN, 1); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_SOURCE, 0); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_HIT, 0); + } else { + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_EN, 0); + } + + mqd->cp_hqd_pq_doorbell_control = tmp; + + /* disable the queue if it's active */ + ring->wptr = 0; + mqd->cp_hqd_dequeue_request = 0; + mqd->cp_hqd_pq_rptr = 0; + mqd->cp_hqd_pq_wptr_lo = 0; + mqd->cp_hqd_pq_wptr_hi = 0; + + /* set the pointer to the MQD */ + mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc; + mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr); + + /* set MQD vmid to 0 */ + tmp = RREG32_SOC15(GC, 0, regCP_MQD_CONTROL); + tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); + mqd->cp_mqd_control = tmp; + + /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ + hqd_gpu_addr = ring->gpu_addr >> 8; + mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; + mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); + + /* set up the HQD, this is similar to CP_RB0_CNTL */ + tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_CONTROL); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, + (order_base_2(ring->ring_size / 4) - 1)); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, + ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); +#ifdef __BIG_ENDIAN + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); +#endif + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); + mqd->cp_hqd_pq_control = tmp; + + /* set the wb address whether it's enabled or not */ + wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); + mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; + mqd->cp_hqd_pq_rptr_report_addr_hi = + upper_32_bits(wb_gpu_addr) & 0xffff; + + /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ + wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); + mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; + mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; + + /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ + ring->wptr = 0; + mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR); + + /* set the vmid for the queue */ + mqd->cp_hqd_vmid = 0; + + tmp = RREG32_SOC15(GC, 0, regCP_HQD_PERSISTENT_STATE); + tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); + mqd->cp_hqd_persistent_state = tmp; + + /* set MIN_IB_AVAIL_SIZE */ + tmp = RREG32_SOC15(GC, 0, regCP_HQD_IB_CONTROL); + tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); + mqd->cp_hqd_ib_control = tmp; + + /* set static priority for a queue/ring */ + gfx_v9_4_3_mqd_set_priority(ring, mqd); + mqd->cp_hqd_quantum = RREG32(regCP_HQD_QUANTUM); + + /* map_queues packet doesn't need activate the queue, + * so only kiq need set this field. + */ + if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) + mqd->cp_hqd_active = 1; + + return 0; +} + +static int gfx_v9_4_3_kiq_init_register(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + struct v9_mqd *mqd = ring->mqd_ptr; + int j; + + /* disable wptr polling */ + WREG32_FIELD15_PREREG(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); + + WREG32_SOC15_RLC(GC, 0, regCP_HQD_EOP_BASE_ADDR, + mqd->cp_hqd_eop_base_addr_lo); + WREG32_SOC15_RLC(GC, 0, regCP_HQD_EOP_BASE_ADDR_HI, + mqd->cp_hqd_eop_base_addr_hi); + + /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ + WREG32_SOC15_RLC(GC, 0, regCP_HQD_EOP_CONTROL, + mqd->cp_hqd_eop_control); + + /* enable doorbell? */ + WREG32_SOC15_RLC(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL, + mqd->cp_hqd_pq_doorbell_control); + + /* disable the queue if it's active */ + if (RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1) { + WREG32_SOC15_RLC(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 1); + for (j = 0; j < adev->usec_timeout; j++) { + if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1)) + break; + udelay(1); + } + WREG32_SOC15_RLC(GC, 0, regCP_HQD_DEQUEUE_REQUEST, + mqd->cp_hqd_dequeue_request); + WREG32_SOC15_RLC(GC, 0, regCP_HQD_PQ_RPTR, + mqd->cp_hqd_pq_rptr); + WREG32_SOC15_RLC(GC, 0, regCP_HQD_PQ_WPTR_LO, + mqd->cp_hqd_pq_wptr_lo); + WREG32_SOC15_RLC(GC, 0, regCP_HQD_PQ_WPTR_HI, + mqd->cp_hqd_pq_wptr_hi); + } + + /* set the pointer to the MQD */ + WREG32_SOC15_RLC(GC, 0, regCP_MQD_BASE_ADDR, + mqd->cp_mqd_base_addr_lo); + WREG32_SOC15_RLC(GC, 0, regCP_MQD_BASE_ADDR_HI, + mqd->cp_mqd_base_addr_hi); + + /* set MQD vmid to 0 */ + WREG32_SOC15_RLC(GC, 0, regCP_MQD_CONTROL, + mqd->cp_mqd_control); + + /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ + WREG32_SOC15_RLC(GC, 0, regCP_HQD_PQ_BASE, + mqd->cp_hqd_pq_base_lo); + WREG32_SOC15_RLC(GC, 0, regCP_HQD_PQ_BASE_HI, + mqd->cp_hqd_pq_base_hi); + + /* set up the HQD, this is similar to CP_RB0_CNTL */ + WREG32_SOC15_RLC(GC, 0, regCP_HQD_PQ_CONTROL, + mqd->cp_hqd_pq_control); + + /* set the wb address whether it's enabled or not */ + WREG32_SOC15_RLC(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR, + mqd->cp_hqd_pq_rptr_report_addr_lo); + WREG32_SOC15_RLC(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR_HI, + mqd->cp_hqd_pq_rptr_report_addr_hi); + + /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ + WREG32_SOC15_RLC(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR, + mqd->cp_hqd_pq_wptr_poll_addr_lo); + WREG32_SOC15_RLC(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI, + mqd->cp_hqd_pq_wptr_poll_addr_hi); + + /* enable the doorbell if requested */ + if (ring->use_doorbell) { + WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_LOWER, + (adev->doorbell_index.kiq * 2) << 2); + WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_UPPER, + (adev->doorbell_index.userqueue_end * 2) << 2); + } + + WREG32_SOC15_RLC(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL, + mqd->cp_hqd_pq_doorbell_control); + + /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ + WREG32_SOC15_RLC(GC, 0, regCP_HQD_PQ_WPTR_LO, + mqd->cp_hqd_pq_wptr_lo); + WREG32_SOC15_RLC(GC, 0, regCP_HQD_PQ_WPTR_HI, + mqd->cp_hqd_pq_wptr_hi); + + /* set the vmid for the queue */ + WREG32_SOC15_RLC(GC, 0, regCP_HQD_VMID, mqd->cp_hqd_vmid); + + WREG32_SOC15_RLC(GC, 0, regCP_HQD_PERSISTENT_STATE, + mqd->cp_hqd_persistent_state); + + /* activate the queue */ + WREG32_SOC15_RLC(GC, 0, regCP_HQD_ACTIVE, + mqd->cp_hqd_active); + + if (ring->use_doorbell) + WREG32_FIELD15_PREREG(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1); + + return 0; +} + +static int gfx_v9_4_3_kiq_fini_register(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + int j; + + /* disable the queue if it's active */ + if (RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1) { + + WREG32_SOC15_RLC(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 1); + + for (j = 0; j < adev->usec_timeout; j++) { + if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1)) + break; + udelay(1); + } + + if (j == AMDGPU_MAX_USEC_TIMEOUT) { + DRM_DEBUG("KIQ dequeue request failed.\n"); + + /* Manual disable if dequeue request times out */ + WREG32_SOC15_RLC(GC, 0, regCP_HQD_ACTIVE, 0); + } + + WREG32_SOC15_RLC(GC, 0, regCP_HQD_DEQUEUE_REQUEST, + 0); + } + + WREG32_SOC15_RLC(GC, 0, regCP_HQD_IQ_TIMER, 0); + WREG32_SOC15_RLC(GC, 0, regCP_HQD_IB_CONTROL, 0); + WREG32_SOC15_RLC(GC, 0, regCP_HQD_PERSISTENT_STATE, 0); + WREG32_SOC15_RLC(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000); + WREG32_SOC15_RLC(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL, 0); + WREG32_SOC15_RLC(GC, 0, regCP_HQD_PQ_RPTR, 0); + WREG32_SOC15_RLC(GC, 0, regCP_HQD_PQ_WPTR_HI, 0); + WREG32_SOC15_RLC(GC, 0, regCP_HQD_PQ_WPTR_LO, 0); + + return 0; +} + +static int gfx_v9_4_3_kiq_init_queue(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + struct v9_mqd *mqd = ring->mqd_ptr; + struct v9_mqd *tmp_mqd; + + gfx_v9_4_3_kiq_setting(ring); + + /* GPU could be in bad state during probe, driver trigger the reset + * after load the SMU, in this case , the mqd is not be initialized. + * driver need to re-init the mqd. + * check mqd->cp_hqd_pq_control since this value should not be 0 + */ + tmp_mqd = (struct v9_mqd *)adev->gfx.kiq[0].mqd_backup; + if (amdgpu_in_reset(adev) && tmp_mqd->cp_hqd_pq_control) { + /* for GPU_RESET case , reset MQD to a clean status */ + if (adev->gfx.kiq[0].mqd_backup) + memcpy(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(struct v9_mqd_allocation)); + + /* reset ring buffer */ + ring->wptr = 0; + amdgpu_ring_clear_ring(ring); + + mutex_lock(&adev->srbm_mutex); + soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); + gfx_v9_4_3_kiq_init_register(ring); + soc15_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + } else { + memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation)); + ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; + ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; + mutex_lock(&adev->srbm_mutex); + soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); + gfx_v9_4_3_mqd_init(ring); + gfx_v9_4_3_kiq_init_register(ring); + soc15_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + + if (adev->gfx.kiq[0].mqd_backup) + memcpy(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(struct v9_mqd_allocation)); + } + + return 0; +} + +static int gfx_v9_4_3_kcq_init_queue(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + struct v9_mqd *mqd = ring->mqd_ptr; + int mqd_idx = ring - &adev->gfx.compute_ring[0]; + struct v9_mqd *tmp_mqd; + + /* Same as above kiq init, driver need to re-init the mqd if mqd->cp_hqd_pq_control + * is not be initialized before + */ + tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx]; + + if (!tmp_mqd->cp_hqd_pq_control || + (!amdgpu_in_reset(adev) && !adev->in_suspend)) { + memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation)); + ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; + ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; + mutex_lock(&adev->srbm_mutex); + soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); + gfx_v9_4_3_mqd_init(ring); + soc15_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + + if (adev->gfx.mec.mqd_backup[mqd_idx]) + memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation)); + } else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */ + /* reset MQD to a clean status */ + if (adev->gfx.mec.mqd_backup[mqd_idx]) + memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation)); + + /* reset ring buffer */ + ring->wptr = 0; + atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], 0); + amdgpu_ring_clear_ring(ring); + } else { + amdgpu_ring_clear_ring(ring); + } + + return 0; +} + +static int gfx_v9_4_3_kiq_resume(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring; + int r; + + ring = &adev->gfx.kiq[0].ring; + + r = amdgpu_bo_reserve(ring->mqd_obj, false); + if (unlikely(r != 0)) + return r; + + r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); + if (unlikely(r != 0)) + return r; + + gfx_v9_4_3_kiq_init_queue(ring); + amdgpu_bo_kunmap(ring->mqd_obj); + ring->mqd_ptr = NULL; + amdgpu_bo_unreserve(ring->mqd_obj); + ring->sched.ready = true; + return 0; +} + +static int gfx_v9_4_3_kcq_resume(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring = NULL; + int r = 0, i; + + gfx_v9_4_3_cp_compute_enable(adev, true); + + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + ring = &adev->gfx.compute_ring[i]; + + r = amdgpu_bo_reserve(ring->mqd_obj, false); + if (unlikely(r != 0)) + goto done; + r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); + if (!r) { + r = gfx_v9_4_3_kcq_init_queue(ring); + amdgpu_bo_kunmap(ring->mqd_obj); + ring->mqd_ptr = NULL; + } + amdgpu_bo_unreserve(ring->mqd_obj); + if (r) + goto done; + } + + r = amdgpu_gfx_enable_kcq(adev, 0); +done: + return r; +} + +static int gfx_v9_4_3_cp_resume(struct amdgpu_device *adev) +{ + int r, i; + struct amdgpu_ring *ring; + + gfx_v9_4_3_enable_gui_idle_interrupt(adev, false); + + if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { + gfx_v9_4_3_disable_gpa_mode(adev); + + r = gfx_v9_4_3_cp_compute_load_microcode(adev); + if (r) + return r; + } + + r = gfx_v9_4_3_kiq_resume(adev); + if (r) + return r; + + r = gfx_v9_4_3_kcq_resume(adev); + if (r) + return r; + + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + ring = &adev->gfx.compute_ring[i]; + amdgpu_ring_test_helper(ring); + } + + gfx_v9_4_3_enable_gui_idle_interrupt(adev, true); + + return 0; +} + +static void gfx_v9_4_3_cp_enable(struct amdgpu_device *adev, bool enable) +{ + gfx_v9_4_3_cp_compute_enable(adev, enable); +} + +static int gfx_v9_4_3_hw_init(void *handle) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + gfx_v9_4_3_init_golden_registers(adev); + + gfx_v9_4_3_constants_init(adev); + + r = adev->gfx.rlc.funcs->resume(adev); + if (r) + return r; + + r = gfx_v9_4_3_cp_resume(adev); + if (r) + return r; + + return r; +} + +static int gfx_v9_4_3_hw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); + amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); + + if (amdgpu_gfx_disable_kcq(adev, 0)) + DRM_ERROR("KCQ disable failed\n"); + + /* Use deinitialize sequence from CAIL when unbinding device from driver, + * otherwise KIQ is hanging when binding back + */ + if (!amdgpu_in_reset(adev) && !adev->in_suspend) { + mutex_lock(&adev->srbm_mutex); + soc15_grbm_select(adev, adev->gfx.kiq[0].ring.me, + adev->gfx.kiq[0].ring.pipe, + adev->gfx.kiq[0].ring.queue, 0); + gfx_v9_4_3_kiq_fini_register(&adev->gfx.kiq[0].ring); + soc15_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + } + + gfx_v9_4_3_cp_enable(adev, false); + + /* Skip suspend with A+A reset */ + if (adev->gmc.xgmi.connected_to_cpu && amdgpu_in_reset(adev)) { + dev_dbg(adev->dev, "Device in reset. Skipping RLC halt\n"); + return 0; + } + + adev->gfx.rlc.funcs->stop(adev); + return 0; +} + +static int gfx_v9_4_3_suspend(void *handle) +{ + return gfx_v9_4_3_hw_fini(handle); +} + +static int gfx_v9_4_3_resume(void *handle) +{ + return gfx_v9_4_3_hw_init(handle); +} + +static bool gfx_v9_4_3_is_idle(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (REG_GET_FIELD(RREG32_SOC15(GC, 0, regGRBM_STATUS), + GRBM_STATUS, GUI_ACTIVE)) + return false; + else + return true; +} + +static int gfx_v9_4_3_wait_for_idle(void *handle) +{ + unsigned i; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + for (i = 0; i < adev->usec_timeout; i++) { + if (gfx_v9_4_3_is_idle(handle)) + return 0; + udelay(1); + } + return -ETIMEDOUT; +} + +static int gfx_v9_4_3_soft_reset(void *handle) +{ + u32 grbm_soft_reset = 0; + u32 tmp; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + /* GRBM_STATUS */ + tmp = RREG32_SOC15(GC, 0, regGRBM_STATUS); + if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK | + GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK | + GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK | + GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK | + GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK | + GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) { + grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, + GRBM_SOFT_RESET, SOFT_RESET_CP, 1); + grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, + GRBM_SOFT_RESET, SOFT_RESET_GFX, 1); + } + + if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) { + grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, + GRBM_SOFT_RESET, SOFT_RESET_CP, 1); + } + + /* GRBM_STATUS2 */ + tmp = RREG32_SOC15(GC, 0, regGRBM_STATUS2); + if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY)) + grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, + GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); + + + if (grbm_soft_reset) { + /* stop the rlc */ + adev->gfx.rlc.funcs->stop(adev); + + /* Disable MEC parsing/prefetching */ + gfx_v9_4_3_cp_compute_enable(adev, false); + + if (grbm_soft_reset) { + tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET); + tmp |= grbm_soft_reset; + dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); + WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, tmp); + tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET); + + udelay(50); + + tmp &= ~grbm_soft_reset; + WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, tmp); + tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET); + } + + /* Wait a little for things to settle down */ + udelay(50); + } + return 0; +} + +static void gfx_v9_4_3_ring_emit_gds_switch(struct amdgpu_ring *ring, + uint32_t vmid, + uint32_t gds_base, uint32_t gds_size, + uint32_t gws_base, uint32_t gws_size, + uint32_t oa_base, uint32_t oa_size) +{ + struct amdgpu_device *adev = ring->adev; + + /* GDS Base */ + gfx_v9_4_3_write_data_to_reg(ring, 0, false, + SOC15_REG_OFFSET(GC, 0, regGDS_VMID0_BASE) + 2 * vmid, + gds_base); + + /* GDS Size */ + gfx_v9_4_3_write_data_to_reg(ring, 0, false, + SOC15_REG_OFFSET(GC, 0, regGDS_VMID0_SIZE) + 2 * vmid, + gds_size); + + /* GWS */ + gfx_v9_4_3_write_data_to_reg(ring, 0, false, + SOC15_REG_OFFSET(GC, 0, regGDS_GWS_VMID0) + vmid, + gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base); + + /* OA */ + gfx_v9_4_3_write_data_to_reg(ring, 0, false, + SOC15_REG_OFFSET(GC, 0, regGDS_OA_VMID0) + vmid, + (1 << (oa_size + oa_base)) - (1 << oa_base)); +} + +static int gfx_v9_4_3_early_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev), + AMDGPU_MAX_COMPUTE_RINGS); + gfx_v9_4_3_set_kiq_pm4_funcs(adev); + gfx_v9_4_3_set_ring_funcs(adev); + gfx_v9_4_3_set_irq_funcs(adev); + gfx_v9_4_3_set_gds_init(adev); + gfx_v9_4_3_set_rlc_funcs(adev); + + return gfx_v9_4_3_init_microcode(adev); +} + +static int gfx_v9_4_3_late_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int r; + + r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0); + if (r) + return r; + + r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0); + if (r) + return r; + + return 0; +} + +static void gfx_v9_4_3_update_medium_grain_clock_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t data, def; + + amdgpu_gfx_rlc_enter_safe_mode(adev); + + /* It is disabled by HW by default */ + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { + /* 1 - RLC_CGTT_MGCG_OVERRIDE */ + def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); + + data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | + RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK | + RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK); + + /* only for Vega10 & Raven1 */ + data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK; + + if (def != data) + WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); + + /* MGLS is a global flag to control all MGLS in GFX */ + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { + /* 2 - RLC memory Light sleep */ + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) { + def = data = RREG32_SOC15(GC, 0, regRLC_MEM_SLP_CNTL); + data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; + if (def != data) + WREG32_SOC15(GC, 0, regRLC_MEM_SLP_CNTL, data); + } + /* 3 - CP memory Light sleep */ + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) { + def = data = RREG32_SOC15(GC, 0, regCP_MEM_SLP_CNTL); + data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; + if (def != data) + WREG32_SOC15(GC, 0, regCP_MEM_SLP_CNTL, data); + } + } + } else { + /* 1 - MGCG_OVERRIDE */ + def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); + + data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK | + RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | + RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK | + RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK); + + if (def != data) + WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); + + /* 2 - disable MGLS in RLC */ + data = RREG32_SOC15(GC, 0, regRLC_MEM_SLP_CNTL); + if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) { + data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; + WREG32_SOC15(GC, 0, regRLC_MEM_SLP_CNTL, data); + } + + /* 3 - disable MGLS in CP */ + data = RREG32_SOC15(GC, 0, regCP_MEM_SLP_CNTL); + if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) { + data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; + WREG32_SOC15(GC, 0, regCP_MEM_SLP_CNTL, data); + } + } + + amdgpu_gfx_rlc_exit_safe_mode(adev); +} + +static void gfx_v9_4_3_update_coarse_grain_clock_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t def, data; + + amdgpu_gfx_rlc_enter_safe_mode(adev); + + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { + def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); + /* unset CGCG override */ + data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK; + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) + data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; + else + data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; + /* update CGCG and CGLS override bits */ + if (def != data) + WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); + + /* enable cgcg FSM(0x0000363F) */ + def = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL); + + if (adev->asic_type == CHIP_ARCTURUS) + data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | + RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; + else + data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | + RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) + data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) | + RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; + if (def != data) + WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, data); + + /* set IDLE_POLL_COUNT(0x00900100) */ + def = RREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_CNTL); + data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | + (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); + if (def != data) + WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_CNTL, data); + } else { + def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL); + /* reset CGCG/CGLS bits */ + data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); + /* disable cgcg and cgls in FSM */ + if (def != data) + WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, data); + } + + amdgpu_gfx_rlc_exit_safe_mode(adev); +} + +static int gfx_v9_4_3_update_gfx_clock_gating(struct amdgpu_device *adev, + bool enable) +{ + if (enable) { + /* CGCG/CGLS should be enabled after MGCG/MGLS + * === MGCG + MGLS === + */ + gfx_v9_4_3_update_medium_grain_clock_gating(adev, enable); + /* === CGCG + CGLS === */ + gfx_v9_4_3_update_coarse_grain_clock_gating(adev, enable); + } else { + /* CGCG/CGLS should be disabled before MGCG/MGLS + * === CGCG + CGLS === + */ + gfx_v9_4_3_update_coarse_grain_clock_gating(adev, enable); + /* === MGCG + MGLS === */ + gfx_v9_4_3_update_medium_grain_clock_gating(adev, enable); + } + return 0; +} + +static const struct amdgpu_rlc_funcs gfx_v9_4_3_rlc_funcs = { + .is_rlc_enabled = gfx_v9_4_3_is_rlc_enabled, + .set_safe_mode = gfx_v9_4_3_set_safe_mode, + .unset_safe_mode = gfx_v9_4_3_unset_safe_mode, + .init = gfx_v9_4_3_rlc_init, + .get_csb_size = gfx_v9_4_3_get_csb_size, + .get_csb_buffer = gfx_v9_4_3_get_csb_buffer, + .resume = gfx_v9_4_3_rlc_resume, + .stop = gfx_v9_4_3_rlc_stop, + .reset = gfx_v9_4_3_rlc_reset, + .start = gfx_v9_4_3_rlc_start, + .update_spm_vmid = gfx_v9_4_3_update_spm_vmid, + .is_rlcg_access_range = gfx_v9_4_3_is_rlcg_access_range, +}; + +static int gfx_v9_4_3_set_powergating_state(void *handle, + enum amd_powergating_state state) +{ + return 0; +} + +static int gfx_v9_4_3_set_clockgating_state(void *handle, + enum amd_clockgating_state state) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (amdgpu_sriov_vf(adev)) + return 0; + + switch (adev->ip_versions[GC_HWIP][0]) { + case IP_VERSION(9, 4, 3): + gfx_v9_4_3_update_gfx_clock_gating(adev, + state == AMD_CG_STATE_GATE); + break; + default: + break; + } + return 0; +} + +static void gfx_v9_4_3_get_clockgating_state(void *handle, u64 *flags) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int data; + + if (amdgpu_sriov_vf(adev)) + *flags = 0; + + /* AMD_CG_SUPPORT_GFX_MGCG */ + data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, regRLC_CGTT_MGCG_OVERRIDE)); + if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK)) + *flags |= AMD_CG_SUPPORT_GFX_MGCG; + + /* AMD_CG_SUPPORT_GFX_CGCG */ + data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, regRLC_CGCG_CGLS_CTRL)); + if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK) + *flags |= AMD_CG_SUPPORT_GFX_CGCG; + + /* AMD_CG_SUPPORT_GFX_CGLS */ + if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK) + *flags |= AMD_CG_SUPPORT_GFX_CGLS; + + /* AMD_CG_SUPPORT_GFX_RLC_LS */ + data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, regRLC_MEM_SLP_CNTL)); + if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) + *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS; + + /* AMD_CG_SUPPORT_GFX_CP_LS */ + data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, regCP_MEM_SLP_CNTL)); + if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) + *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS; +} + +static void gfx_v9_4_3_ring_emit_hdp_flush(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + u32 ref_and_mask, reg_mem_engine; + const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg; + + if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { + switch (ring->me) { + case 1: + ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe; + break; + case 2: + ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe; + break; + default: + return; + } + reg_mem_engine = 0; + } else { + ref_and_mask = nbio_hf_reg->ref_and_mask_cp0; + reg_mem_engine = 1; /* pfp */ + } + + gfx_v9_4_3_wait_reg_mem(ring, reg_mem_engine, 0, 1, + adev->nbio.funcs->get_hdp_flush_req_offset(adev), + adev->nbio.funcs->get_hdp_flush_done_offset(adev), + ref_and_mask, ref_and_mask, 0x20); +} + +static void gfx_v9_4_3_ring_emit_ib_compute(struct amdgpu_ring *ring, + struct amdgpu_job *job, + struct amdgpu_ib *ib, + uint32_t flags) +{ + unsigned vmid = AMDGPU_JOB_GET_VMID(job); + u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); + + /* Currently, there is a high possibility to get wave ID mismatch + * between ME and GDS, leading to a hw deadlock, because ME generates + * different wave IDs than the GDS expects. This situation happens + * randomly when at least 5 compute pipes use GDS ordered append. + * The wave IDs generated by ME are also wrong after suspend/resume. + * Those are probably bugs somewhere else in the kernel driver. + * + * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and + * GDS to 0 for this ring (me/pipe). + */ + if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) { + amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); + amdgpu_ring_write(ring, regGDS_COMPUTE_MAX_WAVE_ID); + amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id); + } + + amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); + BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ + amdgpu_ring_write(ring, +#ifdef __BIG_ENDIAN + (2 << 0) | +#endif + lower_32_bits(ib->gpu_addr)); + amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); + amdgpu_ring_write(ring, control); +} + +static void gfx_v9_4_3_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, + u64 seq, unsigned flags) +{ + bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; + bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; + bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY; + + /* RELEASE_MEM - flush caches, send int */ + amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6)); + amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN | + EOP_TC_NC_ACTION_EN) : + (EOP_TCL1_ACTION_EN | + EOP_TC_ACTION_EN | + EOP_TC_WB_ACTION_EN | + EOP_TC_MD_ACTION_EN)) | + EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | + EVENT_INDEX(5))); + amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); + + /* + * the address should be Qword aligned if 64bit write, Dword + * aligned if only send 32bit data low (discard data high) + */ + if (write64bit) + BUG_ON(addr & 0x7); + else + BUG_ON(addr & 0x3); + amdgpu_ring_write(ring, lower_32_bits(addr)); + amdgpu_ring_write(ring, upper_32_bits(addr)); + amdgpu_ring_write(ring, lower_32_bits(seq)); + amdgpu_ring_write(ring, upper_32_bits(seq)); + amdgpu_ring_write(ring, 0); +} + +static void gfx_v9_4_3_ring_emit_pipeline_sync(struct amdgpu_ring *ring) +{ + int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); + uint32_t seq = ring->fence_drv.sync_seq; + uint64_t addr = ring->fence_drv.gpu_addr; + + gfx_v9_4_3_wait_reg_mem(ring, usepfp, 1, 0, + lower_32_bits(addr), upper_32_bits(addr), + seq, 0xffffffff, 4); +} + +static void gfx_v9_4_3_ring_emit_vm_flush(struct amdgpu_ring *ring, + unsigned vmid, uint64_t pd_addr) +{ + amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); +} + +static u64 gfx_v9_4_3_ring_get_rptr_compute(struct amdgpu_ring *ring) +{ + return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */ +} + +static u64 gfx_v9_4_3_ring_get_wptr_compute(struct amdgpu_ring *ring) +{ + u64 wptr; + + /* XXX check if swapping is necessary on BE */ + if (ring->use_doorbell) + wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]); + else + BUG(); + return wptr; +} + +static void gfx_v9_4_3_ring_set_wptr_compute(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + /* XXX check if swapping is necessary on BE */ + if (ring->use_doorbell) { + atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], ring->wptr); + WDOORBELL64(ring->doorbell_index, ring->wptr); + } else { + BUG(); /* only DOORBELL method supported on gfx9 now */ + } +} + +static void gfx_v9_4_3_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr, + u64 seq, unsigned int flags) +{ + struct amdgpu_device *adev = ring->adev; + + /* we only allocate 32bit for each seq wb address */ + BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT); + + /* write fence seq to the "addr" */ + amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); + amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | + WRITE_DATA_DST_SEL(5) | WR_CONFIRM)); + amdgpu_ring_write(ring, lower_32_bits(addr)); + amdgpu_ring_write(ring, upper_32_bits(addr)); + amdgpu_ring_write(ring, lower_32_bits(seq)); + + if (flags & AMDGPU_FENCE_FLAG_INT) { + /* set register to trigger INT */ + amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); + amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | + WRITE_DATA_DST_SEL(0) | WR_CONFIRM)); + amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, regCPC_INT_STATUS)); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */ + } +} + +static void gfx_v9_4_3_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg, + uint32_t reg_val_offs) +{ + struct amdgpu_device *adev = ring->adev; + + amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); + amdgpu_ring_write(ring, 0 | /* src: register*/ + (5 << 8) | /* dst: memory */ + (1 << 20)); /* write confirm */ + amdgpu_ring_write(ring, reg); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + + reg_val_offs * 4)); + amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + + reg_val_offs * 4)); +} + +static void gfx_v9_4_3_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, + uint32_t val) +{ + uint32_t cmd = 0; + + switch (ring->funcs->type) { + case AMDGPU_RING_TYPE_GFX: + cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM; + break; + case AMDGPU_RING_TYPE_KIQ: + cmd = (1 << 16); /* no inc addr */ + break; + default: + cmd = WR_CONFIRM; + break; + } + amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); + amdgpu_ring_write(ring, cmd); + amdgpu_ring_write(ring, reg); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, val); +} + +static void gfx_v9_4_3_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, + uint32_t val, uint32_t mask) +{ + gfx_v9_4_3_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20); +} + +static void gfx_v9_4_3_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, + uint32_t reg0, uint32_t reg1, + uint32_t ref, uint32_t mask) +{ + amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1, + ref, mask); +} + +static void gfx_v9_4_3_set_compute_eop_interrupt_state(struct amdgpu_device *adev, + int me, int pipe, + enum amdgpu_interrupt_state state) +{ + u32 mec_int_cntl, mec_int_cntl_reg; + + /* + * amdgpu controls only the first MEC. That's why this function only + * handles the setting of interrupts for this specific MEC. All other + * pipes' interrupts are set by amdkfd. + */ + + if (me == 1) { + switch (pipe) { + case 0: + mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL); + break; + case 1: + mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE1_INT_CNTL); + break; + case 2: + mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE2_INT_CNTL); + break; + case 3: + mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE3_INT_CNTL); + break; + default: + DRM_DEBUG("invalid pipe %d\n", pipe); + return; + } + } else { + DRM_DEBUG("invalid me %d\n", me); + return; + } + + switch (state) { + case AMDGPU_IRQ_STATE_DISABLE: + mec_int_cntl = RREG32(mec_int_cntl_reg); + mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, + TIME_STAMP_INT_ENABLE, 0); + WREG32(mec_int_cntl_reg, mec_int_cntl); + break; + case AMDGPU_IRQ_STATE_ENABLE: + mec_int_cntl = RREG32(mec_int_cntl_reg); + mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, + TIME_STAMP_INT_ENABLE, 1); + WREG32(mec_int_cntl_reg, mec_int_cntl); + break; + default: + break; + } +} + +static int gfx_v9_4_3_set_priv_reg_fault_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned type, + enum amdgpu_interrupt_state state) +{ + switch (state) { + case AMDGPU_IRQ_STATE_DISABLE: + case AMDGPU_IRQ_STATE_ENABLE: + WREG32_FIELD15_PREREG(GC, 0, CP_INT_CNTL_RING0, + PRIV_REG_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + break; + default: + break; + } + + return 0; +} + +static int gfx_v9_4_3_set_priv_inst_fault_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned type, + enum amdgpu_interrupt_state state) +{ + switch (state) { + case AMDGPU_IRQ_STATE_DISABLE: + case AMDGPU_IRQ_STATE_ENABLE: + WREG32_FIELD15_PREREG(GC, 0, CP_INT_CNTL_RING0, + PRIV_INSTR_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + break; + default: + break; + } + + return 0; +} + +static int gfx_v9_4_3_set_eop_interrupt_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *src, + unsigned type, + enum amdgpu_interrupt_state state) +{ + switch (type) { + case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: + gfx_v9_4_3_set_compute_eop_interrupt_state(adev, 1, 0, state); + break; + case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP: + gfx_v9_4_3_set_compute_eop_interrupt_state(adev, 1, 1, state); + break; + case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP: + gfx_v9_4_3_set_compute_eop_interrupt_state(adev, 1, 2, state); + break; + case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP: + gfx_v9_4_3_set_compute_eop_interrupt_state(adev, 1, 3, state); + break; + case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP: + gfx_v9_4_3_set_compute_eop_interrupt_state(adev, 2, 0, state); + break; + case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP: + gfx_v9_4_3_set_compute_eop_interrupt_state(adev, 2, 1, state); + break; + case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP: + gfx_v9_4_3_set_compute_eop_interrupt_state(adev, 2, 2, state); + break; + case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP: + gfx_v9_4_3_set_compute_eop_interrupt_state(adev, 2, 3, state); + break; + default: + break; + } + return 0; +} + +static int gfx_v9_4_3_eop_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + int i; + u8 me_id, pipe_id, queue_id; + struct amdgpu_ring *ring; + + DRM_DEBUG("IH: CP EOP\n"); + me_id = (entry->ring_id & 0x0c) >> 2; + pipe_id = (entry->ring_id & 0x03) >> 0; + queue_id = (entry->ring_id & 0x70) >> 4; + + switch (me_id) { + case 0: + case 1: + case 2: + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + ring = &adev->gfx.compute_ring[i]; + /* Per-queue interrupt is supported for MEC starting from VI. + * The interrupt can only be enabled/disabled per pipe instead of per queue. + */ + if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id)) + amdgpu_fence_process(ring); + } + break; + } + return 0; +} + +static void gfx_v9_4_3_fault(struct amdgpu_device *adev, + struct amdgpu_iv_entry *entry) +{ + u8 me_id, pipe_id, queue_id; + struct amdgpu_ring *ring; + int i; + + me_id = (entry->ring_id & 0x0c) >> 2; + pipe_id = (entry->ring_id & 0x03) >> 0; + queue_id = (entry->ring_id & 0x70) >> 4; + + switch (me_id) { + case 0: + case 1: + case 2: + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + ring = &adev->gfx.compute_ring[i]; + if (ring->me == me_id && ring->pipe == pipe_id && + ring->queue == queue_id) + drm_sched_fault(&ring->sched); + } + break; + } +} + +static int gfx_v9_4_3_priv_reg_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + DRM_ERROR("Illegal register access in command stream\n"); + gfx_v9_4_3_fault(adev, entry); + return 0; +} + +static int gfx_v9_4_3_priv_inst_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + DRM_ERROR("Illegal instruction in command stream\n"); + gfx_v9_4_3_fault(adev, entry); + return 0; +} + +static void gfx_v9_4_3_emit_mem_sync(struct amdgpu_ring *ring) +{ + const unsigned int cp_coher_cntl = + PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_ICACHE_ACTION_ENA(1) | + PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_ACTION_ENA(1) | + PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_ACTION_ENA(1) | + PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_ACTION_ENA(1) | + PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WB_ACTION_ENA(1); + + /* ACQUIRE_MEM -make one or more surfaces valid for use by the subsequent operations */ + amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5)); + amdgpu_ring_write(ring, cp_coher_cntl); /* CP_COHER_CNTL */ + amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */ + amdgpu_ring_write(ring, 0xffffff); /* CP_COHER_SIZE_HI */ + amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */ + amdgpu_ring_write(ring, 0); /* CP_COHER_BASE_HI */ + amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */ +} + +static void gfx_v9_4_3_emit_wave_limit_cs(struct amdgpu_ring *ring, + uint32_t pipe, bool enable) +{ + struct amdgpu_device *adev = ring->adev; + uint32_t val; + uint32_t wcl_cs_reg; + + /* regSPI_WCL_PIPE_PERCENT_CS[0-7]_DEFAULT values are same */ + val = enable ? 0x1 : 0x7f; + + switch (pipe) { + case 0: + wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, regSPI_WCL_PIPE_PERCENT_CS0); + break; + case 1: + wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, regSPI_WCL_PIPE_PERCENT_CS1); + break; + case 2: + wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, regSPI_WCL_PIPE_PERCENT_CS2); + break; + case 3: + wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, regSPI_WCL_PIPE_PERCENT_CS3); + break; + default: + DRM_DEBUG("invalid pipe %d\n", pipe); + return; + } + + amdgpu_ring_emit_wreg(ring, wcl_cs_reg, val); + +} +static void gfx_v9_4_3_emit_wave_limit(struct amdgpu_ring *ring, bool enable) +{ + struct amdgpu_device *adev = ring->adev; + uint32_t val; + int i; + + /* regSPI_WCL_PIPE_PERCENT_GFX is 7 bit multiplier register to limit + * number of gfx waves. Setting 5 bit will make sure gfx only gets + * around 25% of gpu resources. + */ + val = enable ? 0x1f : 0x07ffffff; + amdgpu_ring_emit_wreg(ring, + SOC15_REG_OFFSET(GC, 0, regSPI_WCL_PIPE_PERCENT_GFX), + val); + + /* Restrict waves for normal/low priority compute queues as well + * to get best QoS for high priority compute jobs. + * + * amdgpu controls only 1st ME(0-3 CS pipes). + */ + for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) { + if (i != ring->pipe) + gfx_v9_4_3_emit_wave_limit_cs(ring, i, enable); + + } +} + +static const struct amd_ip_funcs gfx_v9_4_3_ip_funcs = { + .name = "gfx_v9_4_3", + .early_init = gfx_v9_4_3_early_init, + .late_init = gfx_v9_4_3_late_init, + .sw_init = gfx_v9_4_3_sw_init, + .sw_fini = gfx_v9_4_3_sw_fini, + .hw_init = gfx_v9_4_3_hw_init, + .hw_fini = gfx_v9_4_3_hw_fini, + .suspend = gfx_v9_4_3_suspend, + .resume = gfx_v9_4_3_resume, + .is_idle = gfx_v9_4_3_is_idle, + .wait_for_idle = gfx_v9_4_3_wait_for_idle, + .soft_reset = gfx_v9_4_3_soft_reset, + .set_clockgating_state = gfx_v9_4_3_set_clockgating_state, + .set_powergating_state = gfx_v9_4_3_set_powergating_state, + .get_clockgating_state = gfx_v9_4_3_get_clockgating_state, +}; + +static const struct amdgpu_ring_funcs gfx_v9_4_3_ring_funcs_compute = { + .type = AMDGPU_RING_TYPE_COMPUTE, + .align_mask = 0xff, + .nop = PACKET3(PACKET3_NOP, 0x3FFF), + .support_64bit_ptrs = true, + .get_rptr = gfx_v9_4_3_ring_get_rptr_compute, + .get_wptr = gfx_v9_4_3_ring_get_wptr_compute, + .set_wptr = gfx_v9_4_3_ring_set_wptr_compute, + .emit_frame_size = + 20 + /* gfx_v9_4_3_ring_emit_gds_switch */ + 7 + /* gfx_v9_4_3_ring_emit_hdp_flush */ + 5 + /* hdp invalidate */ + 7 + /* gfx_v9_4_3_ring_emit_pipeline_sync */ + SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + + 2 + /* gfx_v9_4_3_ring_emit_vm_flush */ + 8 + 8 + 8 + /* gfx_v9_4_3_ring_emit_fence x3 for user fence, vm fence */ + 7 + /* gfx_v9_4_3_emit_mem_sync */ + 5 + /* gfx_v9_4_3_emit_wave_limit for updating regSPI_WCL_PIPE_PERCENT_GFX register */ + 15, /* for updating 3 regSPI_WCL_PIPE_PERCENT_CS registers */ + .emit_ib_size = 7, /* gfx_v9_4_3_ring_emit_ib_compute */ + .emit_ib = gfx_v9_4_3_ring_emit_ib_compute, + .emit_fence = gfx_v9_4_3_ring_emit_fence, + .emit_pipeline_sync = gfx_v9_4_3_ring_emit_pipeline_sync, + .emit_vm_flush = gfx_v9_4_3_ring_emit_vm_flush, + .emit_gds_switch = gfx_v9_4_3_ring_emit_gds_switch, + .emit_hdp_flush = gfx_v9_4_3_ring_emit_hdp_flush, + .test_ring = gfx_v9_4_3_ring_test_ring, + .test_ib = gfx_v9_4_3_ring_test_ib, + .insert_nop = amdgpu_ring_insert_nop, + .pad_ib = amdgpu_ring_generic_pad_ib, + .emit_wreg = gfx_v9_4_3_ring_emit_wreg, + .emit_reg_wait = gfx_v9_4_3_ring_emit_reg_wait, + .emit_reg_write_reg_wait = gfx_v9_4_3_ring_emit_reg_write_reg_wait, + .emit_mem_sync = gfx_v9_4_3_emit_mem_sync, + .emit_wave_limit = gfx_v9_4_3_emit_wave_limit, +}; + +static const struct amdgpu_ring_funcs gfx_v9_4_3_ring_funcs_kiq = { + .type = AMDGPU_RING_TYPE_KIQ, + .align_mask = 0xff, + .nop = PACKET3(PACKET3_NOP, 0x3FFF), + .support_64bit_ptrs = true, + .get_rptr = gfx_v9_4_3_ring_get_rptr_compute, + .get_wptr = gfx_v9_4_3_ring_get_wptr_compute, + .set_wptr = gfx_v9_4_3_ring_set_wptr_compute, + .emit_frame_size = + 20 + /* gfx_v9_4_3_ring_emit_gds_switch */ + 7 + /* gfx_v9_4_3_ring_emit_hdp_flush */ + 5 + /* hdp invalidate */ + 7 + /* gfx_v9_4_3_ring_emit_pipeline_sync */ + SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + + 2 + /* gfx_v9_4_3_ring_emit_vm_flush */ + 8 + 8 + 8, /* gfx_v9_4_3_ring_emit_fence_kiq x3 for user fence, vm fence */ + .emit_ib_size = 7, /* gfx_v9_4_3_ring_emit_ib_compute */ + .emit_fence = gfx_v9_4_3_ring_emit_fence_kiq, + .test_ring = gfx_v9_4_3_ring_test_ring, + .insert_nop = amdgpu_ring_insert_nop, + .pad_ib = amdgpu_ring_generic_pad_ib, + .emit_rreg = gfx_v9_4_3_ring_emit_rreg, + .emit_wreg = gfx_v9_4_3_ring_emit_wreg, + .emit_reg_wait = gfx_v9_4_3_ring_emit_reg_wait, + .emit_reg_write_reg_wait = gfx_v9_4_3_ring_emit_reg_write_reg_wait, +}; + +static void gfx_v9_4_3_set_ring_funcs(struct amdgpu_device *adev) +{ + int i; + + adev->gfx.kiq[0].ring.funcs = &gfx_v9_4_3_ring_funcs_kiq; + + for (i = 0; i < adev->gfx.num_compute_rings; i++) + adev->gfx.compute_ring[i].funcs = &gfx_v9_4_3_ring_funcs_compute; +} + +static const struct amdgpu_irq_src_funcs gfx_v9_4_3_eop_irq_funcs = { + .set = gfx_v9_4_3_set_eop_interrupt_state, + .process = gfx_v9_4_3_eop_irq, +}; + +static const struct amdgpu_irq_src_funcs gfx_v9_4_3_priv_reg_irq_funcs = { + .set = gfx_v9_4_3_set_priv_reg_fault_state, + .process = gfx_v9_4_3_priv_reg_irq, +}; + +static const struct amdgpu_irq_src_funcs gfx_v9_4_3_priv_inst_irq_funcs = { + .set = gfx_v9_4_3_set_priv_inst_fault_state, + .process = gfx_v9_4_3_priv_inst_irq, +}; + +static void gfx_v9_4_3_set_irq_funcs(struct amdgpu_device *adev) +{ + adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; + adev->gfx.eop_irq.funcs = &gfx_v9_4_3_eop_irq_funcs; + + adev->gfx.priv_reg_irq.num_types = 1; + adev->gfx.priv_reg_irq.funcs = &gfx_v9_4_3_priv_reg_irq_funcs; + + adev->gfx.priv_inst_irq.num_types = 1; + adev->gfx.priv_inst_irq.funcs = &gfx_v9_4_3_priv_inst_irq_funcs; +} + +static void gfx_v9_4_3_set_rlc_funcs(struct amdgpu_device *adev) +{ + adev->gfx.rlc.funcs = &gfx_v9_4_3_rlc_funcs; +} + + +static void gfx_v9_4_3_set_gds_init(struct amdgpu_device *adev) +{ + /* init asci gds info */ + switch (adev->ip_versions[GC_HWIP][0]) { + case IP_VERSION(9, 4, 3): + /* 9.4.3 removed all the GDS internal memory, + * only support GWS opcode in kernel, like barrier + * semaphore.etc */ + adev->gds.gds_size = 0; + break; + default: + adev->gds.gds_size = 0x10000; + break; + } + + switch (adev->ip_versions[GC_HWIP][0]) { + case IP_VERSION(9, 4, 3): + /* deprecated for 9.4.3, no usage at all */ + adev->gds.gds_compute_max_wave_id = 0; + break; + default: + /* this really depends on the chip */ + adev->gds.gds_compute_max_wave_id = 0x7ff; + break; + } + + adev->gds.gws_size = 64; + adev->gds.oa_size = 16; +} + +static void gfx_v9_4_3_set_user_cu_inactive_bitmap(struct amdgpu_device *adev, + u32 bitmap) +{ + u32 data; + + if (!bitmap) + return; + + data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; + data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; + + WREG32_SOC15(GC, 0, regGC_USER_SHADER_ARRAY_CONFIG, data); +} + +static u32 gfx_v9_4_3_get_cu_active_bitmap(struct amdgpu_device *adev) +{ + u32 data, mask; + + data = RREG32_SOC15(GC, 0, regCC_GC_SHADER_ARRAY_CONFIG); + data |= RREG32_SOC15(GC, 0, regGC_USER_SHADER_ARRAY_CONFIG); + + data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; + data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; + + mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh); + + return (~data) & mask; +} + +static int gfx_v9_4_3_get_cu_info(struct amdgpu_device *adev, + struct amdgpu_cu_info *cu_info) +{ + int i, j, k, counter, active_cu_number = 0; + u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; + unsigned disable_masks[4 * 4]; + + if (!adev || !cu_info) + return -EINVAL; + + /* + * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs + */ + if (adev->gfx.config.max_shader_engines * + adev->gfx.config.max_sh_per_se > 16) + return -EINVAL; + + amdgpu_gfx_parse_disable_cu(disable_masks, + adev->gfx.config.max_shader_engines, + adev->gfx.config.max_sh_per_se); + + mutex_lock(&adev->grbm_idx_mutex); + for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { + for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { + mask = 1; + ao_bitmap = 0; + counter = 0; + gfx_v9_4_3_select_se_sh(adev, i, j, 0xffffffff); + gfx_v9_4_3_set_user_cu_inactive_bitmap( + adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]); + bitmap = gfx_v9_4_3_get_cu_active_bitmap(adev); + + /* + * The bitmap(and ao_cu_bitmap) in cu_info structure is + * 4x4 size array, and it's usually suitable for Vega + * ASICs which has 4*2 SE/SH layout. + * But for Arcturus, SE/SH layout is changed to 8*1. + * To mostly reduce the impact, we make it compatible + * with current bitmap array as below: + * SE4,SH0 --> bitmap[0][1] + * SE5,SH0 --> bitmap[1][1] + * SE6,SH0 --> bitmap[2][1] + * SE7,SH0 --> bitmap[3][1] + */ + cu_info->bitmap[i % 4][j + i / 4] = bitmap; + + for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) { + if (bitmap & mask) { + if (counter < adev->gfx.config.max_cu_per_sh) + ao_bitmap |= mask; + counter++; + } + mask <<= 1; + } + active_cu_number += counter; + if (i < 2 && j < 2) + ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); + cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap; + } + } + gfx_v9_4_3_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + mutex_unlock(&adev->grbm_idx_mutex); + + cu_info->number = active_cu_number; + cu_info->ao_cu_mask = ao_cu_mask; + cu_info->simd_per_cu = NUM_SIMD_PER_CU; + + return 0; +} + +const struct amdgpu_ip_block_version gfx_v9_4_3_ip_block = { + .type = AMD_IP_BLOCK_TYPE_GFX, + .major = 9, + .minor = 4, + .rev = 0, + .funcs = &gfx_v9_4_3_ip_funcs, }; -- cgit v1.2.3 From 5aa998baab3360d0f1b93d6aff3df924045f956c Mon Sep 17 00:00:00 2001 From: Le Ma <le.ma@amd.com> Date: Wed, 17 Nov 2021 16:28:51 +0800 Subject: drm/amdgpu: add xcc index argument to soc15_grbm_select To support grbm select for multiple XCD case. v2: unify naming style Signed-off-by: Le Ma <le.ma@amd.com> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 10 ++++----- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 26 +++++++++++------------ drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 26 +++++++++++------------ drivers/gpu/drm/amd/amdgpu/soc15.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/soc15.h | 2 +- 6 files changed, 36 insertions(+), 36 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index 03875b971ba6..ebb35633058c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -50,12 +50,12 @@ static void lock_srbm(struct amdgpu_device *adev, uint32_t mec, uint32_t pipe, uint32_t queue, uint32_t vmid) { mutex_lock(&adev->srbm_mutex); - soc15_grbm_select(adev, mec, pipe, queue, vmid); + soc15_grbm_select(adev, mec, pipe, queue, vmid, 0); } static void unlock_srbm(struct amdgpu_device *adev) { - soc15_grbm_select(adev, 0, 0, 0, 0); + soc15_grbm_select(adev, 0, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); } @@ -700,7 +700,7 @@ static void get_wave_count(struct amdgpu_device *adev, int queue_idx, *wave_cnt = 0; pipe_idx = queue_idx / adev->gfx.mec.num_queue_per_pipe; queue_slot = queue_idx % adev->gfx.mec.num_queue_per_pipe; - soc15_grbm_select(adev, 1, pipe_idx, queue_slot, 0); + soc15_grbm_select(adev, 1, pipe_idx, queue_slot, 0, 0); reg_val = RREG32_SOC15_IP(GC, SOC15_REG_OFFSET(GC, 0, mmSPI_CSQ_WF_ACTIVE_COUNT_0) + queue_slot); *wave_cnt = reg_val & SPI_CSQ_WF_ACTIVE_COUNT_0__COUNT_MASK; @@ -772,7 +772,7 @@ void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid, DECLARE_BITMAP(cp_queue_bitmap, KGD_MAX_QUEUES); lock_spi_csq_mutexes(adev); - soc15_grbm_select(adev, 1, 0, 0, 0); + soc15_grbm_select(adev, 1, 0, 0, 0, 0); /* * Iterate through the shader engines and arrays of the device @@ -821,7 +821,7 @@ void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid, } amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); - soc15_grbm_select(adev, 0, 0, 0, 0); + soc15_grbm_select(adev, 0, 0, 0, 0, 0); unlock_spi_csq_mutexes(adev); /* Update the output parameters and return */ diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 62af92e5be51..4939fd61355b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -1831,7 +1831,7 @@ static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd, static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev, u32 me, u32 pipe, u32 q, u32 vm) { - soc15_grbm_select(adev, me, pipe, q, vm); + soc15_grbm_select(adev, me, pipe, q, vm, 0); } static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = { @@ -2324,12 +2324,12 @@ static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev) mutex_lock(&adev->srbm_mutex); for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { - soc15_grbm_select(adev, 0, 0, 0, i); + soc15_grbm_select(adev, 0, 0, 0, i, 0); /* CP and shaders */ WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config); WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases); } - soc15_grbm_select(adev, 0, 0, 0, 0); + soc15_grbm_select(adev, 0, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); /* Initialize all compute VMIDs to have no GDS, GWS, or OA @@ -2394,7 +2394,7 @@ static void gfx_v9_0_constants_init(struct amdgpu_device *adev) /* where to put LDS, scratch, GPUVM in FSA64 space */ mutex_lock(&adev->srbm_mutex); for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) { - soc15_grbm_select(adev, 0, 0, 0, i); + soc15_grbm_select(adev, 0, 0, 0, i, 0); /* CP and shaders */ if (i == 0) { tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, @@ -2416,7 +2416,7 @@ static void gfx_v9_0_constants_init(struct amdgpu_device *adev) WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp); } } - soc15_grbm_select(adev, 0, 0, 0, 0); + soc15_grbm_select(adev, 0, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); @@ -3540,9 +3540,9 @@ static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring) amdgpu_ring_clear_ring(ring); mutex_lock(&adev->srbm_mutex); - soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); + soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0); gfx_v9_0_kiq_init_register(ring); - soc15_grbm_select(adev, 0, 0, 0, 0); + soc15_grbm_select(adev, 0, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); } else { memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation)); @@ -3551,10 +3551,10 @@ static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring) if (amdgpu_sriov_vf(adev) && adev->in_suspend) amdgpu_ring_clear_ring(ring); mutex_lock(&adev->srbm_mutex); - soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); + soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0); gfx_v9_0_mqd_init(ring); gfx_v9_0_kiq_init_register(ring); - soc15_grbm_select(adev, 0, 0, 0, 0); + soc15_grbm_select(adev, 0, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); if (adev->gfx.kiq[0].mqd_backup) @@ -3582,9 +3582,9 @@ static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring) ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; mutex_lock(&adev->srbm_mutex); - soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); + soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0); gfx_v9_0_mqd_init(ring); - soc15_grbm_select(adev, 0, 0, 0, 0); + soc15_grbm_select(adev, 0, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); if (adev->gfx.mec.mqd_backup[mqd_idx]) @@ -3791,9 +3791,9 @@ static int gfx_v9_0_hw_fini(void *handle) mutex_lock(&adev->srbm_mutex); soc15_grbm_select(adev, adev->gfx.kiq[0].ring.me, adev->gfx.kiq[0].ring.pipe, - adev->gfx.kiq[0].ring.queue, 0); + adev->gfx.kiq[0].ring.queue, 0, 0); gfx_v9_0_kiq_fini_register(&adev->gfx.kiq[0].ring); - soc15_grbm_select(adev, 0, 0, 0, 0); + soc15_grbm_select(adev, 0, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c index 3a797424579c..93438770ca1a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c @@ -761,7 +761,7 @@ void gfx_v9_4_2_debug_trap_config_init(struct amdgpu_device *adev, for (i = first_vmid; i < last_vmid; i++) { data = 0; - soc15_grbm_select(adev, 0, 0, 0, i); + soc15_grbm_select(adev, 0, 0, 0, i, 0); data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1); data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, 0); data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, @@ -769,7 +769,7 @@ void gfx_v9_4_2_debug_trap_config_init(struct amdgpu_device *adev, WREG32(SOC15_REG_OFFSET(GC, 0, regSPI_GDBG_PER_VMID_CNTL), data); } - soc15_grbm_select(adev, 0, 0, 0, 0); + soc15_grbm_select(adev, 0, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 212d48114da8..12185e7aac4e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -658,7 +658,7 @@ static void gfx_v9_4_3_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd static void gfx_v9_4_3_select_me_pipe_q(struct amdgpu_device *adev, u32 me, u32 pipe, u32 q, u32 vm) { - soc15_grbm_select(adev, me, pipe, q, vm); + soc15_grbm_select(adev, me, pipe, q, vm, 0); } static const struct amdgpu_gfx_funcs gfx_v9_4_3_gfx_funcs = { @@ -926,12 +926,12 @@ static void gfx_v9_4_3_init_compute_vmid(struct amdgpu_device *adev) mutex_lock(&adev->srbm_mutex); for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { - soc15_grbm_select(adev, 0, 0, 0, i); + soc15_grbm_select(adev, 0, 0, 0, i, 0); /* CP and shaders */ WREG32_SOC15_RLC(GC, 0, regSH_MEM_CONFIG, sh_mem_config); WREG32_SOC15_RLC(GC, 0, regSH_MEM_BASES, sh_mem_bases); } - soc15_grbm_select(adev, 0, 0, 0, 0); + soc15_grbm_select(adev, 0, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); /* Initialize all compute VMIDs to have no GDS, GWS, or OA @@ -977,7 +977,7 @@ static void gfx_v9_4_3_constants_init(struct amdgpu_device *adev) /* where to put LDS, scratch, GPUVM in FSA64 space */ mutex_lock(&adev->srbm_mutex); for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) { - soc15_grbm_select(adev, 0, 0, 0, i); + soc15_grbm_select(adev, 0, 0, 0, i, 0); /* CP and shaders */ if (i == 0) { tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, @@ -999,7 +999,7 @@ static void gfx_v9_4_3_constants_init(struct amdgpu_device *adev) WREG32_SOC15_RLC(GC, 0, regSH_MEM_BASES, tmp); } } - soc15_grbm_select(adev, 0, 0, 0, 0); + soc15_grbm_select(adev, 0, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); @@ -1706,19 +1706,19 @@ static int gfx_v9_4_3_kiq_init_queue(struct amdgpu_ring *ring) amdgpu_ring_clear_ring(ring); mutex_lock(&adev->srbm_mutex); - soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); + soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0); gfx_v9_4_3_kiq_init_register(ring); - soc15_grbm_select(adev, 0, 0, 0, 0); + soc15_grbm_select(adev, 0, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); } else { memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation)); ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; mutex_lock(&adev->srbm_mutex); - soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); + soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0); gfx_v9_4_3_mqd_init(ring); gfx_v9_4_3_kiq_init_register(ring); - soc15_grbm_select(adev, 0, 0, 0, 0); + soc15_grbm_select(adev, 0, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); if (adev->gfx.kiq[0].mqd_backup) @@ -1746,9 +1746,9 @@ static int gfx_v9_4_3_kcq_init_queue(struct amdgpu_ring *ring) ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; mutex_lock(&adev->srbm_mutex); - soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); + soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0); gfx_v9_4_3_mqd_init(ring); - soc15_grbm_select(adev, 0, 0, 0, 0); + soc15_grbm_select(adev, 0, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); if (adev->gfx.mec.mqd_backup[mqd_idx]) @@ -1896,9 +1896,9 @@ static int gfx_v9_4_3_hw_fini(void *handle) mutex_lock(&adev->srbm_mutex); soc15_grbm_select(adev, adev->gfx.kiq[0].ring.me, adev->gfx.kiq[0].ring.pipe, - adev->gfx.kiq[0].ring.queue, 0); + adev->gfx.kiq[0].ring.queue, 0, 0); gfx_v9_4_3_kiq_fini_register(&adev->gfx.kiq[0].ring); - soc15_grbm_select(adev, 0, 0, 0, 0); + soc15_grbm_select(adev, 0, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); } diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index bc5dd80f10c1..4b79a8933476 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -311,7 +311,7 @@ static u32 soc15_get_xclk(struct amdgpu_device *adev) void soc15_grbm_select(struct amdgpu_device *adev, - u32 me, u32 pipe, u32 queue, u32 vmid) + u32 me, u32 pipe, u32 queue, u32 vmid, int xcc_id) { u32 grbm_gfx_cntl = 0; grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, PIPEID, pipe); @@ -319,7 +319,7 @@ void soc15_grbm_select(struct amdgpu_device *adev, grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, VMID, vmid); grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, QUEUEID, queue); - WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_CNTL, grbm_gfx_cntl); + WREG32_SOC15_RLC_SHADOW(GC, xcc_id, mmGRBM_GFX_CNTL, grbm_gfx_cntl); } static void soc15_vga_set_state(struct amdgpu_device *adev, bool state) diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.h b/drivers/gpu/drm/amd/amdgpu/soc15.h index efc2a253e8db..2b41ee968dd1 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15.h @@ -100,7 +100,7 @@ struct soc15_ras_field_entry { #define SOC15_RAS_REG_FIELD_VAL(val, entry, field) SOC15_REG_FIELD_VAL((val), (entry).field##_count_mask, (entry).field##_count_shift) void soc15_grbm_select(struct amdgpu_device *adev, - u32 me, u32 pipe, u32 queue, u32 vmid); + u32 me, u32 pipe, u32 queue, u32 vmid, int xcc_id); void soc15_set_virt_ops(struct amdgpu_device *adev); void soc15_program_register_sequence(struct amdgpu_device *adev, -- cgit v1.2.3 From 6f917fdc934518401ff2e166e6db1f6ac1ef1078 Mon Sep 17 00:00:00 2001 From: Le Ma <le.ma@amd.com> Date: Wed, 17 Nov 2021 17:24:02 +0800 Subject: drm/amdgpu: add multi-XCC initial support in gfx_v9_4_3.c Each XCD needs to be initialized respectively. The major changes are: 1. add iteration to do rlc/kiq/kcq init/fini for each xcd 2. load rlc/mec microcode to each xcd 3. add argument to specify xcc index in initialization functions Signed-off-by: Le Ma <le.ma@amd.com> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 721 +++++++++++++++++--------------- 1 file changed, 394 insertions(+), 327 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 12185e7aac4e..56999bb7ac26 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -181,7 +181,9 @@ static const struct kiq_pm4_funcs gfx_v9_4_3_kiq_pm4_funcs = { static void gfx_v9_4_3_set_kiq_pm4_funcs(struct amdgpu_device *adev) { - adev->gfx.kiq[0].pmf = &gfx_v9_4_3_kiq_pm4_funcs; + int i; + for (i = 0; i < adev->gfx.num_xcd; i++) + adev->gfx.kiq[i].pmf = &gfx_v9_4_3_kiq_pm4_funcs; } static void gfx_v9_4_3_init_golden_registers(struct amdgpu_device *adev) @@ -504,7 +506,9 @@ static int gfx_v9_4_3_mec_init(struct amdgpu_device *adev) const struct gfx_firmware_header_v1_0 *mec_hdr; - bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); + for (i = 0; i < adev->gfx.num_xcd; i++) + bitmap_zero(adev->gfx.mec_bitmap[i].queue_bitmap, + AMDGPU_MAX_COMPUTE_QUEUES); /* take ownership of the relevant compute queues */ amdgpu_gfx_compute_queue_acquire(adev); @@ -731,7 +735,7 @@ static int gfx_v9_4_3_gpu_early_init(struct amdgpu_device *adev) } static int gfx_v9_4_3_compute_ring_init(struct amdgpu_device *adev, int ring_id, - int mec, int pipe, int queue) + int xcc_id, int mec, int pipe, int queue) { unsigned irq_type; struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id]; @@ -740,6 +744,7 @@ static int gfx_v9_4_3_compute_ring_init(struct amdgpu_device *adev, int ring_id, ring = &adev->gfx.compute_ring[ring_id]; /* mec0 is me1 */ + ring->xcc_id = xcc_id; ring->me = mec + 1; ring->pipe = pipe; ring->queue = queue; @@ -750,7 +755,8 @@ static int gfx_v9_4_3_compute_ring_init(struct amdgpu_device *adev, int ring_id, ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (ring_id * GFX9_MEC_HPD_SIZE); ring->vm_hub = AMDGPU_GFXHUB_0; - sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); + sprintf(ring->name, "comp_%d.%d.%d.%d", + ring->xcc_id, ring->me, ring->pipe, ring->queue); irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) @@ -764,7 +770,7 @@ static int gfx_v9_4_3_compute_ring_init(struct amdgpu_device *adev, int ring_id, static int gfx_v9_4_3_sw_init(void *handle) { - int i, j, k, r, ring_id; + int i, j, k, r, ring_id, xcc_id; struct amdgpu_kiq *kiq; struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -805,39 +811,45 @@ static int gfx_v9_4_3_sw_init(void *handle) /* set up the compute queues - allocate horizontally across pipes */ ring_id = 0; - for (i = 0; i < adev->gfx.mec.num_mec; ++i) { - for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { - for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { - if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i, - k, j)) - continue; - - r = gfx_v9_4_3_compute_ring_init(adev, - ring_id, - i, k, j); - if (r) - return r; - - ring_id++; + for (xcc_id = 0; xcc_id < adev->gfx.num_xcd; xcc_id++) { + + for (i = 0; i < adev->gfx.mec.num_mec; ++i) { + for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { + for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; + k++) { + if (!amdgpu_gfx_is_mec_queue_enabled( + adev, xcc_id, i, k, j)) + continue; + + r = gfx_v9_4_3_compute_ring_init(adev, + ring_id, + xcc_id, + i, k, j); + if (r) + return r; + + ring_id++; + } } } - } - r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE, 0); - if (r) { - DRM_ERROR("Failed to init KIQ BOs!\n"); - return r; - } + r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE, xcc_id); + if (r) { + DRM_ERROR("Failed to init KIQ BOs!\n"); + return r; + } - kiq = &adev->gfx.kiq[0]; - r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq, 0); - if (r) - return r; + kiq = &adev->gfx.kiq[xcc_id]; + r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq, xcc_id); + if (r) + return r; - /* create MQD for all compute queues as wel as KIQ for SRIOV case */ - r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation), 0); - if (r) - return r; + /* create MQD for all compute queues as wel as KIQ for SRIOV case */ + r = amdgpu_gfx_mqd_sw_init(adev, + sizeof(struct v9_mqd_allocation), xcc_id); + if (r) + return r; + } r = gfx_v9_4_3_gpu_early_init(adev); if (r) @@ -851,12 +863,15 @@ static int gfx_v9_4_3_sw_fini(void *handle) int i; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - for (i = 0; i < adev->gfx.num_compute_rings; i++) + for (i = 0; i < adev->gfx.num_compute_rings * + adev->gfx.num_xcd; i++) amdgpu_ring_fini(&adev->gfx.compute_ring[i]); - amdgpu_gfx_mqd_sw_fini(adev, 0); - amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring); - amdgpu_gfx_kiq_fini(adev, 0); + for (i = 0; i < adev->gfx.num_xcd; i++) { + amdgpu_gfx_mqd_sw_fini(adev, i); + amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[i].ring); + amdgpu_gfx_kiq_fini(adev, i); + } gfx_v9_4_3_mec_fini(adev); amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj); @@ -881,7 +896,7 @@ static u32 gfx_v9_4_3_get_rb_active_bitmap(struct amdgpu_device *adev) return (~data) & mask; } -static void gfx_v9_4_3_setup_rb(struct amdgpu_device *adev) +static void gfx_v9_4_3_setup_rb(struct amdgpu_device *adev, int xcc_id) { int i, j; u32 data; @@ -906,7 +921,7 @@ static void gfx_v9_4_3_setup_rb(struct amdgpu_device *adev) } #define DEFAULT_SH_MEM_BASES (0x6000) -static void gfx_v9_4_3_init_compute_vmid(struct amdgpu_device *adev) +static void gfx_v9_4_3_init_compute_vmid(struct amdgpu_device *adev, int xcc_id) { int i; uint32_t sh_mem_config; @@ -926,25 +941,25 @@ static void gfx_v9_4_3_init_compute_vmid(struct amdgpu_device *adev) mutex_lock(&adev->srbm_mutex); for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { - soc15_grbm_select(adev, 0, 0, 0, i, 0); + soc15_grbm_select(adev, 0, 0, 0, i, xcc_id); /* CP and shaders */ - WREG32_SOC15_RLC(GC, 0, regSH_MEM_CONFIG, sh_mem_config); - WREG32_SOC15_RLC(GC, 0, regSH_MEM_BASES, sh_mem_bases); + WREG32_SOC15_RLC(GC, xcc_id, regSH_MEM_CONFIG, sh_mem_config); + WREG32_SOC15_RLC(GC, xcc_id, regSH_MEM_BASES, sh_mem_bases); } - soc15_grbm_select(adev, 0, 0, 0, 0, 0); + soc15_grbm_select(adev, 0, 0, 0, 0, xcc_id); mutex_unlock(&adev->srbm_mutex); /* Initialize all compute VMIDs to have no GDS, GWS, or OA acccess. These should be enabled by FW for target VMIDs. */ for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { - WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_BASE, 2 * i, 0); - WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_SIZE, 2 * i, 0); - WREG32_SOC15_OFFSET(GC, 0, regGDS_GWS_VMID0, i, 0); - WREG32_SOC15_OFFSET(GC, 0, regGDS_OA_VMID0, i, 0); + WREG32_SOC15_OFFSET(GC, xcc_id, regGDS_VMID0_BASE, 2 * i, 0); + WREG32_SOC15_OFFSET(GC, xcc_id, regGDS_VMID0_SIZE, 2 * i, 0); + WREG32_SOC15_OFFSET(GC, xcc_id, regGDS_GWS_VMID0, i, 0); + WREG32_SOC15_OFFSET(GC, xcc_id, regGDS_OA_VMID0, i, 0); } } -static void gfx_v9_4_3_init_gds_vmid(struct amdgpu_device *adev) +static void gfx_v9_4_3_init_gds_vmid(struct amdgpu_device *adev, int xcc_id) { int vmid; @@ -955,21 +970,23 @@ static void gfx_v9_4_3_init_gds_vmid(struct amdgpu_device *adev) * access so that HWS firmware can save/restore entries. */ for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) { - WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_BASE, 2 * vmid, 0); - WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_SIZE, 2 * vmid, 0); - WREG32_SOC15_OFFSET(GC, 0, regGDS_GWS_VMID0, vmid, 0); - WREG32_SOC15_OFFSET(GC, 0, regGDS_OA_VMID0, vmid, 0); + WREG32_SOC15_OFFSET(GC, xcc_id, regGDS_VMID0_BASE, 2 * vmid, 0); + WREG32_SOC15_OFFSET(GC, xcc_id, regGDS_VMID0_SIZE, 2 * vmid, 0); + WREG32_SOC15_OFFSET(GC, xcc_id, regGDS_GWS_VMID0, vmid, 0); + WREG32_SOC15_OFFSET(GC, xcc_id, regGDS_OA_VMID0, vmid, 0); } } static void gfx_v9_4_3_constants_init(struct amdgpu_device *adev) { u32 tmp; - int i; + int i, j; - WREG32_FIELD15_PREREG(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff); + for (i = 0; i < adev->gfx.num_xcd; i++) { + WREG32_FIELD15_PREREG(GC, i, GRBM_CNTL, READ_TIMEOUT, 0xff); + gfx_v9_4_3_setup_rb(adev, i); + } - gfx_v9_4_3_setup_rb(adev); gfx_v9_4_3_get_cu_info(adev, &adev->gfx.cu_info); adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, regDB_DEBUG2); @@ -977,63 +994,68 @@ static void gfx_v9_4_3_constants_init(struct amdgpu_device *adev) /* where to put LDS, scratch, GPUVM in FSA64 space */ mutex_lock(&adev->srbm_mutex); for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) { - soc15_grbm_select(adev, 0, 0, 0, i, 0); - /* CP and shaders */ - if (i == 0) { - tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, - SH_MEM_ALIGNMENT_MODE_UNALIGNED); - tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE, - !!adev->gmc.noretry); - WREG32_SOC15_RLC(GC, 0, regSH_MEM_CONFIG, tmp); - WREG32_SOC15_RLC(GC, 0, regSH_MEM_BASES, 0); - } else { - tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, - SH_MEM_ALIGNMENT_MODE_UNALIGNED); - tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE, - !!adev->gmc.noretry); - WREG32_SOC15_RLC(GC, 0, regSH_MEM_CONFIG, tmp); - tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE, - (adev->gmc.private_aperture_start >> 48)); - tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE, - (adev->gmc.shared_aperture_start >> 48)); - WREG32_SOC15_RLC(GC, 0, regSH_MEM_BASES, tmp); + for (j = 0; j < adev->gfx.num_xcd; j++) { + soc15_grbm_select(adev, 0, 0, 0, i, j); + /* CP and shaders */ + if (i == 0) { + tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, + SH_MEM_ALIGNMENT_MODE_UNALIGNED); + tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE, + !!adev->gmc.noretry); + WREG32_SOC15_RLC(GC, j, regSH_MEM_CONFIG, tmp); + WREG32_SOC15_RLC(GC, j, regSH_MEM_BASES, 0); + } else { + tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, + SH_MEM_ALIGNMENT_MODE_UNALIGNED); + tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE, + !!adev->gmc.noretry); + WREG32_SOC15_RLC(GC, j, regSH_MEM_CONFIG, tmp); + tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE, + (adev->gmc.private_aperture_start >> 48)); + tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE, + (adev->gmc.shared_aperture_start >> 48)); + WREG32_SOC15_RLC(GC, j, regSH_MEM_BASES, tmp); + } } } soc15_grbm_select(adev, 0, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); - gfx_v9_4_3_init_compute_vmid(adev); - gfx_v9_4_3_init_gds_vmid(adev); + for (i = 0; i < adev->gfx.num_xcd; i++) { + gfx_v9_4_3_init_compute_vmid(adev, i); + gfx_v9_4_3_init_gds_vmid(adev, i); + } } -static void gfx_v9_4_3_enable_save_restore_machine(struct amdgpu_device *adev) +static void gfx_v9_4_3_enable_save_restore_machine(struct amdgpu_device *adev, + int xcc_id) { - WREG32_FIELD15_PREREG(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1); + WREG32_FIELD15_PREREG(GC, xcc_id, RLC_SRM_CNTL, SRM_ENABLE, 1); } -static void gfx_v9_4_3_init_csb(struct amdgpu_device *adev) +static void gfx_v9_4_3_init_csb(struct amdgpu_device *adev, int xcc_id) { adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr); /* csib */ - WREG32_RLC(SOC15_REG_OFFSET(GC, 0, regRLC_CSIB_ADDR_HI), + WREG32_RLC(SOC15_REG_OFFSET(GC, xcc_id, regRLC_CSIB_ADDR_HI), adev->gfx.rlc.clear_state_gpu_addr >> 32); - WREG32_RLC(SOC15_REG_OFFSET(GC, 0, regRLC_CSIB_ADDR_LO), + WREG32_RLC(SOC15_REG_OFFSET(GC, xcc_id, regRLC_CSIB_ADDR_LO), adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); - WREG32_RLC(SOC15_REG_OFFSET(GC, 0, regRLC_CSIB_LENGTH), + WREG32_RLC(SOC15_REG_OFFSET(GC, xcc_id, regRLC_CSIB_LENGTH), adev->gfx.rlc.clear_state_size); } -static void gfx_v9_4_3_init_pg(struct amdgpu_device *adev) +static void gfx_v9_4_3_init_pg(struct amdgpu_device *adev, int xcc_id) { - gfx_v9_4_3_init_csb(adev); + gfx_v9_4_3_init_csb(adev, xcc_id); /* * Rlc save restore list is workable since v2_1. * And it's needed by gfxoff feature. */ if (adev->gfx.rlc.is_rlc_v2_1) - gfx_v9_4_3_enable_save_restore_machine(adev); + gfx_v9_4_3_enable_save_restore_machine(adev, xcc_id); if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG | @@ -1115,7 +1137,8 @@ static int gfx_v9_4_3_rlc_init(struct amdgpu_device *adev) return 0; } -static void gfx_v9_4_3_wait_for_rlc_serdes(struct amdgpu_device *adev) +static void gfx_v9_4_3_wait_for_rlc_serdes(struct amdgpu_device *adev, + int xcc_id) { u32 i, j, k; u32 mask; @@ -1171,17 +1194,25 @@ static void gfx_v9_4_3_enable_gui_idle_interrupt(struct amdgpu_device *adev, static void gfx_v9_4_3_rlc_stop(struct amdgpu_device *adev) { - WREG32_FIELD15_PREREG(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0); - gfx_v9_4_3_enable_gui_idle_interrupt(adev, false); - gfx_v9_4_3_wait_for_rlc_serdes(adev); + int i; + + for (i = 0; i < adev->gfx.num_xcd; i++) { + WREG32_FIELD15_PREREG(GC, i, RLC_CNTL, RLC_ENABLE_F32, 0); + gfx_v9_4_3_enable_gui_idle_interrupt(adev, false); + gfx_v9_4_3_wait_for_rlc_serdes(adev, i); + } } static void gfx_v9_4_3_rlc_reset(struct amdgpu_device *adev) { - WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); - udelay(50); - WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0); - udelay(50); + int i; + + for (i = 0; i < adev->gfx.num_xcd; i++) { + WREG32_FIELD15_PREREG(GC, i, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); + udelay(50); + WREG32_FIELD15_PREREG(GC, i, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0); + udelay(50); + } } static void gfx_v9_4_3_rlc_start(struct amdgpu_device *adev) @@ -1189,35 +1220,38 @@ static void gfx_v9_4_3_rlc_start(struct amdgpu_device *adev) #ifdef AMDGPU_RLC_DEBUG_RETRY u32 rlc_ucode_ver; #endif + int i; - WREG32_FIELD15_PREREG(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1); - udelay(50); - - /* carrizo do enable cp interrupt after cp inited */ - if (!(adev->flags & AMD_IS_APU)) { - gfx_v9_4_3_enable_gui_idle_interrupt(adev, true); + for (i = 0; i < adev->gfx.num_xcd; i++) { + WREG32_FIELD15_PREREG(GC, i, RLC_CNTL, RLC_ENABLE_F32, 1); udelay(50); - } + + /* carrizo do enable cp interrupt after cp inited */ + if (!(adev->flags & AMD_IS_APU)) { + gfx_v9_4_3_enable_gui_idle_interrupt(adev, true); + udelay(50); + } #ifdef AMDGPU_RLC_DEBUG_RETRY - /* RLC_GPM_GENERAL_6 : RLC Ucode version */ - rlc_ucode_ver = RREG32_SOC15(GC, 0, regRLC_GPM_GENERAL_6); - if (rlc_ucode_ver == 0x108) { - dev_info(adev->dev, - "Using rlc debug ucode. regRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n", - rlc_ucode_ver, adev->gfx.rlc_fw_version); - /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles, - * default is 0x9C4 to create a 100us interval */ - WREG32_SOC15(GC, 0, regRLC_GPM_TIMER_INT_3, 0x9C4); - /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr - * to disable the page fault retry interrupts, default is - * 0x100 (256) */ - WREG32_SOC15(GC, 0, regRLC_GPM_GENERAL_12, 0x100); - } + /* RLC_GPM_GENERAL_6 : RLC Ucode version */ + rlc_ucode_ver = RREG32_SOC15(GC, i, regRLC_GPM_GENERAL_6); + if (rlc_ucode_ver == 0x108) { + dev_info(adev->dev, + "Using rlc debug ucode. regRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n", + rlc_ucode_ver, adev->gfx.rlc_fw_version); + /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles, + * default is 0x9C4 to create a 100us interval */ + WREG32_SOC15(GC, i, regRLC_GPM_TIMER_INT_3, 0x9C4); + /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr + * to disable the page fault retry interrupts, default is + * 0x100 (256) */ + WREG32_SOC15(GC, i, regRLC_GPM_GENERAL_12, 0x100); + } #endif + } } -static int gfx_v9_4_3_rlc_load_microcode(struct amdgpu_device *adev) +static int gfx_v9_4_3_rlc_load_microcode(struct amdgpu_device *adev, int xcc_id) { const struct rlc_firmware_header_v2_0 *hdr; const __le32 *fw_data; @@ -1233,36 +1267,38 @@ static int gfx_v9_4_3_rlc_load_microcode(struct amdgpu_device *adev) le32_to_cpu(hdr->header.ucode_array_offset_bytes)); fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; - WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR, + WREG32_SOC15(GC, xcc_id, regRLC_GPM_UCODE_ADDR, RLCG_UCODE_LOADING_START_ADDRESS); for (i = 0; i < fw_size; i++) { if (amdgpu_emu_mode == 1 && i % 100 == 0) { dev_info(adev->dev, "Write RLC ucode data %u DWs\n", i); msleep(1); } - WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++)); + WREG32_SOC15(GC, xcc_id, regRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++)); } - WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version); + WREG32_SOC15(GC, xcc_id, regRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version); return 0; } static int gfx_v9_4_3_rlc_resume(struct amdgpu_device *adev) { - int r; + int r, i; adev->gfx.rlc.funcs->stop(adev); - /* disable CG */ - WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, 0); + for (i = 0; i < adev->gfx.num_xcd; i++) { + /* disable CG */ + WREG32_SOC15(GC, i, regRLC_CGCG_CGLS_CTRL, 0); - gfx_v9_4_3_init_pg(adev); + gfx_v9_4_3_init_pg(adev, i); - if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { - /* legacy rlc firmware loading */ - r = gfx_v9_4_3_rlc_load_microcode(adev); - if (r) - return r; + if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { + /* legacy rlc firmware loading */ + r = gfx_v9_4_3_rlc_load_microcode(adev, i); + if (r) + return r; + } } adev->gfx.rlc.funcs->start(adev); @@ -1270,7 +1306,8 @@ static int gfx_v9_4_3_rlc_resume(struct amdgpu_device *adev) return 0; } -static void gfx_v9_4_3_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid) +static void gfx_v9_4_3_update_spm_vmid(struct amdgpu_device *adev, + unsigned vmid) { u32 reg, data; @@ -1323,19 +1360,21 @@ static bool gfx_v9_4_3_is_rlcg_access_range(struct amdgpu_device *adev, u32 offs ARRAY_SIZE(rlcg_access_gc_9_4_3)); } -static void gfx_v9_4_3_cp_compute_enable(struct amdgpu_device *adev, bool enable) +static void gfx_v9_4_3_cp_compute_enable(struct amdgpu_device *adev, + bool enable, int xcc_id) { if (enable) { - WREG32_SOC15_RLC(GC, 0, regCP_MEC_CNTL, 0); + WREG32_SOC15_RLC(GC, xcc_id, regCP_MEC_CNTL, 0); } else { - WREG32_SOC15_RLC(GC, 0, regCP_MEC_CNTL, + WREG32_SOC15_RLC(GC, xcc_id, regCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); - adev->gfx.kiq[0].ring.sched.ready = false; + adev->gfx.kiq[xcc_id].ring.sched.ready = false; } udelay(50); } -static int gfx_v9_4_3_cp_compute_load_microcode(struct amdgpu_device *adev) +static int gfx_v9_4_3_cp_compute_load_microcode(struct amdgpu_device *adev, + int xcc_id) { const struct gfx_firmware_header_v1_0 *mec_hdr; const __le32 *fw_data; @@ -1347,7 +1386,7 @@ static int gfx_v9_4_3_cp_compute_load_microcode(struct amdgpu_device *adev) if (!adev->gfx.mec_fw) return -EINVAL; - gfx_v9_4_3_cp_compute_enable(adev, false); + gfx_v9_4_3_cp_compute_enable(adev, false, xcc_id); mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); @@ -1358,17 +1397,17 @@ static int gfx_v9_4_3_cp_compute_load_microcode(struct amdgpu_device *adev) tmp = 0; tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0); tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); - WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp); + WREG32_SOC15(GC, xcc_id, regCP_CPC_IC_BASE_CNTL, tmp); - WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO, + WREG32_SOC15(GC, xcc_id, regCP_CPC_IC_BASE_LO, adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000); - WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI, + WREG32_SOC15(GC, xcc_id, regCP_CPC_IC_BASE_HI, upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr)); mec_ucode_addr_offset = - SOC15_REG_OFFSET(GC, 0, regCP_MEC_ME1_UCODE_ADDR); + SOC15_REG_OFFSET(GC, xcc_id, regCP_MEC_ME1_UCODE_ADDR); mec_ucode_data_offset = - SOC15_REG_OFFSET(GC, 0, regCP_MEC_ME1_UCODE_DATA); + SOC15_REG_OFFSET(GC, xcc_id, regCP_MEC_ME1_UCODE_DATA); /* MEC1 */ WREG32(mec_ucode_addr_offset, mec_hdr->jt_offset); @@ -1383,18 +1422,18 @@ static int gfx_v9_4_3_cp_compute_load_microcode(struct amdgpu_device *adev) } /* KIQ functions */ -static void gfx_v9_4_3_kiq_setting(struct amdgpu_ring *ring) +static void gfx_v9_4_3_kiq_setting(struct amdgpu_ring *ring, int xcc_id) { uint32_t tmp; struct amdgpu_device *adev = ring->adev; /* tell RLC which is KIQ queue */ - tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS); + tmp = RREG32_SOC15(GC, xcc_id, regRLC_CP_SCHEDULERS); tmp &= 0xffffff00; tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); - WREG32_SOC15_RLC(GC, 0, regRLC_CP_SCHEDULERS, tmp); + WREG32_SOC15_RLC(GC, xcc_id, regRLC_CP_SCHEDULERS, tmp); tmp |= 0x80; - WREG32_SOC15_RLC(GC, 0, regRLC_CP_SCHEDULERS, tmp); + WREG32_SOC15_RLC(GC, xcc_id, regRLC_CP_SCHEDULERS, tmp); } static void gfx_v9_4_3_mqd_set_priority(struct amdgpu_ring *ring, struct v9_mqd *mqd) @@ -1538,123 +1577,123 @@ static int gfx_v9_4_3_mqd_init(struct amdgpu_ring *ring) return 0; } -static int gfx_v9_4_3_kiq_init_register(struct amdgpu_ring *ring) +static int gfx_v9_4_3_kiq_init_register(struct amdgpu_ring *ring, int xcc_id) { struct amdgpu_device *adev = ring->adev; struct v9_mqd *mqd = ring->mqd_ptr; int j; /* disable wptr polling */ - WREG32_FIELD15_PREREG(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); + WREG32_FIELD15_PREREG(GC, xcc_id, CP_PQ_WPTR_POLL_CNTL, EN, 0); - WREG32_SOC15_RLC(GC, 0, regCP_HQD_EOP_BASE_ADDR, + WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_EOP_BASE_ADDR, mqd->cp_hqd_eop_base_addr_lo); - WREG32_SOC15_RLC(GC, 0, regCP_HQD_EOP_BASE_ADDR_HI, + WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_EOP_BASE_ADDR_HI, mqd->cp_hqd_eop_base_addr_hi); /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ - WREG32_SOC15_RLC(GC, 0, regCP_HQD_EOP_CONTROL, + WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_EOP_CONTROL, mqd->cp_hqd_eop_control); /* enable doorbell? */ - WREG32_SOC15_RLC(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL, + WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control); /* disable the queue if it's active */ - if (RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1) { - WREG32_SOC15_RLC(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 1); + if (RREG32_SOC15(GC, xcc_id, regCP_HQD_ACTIVE) & 1) { + WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_DEQUEUE_REQUEST, 1); for (j = 0; j < adev->usec_timeout; j++) { - if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1)) + if (!(RREG32_SOC15(GC, xcc_id, regCP_HQD_ACTIVE) & 1)) break; udelay(1); } - WREG32_SOC15_RLC(GC, 0, regCP_HQD_DEQUEUE_REQUEST, + WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request); - WREG32_SOC15_RLC(GC, 0, regCP_HQD_PQ_RPTR, + WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr); - WREG32_SOC15_RLC(GC, 0, regCP_HQD_PQ_WPTR_LO, + WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_PQ_WPTR_LO, mqd->cp_hqd_pq_wptr_lo); - WREG32_SOC15_RLC(GC, 0, regCP_HQD_PQ_WPTR_HI, + WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_PQ_WPTR_HI, mqd->cp_hqd_pq_wptr_hi); } /* set the pointer to the MQD */ - WREG32_SOC15_RLC(GC, 0, regCP_MQD_BASE_ADDR, + WREG32_SOC15_RLC(GC, xcc_id, regCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo); - WREG32_SOC15_RLC(GC, 0, regCP_MQD_BASE_ADDR_HI, + WREG32_SOC15_RLC(GC, xcc_id, regCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi); /* set MQD vmid to 0 */ - WREG32_SOC15_RLC(GC, 0, regCP_MQD_CONTROL, + WREG32_SOC15_RLC(GC, xcc_id, regCP_MQD_CONTROL, mqd->cp_mqd_control); /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ - WREG32_SOC15_RLC(GC, 0, regCP_HQD_PQ_BASE, + WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo); - WREG32_SOC15_RLC(GC, 0, regCP_HQD_PQ_BASE_HI, + WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi); /* set up the HQD, this is similar to CP_RB0_CNTL */ - WREG32_SOC15_RLC(GC, 0, regCP_HQD_PQ_CONTROL, + WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_PQ_CONTROL, mqd->cp_hqd_pq_control); /* set the wb address whether it's enabled or not */ - WREG32_SOC15_RLC(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR, + WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_PQ_RPTR_REPORT_ADDR, mqd->cp_hqd_pq_rptr_report_addr_lo); - WREG32_SOC15_RLC(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR_HI, + WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_PQ_RPTR_REPORT_ADDR_HI, mqd->cp_hqd_pq_rptr_report_addr_hi); /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ - WREG32_SOC15_RLC(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR, + WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo); - WREG32_SOC15_RLC(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI, + WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_PQ_WPTR_POLL_ADDR_HI, mqd->cp_hqd_pq_wptr_poll_addr_hi); /* enable the doorbell if requested */ if (ring->use_doorbell) { - WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_LOWER, + WREG32_SOC15(GC, xcc_id, regCP_MEC_DOORBELL_RANGE_LOWER, (adev->doorbell_index.kiq * 2) << 2); - WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_UPPER, + WREG32_SOC15(GC, xcc_id, regCP_MEC_DOORBELL_RANGE_UPPER, (adev->doorbell_index.userqueue_end * 2) << 2); } - WREG32_SOC15_RLC(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL, + WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control); /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ - WREG32_SOC15_RLC(GC, 0, regCP_HQD_PQ_WPTR_LO, + WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_PQ_WPTR_LO, mqd->cp_hqd_pq_wptr_lo); - WREG32_SOC15_RLC(GC, 0, regCP_HQD_PQ_WPTR_HI, + WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_PQ_WPTR_HI, mqd->cp_hqd_pq_wptr_hi); /* set the vmid for the queue */ - WREG32_SOC15_RLC(GC, 0, regCP_HQD_VMID, mqd->cp_hqd_vmid); + WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_VMID, mqd->cp_hqd_vmid); - WREG32_SOC15_RLC(GC, 0, regCP_HQD_PERSISTENT_STATE, + WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state); /* activate the queue */ - WREG32_SOC15_RLC(GC, 0, regCP_HQD_ACTIVE, + WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_ACTIVE, mqd->cp_hqd_active); if (ring->use_doorbell) - WREG32_FIELD15_PREREG(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1); + WREG32_FIELD15_PREREG(GC, xcc_id, CP_PQ_STATUS, DOORBELL_ENABLE, 1); return 0; } -static int gfx_v9_4_3_kiq_fini_register(struct amdgpu_ring *ring) +static int gfx_v9_4_3_kiq_fini_register(struct amdgpu_ring *ring, int xcc_id) { struct amdgpu_device *adev = ring->adev; int j; /* disable the queue if it's active */ - if (RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1) { + if (RREG32_SOC15(GC, xcc_id, regCP_HQD_ACTIVE) & 1) { - WREG32_SOC15_RLC(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 1); + WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_DEQUEUE_REQUEST, 1); for (j = 0; j < adev->usec_timeout; j++) { - if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1)) + if (!(RREG32_SOC15(GC, xcc_id, regCP_HQD_ACTIVE) & 1)) break; udelay(1); } @@ -1663,72 +1702,71 @@ static int gfx_v9_4_3_kiq_fini_register(struct amdgpu_ring *ring) DRM_DEBUG("KIQ dequeue request failed.\n"); /* Manual disable if dequeue request times out */ - WREG32_SOC15_RLC(GC, 0, regCP_HQD_ACTIVE, 0); + WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_ACTIVE, 0); } - WREG32_SOC15_RLC(GC, 0, regCP_HQD_DEQUEUE_REQUEST, + WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_DEQUEUE_REQUEST, 0); } - WREG32_SOC15_RLC(GC, 0, regCP_HQD_IQ_TIMER, 0); - WREG32_SOC15_RLC(GC, 0, regCP_HQD_IB_CONTROL, 0); - WREG32_SOC15_RLC(GC, 0, regCP_HQD_PERSISTENT_STATE, 0); - WREG32_SOC15_RLC(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000); - WREG32_SOC15_RLC(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL, 0); - WREG32_SOC15_RLC(GC, 0, regCP_HQD_PQ_RPTR, 0); - WREG32_SOC15_RLC(GC, 0, regCP_HQD_PQ_WPTR_HI, 0); - WREG32_SOC15_RLC(GC, 0, regCP_HQD_PQ_WPTR_LO, 0); + WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_IQ_TIMER, 0); + WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_IB_CONTROL, 0); + WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_PERSISTENT_STATE, 0); + WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000); + WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_PQ_DOORBELL_CONTROL, 0); + WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_PQ_RPTR, 0); + WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_PQ_WPTR_HI, 0); + WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_PQ_WPTR_LO, 0); return 0; } -static int gfx_v9_4_3_kiq_init_queue(struct amdgpu_ring *ring) +static int gfx_v9_4_3_kiq_init_queue(struct amdgpu_ring *ring, int xcc_id) { struct amdgpu_device *adev = ring->adev; struct v9_mqd *mqd = ring->mqd_ptr; struct v9_mqd *tmp_mqd; - gfx_v9_4_3_kiq_setting(ring); + gfx_v9_4_3_kiq_setting(ring, xcc_id); /* GPU could be in bad state during probe, driver trigger the reset * after load the SMU, in this case , the mqd is not be initialized. * driver need to re-init the mqd. * check mqd->cp_hqd_pq_control since this value should not be 0 */ - tmp_mqd = (struct v9_mqd *)adev->gfx.kiq[0].mqd_backup; + tmp_mqd = (struct v9_mqd *)adev->gfx.kiq[xcc_id].mqd_backup; if (amdgpu_in_reset(adev) && tmp_mqd->cp_hqd_pq_control) { /* for GPU_RESET case , reset MQD to a clean status */ - if (adev->gfx.kiq[0].mqd_backup) - memcpy(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(struct v9_mqd_allocation)); + if (adev->gfx.kiq[xcc_id].mqd_backup) + memcpy(mqd, adev->gfx.kiq[xcc_id].mqd_backup, sizeof(struct v9_mqd_allocation)); /* reset ring buffer */ ring->wptr = 0; amdgpu_ring_clear_ring(ring); - mutex_lock(&adev->srbm_mutex); - soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0); - gfx_v9_4_3_kiq_init_register(ring); - soc15_grbm_select(adev, 0, 0, 0, 0, 0); + soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, xcc_id); + gfx_v9_4_3_kiq_init_register(ring, xcc_id); + soc15_grbm_select(adev, 0, 0, 0, 0, xcc_id); mutex_unlock(&adev->srbm_mutex); } else { memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation)); ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; mutex_lock(&adev->srbm_mutex); - soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0); + soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, xcc_id); gfx_v9_4_3_mqd_init(ring); - gfx_v9_4_3_kiq_init_register(ring); - soc15_grbm_select(adev, 0, 0, 0, 0, 0); + gfx_v9_4_3_kiq_init_register(ring, xcc_id); + soc15_grbm_select(adev, 0, 0, 0, 0, xcc_id); mutex_unlock(&adev->srbm_mutex); - if (adev->gfx.kiq[0].mqd_backup) - memcpy(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(struct v9_mqd_allocation)); + if (adev->gfx.kiq[xcc_id].mqd_backup) + memcpy(adev->gfx.kiq[xcc_id].mqd_backup, mqd, sizeof(struct v9_mqd_allocation)); } return 0; } -static int gfx_v9_4_3_kcq_init_queue(struct amdgpu_ring *ring) +static int gfx_v9_4_3_kcq_init_queue(struct amdgpu_ring *ring, int xcc_id) { struct amdgpu_device *adev = ring->adev; struct v9_mqd *mqd = ring->mqd_ptr; @@ -1746,9 +1784,9 @@ static int gfx_v9_4_3_kcq_init_queue(struct amdgpu_ring *ring) ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; mutex_lock(&adev->srbm_mutex); - soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0); + soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, xcc_id); gfx_v9_4_3_mqd_init(ring); - soc15_grbm_select(adev, 0, 0, 0, 0, 0); + soc15_grbm_select(adev, 0, 0, 0, 0, xcc_id); mutex_unlock(&adev->srbm_mutex); if (adev->gfx.mec.mqd_backup[mqd_idx]) @@ -1769,12 +1807,12 @@ static int gfx_v9_4_3_kcq_init_queue(struct amdgpu_ring *ring) return 0; } -static int gfx_v9_4_3_kiq_resume(struct amdgpu_device *adev) +static int gfx_v9_4_3_kiq_resume(struct amdgpu_device *adev, int xcc_id) { struct amdgpu_ring *ring; int r; - ring = &adev->gfx.kiq[0].ring; + ring = &adev->gfx.kiq[xcc_id].ring; r = amdgpu_bo_reserve(ring->mqd_obj, false); if (unlikely(r != 0)) @@ -1784,7 +1822,7 @@ static int gfx_v9_4_3_kiq_resume(struct amdgpu_device *adev) if (unlikely(r != 0)) return r; - gfx_v9_4_3_kiq_init_queue(ring); + gfx_v9_4_3_kiq_init_queue(ring, xcc_id); amdgpu_bo_kunmap(ring->mqd_obj); ring->mqd_ptr = NULL; amdgpu_bo_unreserve(ring->mqd_obj); @@ -1792,22 +1830,22 @@ static int gfx_v9_4_3_kiq_resume(struct amdgpu_device *adev) return 0; } -static int gfx_v9_4_3_kcq_resume(struct amdgpu_device *adev) +static int gfx_v9_4_3_kcq_resume(struct amdgpu_device *adev, int xcc_id) { struct amdgpu_ring *ring = NULL; int r = 0, i; - gfx_v9_4_3_cp_compute_enable(adev, true); + gfx_v9_4_3_cp_compute_enable(adev, true, xcc_id); for (i = 0; i < adev->gfx.num_compute_rings; i++) { - ring = &adev->gfx.compute_ring[i]; + ring = &adev->gfx.compute_ring[i + xcc_id * adev->gfx.num_compute_rings]; r = amdgpu_bo_reserve(ring->mqd_obj, false); if (unlikely(r != 0)) goto done; r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); if (!r) { - r = gfx_v9_4_3_kcq_init_queue(ring); + r = gfx_v9_4_3_kcq_init_queue(ring, xcc_id); amdgpu_bo_kunmap(ring->mqd_obj); ring->mqd_ptr = NULL; } @@ -1816,47 +1854,50 @@ static int gfx_v9_4_3_kcq_resume(struct amdgpu_device *adev) goto done; } - r = amdgpu_gfx_enable_kcq(adev, 0); + r = amdgpu_gfx_enable_kcq(adev, xcc_id); done: return r; } static int gfx_v9_4_3_cp_resume(struct amdgpu_device *adev) { - int r, i; + int r, i, j; struct amdgpu_ring *ring; - gfx_v9_4_3_enable_gui_idle_interrupt(adev, false); + for (i = 0; i < adev->gfx.num_xcd; i++) { + gfx_v9_4_3_enable_gui_idle_interrupt(adev, false); + + if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { + gfx_v9_4_3_disable_gpa_mode(adev); - if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { - gfx_v9_4_3_disable_gpa_mode(adev); + r = gfx_v9_4_3_cp_compute_load_microcode(adev, i); + if (r) + return r; + } - r = gfx_v9_4_3_cp_compute_load_microcode(adev); + r = gfx_v9_4_3_kiq_resume(adev, i); if (r) return r; - } - r = gfx_v9_4_3_kiq_resume(adev); - if (r) - return r; + r = gfx_v9_4_3_kcq_resume(adev, i); + if (r) + return r; - r = gfx_v9_4_3_kcq_resume(adev); - if (r) - return r; + for (j = 0; j < adev->gfx.num_compute_rings; j++) { + ring = &adev->gfx.compute_ring[j + i * adev->gfx.num_compute_rings]; + amdgpu_ring_test_helper(ring); + } - for (i = 0; i < adev->gfx.num_compute_rings; i++) { - ring = &adev->gfx.compute_ring[i]; - amdgpu_ring_test_helper(ring); + gfx_v9_4_3_enable_gui_idle_interrupt(adev, true); } - gfx_v9_4_3_enable_gui_idle_interrupt(adev, true); - return 0; } -static void gfx_v9_4_3_cp_enable(struct amdgpu_device *adev, bool enable) +static void gfx_v9_4_3_cp_enable(struct amdgpu_device *adev, bool enable, + int xcc_id) { - gfx_v9_4_3_cp_compute_enable(adev, enable); + gfx_v9_4_3_cp_compute_enable(adev, enable, xcc_id); } static int gfx_v9_4_3_hw_init(void *handle) @@ -1882,27 +1923,30 @@ static int gfx_v9_4_3_hw_init(void *handle) static int gfx_v9_4_3_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i; amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); - if (amdgpu_gfx_disable_kcq(adev, 0)) - DRM_ERROR("KCQ disable failed\n"); + for (i = 0; i < adev->gfx.num_xcd; i++) { + if (amdgpu_gfx_disable_kcq(adev, i)) + DRM_ERROR("XCD %d KCQ disable failed\n", i); - /* Use deinitialize sequence from CAIL when unbinding device from driver, - * otherwise KIQ is hanging when binding back - */ - if (!amdgpu_in_reset(adev) && !adev->in_suspend) { - mutex_lock(&adev->srbm_mutex); - soc15_grbm_select(adev, adev->gfx.kiq[0].ring.me, - adev->gfx.kiq[0].ring.pipe, - adev->gfx.kiq[0].ring.queue, 0, 0); - gfx_v9_4_3_kiq_fini_register(&adev->gfx.kiq[0].ring); - soc15_grbm_select(adev, 0, 0, 0, 0, 0); - mutex_unlock(&adev->srbm_mutex); - } + /* Use deinitialize sequence from CAIL when unbinding device + * from driver, otherwise KIQ is hanging when binding back + */ + if (!amdgpu_in_reset(adev) && !adev->in_suspend) { + mutex_lock(&adev->srbm_mutex); + soc15_grbm_select(adev, adev->gfx.kiq[i].ring.me, + adev->gfx.kiq[i].ring.pipe, + adev->gfx.kiq[i].ring.queue, 0, i); + gfx_v9_4_3_kiq_fini_register(&adev->gfx.kiq[i].ring, i); + soc15_grbm_select(adev, 0, 0, 0, 0, i); + mutex_unlock(&adev->srbm_mutex); + } - gfx_v9_4_3_cp_enable(adev, false); + gfx_v9_4_3_cp_enable(adev, false, i); + } /* Skip suspend with A+A reset */ if (adev->gmc.xgmi.connected_to_cpu && amdgpu_in_reset(adev)) { @@ -1927,12 +1971,14 @@ static int gfx_v9_4_3_resume(void *handle) static bool gfx_v9_4_3_is_idle(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i; - if (REG_GET_FIELD(RREG32_SOC15(GC, 0, regGRBM_STATUS), - GRBM_STATUS, GUI_ACTIVE)) - return false; - else - return true; + for (i = 0; i < adev->gfx.num_xcd; i++) { + if (REG_GET_FIELD(RREG32_SOC15(GC, i, regGRBM_STATUS), + GRBM_STATUS, GUI_ACTIVE)) + return false; + } + return true; } static int gfx_v9_4_3_wait_for_idle(void *handle) @@ -1985,7 +2031,7 @@ static int gfx_v9_4_3_soft_reset(void *handle) adev->gfx.rlc.funcs->stop(adev); /* Disable MEC parsing/prefetching */ - gfx_v9_4_3_cp_compute_enable(adev, false); + gfx_v9_4_3_cp_compute_enable(adev, false, 0); if (grbm_soft_reset) { tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET); @@ -2040,6 +2086,11 @@ static int gfx_v9_4_3_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + /* hardcode in emulation phase */ + adev->gfx.num_xcd = 1; + adev->gfx.num_xcc_per_xcp = 1; + adev->gfx.partition_mode = AMDGPU_SPX_PARTITION_MODE; + adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev), AMDGPU_MAX_COMPUTE_RINGS); gfx_v9_4_3_set_kiq_pm4_funcs(adev); @@ -2068,7 +2119,7 @@ static int gfx_v9_4_3_late_init(void *handle) } static void gfx_v9_4_3_update_medium_grain_clock_gating(struct amdgpu_device *adev, - bool enable) + bool enable, int xcc_id) { uint32_t data, def; @@ -2077,7 +2128,7 @@ static void gfx_v9_4_3_update_medium_grain_clock_gating(struct amdgpu_device *ad /* It is disabled by HW by default */ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { /* 1 - RLC_CGTT_MGCG_OVERRIDE */ - def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); + def = data = RREG32_SOC15(GC, xcc_id, regRLC_CGTT_MGCG_OVERRIDE); data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK | @@ -2087,28 +2138,28 @@ static void gfx_v9_4_3_update_medium_grain_clock_gating(struct amdgpu_device *ad data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK; if (def != data) - WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); + WREG32_SOC15(GC, xcc_id, regRLC_CGTT_MGCG_OVERRIDE, data); /* MGLS is a global flag to control all MGLS in GFX */ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { /* 2 - RLC memory Light sleep */ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) { - def = data = RREG32_SOC15(GC, 0, regRLC_MEM_SLP_CNTL); + def = data = RREG32_SOC15(GC, xcc_id, regRLC_MEM_SLP_CNTL); data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; if (def != data) - WREG32_SOC15(GC, 0, regRLC_MEM_SLP_CNTL, data); + WREG32_SOC15(GC, xcc_id, regRLC_MEM_SLP_CNTL, data); } /* 3 - CP memory Light sleep */ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) { - def = data = RREG32_SOC15(GC, 0, regCP_MEM_SLP_CNTL); + def = data = RREG32_SOC15(GC, xcc_id, regCP_MEM_SLP_CNTL); data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; if (def != data) - WREG32_SOC15(GC, 0, regCP_MEM_SLP_CNTL, data); + WREG32_SOC15(GC, xcc_id, regCP_MEM_SLP_CNTL, data); } } } else { /* 1 - MGCG_OVERRIDE */ - def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); + def = data = RREG32_SOC15(GC, xcc_id, regRLC_CGTT_MGCG_OVERRIDE); data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK | RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | @@ -2116,20 +2167,20 @@ static void gfx_v9_4_3_update_medium_grain_clock_gating(struct amdgpu_device *ad RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK); if (def != data) - WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); + WREG32_SOC15(GC, xcc_id, regRLC_CGTT_MGCG_OVERRIDE, data); /* 2 - disable MGLS in RLC */ - data = RREG32_SOC15(GC, 0, regRLC_MEM_SLP_CNTL); + data = RREG32_SOC15(GC, xcc_id, regRLC_MEM_SLP_CNTL); if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) { data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; - WREG32_SOC15(GC, 0, regRLC_MEM_SLP_CNTL, data); + WREG32_SOC15(GC, xcc_id, regRLC_MEM_SLP_CNTL, data); } /* 3 - disable MGLS in CP */ - data = RREG32_SOC15(GC, 0, regCP_MEM_SLP_CNTL); + data = RREG32_SOC15(GC, xcc_id, regCP_MEM_SLP_CNTL); if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) { data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; - WREG32_SOC15(GC, 0, regCP_MEM_SLP_CNTL, data); + WREG32_SOC15(GC, xcc_id, regCP_MEM_SLP_CNTL, data); } } @@ -2137,14 +2188,14 @@ static void gfx_v9_4_3_update_medium_grain_clock_gating(struct amdgpu_device *ad } static void gfx_v9_4_3_update_coarse_grain_clock_gating(struct amdgpu_device *adev, - bool enable) + bool enable, int xcc_id) { uint32_t def, data; amdgpu_gfx_rlc_enter_safe_mode(adev); if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { - def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); + def = data = RREG32_SOC15(GC, xcc_id, regRLC_CGTT_MGCG_OVERRIDE); /* unset CGCG override */ data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK; if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) @@ -2153,10 +2204,10 @@ static void gfx_v9_4_3_update_coarse_grain_clock_gating(struct amdgpu_device *ad data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; /* update CGCG and CGLS override bits */ if (def != data) - WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); + WREG32_SOC15(GC, xcc_id, regRLC_CGTT_MGCG_OVERRIDE, data); /* enable cgcg FSM(0x0000363F) */ - def = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL); + def = RREG32_SOC15(GC, xcc_id, regRLC_CGCG_CGLS_CTRL); if (adev->asic_type == CHIP_ARCTURUS) data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | @@ -2168,43 +2219,43 @@ static void gfx_v9_4_3_update_coarse_grain_clock_gating(struct amdgpu_device *ad data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; if (def != data) - WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, data); + WREG32_SOC15(GC, xcc_id, regRLC_CGCG_CGLS_CTRL, data); /* set IDLE_POLL_COUNT(0x00900100) */ - def = RREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_CNTL); + def = RREG32_SOC15(GC, xcc_id, regCP_RB_WPTR_POLL_CNTL); data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); if (def != data) - WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_CNTL, data); + WREG32_SOC15(GC, xcc_id, regCP_RB_WPTR_POLL_CNTL, data); } else { - def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL); + def = data = RREG32_SOC15(GC, xcc_id, regRLC_CGCG_CGLS_CTRL); /* reset CGCG/CGLS bits */ data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); /* disable cgcg and cgls in FSM */ if (def != data) - WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, data); + WREG32_SOC15(GC, xcc_id, regRLC_CGCG_CGLS_CTRL, data); } amdgpu_gfx_rlc_exit_safe_mode(adev); } static int gfx_v9_4_3_update_gfx_clock_gating(struct amdgpu_device *adev, - bool enable) + bool enable, int xcc_id) { if (enable) { /* CGCG/CGLS should be enabled after MGCG/MGLS * === MGCG + MGLS === */ - gfx_v9_4_3_update_medium_grain_clock_gating(adev, enable); + gfx_v9_4_3_update_medium_grain_clock_gating(adev, enable, xcc_id); /* === CGCG + CGLS === */ - gfx_v9_4_3_update_coarse_grain_clock_gating(adev, enable); + gfx_v9_4_3_update_coarse_grain_clock_gating(adev, enable, xcc_id); } else { /* CGCG/CGLS should be disabled before MGCG/MGLS * === CGCG + CGLS === */ - gfx_v9_4_3_update_coarse_grain_clock_gating(adev, enable); + gfx_v9_4_3_update_coarse_grain_clock_gating(adev, enable, xcc_id); /* === MGCG + MGLS === */ - gfx_v9_4_3_update_medium_grain_clock_gating(adev, enable); + gfx_v9_4_3_update_medium_grain_clock_gating(adev, enable, xcc_id); } return 0; } @@ -2234,14 +2285,16 @@ static int gfx_v9_4_3_set_clockgating_state(void *handle, enum amd_clockgating_state state) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i; if (amdgpu_sriov_vf(adev)) return 0; switch (adev->ip_versions[GC_HWIP][0]) { case IP_VERSION(9, 4, 3): - gfx_v9_4_3_update_gfx_clock_gating(adev, - state == AMD_CG_STATE_GATE); + for (i = 0; i < adev->gfx.num_xcd; i++) + gfx_v9_4_3_update_gfx_clock_gating(adev, + state == AMD_CG_STATE_GATE, i); break; default: break; @@ -2509,8 +2562,9 @@ static void gfx_v9_4_3_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, } static void gfx_v9_4_3_set_compute_eop_interrupt_state(struct amdgpu_device *adev, - int me, int pipe, - enum amdgpu_interrupt_state state) + int me, int pipe, + enum amdgpu_interrupt_state state, + int xcc_id) { u32 mec_int_cntl, mec_int_cntl_reg; @@ -2523,16 +2577,16 @@ static void gfx_v9_4_3_set_compute_eop_interrupt_state(struct amdgpu_device *ade if (me == 1) { switch (pipe) { case 0: - mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL); + mec_int_cntl_reg = SOC15_REG_OFFSET(GC, xcc_id, regCP_ME1_PIPE0_INT_CNTL); break; case 1: - mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE1_INT_CNTL); + mec_int_cntl_reg = SOC15_REG_OFFSET(GC, xcc_id, regCP_ME1_PIPE1_INT_CNTL); break; case 2: - mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE2_INT_CNTL); + mec_int_cntl_reg = SOC15_REG_OFFSET(GC, xcc_id, regCP_ME1_PIPE2_INT_CNTL); break; case 3: - mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE3_INT_CNTL); + mec_int_cntl_reg = SOC15_REG_OFFSET(GC, xcc_id, regCP_ME1_PIPE3_INT_CNTL); break; default: DRM_DEBUG("invalid pipe %d\n", pipe); @@ -2566,12 +2620,15 @@ static int gfx_v9_4_3_set_priv_reg_fault_state(struct amdgpu_device *adev, unsigned type, enum amdgpu_interrupt_state state) { + int i; + switch (state) { case AMDGPU_IRQ_STATE_DISABLE: case AMDGPU_IRQ_STATE_ENABLE: - WREG32_FIELD15_PREREG(GC, 0, CP_INT_CNTL_RING0, - PRIV_REG_INT_ENABLE, - state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + for (i = 0; i < adev->gfx.num_xcd; i++) + WREG32_FIELD15_PREREG(GC, i, CP_INT_CNTL_RING0, + PRIV_REG_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); break; default: break; @@ -2585,12 +2642,15 @@ static int gfx_v9_4_3_set_priv_inst_fault_state(struct amdgpu_device *adev, unsigned type, enum amdgpu_interrupt_state state) { + int i; + switch (state) { case AMDGPU_IRQ_STATE_DISABLE: case AMDGPU_IRQ_STATE_ENABLE: - WREG32_FIELD15_PREREG(GC, 0, CP_INT_CNTL_RING0, - PRIV_INSTR_INT_ENABLE, - state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + for (i = 0; i < adev->gfx.num_xcd; i++) + WREG32_FIELD15_PREREG(GC, i, CP_INT_CNTL_RING0, + PRIV_INSTR_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); break; default: break; @@ -2604,34 +2664,38 @@ static int gfx_v9_4_3_set_eop_interrupt_state(struct amdgpu_device *adev, unsigned type, enum amdgpu_interrupt_state state) { - switch (type) { - case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: - gfx_v9_4_3_set_compute_eop_interrupt_state(adev, 1, 0, state); - break; - case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP: - gfx_v9_4_3_set_compute_eop_interrupt_state(adev, 1, 1, state); - break; - case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP: - gfx_v9_4_3_set_compute_eop_interrupt_state(adev, 1, 2, state); - break; - case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP: - gfx_v9_4_3_set_compute_eop_interrupt_state(adev, 1, 3, state); - break; - case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP: - gfx_v9_4_3_set_compute_eop_interrupt_state(adev, 2, 0, state); - break; - case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP: - gfx_v9_4_3_set_compute_eop_interrupt_state(adev, 2, 1, state); - break; - case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP: - gfx_v9_4_3_set_compute_eop_interrupt_state(adev, 2, 2, state); - break; - case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP: - gfx_v9_4_3_set_compute_eop_interrupt_state(adev, 2, 3, state); - break; - default: - break; + int i; + for (i = 0; i < adev->gfx.num_xcd; i++) { + switch (type) { + case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: + gfx_v9_4_3_set_compute_eop_interrupt_state(adev, 1, 0, state, i); + break; + case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP: + gfx_v9_4_3_set_compute_eop_interrupt_state(adev, 1, 1, state, i); + break; + case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP: + gfx_v9_4_3_set_compute_eop_interrupt_state(adev, 1, 2, state, i); + break; + case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP: + gfx_v9_4_3_set_compute_eop_interrupt_state(adev, 1, 3, state, i); + break; + case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP: + gfx_v9_4_3_set_compute_eop_interrupt_state(adev, 2, 0, state, i); + break; + case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP: + gfx_v9_4_3_set_compute_eop_interrupt_state(adev, 2, 1, state, i); + break; + case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP: + gfx_v9_4_3_set_compute_eop_interrupt_state(adev, 2, 2, state, i); + break; + case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP: + gfx_v9_4_3_set_compute_eop_interrupt_state(adev, 2, 3, state, i); + break; + default: + break; + } } + return 0; } @@ -2871,12 +2935,15 @@ static const struct amdgpu_ring_funcs gfx_v9_4_3_ring_funcs_kiq = { static void gfx_v9_4_3_set_ring_funcs(struct amdgpu_device *adev) { - int i; + int i, j; - adev->gfx.kiq[0].ring.funcs = &gfx_v9_4_3_ring_funcs_kiq; + for (i = 0; i < adev->gfx.num_xcd; i++) { + adev->gfx.kiq[i].ring.funcs = &gfx_v9_4_3_ring_funcs_kiq; - for (i = 0; i < adev->gfx.num_compute_rings; i++) - adev->gfx.compute_ring[i].funcs = &gfx_v9_4_3_ring_funcs_compute; + for (j = 0; j < adev->gfx.num_compute_rings; j++) + adev->gfx.compute_ring[j + i * adev->gfx.num_compute_rings].funcs + = &gfx_v9_4_3_ring_funcs_compute; + } } static const struct amdgpu_irq_src_funcs gfx_v9_4_3_eop_irq_funcs = { -- cgit v1.2.3 From ec08571aca7c5e6bf2d1820db9c8aaa104eb9b68 Mon Sep 17 00:00:00 2001 From: Le Ma <le.ma@amd.com> Date: Wed, 17 Nov 2021 17:51:17 +0800 Subject: drm/amdgpu: add xcc index argument to gfx v9_4_3 functions Change those v9_4_3 interfaces which are exposed in gfx_v9_0.c. For some active single-xcc emu models, the code path in gfx_v9_0.c is better to keep reserved for a while. Signed-off-by: Le Ma <le.ma@amd.com> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 56999bb7ac26..f1c04140e717 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -1068,13 +1068,13 @@ static void gfx_v9_4_3_init_pg(struct amdgpu_device *adev, int xcc_id) } } -void gfx_v9_4_3_disable_gpa_mode(struct amdgpu_device *adev) +void gfx_v9_4_3_disable_gpa_mode(struct amdgpu_device *adev, int xcc_id) { uint32_t data; - data = RREG32_SOC15(GC, 0, regCPC_PSP_DEBUG); + data = RREG32_SOC15(GC, xcc_id, regCPC_PSP_DEBUG); data |= CPC_PSP_DEBUG__UTCL2IUGPAOVERRIDE_MASK; - WREG32_SOC15(GC, 0, regCPC_PSP_DEBUG, data); + WREG32_SOC15(GC, xcc_id, regCPC_PSP_DEBUG, data); } static bool gfx_v9_4_3_is_rlc_enabled(struct amdgpu_device *adev) @@ -1177,19 +1177,19 @@ static void gfx_v9_4_3_wait_for_rlc_serdes(struct amdgpu_device *adev, } static void gfx_v9_4_3_enable_gui_idle_interrupt(struct amdgpu_device *adev, - bool enable) + bool enable, int xcc_id) { u32 tmp; /* These interrupts should be enabled to drive DS clock */ - tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL_RING0); + tmp = RREG32_SOC15(GC, xcc_id, regCP_INT_CNTL_RING0); tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0); tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0); tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0); - WREG32_SOC15(GC, 0, regCP_INT_CNTL_RING0, tmp); + WREG32_SOC15(GC, xcc_id, regCP_INT_CNTL_RING0, tmp); } static void gfx_v9_4_3_rlc_stop(struct amdgpu_device *adev) @@ -1198,7 +1198,7 @@ static void gfx_v9_4_3_rlc_stop(struct amdgpu_device *adev) for (i = 0; i < adev->gfx.num_xcd; i++) { WREG32_FIELD15_PREREG(GC, i, RLC_CNTL, RLC_ENABLE_F32, 0); - gfx_v9_4_3_enable_gui_idle_interrupt(adev, false); + gfx_v9_4_3_enable_gui_idle_interrupt(adev, false, i); gfx_v9_4_3_wait_for_rlc_serdes(adev, i); } } @@ -1228,7 +1228,7 @@ static void gfx_v9_4_3_rlc_start(struct amdgpu_device *adev) /* carrizo do enable cp interrupt after cp inited */ if (!(adev->flags & AMD_IS_APU)) { - gfx_v9_4_3_enable_gui_idle_interrupt(adev, true); + gfx_v9_4_3_enable_gui_idle_interrupt(adev, true, i); udelay(50); } @@ -1865,10 +1865,10 @@ static int gfx_v9_4_3_cp_resume(struct amdgpu_device *adev) struct amdgpu_ring *ring; for (i = 0; i < adev->gfx.num_xcd; i++) { - gfx_v9_4_3_enable_gui_idle_interrupt(adev, false); + gfx_v9_4_3_enable_gui_idle_interrupt(adev, false, i); if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { - gfx_v9_4_3_disable_gpa_mode(adev); + gfx_v9_4_3_disable_gpa_mode(adev, i); r = gfx_v9_4_3_cp_compute_load_microcode(adev, i); if (r) @@ -1888,7 +1888,7 @@ static int gfx_v9_4_3_cp_resume(struct amdgpu_device *adev) amdgpu_ring_test_helper(ring); } - gfx_v9_4_3_enable_gui_idle_interrupt(adev, true); + gfx_v9_4_3_enable_gui_idle_interrupt(adev, true, i); } return 0; -- cgit v1.2.3 From d51ac6d0a23caf1005cb640f8533161c5d2dd0c0 Mon Sep 17 00:00:00 2001 From: Le Ma <le.ma@amd.com> Date: Tue, 24 May 2022 11:22:49 +0800 Subject: drm/amdgpu: add xcc index argument to select_sh_se function v2 v1: To support multiple XCD case (Le) v2: introduce xcc index to gfx_v11_0_select_sh_se (Hawking) Signed-off-by: Le Ma <le.ma@amd.com> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> Signed-off-by: Hawking Zhang <Hawking.Zhang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 4 +-- drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 16 +++++------ drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 4 +-- drivers/gpu/drm/amd/amdgpu/cik.c | 4 +-- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 16 +++++------ drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 8 +++--- drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c | 24 ++++++++--------- drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 33 ++++++++++++----------- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 33 ++++++++++++----------- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 30 ++++++++++----------- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.h | 2 +- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c | 2 +- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 20 +++++++------- drivers/gpu/drm/amd/amdgpu/nv.c | 4 +-- drivers/gpu/drm/amd/amdgpu/si.c | 4 +-- drivers/gpu/drm/amd/amdgpu/soc15.c | 4 +-- drivers/gpu/drm/amd/amdgpu/soc21.c | 4 +-- drivers/gpu/drm/amd/amdgpu/vi.c | 4 +-- 18 files changed, 110 insertions(+), 106 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index ebb35633058c..ae06d1f2af93 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -787,7 +787,7 @@ void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid, for (se_idx = 0; se_idx < se_cnt; se_idx++) { for (sh_idx = 0; sh_idx < sh_cnt; sh_idx++) { - amdgpu_gfx_select_se_sh(adev, se_idx, sh_idx, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, se_idx, sh_idx, 0xffffffff, 0); queue_map = RREG32_SOC15(GC, 0, mmSPI_CSQ_WF_ACTIVE_STATUS); /* @@ -820,7 +820,7 @@ void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid, } } - amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); soc15_grbm_select(adev, 0, 0, 0, 0, 0); unlock_spi_csq_mutexes(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index f60753f97ac5..cc64ae550bc1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -136,7 +136,7 @@ static int amdgpu_debugfs_process_reg_op(bool read, struct file *f, } mutex_lock(&adev->grbm_idx_mutex); amdgpu_gfx_select_se_sh(adev, se_bank, - sh_bank, instance_bank); + sh_bank, instance_bank, 0); } else if (use_ring) { mutex_lock(&adev->srbm_mutex); amdgpu_gfx_select_me_pipe_q(adev, me, pipe, queue, vmid); @@ -169,7 +169,7 @@ static int amdgpu_debugfs_process_reg_op(bool read, struct file *f, end: if (use_bank) { - amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); } else if (use_ring) { amdgpu_gfx_select_me_pipe_q(adev, 0, 0, 0, 0); @@ -263,7 +263,7 @@ static ssize_t amdgpu_debugfs_regs2_op(struct file *f, char __user *buf, u32 off mutex_lock(&adev->grbm_idx_mutex); amdgpu_gfx_select_se_sh(adev, rd->id.grbm.se, rd->id.grbm.sh, - rd->id.grbm.instance); + rd->id.grbm.instance, 0); } if (rd->id.use_srbm) { @@ -295,7 +295,7 @@ static ssize_t amdgpu_debugfs_regs2_op(struct file *f, char __user *buf, u32 off } end: if (rd->id.use_grbm) { - amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); } @@ -907,13 +907,13 @@ static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf, /* switch to the specific se/sh/cu */ mutex_lock(&adev->grbm_idx_mutex); - amdgpu_gfx_select_se_sh(adev, se, sh, cu); + amdgpu_gfx_select_se_sh(adev, se, sh, cu, 0); x = 0; if (adev->gfx.funcs->read_wave_data) adev->gfx.funcs->read_wave_data(adev, simd, wave, data, &x); - amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); + amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0); mutex_unlock(&adev->grbm_idx_mutex); pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); @@ -1001,7 +1001,7 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf, /* switch to the specific se/sh/cu */ mutex_lock(&adev->grbm_idx_mutex); - amdgpu_gfx_select_se_sh(adev, se, sh, cu); + amdgpu_gfx_select_se_sh(adev, se, sh, cu, 0); if (bank == 0) { if (adev->gfx.funcs->read_wave_vgprs) @@ -1011,7 +1011,7 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf, adev->gfx.funcs->read_wave_sgprs(adev, simd, wave, offset, size>>2, data); } - amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); + amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0); mutex_unlock(&adev->grbm_idx_mutex); pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index a9e41d7970ea..77e2f714e357 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -237,7 +237,7 @@ struct amdgpu_gfx_funcs { /* get the gpu clock counter */ uint64_t (*get_gpu_clock_counter)(struct amdgpu_device *adev); void (*select_se_sh)(struct amdgpu_device *adev, u32 se_num, - u32 sh_num, u32 instance); + u32 sh_num, u32 instance, int xcc_id); void (*read_wave_data)(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields); void (*read_wave_vgprs)(struct amdgpu_device *adev, uint32_t simd, @@ -386,7 +386,7 @@ struct amdgpu_gfx { }; #define amdgpu_gfx_get_gpu_clock_counter(adev) (adev)->gfx.funcs->get_gpu_clock_counter((adev)) -#define amdgpu_gfx_select_se_sh(adev, se, sh, instance) (adev)->gfx.funcs->select_se_sh((adev), (se), (sh), (instance)) +#define amdgpu_gfx_select_se_sh(adev, se, sh, instance, xcc_id) ((adev)->gfx.funcs->select_se_sh((adev), (se), (sh), (instance), (xcc_id))) #define amdgpu_gfx_select_me_pipe_q(adev, me, pipe, q, vmid) (adev)->gfx.funcs->select_me_pipe_q((adev), (me), (pipe), (q), (vmid)) #define amdgpu_gfx_init_spm_golden(adev) (adev)->gfx.funcs->init_spm_golden((adev)) diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index de6d10390ab2..5641cf05d856 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c @@ -1141,12 +1141,12 @@ static uint32_t cik_get_register_value(struct amdgpu_device *adev, mutex_lock(&adev->grbm_idx_mutex); if (se_num != 0xffffffff || sh_num != 0xffffffff) - amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff, 0); val = RREG32(reg_offset); if (se_num != 0xffffffff || sh_num != 0xffffffff) - amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); return val; } else { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index accc0a7251b9..323f5b8927ad 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -3490,7 +3490,7 @@ static int gfx_v10_0_get_cu_info(struct amdgpu_device *adev, struct amdgpu_cu_info *cu_info); static uint64_t gfx_v10_0_get_gpu_clock_counter(struct amdgpu_device *adev); static void gfx_v10_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, - u32 sh_num, u32 instance); + u32 sh_num, u32 instance, int xcc_id); static u32 gfx_v10_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev); static int gfx_v10_0_rlc_backdoor_autoload_buffer_init(struct amdgpu_device *adev); @@ -4712,7 +4712,7 @@ static int gfx_v10_0_sw_fini(void *handle) } static void gfx_v10_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, - u32 sh_num, u32 instance) + u32 sh_num, u32 instance, int xcc_id) { u32 data; @@ -4772,13 +4772,13 @@ static void gfx_v10_0_setup_rb(struct amdgpu_device *adev) (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 6))) && ((gfx_v10_3_get_disabled_sa(adev) >> bitmap) & 1)) continue; - gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff); + gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff, 0); data = gfx_v10_0_get_rb_active_bitmap(adev); active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * rb_bitmap_width_per_sh); } } - gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); adev->gfx.config.backend_enable_mask = active_rbs; @@ -4907,7 +4907,7 @@ static void gfx_v10_0_tcp_harvest(struct amdgpu_device *adev) mutex_lock(&adev->grbm_idx_mutex); for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { - gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff); + gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff, 0); wgp_active_bitmap = gfx_v10_0_get_wgp_active_bitmap_per_sh(adev); /* * Set corresponding TCP bits for the inactive WGPs in @@ -4940,7 +4940,7 @@ static void gfx_v10_0_tcp_harvest(struct amdgpu_device *adev) } } - gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); } @@ -9540,7 +9540,7 @@ static int gfx_v10_0_get_cu_info(struct amdgpu_device *adev, mask = 1; ao_bitmap = 0; counter = 0; - gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff); + gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff, 0); if (i < 4 && j < 2) gfx_v10_0_set_user_wgp_inactive_bitmap_per_sh( adev, disable_masks[i * 2 + j]); @@ -9561,7 +9561,7 @@ static int gfx_v10_0_get_cu_info(struct amdgpu_device *adev, cu_info->ao_cu_bitmap[i][j] = ao_bitmap; } } - gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); cu_info->number = active_cu_number; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 4fbefe236fc7..50d0ff9ca259 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -112,7 +112,7 @@ static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev, struct amdgpu_cu_info *cu_info); static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev); static void gfx_v11_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, - u32 sh_num, u32 instance); + u32 sh_num, u32 instance, int xcc_id); static u32 gfx_v11_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev); static void gfx_v11_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume); @@ -1484,7 +1484,7 @@ static int gfx_v11_0_sw_fini(void *handle) } static void gfx_v11_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, - u32 sh_num, u32 instance) + u32 sh_num, u32 instance, int xcc_id) { u32 data; @@ -6473,7 +6473,7 @@ static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev, for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { mask = 1; counter = 0; - gfx_v11_0_select_se_sh(adev, i, j, 0xffffffff); + gfx_v11_0_select_se_sh(adev, i, j, 0xffffffff, 0); if (i < 8 && j < 2) gfx_v11_0_set_user_wgp_inactive_bitmap_per_sh( adev, disable_masks[i * 2 + j]); @@ -6505,7 +6505,7 @@ static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev, active_cu_number += counter; } } - gfx_v11_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v11_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); cu_info->number = active_cu_number; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c index d9ce4d1c50e4..7cb72bf1acdd 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c @@ -1285,7 +1285,7 @@ static void gfx_v6_0_tiling_mode_table_init(struct amdgpu_device *adev) } static void gfx_v6_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, - u32 sh_num, u32 instance) + u32 sh_num, u32 instance, int xcc_id) { u32 data; @@ -1438,12 +1438,12 @@ static void gfx_v6_0_write_harvested_raster_configs(struct amdgpu_device *adev, } /* GRBM_GFX_INDEX has a different offset on SI */ - gfx_v6_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff); + gfx_v6_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff, 0); WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se); } /* GRBM_GFX_INDEX has a different offset on SI */ - gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); } static void gfx_v6_0_setup_rb(struct amdgpu_device *adev) @@ -1459,14 +1459,14 @@ static void gfx_v6_0_setup_rb(struct amdgpu_device *adev) mutex_lock(&adev->grbm_idx_mutex); for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { - gfx_v6_0_select_se_sh(adev, i, j, 0xffffffff); + gfx_v6_0_select_se_sh(adev, i, j, 0xffffffff, 0); data = gfx_v6_0_get_rb_active_bitmap(adev); active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * rb_bitmap_width_per_sh); } } - gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); adev->gfx.config.backend_enable_mask = active_rbs; adev->gfx.config.num_rbs = hweight32(active_rbs); @@ -1487,7 +1487,7 @@ static void gfx_v6_0_setup_rb(struct amdgpu_device *adev) /* cache the values for userspace */ for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { - gfx_v6_0_select_se_sh(adev, i, j, 0xffffffff); + gfx_v6_0_select_se_sh(adev, i, j, 0xffffffff, 0); adev->gfx.config.rb_config[i][j].rb_backend_disable = RREG32(mmCC_RB_BACKEND_DISABLE); adev->gfx.config.rb_config[i][j].user_rb_backend_disable = @@ -1496,7 +1496,7 @@ static void gfx_v6_0_setup_rb(struct amdgpu_device *adev) RREG32(mmPA_SC_RASTER_CONFIG); } } - gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); } @@ -1535,7 +1535,7 @@ static void gfx_v6_0_setup_spi(struct amdgpu_device *adev) mutex_lock(&adev->grbm_idx_mutex); for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { - gfx_v6_0_select_se_sh(adev, i, j, 0xffffffff); + gfx_v6_0_select_se_sh(adev, i, j, 0xffffffff, 0); data = RREG32(mmSPI_STATIC_THREAD_MGMT_3); active_cu = gfx_v6_0_get_cu_enabled(adev); @@ -1550,7 +1550,7 @@ static void gfx_v6_0_setup_spi(struct amdgpu_device *adev) } } } - gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); } @@ -2391,7 +2391,7 @@ static void gfx_v6_0_enable_lbpw(struct amdgpu_device *adev, bool enable) WREG32_FIELD(RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0); if (!enable) { - gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); WREG32(mmSPI_LB_CU_MASK, 0x00ff); } } @@ -3571,7 +3571,7 @@ static void gfx_v6_0_get_cu_info(struct amdgpu_device *adev) mask = 1; ao_bitmap = 0; counter = 0; - gfx_v6_0_select_se_sh(adev, i, j, 0xffffffff); + gfx_v6_0_select_se_sh(adev, i, j, 0xffffffff, 0); if (i < 4 && j < 2) gfx_v6_0_set_user_cu_inactive_bitmap( adev, disable_masks[i * 2 + j]); @@ -3593,7 +3593,7 @@ static void gfx_v6_0_get_cu_info(struct amdgpu_device *adev) } } - gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); cu_info->number = active_cu_number; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 46740ad9a80f..d055e44eee1d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -1552,7 +1552,8 @@ static void gfx_v7_0_tiling_mode_table_init(struct amdgpu_device *adev) * Select which SE, SH combinations to address. */ static void gfx_v7_0_select_se_sh(struct amdgpu_device *adev, - u32 se_num, u32 sh_num, u32 instance) + u32 se_num, u32 sh_num, u32 instance, + int xcc_id) { u32 data; @@ -1732,13 +1733,13 @@ gfx_v7_0_write_harvested_raster_configs(struct amdgpu_device *adev, } /* GRBM_GFX_INDEX has a different offset on CI+ */ - gfx_v7_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff); + gfx_v7_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff, 0); WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se); WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1); } /* GRBM_GFX_INDEX has a different offset on CI+ */ - gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); } /** @@ -1761,13 +1762,13 @@ static void gfx_v7_0_setup_rb(struct amdgpu_device *adev) mutex_lock(&adev->grbm_idx_mutex); for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { - gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff); + gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff, 0); data = gfx_v7_0_get_rb_active_bitmap(adev); active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * rb_bitmap_width_per_sh); } } - gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); adev->gfx.config.backend_enable_mask = active_rbs; adev->gfx.config.num_rbs = hweight32(active_rbs); @@ -1790,7 +1791,7 @@ static void gfx_v7_0_setup_rb(struct amdgpu_device *adev) /* cache the values for userspace */ for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { - gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff); + gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff, 0); adev->gfx.config.rb_config[i][j].rb_backend_disable = RREG32(mmCC_RB_BACKEND_DISABLE); adev->gfx.config.rb_config[i][j].user_rb_backend_disable = @@ -1801,7 +1802,7 @@ static void gfx_v7_0_setup_rb(struct amdgpu_device *adev) RREG32(mmPA_SC_RASTER_CONFIG_1); } } - gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); } @@ -1911,7 +1912,7 @@ static void gfx_v7_0_constants_init(struct amdgpu_device *adev) * making sure that the following register writes will be broadcasted * to all the shaders */ - gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); /* XXX SH_MEM regs */ /* where to put LDS, scratch, GPUVM in FSA64 space */ @@ -3301,7 +3302,7 @@ static void gfx_v7_0_wait_for_rlc_serdes(struct amdgpu_device *adev) mutex_lock(&adev->grbm_idx_mutex); for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { - gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff); + gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff, 0); for (k = 0; k < adev->usec_timeout; k++) { if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0) break; @@ -3309,7 +3310,7 @@ static void gfx_v7_0_wait_for_rlc_serdes(struct amdgpu_device *adev) } } } - gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK | @@ -3474,7 +3475,7 @@ static int gfx_v7_0_rlc_resume(struct amdgpu_device *adev) WREG32(mmRLC_LB_CNTR_MAX, 0x00008000); mutex_lock(&adev->grbm_idx_mutex); - gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); WREG32(mmRLC_LB_INIT_CU_MASK, 0xffffffff); WREG32(mmRLC_LB_PARAMS, 0x00600408); WREG32(mmRLC_LB_CNTL, 0x80000004); @@ -3530,7 +3531,7 @@ static void gfx_v7_0_enable_cgcg(struct amdgpu_device *adev, bool enable) tmp = gfx_v7_0_halt_rlc(adev); mutex_lock(&adev->grbm_idx_mutex); - gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff); WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff); tmp2 = RLC_SERDES_WR_CTRL__BPM_ADDR_MASK | @@ -3584,7 +3585,7 @@ static void gfx_v7_0_enable_mgcg(struct amdgpu_device *adev, bool enable) tmp = gfx_v7_0_halt_rlc(adev); mutex_lock(&adev->grbm_idx_mutex); - gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff); WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff); data = RLC_SERDES_WR_CTRL__BPM_ADDR_MASK | @@ -3635,7 +3636,7 @@ static void gfx_v7_0_enable_mgcg(struct amdgpu_device *adev, bool enable) tmp = gfx_v7_0_halt_rlc(adev); mutex_lock(&adev->grbm_idx_mutex); - gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff); WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff); data = RLC_SERDES_WR_CTRL__BPM_ADDR_MASK | RLC_SERDES_WR_CTRL__MGCG_OVERRIDE_1_MASK; @@ -5115,7 +5116,7 @@ static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev) mask = 1; ao_bitmap = 0; counter = 0; - gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff); + gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff, 0); if (i < 4 && j < 2) gfx_v7_0_set_user_cu_inactive_bitmap( adev, disable_masks[i * 2 + j]); @@ -5136,7 +5137,7 @@ static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev) cu_info->ao_cu_bitmap[i][j] = ao_bitmap; } } - gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); cu_info->number = active_cu_number; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 8a43e87de49f..b60480876149 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -3395,7 +3395,8 @@ static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev) } static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, - u32 se_num, u32 sh_num, u32 instance) + u32 se_num, u32 sh_num, u32 instance, + int xcc_id) { u32 data; @@ -3579,13 +3580,13 @@ gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev, } /* GRBM_GFX_INDEX has a different offset on VI */ - gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff); + gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff, 0); WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se); WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1); } /* GRBM_GFX_INDEX has a different offset on VI */ - gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); } static void gfx_v8_0_setup_rb(struct amdgpu_device *adev) @@ -3601,13 +3602,13 @@ static void gfx_v8_0_setup_rb(struct amdgpu_device *adev) mutex_lock(&adev->grbm_idx_mutex); for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { - gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff); + gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff, 0); data = gfx_v8_0_get_rb_active_bitmap(adev); active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * rb_bitmap_width_per_sh); } } - gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); adev->gfx.config.backend_enable_mask = active_rbs; adev->gfx.config.num_rbs = hweight32(active_rbs); @@ -3630,7 +3631,7 @@ static void gfx_v8_0_setup_rb(struct amdgpu_device *adev) /* cache the values for userspace */ for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { - gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff); + gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff, 0); adev->gfx.config.rb_config[i][j].rb_backend_disable = RREG32(mmCC_RB_BACKEND_DISABLE); adev->gfx.config.rb_config[i][j].user_rb_backend_disable = @@ -3641,7 +3642,7 @@ static void gfx_v8_0_setup_rb(struct amdgpu_device *adev) RREG32(mmPA_SC_RASTER_CONFIG_1); } } - gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); } @@ -3788,7 +3789,7 @@ static void gfx_v8_0_constants_init(struct amdgpu_device *adev) * making sure that the following register writes will be broadcasted * to all the shaders */ - gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); WREG32(mmPA_SC_FIFO_SIZE, (adev->gfx.config.sc_prim_fifo_size_frontend << @@ -3819,7 +3820,7 @@ static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev) mutex_lock(&adev->grbm_idx_mutex); for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { - gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff); + gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff, 0); for (k = 0; k < adev->usec_timeout; k++) { if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0) break; @@ -3827,7 +3828,7 @@ static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev) } if (k == adev->usec_timeout) { gfx_v8_0_select_se_sh(adev, 0xffffffff, - 0xffffffff, 0xffffffff); + 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); DRM_INFO("Timeout wait for RLC serdes %u,%u\n", i, j); @@ -3835,7 +3836,7 @@ static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev) } } } - gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK | @@ -5481,7 +5482,7 @@ static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev, { uint32_t data; - gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff); WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff); @@ -6723,11 +6724,11 @@ static void gfx_v8_0_parse_sq_irq(struct amdgpu_device *adev, unsigned ih_data, */ if (from_wq) { mutex_lock(&adev->grbm_idx_mutex); - gfx_v8_0_select_se_sh(adev, se_id, sh_id, cu_id); + gfx_v8_0_select_se_sh(adev, se_id, sh_id, cu_id, 0); sq_edc_source = REG_GET_FIELD(RREG32(mmSQ_EDC_INFO), SQ_EDC_INFO, SOURCE); - gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); } @@ -7116,7 +7117,7 @@ static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev) mask = 1; ao_bitmap = 0; counter = 0; - gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff); + gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff, 0); if (i < 4 && j < 2) gfx_v8_0_set_user_cu_inactive_bitmap( adev, disable_masks[i * 2 + j]); @@ -7137,7 +7138,7 @@ static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev) cu_info->ao_cu_bitmap[i][j] = ao_bitmap; } } - gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); cu_info->number = active_cu_number; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 4939fd61355b..2fa7adef18a9 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -1504,7 +1504,7 @@ static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev) mask = 1; cu_bitmap = 0; counter = 0; - amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0); for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { if (cu_info->bitmap[i][j] & mask) { @@ -1523,7 +1523,7 @@ static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev) cu_info->ao_cu_bitmap[i][j] = cu_bitmap; } } - amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); } @@ -1545,7 +1545,7 @@ static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev) mutex_lock(&adev->grbm_idx_mutex); /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/ - amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff); /* set mmRLC_LB_PARAMS = 0x003F_1006 */ @@ -1594,7 +1594,7 @@ static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev) mutex_lock(&adev->grbm_idx_mutex); /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/ - amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff); /* set mmRLC_LB_PARAMS = 0x003F_1006 */ @@ -2241,7 +2241,7 @@ static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev) } void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, - u32 instance) + u32 instance, int xcc_id) { u32 data; @@ -2290,13 +2290,13 @@ static void gfx_v9_0_setup_rb(struct amdgpu_device *adev) mutex_lock(&adev->grbm_idx_mutex); for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { - amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0); data = gfx_v9_0_get_rb_active_bitmap(adev); active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * rb_bitmap_width_per_sh); } } - amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); adev->gfx.config.backend_enable_mask = active_rbs; @@ -2433,7 +2433,7 @@ static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev) mutex_lock(&adev->grbm_idx_mutex); for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { - amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0); for (k = 0; k < adev->usec_timeout; k++) { if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0) break; @@ -2441,7 +2441,7 @@ static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev) } if (k == adev->usec_timeout) { amdgpu_gfx_select_se_sh(adev, 0xffffffff, - 0xffffffff, 0xffffffff); + 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); DRM_INFO("Timeout wait for RLC serdes %u,%u\n", i, j); @@ -2449,7 +2449,7 @@ static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev) } } } - amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK | @@ -6608,7 +6608,7 @@ static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev) for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) { for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) { for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) { - amdgpu_gfx_select_se_sh(adev, j, 0x0, k); + amdgpu_gfx_select_se_sh(adev, j, 0x0, k, 0); RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i])); } } @@ -6670,7 +6670,7 @@ static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) { for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) { for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) { - amdgpu_gfx_select_se_sh(adev, j, 0, k); + amdgpu_gfx_select_se_sh(adev, j, 0, k, 0); reg_value = RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i])); if (reg_value) @@ -6685,7 +6685,7 @@ static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, err_data->ce_count += sec_count; err_data->ue_count += ded_count; - amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); gfx_v9_0_query_utc_edc_status(adev, err_data); @@ -7145,7 +7145,7 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, mask = 1; ao_bitmap = 0; counter = 0; - amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0); gfx_v9_0_set_user_cu_inactive_bitmap( adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]); bitmap = gfx_v9_0_get_cu_active_bitmap(adev); @@ -7178,7 +7178,7 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap; } } - amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); cu_info->number = active_cu_number; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.h b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.h index dfe8d4841f58..f9f6edc5e558 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.h +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.h @@ -27,6 +27,6 @@ extern const struct amdgpu_ip_block_version gfx_v9_0_ip_block; void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, - u32 instance); + u32 instance, int xcc_id); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c index 93438770ca1a..d648a29c33e0 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c @@ -777,7 +777,7 @@ void gfx_v9_4_2_set_power_brake_sequence(struct amdgpu_device *adev) { u32 tmp; - gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); tmp = 0; tmp = REG_SET_FIELD(tmp, GC_THROTTLE_CTRL, PATTERN_MODE, 1); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index f1c04140e717..b67be666f38a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -568,7 +568,8 @@ static int gfx_v9_4_3_mec_init(struct amdgpu_device *adev) static void gfx_v9_4_3_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, - u32 instance) + u32 instance, + int xcc_id) { u32 data; @@ -591,7 +592,7 @@ static void gfx_v9_4_3_select_se_sh(struct amdgpu_device *adev, else data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num); - WREG32_SOC15_RLC_SHADOW_EX(reg, GC, 0, regGRBM_GFX_INDEX, data); + WREG32_SOC15_RLC_SHADOW_EX(reg, GC, xcc_id, regGRBM_GFX_INDEX, data); } static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address) @@ -907,13 +908,13 @@ static void gfx_v9_4_3_setup_rb(struct amdgpu_device *adev, int xcc_id) mutex_lock(&adev->grbm_idx_mutex); for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { - gfx_v9_4_3_select_se_sh(adev, i, j, 0xffffffff); + gfx_v9_4_3_select_se_sh(adev, i, j, 0xffffffff, xcc_id); data = gfx_v9_4_3_get_rb_active_bitmap(adev); active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * rb_bitmap_width_per_sh); } } - gfx_v9_4_3_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v9_4_3_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, xcc_id); mutex_unlock(&adev->grbm_idx_mutex); adev->gfx.config.backend_enable_mask = active_rbs; @@ -1146,7 +1147,7 @@ static void gfx_v9_4_3_wait_for_rlc_serdes(struct amdgpu_device *adev, mutex_lock(&adev->grbm_idx_mutex); for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { - gfx_v9_4_3_select_se_sh(adev, i, j, 0xffffffff); + gfx_v9_4_3_select_se_sh(adev, i, j, 0xffffffff, xcc_id); for (k = 0; k < adev->usec_timeout; k++) { if (RREG32_SOC15(GC, 0, regRLC_SERDES_CU_MASTER_BUSY) == 0) break; @@ -1154,7 +1155,8 @@ static void gfx_v9_4_3_wait_for_rlc_serdes(struct amdgpu_device *adev, } if (k == adev->usec_timeout) { gfx_v9_4_3_select_se_sh(adev, 0xffffffff, - 0xffffffff, 0xffffffff); + 0xffffffff, 0xffffffff, + xcc_id); mutex_unlock(&adev->grbm_idx_mutex); DRM_INFO("Timeout wait for RLC serdes %u,%u\n", i, j); @@ -1162,7 +1164,7 @@ static void gfx_v9_4_3_wait_for_rlc_serdes(struct amdgpu_device *adev, } } } - gfx_v9_4_3_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v9_4_3_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, xcc_id); mutex_unlock(&adev->grbm_idx_mutex); mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK | @@ -3065,7 +3067,7 @@ static int gfx_v9_4_3_get_cu_info(struct amdgpu_device *adev, mask = 1; ao_bitmap = 0; counter = 0; - gfx_v9_4_3_select_se_sh(adev, i, j, 0xffffffff); + gfx_v9_4_3_select_se_sh(adev, i, j, 0xffffffff, 0); gfx_v9_4_3_set_user_cu_inactive_bitmap( adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]); bitmap = gfx_v9_4_3_get_cu_active_bitmap(adev); @@ -3098,7 +3100,7 @@ static int gfx_v9_4_3_get_cu_info(struct amdgpu_device *adev, cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap; } } - gfx_v9_4_3_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v9_4_3_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); cu_info->number = active_cu_number; diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index 47420b403871..148049782f50 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -371,12 +371,12 @@ static uint32_t nv_read_indexed_register(struct amdgpu_device *adev, u32 se_num, mutex_lock(&adev->grbm_idx_mutex); if (se_num != 0xffffffff || sh_num != 0xffffffff) - amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff, 0); val = RREG32(reg_offset); if (se_num != 0xffffffff || sh_num != 0xffffffff) - amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); return val; } diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c index 7f99e130acd0..f64b87b11b1b 100644 --- a/drivers/gpu/drm/amd/amdgpu/si.c +++ b/drivers/gpu/drm/amd/amdgpu/si.c @@ -1181,12 +1181,12 @@ static uint32_t si_get_register_value(struct amdgpu_device *adev, mutex_lock(&adev->grbm_idx_mutex); if (se_num != 0xffffffff || sh_num != 0xffffffff) - amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff, 0); val = RREG32(reg_offset); if (se_num != 0xffffffff || sh_num != 0xffffffff) - amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); return val; } else { diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 4b79a8933476..4d1487a9836c 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -363,12 +363,12 @@ static uint32_t soc15_read_indexed_register(struct amdgpu_device *adev, u32 se_n mutex_lock(&adev->grbm_idx_mutex); if (se_num != 0xffffffff || sh_num != 0xffffffff) - amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff, 0); val = RREG32(reg_offset); if (se_num != 0xffffffff || sh_num != 0xffffffff) - amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); return val; } diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index 514bfc705d5a..6ef4be9322d9 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -288,12 +288,12 @@ static uint32_t soc21_read_indexed_register(struct amdgpu_device *adev, u32 se_n mutex_lock(&adev->grbm_idx_mutex); if (se_num != 0xffffffff || sh_num != 0xffffffff) - amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff, 0); val = RREG32(reg_offset); if (se_num != 0xffffffff || sh_num != 0xffffffff) - amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); return val; } diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 531f173ade2d..8e70581960fb 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -762,12 +762,12 @@ static uint32_t vi_get_register_value(struct amdgpu_device *adev, mutex_lock(&adev->grbm_idx_mutex); if (se_num != 0xffffffff || sh_num != 0xffffffff) - amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff, 0); val = RREG32(reg_offset); if (se_num != 0xffffffff || sh_num != 0xffffffff) - amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); return val; } else { -- cgit v1.2.3 From 86b20703e4c5a3c39891def0a68e7438aeca9db9 Mon Sep 17 00:00:00 2001 From: Le Ma <le.ma@amd.com> Date: Wed, 27 Jul 2022 14:24:05 +0800 Subject: drm/amdgpu: add xcc index argument to rlc safe_mode func (v4) v1: To support multple XCD case (Le) v2: unify naming style (Le) v3: apply the changes to gc v11_0 (Hawking) v4: apply the changes to gc SOC21 (Morris) Signed-off-by: Le Ma <le.ma@amd.com> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> Signed-off-by: Hawking Zhang <Hawking.Zhang@amd.com> Signed-off-by: Morris Zhang <Shiwu.Zhang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c | 16 +++++----- drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h | 10 +++--- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 12 ++++---- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 20 ++++++------ drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 4 +-- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 20 ++++++------ drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 20 ++++++------ drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 16 +++++----- drivers/gpu/drm/amd/amdgpu/nv.c | 4 +-- drivers/gpu/drm/amd/amdgpu/soc21.c | 4 +-- drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c | 6 ++-- .../drm/amd/pm/powerplay/hwmgr/smu7_powertune.c | 12 ++++---- .../drm/amd/pm/powerplay/hwmgr/vega10_powertune.c | 36 +++++++++++----------- 13 files changed, 90 insertions(+), 90 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c index 85fb730d9fc8..d3bed9a3e61f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c @@ -34,9 +34,9 @@ * * Set RLC enter into safe mode if RLC is enabled and haven't in safe mode. */ -void amdgpu_gfx_rlc_enter_safe_mode(struct amdgpu_device *adev) +void amdgpu_gfx_rlc_enter_safe_mode(struct amdgpu_device *adev, int xcc_id) { - if (adev->gfx.rlc.in_safe_mode) + if (adev->gfx.rlc.in_safe_mode[xcc_id]) return; /* if RLC is not enabled, do nothing */ @@ -46,8 +46,8 @@ void amdgpu_gfx_rlc_enter_safe_mode(struct amdgpu_device *adev) if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_3D_CGCG)) { - adev->gfx.rlc.funcs->set_safe_mode(adev); - adev->gfx.rlc.in_safe_mode = true; + adev->gfx.rlc.funcs->set_safe_mode(adev, xcc_id); + adev->gfx.rlc.in_safe_mode[xcc_id] = true; } } @@ -58,9 +58,9 @@ void amdgpu_gfx_rlc_enter_safe_mode(struct amdgpu_device *adev) * * Set RLC exit safe mode if RLC is enabled and have entered into safe mode. */ -void amdgpu_gfx_rlc_exit_safe_mode(struct amdgpu_device *adev) +void amdgpu_gfx_rlc_exit_safe_mode(struct amdgpu_device *adev, int xcc_id) { - if (!(adev->gfx.rlc.in_safe_mode)) + if (!(adev->gfx.rlc.in_safe_mode[xcc_id])) return; /* if RLC is not enabled, do nothing */ @@ -70,8 +70,8 @@ void amdgpu_gfx_rlc_exit_safe_mode(struct amdgpu_device *adev) if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_3D_CGCG)) { - adev->gfx.rlc.funcs->unset_safe_mode(adev); - adev->gfx.rlc.in_safe_mode = false; + adev->gfx.rlc.funcs->unset_safe_mode(adev, xcc_id); + adev->gfx.rlc.in_safe_mode[xcc_id] = false; } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h index 23f060db9255..80b263646966 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h @@ -157,8 +157,8 @@ typedef struct _RLC_TABLE_OF_CONTENT { struct amdgpu_rlc_funcs { bool (*is_rlc_enabled)(struct amdgpu_device *adev); - void (*set_safe_mode)(struct amdgpu_device *adev); - void (*unset_safe_mode)(struct amdgpu_device *adev); + void (*set_safe_mode)(struct amdgpu_device *adev, int xcc_id); + void (*unset_safe_mode)(struct amdgpu_device *adev, int xcc_id); int (*init)(struct amdgpu_device *adev); u32 (*get_csb_size)(struct amdgpu_device *adev); void (*get_csb_buffer)(struct amdgpu_device *adev, volatile u32 *buffer); @@ -201,7 +201,7 @@ struct amdgpu_rlc { u32 cp_table_size; /* safe mode for updating CG/PG state */ - bool in_safe_mode; + bool in_safe_mode[8]; const struct amdgpu_rlc_funcs *funcs; /* for firmware data */ @@ -260,8 +260,8 @@ struct amdgpu_rlc { struct amdgpu_rlcg_reg_access_ctrl reg_access_ctrl; }; -void amdgpu_gfx_rlc_enter_safe_mode(struct amdgpu_device *adev); -void amdgpu_gfx_rlc_exit_safe_mode(struct amdgpu_device *adev); +void amdgpu_gfx_rlc_enter_safe_mode(struct amdgpu_device *adev, int xcc_id); +void amdgpu_gfx_rlc_exit_safe_mode(struct amdgpu_device *adev, int xcc_id); int amdgpu_gfx_rlc_init_sr(struct amdgpu_device *adev, u32 dws); int amdgpu_gfx_rlc_init_csb(struct amdgpu_device *adev); int amdgpu_gfx_rlc_init_cpt(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 323f5b8927ad..8bd07ff59671 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -7571,7 +7571,7 @@ static bool gfx_v10_0_is_rlc_enabled(struct amdgpu_device *adev) return (REG_GET_FIELD(rlc_cntl, RLC_CNTL, RLC_ENABLE_F32)) ? true : false; } -static void gfx_v10_0_set_safe_mode(struct amdgpu_device *adev) +static void gfx_v10_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id) { uint32_t data; unsigned i; @@ -7612,7 +7612,7 @@ static void gfx_v10_0_set_safe_mode(struct amdgpu_device *adev) } } -static void gfx_v10_0_unset_safe_mode(struct amdgpu_device *adev) +static void gfx_v10_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id) { uint32_t data; @@ -7959,7 +7959,7 @@ static void gfx_v10_0_apply_medium_grain_clock_gating_workaround(struct amdgpu_d static int gfx_v10_0_update_gfx_clock_gating(struct amdgpu_device *adev, bool enable) { - amdgpu_gfx_rlc_enter_safe_mode(adev); + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); if (enable) { /* enable FGCG firstly*/ @@ -7998,7 +7998,7 @@ static int gfx_v10_0_update_gfx_clock_gating(struct amdgpu_device *adev, AMD_CG_SUPPORT_GFX_3D_CGLS)) gfx_v10_0_enable_gui_idle_interrupt(adev, enable); - amdgpu_gfx_rlc_exit_safe_mode(adev); + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); return 0; } @@ -8092,11 +8092,11 @@ static void gfx_v10_cntl_power_gating(struct amdgpu_device *adev, bool enable) static void gfx_v10_cntl_pg(struct amdgpu_device *adev, bool enable) { - amdgpu_gfx_rlc_enter_safe_mode(adev); + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); gfx_v10_cntl_power_gating(adev, enable); - amdgpu_gfx_rlc_exit_safe_mode(adev); + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); } static const struct amdgpu_rlc_funcs gfx_v10_0_rlc_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 50d0ff9ca259..d3c89e6c0c03 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -123,8 +123,8 @@ static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev); static void gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring *ring, uint16_t pasid, uint32_t flush_type, bool all_hub, uint8_t dst_sel); -static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev); -static void gfx_v11_0_unset_safe_mode(struct amdgpu_device *adev); +static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id); +static void gfx_v11_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id); static void gfx_v11_0_update_perf_clk(struct amdgpu_device *adev, bool enable); @@ -4532,7 +4532,7 @@ static int gfx_v11_0_soft_reset(void *handle) tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 0); WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp); - gfx_v11_0_set_safe_mode(adev); + gfx_v11_0_set_safe_mode(adev, 0); for (i = 0; i < adev->gfx.mec.num_mec; ++i) { for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { @@ -4632,7 +4632,7 @@ static int gfx_v11_0_soft_reset(void *handle) tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 1); WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp); - gfx_v11_0_unset_safe_mode(adev); + gfx_v11_0_unset_safe_mode(adev, 0); return gfx_v11_0_cp_resume(adev); } @@ -4798,7 +4798,7 @@ static bool gfx_v11_0_is_rlc_enabled(struct amdgpu_device *adev) return (REG_GET_FIELD(rlc_cntl, RLC_CNTL, RLC_ENABLE_F32)) ? true : false; } -static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev) +static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id) { uint32_t data; unsigned i; @@ -4817,7 +4817,7 @@ static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev) } } -static void gfx_v11_0_unset_safe_mode(struct amdgpu_device *adev) +static void gfx_v11_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id) { WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, RLC_SAFE_MODE__CMD_MASK); } @@ -5045,7 +5045,7 @@ static void gfx_v11_0_update_coarse_grain_clock_gating(struct amdgpu_device *ade static int gfx_v11_0_update_gfx_clock_gating(struct amdgpu_device *adev, bool enable) { - amdgpu_gfx_rlc_enter_safe_mode(adev); + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); gfx_v11_0_update_coarse_grain_clock_gating(adev, enable); @@ -5065,7 +5065,7 @@ static int gfx_v11_0_update_gfx_clock_gating(struct amdgpu_device *adev, AMD_CG_SUPPORT_GFX_3D_CGLS)) gfx_v11_0_enable_gui_idle_interrupt(adev, enable); - amdgpu_gfx_rlc_exit_safe_mode(adev); + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); return 0; } @@ -5133,11 +5133,11 @@ static void gfx_v11_cntl_power_gating(struct amdgpu_device *adev, bool enable) static void gfx_v11_cntl_pg(struct amdgpu_device *adev, bool enable) { - amdgpu_gfx_rlc_enter_safe_mode(adev); + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); gfx_v11_cntl_power_gating(adev, enable); - amdgpu_gfx_rlc_exit_safe_mode(adev); + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); } static int gfx_v11_0_set_powergating_state(void *handle, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index d055e44eee1d..d56dda5fc588 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -3362,7 +3362,7 @@ static bool gfx_v7_0_is_rlc_enabled(struct amdgpu_device *adev) return true; } -static void gfx_v7_0_set_safe_mode(struct amdgpu_device *adev) +static void gfx_v7_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id) { u32 tmp, i, mask; @@ -3384,7 +3384,7 @@ static void gfx_v7_0_set_safe_mode(struct amdgpu_device *adev) } } -static void gfx_v7_0_unset_safe_mode(struct amdgpu_device *adev) +static void gfx_v7_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id) { u32 tmp; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index b60480876149..278416acf060 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -4903,7 +4903,7 @@ static int gfx_v8_0_hw_fini(void *handle) pr_debug("For SRIOV client, shouldn't do anything.\n"); return 0; } - amdgpu_gfx_rlc_enter_safe_mode(adev); + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); if (!gfx_v8_0_wait_for_idle(adev)) gfx_v8_0_cp_enable(adev, false); else @@ -4912,7 +4912,7 @@ static int gfx_v8_0_hw_fini(void *handle) adev->gfx.rlc.funcs->stop(adev); else pr_err("rlc is busy, skip halt rlc\n"); - amdgpu_gfx_rlc_exit_safe_mode(adev); + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); return 0; } @@ -5377,7 +5377,7 @@ static int gfx_v8_0_set_powergating_state(void *handle, AMD_PG_SUPPORT_RLC_SMU_HS | AMD_PG_SUPPORT_CP | AMD_PG_SUPPORT_GFX_DMG)) - amdgpu_gfx_rlc_enter_safe_mode(adev); + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); switch (adev->asic_type) { case CHIP_CARRIZO: case CHIP_STONEY: @@ -5431,7 +5431,7 @@ static int gfx_v8_0_set_powergating_state(void *handle, AMD_PG_SUPPORT_RLC_SMU_HS | AMD_PG_SUPPORT_CP | AMD_PG_SUPPORT_GFX_DMG)) - amdgpu_gfx_rlc_exit_safe_mode(adev); + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); return 0; } @@ -5536,7 +5536,7 @@ static bool gfx_v8_0_is_rlc_enabled(struct amdgpu_device *adev) return true; } -static void gfx_v8_0_set_safe_mode(struct amdgpu_device *adev) +static void gfx_v8_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id) { uint32_t data; unsigned i; @@ -5563,7 +5563,7 @@ static void gfx_v8_0_set_safe_mode(struct amdgpu_device *adev) } } -static void gfx_v8_0_unset_safe_mode(struct amdgpu_device *adev) +static void gfx_v8_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id) { uint32_t data; unsigned i; @@ -5622,7 +5622,7 @@ static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev { uint32_t temp, data; - amdgpu_gfx_rlc_enter_safe_mode(adev); + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); /* It is disabled by HW by default */ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { @@ -5718,7 +5718,7 @@ static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev gfx_v8_0_wait_for_rlc_serdes(adev); } - amdgpu_gfx_rlc_exit_safe_mode(adev); + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); } static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, @@ -5728,7 +5728,7 @@ static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL); - amdgpu_gfx_rlc_enter_safe_mode(adev); + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); @@ -5811,7 +5811,7 @@ static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev gfx_v8_0_wait_for_rlc_serdes(adev); - amdgpu_gfx_rlc_exit_safe_mode(adev); + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); } static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev, bool enable) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 2fa7adef18a9..bce6919d666a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -4623,7 +4623,7 @@ static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev) return true; } -static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev) +static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id) { uint32_t data; unsigned i; @@ -4640,7 +4640,7 @@ static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev) } } -static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev) +static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id) { uint32_t data; @@ -4651,7 +4651,7 @@ static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev) static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev, bool enable) { - amdgpu_gfx_rlc_enter_safe_mode(adev); + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) { gfx_v9_0_enable_gfx_cg_power_gating(adev, true); @@ -4663,7 +4663,7 @@ static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev, gfx_v9_0_enable_gfx_pipeline_powergating(adev, false); } - amdgpu_gfx_rlc_exit_safe_mode(adev); + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); } static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev, @@ -4690,7 +4690,7 @@ static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev { uint32_t data, def; - amdgpu_gfx_rlc_enter_safe_mode(adev); + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); /* It is disabled by HW by default */ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { @@ -4757,7 +4757,7 @@ static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev } } - amdgpu_gfx_rlc_exit_safe_mode(adev); + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); } static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev, @@ -4768,7 +4768,7 @@ static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev, if (!adev->gfx.num_gfx_rings) return; - amdgpu_gfx_rlc_enter_safe_mode(adev); + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); /* Enable 3D CGCG/CGLS */ if (enable) { @@ -4812,7 +4812,7 @@ static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev, WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data); } - amdgpu_gfx_rlc_exit_safe_mode(adev); + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); } static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, @@ -4820,7 +4820,7 @@ static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev { uint32_t def, data; - amdgpu_gfx_rlc_enter_safe_mode(adev); + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); @@ -4864,7 +4864,7 @@ static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data); } - amdgpu_gfx_rlc_exit_safe_mode(adev); + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); } static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index b67be666f38a..baa10ee8ec69 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -1090,14 +1090,14 @@ static bool gfx_v9_4_3_is_rlc_enabled(struct amdgpu_device *adev) return true; } -static void gfx_v9_4_3_set_safe_mode(struct amdgpu_device *adev) +static void gfx_v9_4_3_set_safe_mode(struct amdgpu_device *adev, int xcc_id) { uint32_t data; unsigned i; data = RLC_SAFE_MODE__CMD_MASK; data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); - WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, data); + WREG32_SOC15(GC, xcc_id, regRLC_SAFE_MODE, data); /* wait for RLC_SAFE_MODE */ for (i = 0; i < adev->usec_timeout; i++) { @@ -1107,12 +1107,12 @@ static void gfx_v9_4_3_set_safe_mode(struct amdgpu_device *adev) } } -static void gfx_v9_4_3_unset_safe_mode(struct amdgpu_device *adev) +static void gfx_v9_4_3_unset_safe_mode(struct amdgpu_device *adev, int xcc_id) { uint32_t data; data = RLC_SAFE_MODE__CMD_MASK; - WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, data); + WREG32_SOC15(GC, xcc_id, regRLC_SAFE_MODE, data); } static int gfx_v9_4_3_rlc_init(struct amdgpu_device *adev) @@ -2125,7 +2125,7 @@ static void gfx_v9_4_3_update_medium_grain_clock_gating(struct amdgpu_device *ad { uint32_t data, def; - amdgpu_gfx_rlc_enter_safe_mode(adev); + amdgpu_gfx_rlc_enter_safe_mode(adev, xcc_id); /* It is disabled by HW by default */ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { @@ -2186,7 +2186,7 @@ static void gfx_v9_4_3_update_medium_grain_clock_gating(struct amdgpu_device *ad } } - amdgpu_gfx_rlc_exit_safe_mode(adev); + amdgpu_gfx_rlc_exit_safe_mode(adev, xcc_id); } static void gfx_v9_4_3_update_coarse_grain_clock_gating(struct amdgpu_device *adev, @@ -2194,7 +2194,7 @@ static void gfx_v9_4_3_update_coarse_grain_clock_gating(struct amdgpu_device *ad { uint32_t def, data; - amdgpu_gfx_rlc_enter_safe_mode(adev); + amdgpu_gfx_rlc_enter_safe_mode(adev, xcc_id); if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { def = data = RREG32_SOC15(GC, xcc_id, regRLC_CGTT_MGCG_OVERRIDE); @@ -2238,7 +2238,7 @@ static void gfx_v9_4_3_update_coarse_grain_clock_gating(struct amdgpu_device *ad WREG32_SOC15(GC, xcc_id, regRLC_CGCG_CGLS_CTRL, data); } - amdgpu_gfx_rlc_exit_safe_mode(adev); + amdgpu_gfx_rlc_exit_safe_mode(adev, xcc_id); } static int gfx_v9_4_3_update_gfx_clock_gating(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index 148049782f50..dabeeab2f2ad 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -629,9 +629,9 @@ static int nv_update_umd_stable_pstate(struct amdgpu_device *adev, bool enter) { if (enter) - amdgpu_gfx_rlc_enter_safe_mode(adev); + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); else - amdgpu_gfx_rlc_exit_safe_mode(adev); + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); if (adev->gfx.funcs->update_perfmon_mgcg) adev->gfx.funcs->update_perfmon_mgcg(adev, !enter); diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index 6ef4be9322d9..7d59303ca2f9 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -549,9 +549,9 @@ static int soc21_update_umd_stable_pstate(struct amdgpu_device *adev, bool enter) { if (enter) - amdgpu_gfx_rlc_enter_safe_mode(adev); + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); else - amdgpu_gfx_rlc_exit_safe_mode(adev); + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); if (adev->gfx.funcs->update_perfmon_mgcg) adev->gfx.funcs->update_perfmon_mgcg(adev, !enter); diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c index f5e08b60f66e..36c831b280ed 100644 --- a/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c @@ -508,19 +508,19 @@ static int kv_enable_didt(struct amdgpu_device *adev, bool enable) pi->caps_db_ramping || pi->caps_td_ramping || pi->caps_tcp_ramping) { - amdgpu_gfx_rlc_enter_safe_mode(adev); + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); if (enable) { ret = kv_program_pt_config_registers(adev, didt_config_kv); if (ret) { - amdgpu_gfx_rlc_exit_safe_mode(adev); + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); return ret; } } kv_do_enable_didt(adev, enable); - amdgpu_gfx_rlc_exit_safe_mode(adev); + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); } return 0; diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_powertune.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_powertune.c index 32a5a00fd8ae..21be23ec3c79 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_powertune.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_powertune.c @@ -973,7 +973,7 @@ int smu7_enable_didt_config(struct pp_hwmgr *hwmgr) PP_CAP(PHM_PlatformCaps_TDRamping) || PP_CAP(PHM_PlatformCaps_TCPRamping)) { - amdgpu_gfx_rlc_enter_safe_mode(adev); + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); mutex_lock(&adev->grbm_idx_mutex); value = 0; value2 = cgs_read_register(hwmgr->device, mmGRBM_GFX_INDEX); @@ -1048,13 +1048,13 @@ int smu7_enable_didt_config(struct pp_hwmgr *hwmgr) } mutex_unlock(&adev->grbm_idx_mutex); - amdgpu_gfx_rlc_exit_safe_mode(adev); + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); } return 0; error: mutex_unlock(&adev->grbm_idx_mutex); - amdgpu_gfx_rlc_exit_safe_mode(adev); + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); return result; } @@ -1068,7 +1068,7 @@ int smu7_disable_didt_config(struct pp_hwmgr *hwmgr) PP_CAP(PHM_PlatformCaps_TDRamping) || PP_CAP(PHM_PlatformCaps_TCPRamping)) { - amdgpu_gfx_rlc_enter_safe_mode(adev); + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); result = smu7_enable_didt(hwmgr, false); PP_ASSERT_WITH_CODE((result == 0), @@ -1081,12 +1081,12 @@ int smu7_disable_didt_config(struct pp_hwmgr *hwmgr) PP_ASSERT_WITH_CODE((0 == result), "Failed to disable DPM DIDT.", goto error); } - amdgpu_gfx_rlc_exit_safe_mode(adev); + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); } return 0; error: - amdgpu_gfx_rlc_exit_safe_mode(adev); + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); return result; } diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_powertune.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_powertune.c index 9757d47dd6b8..309a9d3bc1b7 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_powertune.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_powertune.c @@ -915,7 +915,7 @@ static int vega10_enable_cac_driving_se_didt_config(struct pp_hwmgr *hwmgr) num_se = adev->gfx.config.max_shader_engines; - amdgpu_gfx_rlc_enter_safe_mode(adev); + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); mutex_lock(&adev->grbm_idx_mutex); for (count = 0; count < num_se; count++) { @@ -940,7 +940,7 @@ static int vega10_enable_cac_driving_se_didt_config(struct pp_hwmgr *hwmgr) vega10_didt_set_mask(hwmgr, true); - amdgpu_gfx_rlc_exit_safe_mode(adev); + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); return 0; } @@ -949,11 +949,11 @@ static int vega10_disable_cac_driving_se_didt_config(struct pp_hwmgr *hwmgr) { struct amdgpu_device *adev = hwmgr->adev; - amdgpu_gfx_rlc_enter_safe_mode(adev); + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); vega10_didt_set_mask(hwmgr, false); - amdgpu_gfx_rlc_exit_safe_mode(adev); + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); return 0; } @@ -966,7 +966,7 @@ static int vega10_enable_psm_gc_didt_config(struct pp_hwmgr *hwmgr) num_se = adev->gfx.config.max_shader_engines; - amdgpu_gfx_rlc_enter_safe_mode(adev); + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); mutex_lock(&adev->grbm_idx_mutex); for (count = 0; count < num_se; count++) { @@ -985,7 +985,7 @@ static int vega10_enable_psm_gc_didt_config(struct pp_hwmgr *hwmgr) vega10_didt_set_mask(hwmgr, true); - amdgpu_gfx_rlc_exit_safe_mode(adev); + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); vega10_program_gc_didt_config_registers(hwmgr, GCDiDtDroopCtrlConfig_vega10); if (PP_CAP(PHM_PlatformCaps_GCEDC)) @@ -1002,11 +1002,11 @@ static int vega10_disable_psm_gc_didt_config(struct pp_hwmgr *hwmgr) struct amdgpu_device *adev = hwmgr->adev; uint32_t data; - amdgpu_gfx_rlc_enter_safe_mode(adev); + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); vega10_didt_set_mask(hwmgr, false); - amdgpu_gfx_rlc_exit_safe_mode(adev); + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); if (PP_CAP(PHM_PlatformCaps_GCEDC)) { data = 0x00000000; @@ -1027,7 +1027,7 @@ static int vega10_enable_se_edc_config(struct pp_hwmgr *hwmgr) num_se = adev->gfx.config.max_shader_engines; - amdgpu_gfx_rlc_enter_safe_mode(adev); + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); mutex_lock(&adev->grbm_idx_mutex); for (count = 0; count < num_se; count++) { @@ -1048,7 +1048,7 @@ static int vega10_enable_se_edc_config(struct pp_hwmgr *hwmgr) vega10_didt_set_mask(hwmgr, true); - amdgpu_gfx_rlc_exit_safe_mode(adev); + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); return 0; } @@ -1057,11 +1057,11 @@ static int vega10_disable_se_edc_config(struct pp_hwmgr *hwmgr) { struct amdgpu_device *adev = hwmgr->adev; - amdgpu_gfx_rlc_enter_safe_mode(adev); + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); vega10_didt_set_mask(hwmgr, false); - amdgpu_gfx_rlc_exit_safe_mode(adev); + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); return 0; } @@ -1075,7 +1075,7 @@ static int vega10_enable_psm_gc_edc_config(struct pp_hwmgr *hwmgr) num_se = adev->gfx.config.max_shader_engines; - amdgpu_gfx_rlc_enter_safe_mode(adev); + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); vega10_program_gc_didt_config_registers(hwmgr, AvfsPSMResetConfig_vega10); @@ -1096,7 +1096,7 @@ static int vega10_enable_psm_gc_edc_config(struct pp_hwmgr *hwmgr) vega10_didt_set_mask(hwmgr, true); - amdgpu_gfx_rlc_exit_safe_mode(adev); + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); vega10_program_gc_didt_config_registers(hwmgr, PSMGCEDCDroopCtrlConfig_vega10); @@ -1116,11 +1116,11 @@ static int vega10_disable_psm_gc_edc_config(struct pp_hwmgr *hwmgr) struct amdgpu_device *adev = hwmgr->adev; uint32_t data; - amdgpu_gfx_rlc_enter_safe_mode(adev); + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); vega10_didt_set_mask(hwmgr, false); - amdgpu_gfx_rlc_exit_safe_mode(adev); + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); if (PP_CAP(PHM_PlatformCaps_GCEDC)) { data = 0x00000000; @@ -1138,7 +1138,7 @@ static int vega10_enable_se_edc_force_stall_config(struct pp_hwmgr *hwmgr) struct amdgpu_device *adev = hwmgr->adev; int result; - amdgpu_gfx_rlc_enter_safe_mode(adev); + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); mutex_lock(&adev->grbm_idx_mutex); WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xE0000000); @@ -1151,7 +1151,7 @@ static int vega10_enable_se_edc_force_stall_config(struct pp_hwmgr *hwmgr) vega10_didt_set_mask(hwmgr, false); - amdgpu_gfx_rlc_exit_safe_mode(adev); + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); return 0; } -- cgit v1.2.3 From 66daccde429611530db82605c197be01adadb389 Mon Sep 17 00:00:00 2001 From: Le Ma <le.ma@amd.com> Date: Wed, 1 Dec 2021 16:44:18 +0800 Subject: drm/amdgpu: add master/slave check in init phase Skip KCQ setup on slave xcc as there's no use case. Signed-off-by: Le Ma <le.ma@amd.com> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 59 ++++++++++++++++++++------------- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 2 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 5 +++ drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 10 ++++-- 4 files changed, 50 insertions(+), 26 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 7f5c60381103..c83fb4277233 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -489,16 +489,19 @@ int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev, int xcc_id) return -EINVAL; spin_lock(&kiq->ring_lock); - if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size * - adev->gfx.num_compute_rings)) { - spin_unlock(&adev->gfx.kiq[0].ring_lock); - return -ENOMEM; - } + if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) { + if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size * + adev->gfx.num_compute_rings)) { + spin_unlock(&kiq->ring_lock); + return -ENOMEM; + } - for (i = 0; i < adev->gfx.num_compute_rings; i++) { - j = i + xcc_id * adev->gfx.num_compute_rings; - kiq->pmf->kiq_unmap_queues(kiq_ring, &adev->gfx.compute_ring[i], - RESET_QUEUES, 0, 0); + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + j = i + xcc_id * adev->gfx.num_compute_rings; + kiq->pmf->kiq_unmap_queues(kiq_ring, + &adev->gfx.compute_ring[i], + RESET_QUEUES, 0, 0); + } } if (adev->gfx.kiq[0].ring.sched.ready && !adev->job_hang) @@ -549,22 +552,26 @@ int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev, int xcc_id) DRM_INFO("kiq ring mec %d pipe %d q %d\n", kiq_ring->me, kiq_ring->pipe, kiq_ring->queue); spin_lock(&kiq->ring_lock); - r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size * - adev->gfx.num_compute_rings + - kiq->pmf->set_resources_size); - if (r) { - DRM_ERROR("Failed to lock KIQ (%d).\n", r); - spin_unlock(&adev->gfx.kiq[0].ring_lock); - return r; - } + /* No need to map kcq on the slave */ + if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) { + r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size * + adev->gfx.num_compute_rings + + kiq->pmf->set_resources_size); + if (r) { + DRM_ERROR("Failed to lock KIQ (%d).\n", r); + spin_unlock(&adev->gfx.kiq[0].ring_lock); + return r; + } - if (adev->enable_mes) - queue_mask = ~0ULL; + if (adev->enable_mes) + queue_mask = ~0ULL; - kiq->pmf->kiq_set_resources(kiq_ring, queue_mask); - for (i = 0; i < adev->gfx.num_compute_rings; i++) { - j = i + xcc_id * adev->gfx.num_compute_rings; - kiq->pmf->kiq_map_queues(kiq_ring, &adev->gfx.compute_ring[i]); + kiq->pmf->kiq_set_resources(kiq_ring, queue_mask); + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + j = i + xcc_id * adev->gfx.num_compute_rings; + kiq->pmf->kiq_map_queues(kiq_ring, + &adev->gfx.compute_ring[i]); + } } r = amdgpu_ring_test_helper(kiq_ring); @@ -1078,3 +1085,9 @@ void amdgpu_gfx_cp_init_microcode(struct amdgpu_device *adev, adev->firmware.fw_size += ALIGN(fw_size, PAGE_SIZE); } } + +bool amdgpu_gfx_is_master_xcc(struct amdgpu_device *adev, int xcc_id) +{ + return !(xcc_id % (adev->gfx.num_xcc_per_xcp ? + adev->gfx.num_xcc_per_xcp : 1)); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 77e2f714e357..a2d311a4da5a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -462,4 +462,6 @@ void amdgpu_gfx_cp_init_microcode(struct amdgpu_device *adev, uint32_t ucode_id) int amdgpu_gfx_ras_sw_init(struct amdgpu_device *adev); int amdgpu_gfx_poison_consumption_handler(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry); + +bool amdgpu_gfx_is_master_xcc(struct amdgpu_device *adev, int xcc_id); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 4ff348e10e4d..ef96ff2f4272 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -406,6 +406,11 @@ int amdgpu_ib_ring_tests(struct amdgpu_device *adev) else tmo = tmo_gfx; + /* skip ib test on the slave kcq */ + if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE && + !amdgpu_gfx_is_master_xcc(adev, ring->xcc_id)) + continue; + r = amdgpu_ring_test_ib(ring, tmo); if (!r) { DRM_DEV_DEBUG(adev->dev, "ib test on %s succeeded\n", diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index baa10ee8ec69..572f84f487cd 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -1885,9 +1885,13 @@ static int gfx_v9_4_3_cp_resume(struct amdgpu_device *adev) if (r) return r; - for (j = 0; j < adev->gfx.num_compute_rings; j++) { - ring = &adev->gfx.compute_ring[j + i * adev->gfx.num_compute_rings]; - amdgpu_ring_test_helper(ring); + /* skip ring test on slave kcq */ + if (amdgpu_gfx_is_master_xcc(adev, i)) { + for (j = 0; j < adev->gfx.num_compute_rings; j++) { + ring = &adev->gfx.compute_ring[j + + i * adev->gfx.num_compute_rings]; + amdgpu_ring_test_helper(ring); + } } gfx_v9_4_3_enable_gui_idle_interrupt(adev, true, i); -- cgit v1.2.3 From 47659738fbd2f06730635a487605002ea9b11f3d Mon Sep 17 00:00:00 2001 From: Le Ma <le.ma@amd.com> Date: Fri, 19 Nov 2021 15:35:30 +0800 Subject: drm/amdgpu: allocate doorbell index for multi-die case Allocate different doorbell index for kiq/kcq rings on each die Signed-off-by: Le Ma <le.ma@amd.com> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h | 9 ++++++++- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 5 +++++ drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h | 1 + drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 8 +++++++- 5 files changed, 22 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h index 8fd11497faba..ffb75d23d2fc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h @@ -83,6 +83,8 @@ struct amdgpu_doorbell_index { }; uint32_t first_non_cp; uint32_t last_non_cp; + uint32_t xcc1_kiq_start; + uint32_t xcc1_mec_ring0_start; uint32_t max_assignment; /* Per engine SDMA doorbell size in dword */ uint32_t sdma_doorbell_range; @@ -164,7 +166,12 @@ typedef enum _AMDGPU_VEGA20_DOORBELL_ASSIGNMENT AMDGPU_VEGA20_DOORBELL64_FIRST_NON_CP = AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE0, AMDGPU_VEGA20_DOORBELL64_LAST_NON_CP = AMDGPU_VEGA20_DOORBELL64_VCE_RING6_7, - AMDGPU_VEGA20_DOORBELL_MAX_ASSIGNMENT = 0x18F, + /* kiq/kcq from second XCD. Max 8 XCDs */ + AMDGPU_VEGA20_DOORBELL_XCC1_KIQ_START = 0x190, + /* 8 compute rings per GC. Max to 0x1CE */ + AMDGPU_VEGA20_DOORBELL_XCC1_MEC_RING0_START = 0x197, + + AMDGPU_VEGA20_DOORBELL_MAX_ASSIGNMENT = 0x1CE, AMDGPU_VEGA20_DOORBELL_INVALID = 0xFFFF } AMDGPU_VEGA20_DOORBELL_ASSIGNMENT; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index c83fb4277233..465ad0b7cddb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -316,6 +316,11 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, ring->doorbell_index = adev->doorbell_index.kiq; ring->xcc_id = xcc_id; ring->vm_hub = AMDGPU_GFXHUB_0; + if (xcc_id >= 1) + ring->doorbell_index = adev->doorbell_index.xcc1_kiq_start + + xcc_id - 1; + else + ring->doorbell_index = adev->doorbell_index.kiq; r = amdgpu_gfx_kiq_acquire(adev, ring, xcc_id); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c index 1d5af50331e4..d58353c89e59 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c @@ -270,6 +270,7 @@ void amdgpu_ih_decode_iv_helper(struct amdgpu_device *adev, entry->timestamp = dw[1] | ((u64)(dw[2] & 0xffff) << 32); entry->timestamp_src = dw[2] >> 31; entry->pasid = dw[3] & 0xffff; + entry->node_id = (dw[3] >> 16) & 0xff; entry->pasid_src = dw[3] >> 31; entry->src_data[0] = dw[4]; entry->src_data[1] = dw[5]; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h index be243adf3e65..7a8e686bdd41 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h @@ -53,6 +53,7 @@ struct amdgpu_iv_entry { uint64_t timestamp; unsigned timestamp_src; unsigned pasid; + unsigned node_id; unsigned pasid_src; unsigned src_data[AMDGPU_IRQ_SRC_DATA_MAX_SIZE_DW]; const uint32_t *iv_entry; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 572f84f487cd..56a415e151d4 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -752,7 +752,13 @@ static int gfx_v9_4_3_compute_ring_init(struct amdgpu_device *adev, int ring_id, ring->ring_obj = NULL; ring->use_doorbell = true; - ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1; + if (xcc_id >= 1) + ring->doorbell_index = + (adev->doorbell_index.xcc1_mec_ring0_start + + ring_id - adev->gfx.num_compute_rings) << 1; + else + ring->doorbell_index = + (adev->doorbell_index.mec_ring0 + ring_id) << 1; ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (ring_id * GFX9_MEC_HPD_SIZE); ring->vm_hub = AMDGPU_GFXHUB_0; -- cgit v1.2.3 From 3c4f6507abde5c36e892d63b25296fc6d9b13285 Mon Sep 17 00:00:00 2001 From: Guchun Chen <guchun.chen@amd.com> Date: Thu, 27 Apr 2023 09:21:05 +0800 Subject: drm/amdgpu: mark gfx_v9_4_3_disable_gpa_mode() static This was left global by accident, the corresponding functions for other hardware types are already static: drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c:1072:6: error: no previous prototype for function 'gfx_v9_4_3_disable_gpa_mode' [-Werror,-Wmissing-prototypes] Fixes: 86301129698b ("drm/amdgpu: split gc v9_4_3 functionality from gc v9_0") Reported-by: kernel test robot <lkp@intel.com> Signed-off-by: Arnd Bergmann <arnd@arndb.de> Reviewed-by: Guchun Chen <guchun.chen@amd.com> Signed-off-by: Guchun Chen <guchun.chen@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 56a415e151d4..312491455382 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -1075,7 +1075,7 @@ static void gfx_v9_4_3_init_pg(struct amdgpu_device *adev, int xcc_id) } } -void gfx_v9_4_3_disable_gpa_mode(struct amdgpu_device *adev, int xcc_id) +static void gfx_v9_4_3_disable_gpa_mode(struct amdgpu_device *adev, int xcc_id) { uint32_t data; -- cgit v1.2.3 From 45b54a7dd3437632352ed28163e982233ef190a8 Mon Sep 17 00:00:00 2001 From: Alex Deucher <alexander.deucher@amd.com> Date: Wed, 26 Apr 2023 14:49:00 -0400 Subject: drm/amdgpu/gfx9: always restore kcq MQDs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Always restore the MQD not just when we do a reset. This allows us to move the MQD to VRAM if we want. v2: always reset ring pointer as well (Christian) Reviewed-by: Christian König <christian.koenig@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 7 ++----- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 7 ++----- 2 files changed, 4 insertions(+), 10 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 8fb027cf1bfb..4a7556099469 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -3589,17 +3589,14 @@ static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring) if (adev->gfx.mec.mqd_backup[mqd_idx]) memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation)); - } else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */ - /* reset MQD to a clean status */ + } else { + /* restore MQD to a clean status */ if (adev->gfx.mec.mqd_backup[mqd_idx]) memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation)); - /* reset ring buffer */ ring->wptr = 0; atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0); amdgpu_ring_clear_ring(ring); - } else { - amdgpu_ring_clear_ring(ring); } return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 312491455382..b5924543e363 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -1799,17 +1799,14 @@ static int gfx_v9_4_3_kcq_init_queue(struct amdgpu_ring *ring, int xcc_id) if (adev->gfx.mec.mqd_backup[mqd_idx]) memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation)); - } else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */ - /* reset MQD to a clean status */ + } else { + /* restore MQD to a clean status */ if (adev->gfx.mec.mqd_backup[mqd_idx]) memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation)); - /* reset ring buffer */ ring->wptr = 0; atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], 0); amdgpu_ring_clear_ring(ring); - } else { - amdgpu_ring_clear_ring(ring); } return 0; -- cgit v1.2.3 From f4409a2361152f3480781a1dea1a3bd0d8369c78 Mon Sep 17 00:00:00 2001 From: Dan Carpenter <dan.carpenter@linaro.org> Date: Wed, 3 May 2023 18:09:35 +0300 Subject: drm/amdgpu: unlock on error in gfx_v9_4_3_kiq_resume() Smatch complains that we need to drop this lock before returning. drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c:1838 gfx_v9_4_3_kiq_resume() warn: inconsistent returns 'ring->mqd_obj->tbo.base.resv'. Fixes: 86301129698b ("drm/amdgpu: split gc v9_4_3 functionality from gc v9_0") Signed-off-by: Dan Carpenter <dan.carpenter@linaro.org> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index b5924543e363..c9ae3065a13d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -1824,8 +1824,10 @@ static int gfx_v9_4_3_kiq_resume(struct amdgpu_device *adev, int xcc_id) return r; r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); - if (unlikely(r != 0)) + if (unlikely(r != 0)) { + amdgpu_bo_unreserve(ring->mqd_obj); return r; + } gfx_v9_4_3_kiq_init_queue(ring, xcc_id); amdgpu_bo_kunmap(ring->mqd_obj); -- cgit v1.2.3 From 9e72813f69b178b676a54c4d6b24d3e84492b61a Mon Sep 17 00:00:00 2001 From: Jiapeng Chong <jiapeng.chong@linux.alibaba.com> Date: Sat, 6 May 2023 16:10:43 +0800 Subject: drm/amdgpu: Remove the unused variable golden_settings_gc_9_4_3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Variable golden_settings_gc_9_4_3 is not effectively used, so delete it. drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c:48:38: warning: ‘golden_settings_gc_9_4_3’ defined but not used. Reported-by: Abaci Robot <abaci@linux.alibaba.com> Link: https://bugzilla.openanolis.cn/show_bug.cgi?id=4877 Signed-off-by: Jiapeng Chong <jiapeng.chong@linux.alibaba.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index c9ae3065a13d..9d17dcfae130 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -45,10 +45,6 @@ MODULE_FIRMWARE("amdgpu/gc_9_4_3_rlc.bin"); #define GFX9_MEC_HPD_SIZE 4096 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L -static const struct soc15_reg_golden golden_settings_gc_9_4_3[] = { - -}; - static void gfx_v9_4_3_set_ring_funcs(struct amdgpu_device *adev); static void gfx_v9_4_3_set_irq_funcs(struct amdgpu_device *adev); static void gfx_v9_4_3_set_gds_init(struct amdgpu_device *adev); -- cgit v1.2.3 From f4caf5842652f08e024741ef6d423cb0c101d863 Mon Sep 17 00:00:00 2001 From: Hawking Zhang <Hawking.Zhang@amd.com> Date: Wed, 14 Sep 2022 16:35:50 +0800 Subject: drm/amdgpu: introduce vmhub definition for multi-partition cases (v3) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit v1: Each partition has its own gfxhub or mmhub. adjust the num of MAX_VMHUBS and the GFXHUB/MMHUB layout (Le) v2: re-design the AMDGPU_GFXHUB/AMDGPU_MMHUB layout (Le) v3: apply the gfxhub/mmhub layout to new IPs (Hawking) v4: fix up gmc11 (Alex) v5: rebase (Alex) Signed-off-by: Le Ma <le.ma@amd.com> Acked-by: Christian König <christian.koenig@amd.com> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> Signed-off-by: Hawking Zhang <Hawking.Zhang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 13 +++-- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 6 +-- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 8 +-- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 8 +-- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c | 2 +- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 4 +- drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c | 10 ++-- drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c | 10 ++-- drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c | 10 ++-- drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c | 12 ++--- drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0.c | 10 ++-- drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0_3.c | 10 ++-- drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 33 ++++++------- drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c | 26 +++++----- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 78 +++++++++++++++--------------- drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c | 4 +- drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/mes_v10_1.c | 2 +- drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c | 10 ++-- drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c | 10 ++-- drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c | 10 ++-- drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c | 10 ++-- drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c | 10 ++-- drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c | 10 ++-- drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c | 10 ++-- drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c | 10 ++-- drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c | 12 ++--- drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 10 ++-- drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 4 +- drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 2 +- drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c | 4 +- drivers/gpu/drm/amd/amdgpu/vce_v4_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c | 4 +- drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c | 4 +- drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c | 8 +-- drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c | 4 +- drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c | 2 +- 47 files changed, 204 insertions(+), 204 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index a46285841d17..f0a136d35279 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -736,7 +736,7 @@ int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct amdgpu_device *adev, for (i = 0; i < adev->num_vmhubs; i++) amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0); } else { - amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB_0, 0); + amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB(0), 0); } return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 87e1a1a9f298..488b3bb6dcb1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -315,7 +315,7 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, ring->use_doorbell = true; ring->doorbell_index = adev->doorbell_index.kiq; ring->xcc_id = xcc_id; - ring->vm_hub = AMDGPU_GFXHUB_0; + ring->vm_hub = AMDGPU_GFXHUB(0); if (xcc_id >= 1) ring->doorbell_index = adev->doorbell_index.xcc1_kiq_start + xcc_id - 1; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index 4e2531758866..0a4e5fcfec6b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -670,7 +670,7 @@ void amdgpu_gmc_set_vm_fault_masks(struct amdgpu_device *adev, int hub_type, for (i = 0; i < 16; i++) { reg = hub->vm_context0_cntl + hub->ctx_distance * i; - tmp = (hub_type == AMDGPU_GFXHUB_0) ? + tmp = (hub_type == AMDGPU_GFXHUB(0)) ? RREG32_SOC15_IP(GC, reg) : RREG32_SOC15_IP(MMHUB, reg); @@ -679,7 +679,7 @@ void amdgpu_gmc_set_vm_fault_masks(struct amdgpu_device *adev, int hub_type, else tmp &= ~hub->vm_cntx_cntl_vm_fault; - (hub_type == AMDGPU_GFXHUB_0) ? + (hub_type == AMDGPU_GFXHUB(0)) ? WREG32_SOC15_IP(GC, reg, tmp) : WREG32_SOC15_IP(MMHUB, reg, tmp); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index b6bd667df676..c3964c14f215 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2374,12 +2374,12 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) case AMDGPU_VM_OP_RESERVE_VMID: /* We only have requirement to reserve vmid from gfxhub */ r = amdgpu_vmid_alloc_reserved(adev, &fpriv->vm, - AMDGPU_GFXHUB_0); + AMDGPU_GFXHUB(0)); if (r) return r; break; case AMDGPU_VM_OP_UNRESERVE_VMID: - amdgpu_vmid_free_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB_0); + amdgpu_vmid_free_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB(0)); break; default: return -EINVAL; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 6f085f0b4ef3..9f5d32b0fda1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -111,11 +111,14 @@ struct amdgpu_mem_stats; /* Reserve 4MB VRAM for page tables */ #define AMDGPU_VM_RESERVED_VRAM (8ULL << 20) -/* max number of VMHUB */ -#define AMDGPU_MAX_VMHUBS 3 -#define AMDGPU_GFXHUB_0 0 -#define AMDGPU_MMHUB_0 1 -#define AMDGPU_MMHUB_1 2 +/* + * max number of VMHUB + * layout: max 8 GFXHUB + 4 MMHUB0 + 1 MMHUB1 + */ +#define AMDGPU_MAX_VMHUBS 13 +#define AMDGPU_GFXHUB(x) (x) +#define AMDGPU_MMHUB0(x) (8 + x) +#define AMDGPU_MMHUB1(x) (8 + 4 + x) /* Reserve 2MB at top/bottom of address space for kernel use */ #define AMDGPU_VA_RESERVED_SIZE (2ULL << 20) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 8e86b2c23c0a..7b585141e10e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -4461,7 +4461,7 @@ static int gfx_v10_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id, ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; else ring->doorbell_index = adev->doorbell_index.gfx_ring1 << 1; - ring->vm_hub = AMDGPU_GFXHUB_0; + ring->vm_hub = AMDGPU_GFXHUB(0); sprintf(ring->name, "gfx_%d.%d.%d", ring->me, ring->pipe, ring->queue); irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + ring->pipe; @@ -4490,7 +4490,7 @@ static int gfx_v10_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1; ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (ring_id * GFX10_MEC_HPD_SIZE); - ring->vm_hub = AMDGPU_GFXHUB_0; + ring->vm_hub = AMDGPU_GFXHUB(0); sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP @@ -4978,7 +4978,7 @@ static void gfx_v10_0_constants_init(struct amdgpu_device *adev) /* XXX SH_MEM regs */ /* where to put LDS, scratch, GPUVM in FSA64 space */ mutex_lock(&adev->srbm_mutex); - for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) { + for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) { nv_grbm_select(adev, 0, 0, 0, i); /* CP and shaders */ WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index f77779c31043..790df2cc3480 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -906,7 +906,7 @@ static int gfx_v11_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id, ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; else ring->doorbell_index = adev->doorbell_index.gfx_ring1 << 1; - ring->vm_hub = AMDGPU_GFXHUB_0; + ring->vm_hub = AMDGPU_GFXHUB(0); sprintf(ring->name, "gfx_%d.%d.%d", ring->me, ring->pipe, ring->queue); irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + ring->pipe; @@ -937,7 +937,7 @@ static int gfx_v11_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1; ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (ring_id * GFX11_MEC_HPD_SIZE); - ring->vm_hub = AMDGPU_GFXHUB_0; + ring->vm_hub = AMDGPU_GFXHUB(0); sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP @@ -1707,7 +1707,7 @@ static void gfx_v11_0_constants_init(struct amdgpu_device *adev) /* XXX SH_MEM regs */ /* where to put LDS, scratch, GPUVM in FSA64 space */ mutex_lock(&adev->srbm_mutex); - for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) { + for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) { soc21_grbm_select(adev, 0, 0, 0, i); /* CP and shaders */ WREG32_SOC15(GC, 0, regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG); @@ -4190,7 +4190,7 @@ static int gfx_v11_0_gfxhub_enable(struct amdgpu_device *adev) false : true; adev->gfxhub.funcs->set_fault_enable_default(adev, value); - amdgpu_gmc_flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB_0, 0); + amdgpu_gmc_flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB(0), 0); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 46577b59cb04..91814dc083c9 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -2005,7 +2005,7 @@ static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1; ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (ring_id * GFX9_MEC_HPD_SIZE); - ring->vm_hub = AMDGPU_GFXHUB_0; + ring->vm_hub = AMDGPU_GFXHUB(0); sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP @@ -2105,7 +2105,7 @@ static int gfx_v9_0_sw_init(void *handle) /* disable scheduler on the real ring */ ring->no_scheduler = true; - ring->vm_hub = AMDGPU_GFXHUB_0; + ring->vm_hub = AMDGPU_GFXHUB(0); r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP, AMDGPU_RING_PRIO_DEFAULT, NULL); @@ -2123,7 +2123,7 @@ static int gfx_v9_0_sw_init(void *handle) ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; ring->is_sw_ring = true; hw_prio = amdgpu_sw_ring_priority(i); - ring->vm_hub = AMDGPU_GFXHUB_0; + ring->vm_hub = AMDGPU_GFXHUB(0); r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP, hw_prio, NULL); @@ -2393,7 +2393,7 @@ static void gfx_v9_0_constants_init(struct amdgpu_device *adev) /* XXX SH_MEM regs */ /* where to put LDS, scratch, GPUVM in FSA64 space */ mutex_lock(&adev->srbm_mutex); - for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) { + for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) { soc15_grbm_select(adev, 0, 0, 0, i, 0); /* CP and shaders */ if (i == 0) { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c index d648a29c33e0..ec7c049c5952 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c @@ -1935,7 +1935,7 @@ static bool gfx_v9_4_2_query_uctl2_poison_status(struct amdgpu_device *adev) u32 status = 0; struct amdgpu_vmhub *hub; - hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; status = RREG32(hub->vm_l2_pro_fault_status); /* reset page fault status */ WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 9d17dcfae130..f5104b982633 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -757,7 +757,7 @@ static int gfx_v9_4_3_compute_ring_init(struct amdgpu_device *adev, int ring_id, (adev->doorbell_index.mec_ring0 + ring_id) << 1; ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (ring_id * GFX9_MEC_HPD_SIZE); - ring->vm_hub = AMDGPU_GFXHUB_0; + ring->vm_hub = AMDGPU_GFXHUB(0); sprintf(ring->name, "comp_%d.%d.%d.%d", ring->xcc_id, ring->me, ring->pipe, ring->queue); @@ -996,7 +996,7 @@ static void gfx_v9_4_3_constants_init(struct amdgpu_device *adev) /* XXX SH_MEM regs */ /* where to put LDS, scratch, GPUVM in FSA64 space */ mutex_lock(&adev->srbm_mutex); - for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) { + for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) { for (j = 0; j < adev->gfx.num_xcd; j++) { soc15_grbm_select(adev, 0, 0, 0, i, j); /* CP and shaders */ diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c index ab2325f6c7ac..d94cc1ec7242 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c @@ -40,7 +40,7 @@ static void gfxhub_v1_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, uint64_t page_table_base) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, hub->ctx_addr_distance * vmid, @@ -247,7 +247,7 @@ static void gfxhub_v1_0_disable_identity_aperture(struct amdgpu_device *adev) static void gfxhub_v1_0_setup_vmid_config(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; unsigned num_level, block_size; uint32_t tmp; int i; @@ -307,7 +307,7 @@ static void gfxhub_v1_0_setup_vmid_config(struct amdgpu_device *adev) static void gfxhub_v1_0_program_invalidation(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; unsigned i; for (i = 0 ; i < 18; ++i) { @@ -338,7 +338,7 @@ static int gfxhub_v1_0_gart_enable(struct amdgpu_device *adev) static void gfxhub_v1_0_gart_disable(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; u32 tmp; u32 i; @@ -411,7 +411,7 @@ static void gfxhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, static void gfxhub_v1_0_init(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; hub->ctx0_ptb_addr_lo32 = SOC15_REG_OFFSET(GC, 0, diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c index 79af32bb078c..9c385ce3a8c4 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c @@ -42,7 +42,7 @@ static void gfxhub_v1_2_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, uint64_t page_table_base) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; int i; for (i = 0; i < adev->gfx.num_xcd; i++) { @@ -291,7 +291,7 @@ static void gfxhub_v1_2_disable_identity_aperture(struct amdgpu_device *adev) static void gfxhub_v1_2_setup_vmid_config(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; unsigned num_level, block_size; uint32_t tmp; int i, j; @@ -357,7 +357,7 @@ static void gfxhub_v1_2_setup_vmid_config(struct amdgpu_device *adev) static void gfxhub_v1_2_program_invalidation(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; unsigned i, j; for (j = 0; j < adev->gfx.num_xcd; j++) { @@ -406,7 +406,7 @@ static int gfxhub_v1_2_gart_enable(struct amdgpu_device *adev) static void gfxhub_v1_2_gart_disable(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; u32 tmp; u32 i, j; @@ -483,7 +483,7 @@ static void gfxhub_v1_2_set_fault_enable_default(struct amdgpu_device *adev, static void gfxhub_v1_2_init(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; hub->ctx0_ptb_addr_lo32 = SOC15_REG_OFFSET(GC, 0, diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c index 9b3a02527318..f173a61c6c15 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c @@ -120,7 +120,7 @@ static u64 gfxhub_v2_0_get_mc_fb_offset(struct amdgpu_device *adev) static void gfxhub_v2_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, uint64_t page_table_base) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, hub->ctx_addr_distance * vmid, @@ -282,7 +282,7 @@ static void gfxhub_v2_0_disable_identity_aperture(struct amdgpu_device *adev) static void gfxhub_v2_0_setup_vmid_config(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; int i; uint32_t tmp; @@ -331,7 +331,7 @@ static void gfxhub_v2_0_setup_vmid_config(struct amdgpu_device *adev) static void gfxhub_v2_0_program_invalidation(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; unsigned i; for (i = 0 ; i < 18; ++i) { @@ -360,7 +360,7 @@ static int gfxhub_v2_0_gart_enable(struct amdgpu_device *adev) static void gfxhub_v2_0_gart_disable(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; u32 tmp; u32 i; @@ -433,7 +433,7 @@ static const struct amdgpu_vmhub_funcs gfxhub_v2_0_vmhub_funcs = { static void gfxhub_v2_0_init(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; hub->ctx0_ptb_addr_lo32 = SOC15_REG_OFFSET(GC, 0, diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c index 4aacbbec31e2..d8fc3e8088cd 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c @@ -123,7 +123,7 @@ static u64 gfxhub_v2_1_get_mc_fb_offset(struct amdgpu_device *adev) static void gfxhub_v2_1_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, uint64_t page_table_base) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, hub->ctx_addr_distance * vmid, @@ -291,7 +291,7 @@ static void gfxhub_v2_1_disable_identity_aperture(struct amdgpu_device *adev) static void gfxhub_v2_1_setup_vmid_config(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; int i; uint32_t tmp; @@ -340,7 +340,7 @@ static void gfxhub_v2_1_setup_vmid_config(struct amdgpu_device *adev) static void gfxhub_v2_1_program_invalidation(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; unsigned i; for (i = 0 ; i < 18; ++i) { @@ -381,7 +381,7 @@ static int gfxhub_v2_1_gart_enable(struct amdgpu_device *adev) static void gfxhub_v2_1_gart_disable(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; u32 tmp; u32 i; @@ -462,7 +462,7 @@ static const struct amdgpu_vmhub_funcs gfxhub_v2_1_vmhub_funcs = { static void gfxhub_v2_1_init(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; hub->ctx0_ptb_addr_lo32 = SOC15_REG_OFFSET(GC, 0, @@ -651,7 +651,7 @@ static void gfxhub_v2_1_restore_regs(struct amdgpu_device *adev) static void gfxhub_v2_1_halt(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; int i; uint32_t tmp; int time = 1000; diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0.c index 13712640fa46..c53147f9c9fc 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0.c @@ -119,7 +119,7 @@ static u64 gfxhub_v3_0_get_mc_fb_offset(struct amdgpu_device *adev) static void gfxhub_v3_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, uint64_t page_table_base) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, hub->ctx_addr_distance * vmid, @@ -290,7 +290,7 @@ static void gfxhub_v3_0_disable_identity_aperture(struct amdgpu_device *adev) static void gfxhub_v3_0_setup_vmid_config(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; int i; uint32_t tmp; @@ -339,7 +339,7 @@ static void gfxhub_v3_0_setup_vmid_config(struct amdgpu_device *adev) static void gfxhub_v3_0_program_invalidation(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; unsigned i; for (i = 0 ; i < 18; ++i) { @@ -380,7 +380,7 @@ static int gfxhub_v3_0_gart_enable(struct amdgpu_device *adev) static void gfxhub_v3_0_gart_disable(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; u32 tmp; u32 i; @@ -463,7 +463,7 @@ static const struct amdgpu_vmhub_funcs gfxhub_v3_0_vmhub_funcs = { static void gfxhub_v3_0_init(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; hub->ctx0_ptb_addr_lo32 = SOC15_REG_OFFSET(GC, 0, diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0_3.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0_3.c index 6e0bd628c889..ae777487d72e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0_3.c @@ -122,7 +122,7 @@ static u64 gfxhub_v3_0_3_get_mc_fb_offset(struct amdgpu_device *adev) static void gfxhub_v3_0_3_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, uint64_t page_table_base) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, hub->ctx_addr_distance * vmid, @@ -295,7 +295,7 @@ static void gfxhub_v3_0_3_disable_identity_aperture(struct amdgpu_device *adev) static void gfxhub_v3_0_3_setup_vmid_config(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; int i; uint32_t tmp; @@ -344,7 +344,7 @@ static void gfxhub_v3_0_3_setup_vmid_config(struct amdgpu_device *adev) static void gfxhub_v3_0_3_program_invalidation(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; unsigned i; for (i = 0 ; i < 18; ++i) { @@ -373,7 +373,7 @@ static int gfxhub_v3_0_3_gart_enable(struct amdgpu_device *adev) static void gfxhub_v3_0_3_gart_disable(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; u32 tmp; u32 i; @@ -451,7 +451,7 @@ static const struct amdgpu_vmhub_funcs gfxhub_v3_0_3_vmhub_funcs = { static void gfxhub_v3_0_3_init(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; hub->ctx0_ptb_addr_lo32 = SOC15_REG_OFFSET(GC, 0, diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index 5697b66bf0de..ea2a448147e3 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -76,7 +76,7 @@ gmc_v10_0_vm_fault_interrupt_state(struct amdgpu_device *adev, switch (state) { case AMDGPU_IRQ_STATE_DISABLE: /* MM HUB */ - amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_MMHUB_0, false); + amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_MMHUB0(0), false); /* GFX HUB */ /* This works because this interrupt is only * enabled at init/resume and disabled in @@ -84,11 +84,11 @@ gmc_v10_0_vm_fault_interrupt_state(struct amdgpu_device *adev, * change over the course of suspend/resume. */ if (!adev->in_s0ix) - amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_GFXHUB_0, false); + amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_GFXHUB(0), false); break; case AMDGPU_IRQ_STATE_ENABLE: /* MM HUB */ - amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_MMHUB_0, true); + amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_MMHUB0(0), true); /* GFX HUB */ /* This works because this interrupt is only * enabled at init/resume and disabled in @@ -96,7 +96,7 @@ gmc_v10_0_vm_fault_interrupt_state(struct amdgpu_device *adev, * change over the course of suspend/resume. */ if (!adev->in_s0ix) - amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_GFXHUB_0, true); + amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_GFXHUB(0), true); break; default: break; @@ -149,7 +149,7 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev, * be updated to avoid reading an incorrect value due to * the new fast GRBM interface. */ - if ((entry->vmid_src == AMDGPU_GFXHUB_0) && + if ((entry->vmid_src == AMDGPU_GFXHUB(0)) && (adev->ip_versions[GC_HWIP][0] < IP_VERSION(10, 3, 0))) RREG32(hub->vm_l2_pro_fault_status); @@ -212,8 +212,7 @@ static void gmc_v10_0_set_irq_funcs(struct amdgpu_device *adev) static bool gmc_v10_0_use_invalidate_semaphore(struct amdgpu_device *adev, uint32_t vmhub) { - return ((vmhub == AMDGPU_MMHUB_0 || - vmhub == AMDGPU_MMHUB_1) && + return ((vmhub == AMDGPU_MMHUB0(0)) && (!amdgpu_sriov_vf(adev))); } @@ -249,7 +248,7 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, unsigned int i; unsigned char hub_ip = 0; - hub_ip = (vmhub == AMDGPU_GFXHUB_0) ? + hub_ip = (vmhub == AMDGPU_GFXHUB(0)) ? GC_HWIP : MMHUB_HWIP; spin_lock(&adev->gmc.invalidate_lock); @@ -284,7 +283,7 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, * Issue a dummy read to wait for the ACK register to be cleared * to avoid a false ACK due to the new fast GRBM interface. */ - if ((vmhub == AMDGPU_GFXHUB_0) && + if ((vmhub == AMDGPU_GFXHUB(0)) && (adev->ip_versions[GC_HWIP][0] < IP_VERSION(10, 3, 0))) RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_req + hub->eng_distance * eng, hub_ip); @@ -361,19 +360,19 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, mutex_lock(&adev->mman.gtt_window_lock); - if (vmhub == AMDGPU_MMHUB_0) { - gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_MMHUB_0, 0); + if (vmhub == AMDGPU_MMHUB0(0)) { + gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_MMHUB0(0), 0); mutex_unlock(&adev->mman.gtt_window_lock); return; } - BUG_ON(vmhub != AMDGPU_GFXHUB_0); + BUG_ON(vmhub != AMDGPU_GFXHUB(0)); if (!adev->mman.buffer_funcs_enabled || !adev->ib_pool_ready || amdgpu_in_reset(adev) || ring->sched.ready == false) { - gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_GFXHUB_0, 0); + gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_GFXHUB(0), 0); mutex_unlock(&adev->mman.gtt_window_lock); return; } @@ -466,7 +465,7 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, i, flush_type); } else { gmc_v10_0_flush_gpu_tlb(adev, vmid, - AMDGPU_GFXHUB_0, flush_type); + AMDGPU_GFXHUB(0), flush_type); } if (!adev->enable_mes) break; @@ -534,7 +533,7 @@ static void gmc_v10_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid if (ring->is_mes_queue) return; - if (ring->vm_hub == AMDGPU_GFXHUB_0) + if (ring->vm_hub == AMDGPU_GFXHUB(0)) reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid; else reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT_MM) + vmid; @@ -1075,9 +1074,9 @@ static int gmc_v10_0_gart_enable(struct amdgpu_device *adev) if (!adev->in_s0ix) adev->gfxhub.funcs->set_fault_enable_default(adev, value); adev->mmhub.funcs->set_fault_enable_default(adev, value); - gmc_v10_0_flush_gpu_tlb(adev, 0, AMDGPU_MMHUB_0, 0); + gmc_v10_0_flush_gpu_tlb(adev, 0, AMDGPU_MMHUB0(0), 0); if (!adev->in_s0ix) - gmc_v10_0_flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB_0, 0); + gmc_v10_0_flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB(0), 0); DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", (unsigned)(adev->gmc.gart_size >> 20), diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c index 2f570fb5febe..fb2ac31cbba7 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c @@ -64,7 +64,7 @@ gmc_v11_0_vm_fault_interrupt_state(struct amdgpu_device *adev, switch (state) { case AMDGPU_IRQ_STATE_DISABLE: /* MM HUB */ - amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_MMHUB_0, false); + amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_MMHUB0(0), false); /* GFX HUB */ /* This works because this interrupt is only * enabled at init/resume and disabled in @@ -72,11 +72,11 @@ gmc_v11_0_vm_fault_interrupt_state(struct amdgpu_device *adev, * change over the course of suspend/resume. */ if (!adev->in_s0ix) - amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_GFXHUB_0, false); + amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_GFXHUB(0), false); break; case AMDGPU_IRQ_STATE_ENABLE: /* MM HUB */ - amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_MMHUB_0, true); + amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_MMHUB0(0), true); /* GFX HUB */ /* This works because this interrupt is only * enabled at init/resume and disabled in @@ -84,7 +84,7 @@ gmc_v11_0_vm_fault_interrupt_state(struct amdgpu_device *adev, * change over the course of suspend/resume. */ if (!adev->in_s0ix) - amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_GFXHUB_0, true); + amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_GFXHUB(0), true); break; default: break; @@ -110,7 +110,7 @@ static int gmc_v11_0_process_interrupt(struct amdgpu_device *adev, * be updated to avoid reading an incorrect value due to * the new fast GRBM interface. */ - if (entry->vmid_src == AMDGPU_GFXHUB_0) + if (entry->vmid_src == AMDGPU_GFXHUB(0)) RREG32(hub->vm_l2_pro_fault_status); status = RREG32(hub->vm_l2_pro_fault_status); @@ -170,7 +170,7 @@ static void gmc_v11_0_set_irq_funcs(struct amdgpu_device *adev) static bool gmc_v11_0_use_invalidate_semaphore(struct amdgpu_device *adev, uint32_t vmhub) { - return ((vmhub == AMDGPU_MMHUB_0) && + return ((vmhub == AMDGPU_MMHUB0(0)) && (!amdgpu_sriov_vf(adev))); } @@ -202,7 +202,7 @@ static void gmc_v11_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, unsigned int i; unsigned char hub_ip = 0; - hub_ip = (vmhub == AMDGPU_GFXHUB_0) ? + hub_ip = (vmhub == AMDGPU_GFXHUB(0)) ? GC_HWIP : MMHUB_HWIP; spin_lock(&adev->gmc.invalidate_lock); @@ -251,7 +251,7 @@ static void gmc_v11_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, hub->eng_distance * eng, 0, hub_ip); /* Issue additional private vm invalidation to MMHUB */ - if ((vmhub != AMDGPU_GFXHUB_0) && + if ((vmhub != AMDGPU_GFXHUB(0)) && (hub->vm_l2_bank_select_reserved_cid2) && !amdgpu_sriov_vf(adev)) { inv_req = RREG32_NO_KIQ(hub->vm_l2_bank_select_reserved_cid2); @@ -284,7 +284,7 @@ static void gmc_v11_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, static void gmc_v11_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, uint32_t vmhub, uint32_t flush_type) { - if ((vmhub == AMDGPU_GFXHUB_0) && !adev->gfx.is_poweron) + if ((vmhub == AMDGPU_GFXHUB(0)) && !adev->gfx.is_poweron) return; /* flush hdp cache */ @@ -369,7 +369,7 @@ static int gmc_v11_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, i, flush_type); } else { gmc_v11_0_flush_gpu_tlb(adev, vmid, - AMDGPU_GFXHUB_0, flush_type); + AMDGPU_GFXHUB(0), flush_type); } } } @@ -435,7 +435,7 @@ static void gmc_v11_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid if (ring->is_mes_queue) return; - if (ring->vm_hub == AMDGPU_GFXHUB_0) + if (ring->vm_hub == AMDGPU_GFXHUB(0)) reg = SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT) + vmid; else reg = SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT_MM) + vmid; @@ -886,7 +886,7 @@ static int gmc_v11_0_sw_fini(void *handle) static void gmc_v11_0_init_golden_registers(struct amdgpu_device *adev) { if (amdgpu_sriov_vf(adev)) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; WREG32(hub->vm_contexts_disable, 0); return; @@ -921,7 +921,7 @@ static int gmc_v11_0_gart_enable(struct amdgpu_device *adev) false : true; adev->mmhub.funcs->set_fault_enable_default(adev, value); - gmc_v11_0_flush_gpu_tlb(adev, 0, AMDGPU_MMHUB_0, 0); + gmc_v11_0_flush_gpu_tlb(adev, 0, AMDGPU_MMHUB0(0), 0); DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", (unsigned)(adev->gmc.gart_size >> 20), diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 6ae5cee9b64b..193ba4d912a6 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -491,20 +491,20 @@ static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev, * fini/suspend, so the overall state doesn't * change over the course of suspend/resume. */ - if (adev->in_s0ix && (j == AMDGPU_GFXHUB_0)) + if (adev->in_s0ix && (j == AMDGPU_GFXHUB(0))) continue; - if (j == AMDGPU_GFXHUB_0) - tmp = RREG32_SOC15_IP(GC, reg); - else + if (j >= AMDGPU_MMHUB0(0)) tmp = RREG32_SOC15_IP(MMHUB, reg); + else + tmp = RREG32_SOC15_IP(GC, reg); tmp &= ~bits; - if (j == AMDGPU_GFXHUB_0) - WREG32_SOC15_IP(GC, reg, tmp); - else + if (j >= AMDGPU_MMHUB0(0)) WREG32_SOC15_IP(MMHUB, reg, tmp); + else + WREG32_SOC15_IP(GC, reg, tmp); } } break; @@ -519,20 +519,20 @@ static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev, * fini/suspend, so the overall state doesn't * change over the course of suspend/resume. */ - if (adev->in_s0ix && (j == AMDGPU_GFXHUB_0)) + if (adev->in_s0ix && (j == AMDGPU_GFXHUB(0))) continue; - if (j == AMDGPU_GFXHUB_0) - tmp = RREG32_SOC15_IP(GC, reg); - else + if (j >= AMDGPU_MMHUB0(0)) tmp = RREG32_SOC15_IP(MMHUB, reg); + else + tmp = RREG32_SOC15_IP(GC, reg); tmp |= bits; - if (j == AMDGPU_GFXHUB_0) - WREG32_SOC15_IP(GC, reg, tmp); - else + if (j >= AMDGPU_MMHUB0(0)) WREG32_SOC15_IP(MMHUB, reg, tmp); + else + WREG32_SOC15_IP(GC, reg, tmp); } } break; @@ -605,13 +605,13 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, if (entry->client_id == SOC15_IH_CLIENTID_VMC) { hub_name = "mmhub0"; - hub = &adev->vmhub[AMDGPU_MMHUB_0]; + hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; } else if (entry->client_id == SOC15_IH_CLIENTID_VMC1) { hub_name = "mmhub1"; - hub = &adev->vmhub[AMDGPU_MMHUB_1]; + hub = &adev->vmhub[AMDGPU_MMHUB1(0)]; } else { hub_name = "gfxhub0"; - hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; } memset(&task_info, 0, sizeof(struct amdgpu_task_info)); @@ -636,7 +636,7 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, * be updated to avoid reading an incorrect value due to * the new fast GRBM interface. */ - if ((entry->vmid_src == AMDGPU_GFXHUB_0) && + if ((entry->vmid_src == AMDGPU_GFXHUB(0)) && (adev->ip_versions[GC_HWIP][0] < IP_VERSION(9, 4, 2))) RREG32(hub->vm_l2_pro_fault_status); @@ -649,7 +649,7 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, dev_err(adev->dev, "VM_L2_PROTECTION_FAULT_STATUS:0x%08X\n", status); - if (hub == &adev->vmhub[AMDGPU_GFXHUB_0]) { + if (hub == &adev->vmhub[AMDGPU_GFXHUB(0)]) { dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n", cid >= ARRAY_SIZE(gfxhub_client_ids) ? "unknown" : gfxhub_client_ids[cid], @@ -759,8 +759,8 @@ static bool gmc_v9_0_use_invalidate_semaphore(struct amdgpu_device *adev, adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3)) return false; - return ((vmhub == AMDGPU_MMHUB_0 || - vmhub == AMDGPU_MMHUB_1) && + return ((vmhub == AMDGPU_MMHUB0(0) || + vmhub == AMDGPU_MMHUB1(0)) && (!amdgpu_sriov_vf(adev)) && (!(!(adev->apu_flags & AMD_APU_IS_RAVEN2) && (adev->apu_flags & AMD_APU_IS_PICASSO)))); @@ -849,11 +849,10 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, if (use_semaphore) { for (j = 0; j < adev->usec_timeout; j++) { /* a read return value of 1 means semaphore acquire */ - if (vmhub == AMDGPU_GFXHUB_0) - tmp = RREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_sem + hub->eng_distance * eng); - else + if (vmhub >= AMDGPU_MMHUB0(0)) tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_sem + hub->eng_distance * eng); - + else + tmp = RREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_sem + hub->eng_distance * eng); if (tmp & 0x1) break; udelay(1); @@ -864,27 +863,26 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, } do { - if (vmhub == AMDGPU_GFXHUB_0) - WREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_req + hub->eng_distance * eng, inv_req); - else + if (vmhub >= AMDGPU_MMHUB0(0)) WREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_req + hub->eng_distance * eng, inv_req); + else + WREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_req + hub->eng_distance * eng, inv_req); /* * Issue a dummy read to wait for the ACK register to * be cleared to avoid a false ACK due to the new fast * GRBM interface. */ - if ((vmhub == AMDGPU_GFXHUB_0) && + if ((vmhub == AMDGPU_GFXHUB(0)) && (adev->ip_versions[GC_HWIP][0] < IP_VERSION(9, 4, 2))) RREG32_NO_KIQ(hub->vm_inv_eng0_req + hub->eng_distance * eng); for (j = 0; j < adev->usec_timeout; j++) { - if (vmhub == AMDGPU_GFXHUB_0) - tmp = RREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_ack + hub->eng_distance * eng); - else + if (vmhub >= AMDGPU_MMHUB0(0)) tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_ack + hub->eng_distance * eng); - + else + tmp = RREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_ack + hub->eng_distance * eng); if (tmp & (1 << vmid)) break; udelay(1); @@ -900,10 +898,10 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, * add semaphore release after invalidation, * write with 0 means semaphore release */ - if (vmhub == AMDGPU_GFXHUB_0) - WREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_sem + hub->eng_distance * eng, 0); + if (vmhub >= AMDGPU_MMHUB0(0)) + WREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_req + hub->eng_distance * eng, inv_req); else - WREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_sem + hub->eng_distance * eng, 0); + WREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_req + hub->eng_distance * eng, inv_req); } spin_unlock(&adev->gmc.invalidate_lock); @@ -994,7 +992,7 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, i, flush_type); } else { gmc_v9_0_flush_gpu_tlb(adev, vmid, - AMDGPU_GFXHUB_0, flush_type); + AMDGPU_GFXHUB(0), flush_type); } break; } @@ -1060,10 +1058,10 @@ static void gmc_v9_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid, uint32_t reg; /* Do nothing because there's no lut register for mmhub1. */ - if (ring->vm_hub == AMDGPU_MMHUB_1) + if (ring->vm_hub == AMDGPU_MMHUB1(0)) return; - if (ring->vm_hub == AMDGPU_GFXHUB_0) + if (ring->vm_hub == AMDGPU_GFXHUB(0)) reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid; else reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT_MM) + vmid; @@ -1947,7 +1945,7 @@ static int gmc_v9_0_hw_init(void *handle) adev->mmhub.funcs->set_fault_enable_default(adev, value); } for (i = 0; i < adev->num_vmhubs; ++i) { - if (adev->in_s0ix && (i == AMDGPU_GFXHUB_0)) + if (adev->in_s0ix && (i == AMDGPU_GFXHUB(0))) continue; gmc_v9_0_flush_gpu_tlb(adev, 0, i, 0); } diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c index a3076eb8af6a..71fe7f6f9889 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c @@ -485,7 +485,7 @@ int jpeg_v1_0_sw_init(void *handle) return r; ring = &adev->jpeg.inst->ring_dec; - ring->vm_hub = AMDGPU_MMHUB_0; + ring->vm_hub = AMDGPU_MMHUB0(0); sprintf(ring->name, "jpeg_dec"); r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0, AMDGPU_RING_PRIO_DEFAULT, NULL); diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c index 0eddf7c824a7..3a43e42f4834 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c @@ -86,7 +86,7 @@ static int jpeg_v2_0_sw_init(void *handle) ring = &adev->jpeg.inst->ring_dec; ring->use_doorbell = true; ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1; - ring->vm_hub = AMDGPU_MMHUB_0; + ring->vm_hub = AMDGPU_MMHUB0(0); sprintf(ring->name, "jpeg_dec"); r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0, AMDGPU_RING_PRIO_DEFAULT, NULL); diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c index b040f51d9aa9..259b7ba6a842 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c @@ -128,9 +128,9 @@ static int jpeg_v2_5_sw_init(void *handle) ring = &adev->jpeg.inst[i].ring_dec; ring->use_doorbell = true; if (adev->ip_versions[UVD_HWIP][0] == IP_VERSION(2, 5, 0)) - ring->vm_hub = AMDGPU_MMHUB_1; + ring->vm_hub = AMDGPU_MMHUB1(0); else - ring->vm_hub = AMDGPU_MMHUB_0; + ring->vm_hub = AMDGPU_MMHUB0(0); ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1 + 8 * i; sprintf(ring->name, "jpeg_dec_%d", i); r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst[i].irq, diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c index 1c2292cc5f2c..c55386c22311 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c @@ -101,7 +101,7 @@ static int jpeg_v3_0_sw_init(void *handle) ring = &adev->jpeg.inst->ring_dec; ring->use_doorbell = true; ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1; - ring->vm_hub = AMDGPU_MMHUB_0; + ring->vm_hub = AMDGPU_MMHUB0(0); sprintf(ring->name, "jpeg_dec"); r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0, AMDGPU_RING_PRIO_DEFAULT, NULL); diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c index 77e1e64aa1d1..d7d5ffc29393 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c @@ -108,7 +108,7 @@ static int jpeg_v4_0_sw_init(void *handle) ring = &adev->jpeg.inst->ring_dec; ring->use_doorbell = true; ring->doorbell_index = amdgpu_sriov_vf(adev) ? (((adev->doorbell_index.vcn.vcn_ring0_1) << 1) + 4) : ((adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1); - ring->vm_hub = AMDGPU_MMHUB_0; + ring->vm_hub = AMDGPU_MMHUB0(0); sprintf(ring->name, "jpeg_dec"); r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0, diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c index 4560476c7c31..f1a6abdad21b 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c @@ -149,7 +149,7 @@ static int mes_v10_1_add_hw_queue(struct amdgpu_mes *mes, { struct amdgpu_device *adev = mes->adev; union MESAPI__ADD_QUEUE mes_add_queue_pkt; - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; uint32_t vm_cntx_cntl = hub->vm_cntx_cntl; memset(&mes_add_queue_pkt, 0, sizeof(mes_add_queue_pkt)); diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index 3adb450eec07..9791f3581786 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -164,7 +164,7 @@ static int mes_v11_0_add_hw_queue(struct amdgpu_mes *mes, { struct amdgpu_device *adev = mes->adev; union MESAPI__ADD_QUEUE mes_add_queue_pkt; - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; uint32_t vm_cntx_cntl = hub->vm_cntx_cntl; memset(&mes_add_queue_pkt, 0, sizeof(mes_add_queue_pkt)); diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c index 15e7cbeae75b..fb91b31056ca 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c @@ -54,7 +54,7 @@ static u64 mmhub_v1_0_get_fb_location(struct amdgpu_device *adev) static void mmhub_v1_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, uint64_t page_table_base) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, hub->ctx_addr_distance * vmid, @@ -229,7 +229,7 @@ static void mmhub_v1_0_disable_identity_aperture(struct amdgpu_device *adev) static void mmhub_v1_0_setup_vmid_config(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; unsigned num_level, block_size; uint32_t tmp; int i; @@ -285,7 +285,7 @@ static void mmhub_v1_0_setup_vmid_config(struct amdgpu_device *adev) static void mmhub_v1_0_program_invalidation(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; unsigned i; for (i = 0; i < 18; ++i) { @@ -338,7 +338,7 @@ static int mmhub_v1_0_gart_enable(struct amdgpu_device *adev) static void mmhub_v1_0_gart_disable(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; u32 tmp; u32 i; @@ -415,7 +415,7 @@ static void mmhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, bool static void mmhub_v1_0_init(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; hub->ctx0_ptb_addr_lo32 = SOC15_REG_OFFSET(MMHUB, 0, diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c index 73afbf2facc9..9086f2fdfaf4 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c @@ -54,7 +54,7 @@ static u64 mmhub_v1_7_get_fb_location(struct amdgpu_device *adev) static void mmhub_v1_7_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, uint64_t page_table_base) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; WREG32_SOC15_OFFSET(MMHUB, 0, regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, hub->ctx_addr_distance * vmid, lower_32_bits(page_table_base)); @@ -261,7 +261,7 @@ static void mmhub_v1_7_disable_identity_aperture(struct amdgpu_device *adev) static void mmhub_v1_7_setup_vmid_config(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; unsigned num_level, block_size; uint32_t tmp; int i; @@ -319,7 +319,7 @@ static void mmhub_v1_7_setup_vmid_config(struct amdgpu_device *adev) static void mmhub_v1_7_program_invalidation(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; unsigned i; for (i = 0; i < 18; ++i) { @@ -348,7 +348,7 @@ static int mmhub_v1_7_gart_enable(struct amdgpu_device *adev) static void mmhub_v1_7_gart_disable(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; u32 tmp; u32 i; @@ -425,7 +425,7 @@ static void mmhub_v1_7_set_fault_enable_default(struct amdgpu_device *adev, bool static void mmhub_v1_7_init(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; hub->ctx0_ptb_addr_lo32 = SOC15_REG_OFFSET(MMHUB, 0, diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c index 342d1702104c..9ec06f9db761 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c @@ -53,7 +53,7 @@ static u64 mmhub_v1_8_get_fb_location(struct amdgpu_device *adev) static void mmhub_v1_8_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, uint64_t page_table_base) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; WREG32_SOC15_OFFSET(MMHUB, 0, regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, hub->ctx_addr_distance * vmid, lower_32_bits(page_table_base)); @@ -253,7 +253,7 @@ static void mmhub_v1_8_disable_identity_aperture(struct amdgpu_device *adev) static void mmhub_v1_8_setup_vmid_config(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; unsigned num_level, block_size; uint32_t tmp; int i; @@ -311,7 +311,7 @@ static void mmhub_v1_8_setup_vmid_config(struct amdgpu_device *adev) static void mmhub_v1_8_program_invalidation(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; unsigned i; for (i = 0; i < 18; ++i) { @@ -352,7 +352,7 @@ static int mmhub_v1_8_gart_enable(struct amdgpu_device *adev) static void mmhub_v1_8_gart_disable(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; u32 tmp; u32 i; @@ -426,7 +426,7 @@ static void mmhub_v1_8_set_fault_enable_default(struct amdgpu_device *adev, bool static void mmhub_v1_8_init(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; hub->ctx0_ptb_addr_lo32 = SOC15_REG_OFFSET(MMHUB, 0, diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c index 278e32db878d..8f76c6ecf50a 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c @@ -187,7 +187,7 @@ mmhub_v2_0_print_l2_protection_fault_status(struct amdgpu_device *adev, static void mmhub_v2_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, uint64_t page_table_base) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; WREG32_SOC15_OFFSET_RLC(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, hub->ctx_addr_distance * vmid, @@ -362,7 +362,7 @@ static void mmhub_v2_0_disable_identity_aperture(struct amdgpu_device *adev) static void mmhub_v2_0_setup_vmid_config(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; int i; uint32_t tmp; @@ -412,7 +412,7 @@ static void mmhub_v2_0_setup_vmid_config(struct amdgpu_device *adev) static void mmhub_v2_0_program_invalidation(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; unsigned i; for (i = 0; i < 18; ++i) { @@ -441,7 +441,7 @@ static int mmhub_v2_0_gart_enable(struct amdgpu_device *adev) static void mmhub_v2_0_gart_disable(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; u32 tmp; u32 i; @@ -520,7 +520,7 @@ static const struct amdgpu_vmhub_funcs mmhub_v2_0_vmhub_funcs = { static void mmhub_v2_0_init(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; hub->ctx0_ptb_addr_lo32 = SOC15_REG_OFFSET(MMHUB, 0, diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c index fcf2813e70db..8bd0fc8d9d25 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c @@ -121,7 +121,7 @@ static void mmhub_v2_3_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, uint64_t page_table_base) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, hub->ctx_addr_distance * vmid, lower_32_bits(page_table_base)); @@ -280,7 +280,7 @@ static void mmhub_v2_3_disable_identity_aperture(struct amdgpu_device *adev) static void mmhub_v2_3_setup_vmid_config(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; int i; uint32_t tmp; @@ -330,7 +330,7 @@ static void mmhub_v2_3_setup_vmid_config(struct amdgpu_device *adev) static void mmhub_v2_3_program_invalidation(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; unsigned i; for (i = 0; i < 18; ++i) { @@ -373,7 +373,7 @@ static int mmhub_v2_3_gart_enable(struct amdgpu_device *adev) static void mmhub_v2_3_gart_disable(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; u32 tmp; u32 i; @@ -446,7 +446,7 @@ static const struct amdgpu_vmhub_funcs mmhub_v2_3_vmhub_funcs = { static void mmhub_v2_3_init(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; hub->ctx0_ptb_addr_lo32 = SOC15_REG_OFFSET(MMHUB, 0, diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c index 17a792616979..441379e91cfa 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c @@ -136,7 +136,7 @@ mmhub_v3_0_print_l2_protection_fault_status(struct amdgpu_device *adev, static void mmhub_v3_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, uint64_t page_table_base) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, hub->ctx_addr_distance * vmid, @@ -319,7 +319,7 @@ static void mmhub_v3_0_disable_identity_aperture(struct amdgpu_device *adev) static void mmhub_v3_0_setup_vmid_config(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; int i; uint32_t tmp; @@ -369,7 +369,7 @@ static void mmhub_v3_0_setup_vmid_config(struct amdgpu_device *adev) static void mmhub_v3_0_program_invalidation(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; unsigned i; for (i = 0; i < 18; ++i) { @@ -398,7 +398,7 @@ static int mmhub_v3_0_gart_enable(struct amdgpu_device *adev) static void mmhub_v3_0_gart_disable(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; u32 tmp; u32 i; @@ -477,7 +477,7 @@ static const struct amdgpu_vmhub_funcs mmhub_v3_0_vmhub_funcs = { static void mmhub_v3_0_init(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; hub->ctx0_ptb_addr_lo32 = SOC15_REG_OFFSET(MMHUB, 0, diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c index 26509b6b8c24..12c7f4b46ea9 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c @@ -138,7 +138,7 @@ static void mmhub_v3_0_1_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, uint64_t page_table_base) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, hub->ctx_addr_distance * vmid, @@ -306,7 +306,7 @@ static void mmhub_v3_0_1_disable_identity_aperture(struct amdgpu_device *adev) static void mmhub_v3_0_1_setup_vmid_config(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; int i; uint32_t tmp; @@ -356,7 +356,7 @@ static void mmhub_v3_0_1_setup_vmid_config(struct amdgpu_device *adev) static void mmhub_v3_0_1_program_invalidation(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; unsigned i; for (i = 0; i < 18; ++i) { @@ -385,7 +385,7 @@ static int mmhub_v3_0_1_gart_enable(struct amdgpu_device *adev) static void mmhub_v3_0_1_gart_disable(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; u32 tmp; u32 i; @@ -459,7 +459,7 @@ static const struct amdgpu_vmhub_funcs mmhub_v3_0_1_vmhub_funcs = { static void mmhub_v3_0_1_init(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; hub->ctx0_ptb_addr_lo32 = SOC15_REG_OFFSET(MMHUB, 0, diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c index 26abbc6a47ab..5dadc85abf7e 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c @@ -129,7 +129,7 @@ mmhub_v3_0_2_print_l2_protection_fault_status(struct amdgpu_device *adev, static void mmhub_v3_0_2_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, uint64_t page_table_base) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, hub->ctx_addr_distance * vmid, @@ -311,7 +311,7 @@ static void mmhub_v3_0_2_disable_identity_aperture(struct amdgpu_device *adev) static void mmhub_v3_0_2_setup_vmid_config(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; int i; uint32_t tmp; @@ -361,7 +361,7 @@ static void mmhub_v3_0_2_setup_vmid_config(struct amdgpu_device *adev) static void mmhub_v3_0_2_program_invalidation(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; unsigned i; for (i = 0; i < 18; ++i) { @@ -390,7 +390,7 @@ static int mmhub_v3_0_2_gart_enable(struct amdgpu_device *adev) static void mmhub_v3_0_2_gart_disable(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; u32 tmp; u32 i; @@ -469,7 +469,7 @@ static const struct amdgpu_vmhub_funcs mmhub_v3_0_2_vmhub_funcs = { static void mmhub_v3_0_2_init(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; hub->ctx0_ptb_addr_lo32 = SOC15_REG_OFFSET(MMHUB, 0, diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c index 72083e96222f..e790f890aec6 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c @@ -57,7 +57,7 @@ static u64 mmhub_v9_4_get_fb_location(struct amdgpu_device *adev) static void mmhub_v9_4_setup_hubid_vm_pt_regs(struct amdgpu_device *adev, int hubid, uint32_t vmid, uint64_t value) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, @@ -294,7 +294,7 @@ static void mmhub_v9_4_disable_identity_aperture(struct amdgpu_device *adev, static void mmhub_v9_4_setup_vmid_config(struct amdgpu_device *adev, int hubid) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; unsigned int num_level, block_size; uint32_t tmp; int i; @@ -363,7 +363,7 @@ static void mmhub_v9_4_setup_vmid_config(struct amdgpu_device *adev, int hubid) static void mmhub_v9_4_program_invalidation(struct amdgpu_device *adev, int hubid) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; unsigned i; for (i = 0; i < 18; ++i) { @@ -404,7 +404,7 @@ static int mmhub_v9_4_gart_enable(struct amdgpu_device *adev) static void mmhub_v9_4_gart_disable(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; u32 tmp; u32 i, j; @@ -507,8 +507,8 @@ static void mmhub_v9_4_set_fault_enable_default(struct amdgpu_device *adev, bool static void mmhub_v9_4_init(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub[MMHUB_NUM_INSTANCES] = - {&adev->vmhub[AMDGPU_MMHUB_0], &adev->vmhub[AMDGPU_MMHUB_1]}; + struct amdgpu_vmhub *hub[MMHUB_NUM_INSTANCES] = { + &adev->vmhub[AMDGPU_MMHUB0(0)], &adev->vmhub[AMDGPU_MMHUB1(0)]}; int i; for (i = 0; i < MMHUB_NUM_INSTANCES; i++) { diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 9295ac7edd56..50b6eb9bcfda 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -1825,12 +1825,12 @@ static int sdma_v4_0_sw_init(void *handle) /* * On Arcturus, SDMA instance 5~7 has a different vmhub - * type(AMDGPU_MMHUB_1). + * type(AMDGPU_MMHUB1). */ if (adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 2, 2) && i >= 5) - ring->vm_hub = AMDGPU_MMHUB_1; + ring->vm_hub = AMDGPU_MMHUB1(0); else - ring->vm_hub = AMDGPU_MMHUB_0; + ring->vm_hub = AMDGPU_MMHUB0(0); sprintf(ring->name, "sdma%d", i); r = amdgpu_ring_init(adev, ring, 1024, &adev->sdma.trap_irq, @@ -1851,9 +1851,9 @@ static int sdma_v4_0_sw_init(void *handle) ring->doorbell_index += 0x400; if (adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 2, 2) && i >= 5) - ring->vm_hub = AMDGPU_MMHUB_1; + ring->vm_hub = AMDGPU_MMHUB1(0); else - ring->vm_hub = AMDGPU_MMHUB_0; + ring->vm_hub = AMDGPU_MMHUB0(0); sprintf(ring->name, "page%d", i); r = amdgpu_ring_init(adev, ring, 1024, diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c index 64dcaa2670dd..7efe7c43fffb 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c @@ -1309,7 +1309,7 @@ static int sdma_v4_4_2_sw_init(void *handle) /* doorbell size is 2 dwords, get DWORD offset */ ring->doorbell_index = adev->doorbell_index.sdma_engine[i] << 1; - ring->vm_hub = AMDGPU_MMHUB_0; + ring->vm_hub = AMDGPU_MMHUB0(0); sprintf(ring->name, "sdma%d", i); r = amdgpu_ring_init(adev, ring, 1024, &adev->sdma.trap_irq, @@ -1328,7 +1328,7 @@ static int sdma_v4_4_2_sw_init(void *handle) */ ring->doorbell_index = adev->doorbell_index.sdma_engine[i] << 1; ring->doorbell_index += 0x400; - ring->vm_hub = AMDGPU_MMHUB_0; + ring->vm_hub = AMDGPU_MMHUB0(0); sprintf(ring->name, "page%d", i); r = amdgpu_ring_init(adev, ring, 1024, diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index 92e1299be021..a0077cf41295 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -1389,7 +1389,7 @@ static int sdma_v5_0_sw_init(void *handle) (adev->doorbell_index.sdma_engine[0] << 1) //get DWORD offset : (adev->doorbell_index.sdma_engine[1] << 1); // get DWORD offset - ring->vm_hub = AMDGPU_GFXHUB_0; + ring->vm_hub = AMDGPU_GFXHUB(0); sprintf(ring->name, "sdma%d", i); r = amdgpu_ring_init(adev, ring, 1024, &adev->sdma.trap_irq, (i == 0) ? AMDGPU_SDMA_IRQ_INSTANCE0 : diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index ca7e8757d78e..efa2c84ee78e 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -1253,7 +1253,7 @@ static int sdma_v5_2_sw_init(void *handle) ring->doorbell_index = (adev->doorbell_index.sdma_engine[i] << 1); //get DWORD offset - ring->vm_hub = AMDGPU_GFXHUB_0; + ring->vm_hub = AMDGPU_GFXHUB(0); sprintf(ring->name, "sdma%d", i); r = amdgpu_ring_init(adev, ring, 1024, &adev->sdma.trap_irq, AMDGPU_SDMA_IRQ_INSTANCE0 + i, diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c index 3d9a80511a45..79d09792d2ce 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c @@ -1298,7 +1298,7 @@ static int sdma_v6_0_sw_init(void *handle) ring->doorbell_index = (adev->doorbell_index.sdma_engine[i] << 1); // get DWORD offset - ring->vm_hub = AMDGPU_GFXHUB_0; + ring->vm_hub = AMDGPU_GFXHUB(0); sprintf(ring->name, "sdma%d", i); r = amdgpu_ring_init(adev, ring, 1024, &adev->sdma.trap_irq, diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c index e32b656b3dab..abaa4463e906 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c @@ -444,7 +444,7 @@ static int uvd_v7_0_sw_init(void *handle) continue; if (!amdgpu_sriov_vf(adev)) { ring = &adev->uvd.inst[j].ring; - ring->vm_hub = AMDGPU_MMHUB_0; + ring->vm_hub = AMDGPU_MMHUB0(0); sprintf(ring->name, "uvd_%d", ring->me); r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst[j].irq, 0, @@ -455,7 +455,7 @@ static int uvd_v7_0_sw_init(void *handle) for (i = 0; i < adev->uvd.num_enc_rings; ++i) { ring = &adev->uvd.inst[j].ring_enc[i]; - ring->vm_hub = AMDGPU_MMHUB_0; + ring->vm_hub = AMDGPU_MMHUB0(0); sprintf(ring->name, "uvd_enc_%d.%d", ring->me, i); if (amdgpu_sriov_vf(adev)) { ring->use_doorbell = true; diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c index 57b85bb6a1e4..e0b70cd3b697 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c @@ -466,7 +466,7 @@ static int vce_v4_0_sw_init(void *handle) enum amdgpu_ring_priority_level hw_prio = amdgpu_vce_get_ring_prio(i); ring = &adev->vce.ring[i]; - ring->vm_hub = AMDGPU_MMHUB_0; + ring->vm_hub = AMDGPU_MMHUB0(0); sprintf(ring->name, "vce%d", i); if (amdgpu_sriov_vf(adev)) { /* DOORBELL only works under SRIOV */ diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index 761c28fa6ec1..f877c39c7cdd 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -120,7 +120,7 @@ static int vcn_v1_0_sw_init(void *handle) return r; ring = &adev->vcn.inst->ring_dec; - ring->vm_hub = AMDGPU_MMHUB_0; + ring->vm_hub = AMDGPU_MMHUB0(0); sprintf(ring->name, "vcn_dec"); r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0, AMDGPU_RING_PRIO_DEFAULT, NULL); @@ -142,7 +142,7 @@ static int vcn_v1_0_sw_init(void *handle) enum amdgpu_ring_priority_level hw_prio = amdgpu_vcn_get_enc_ring_prio(i); ring = &adev->vcn.inst->ring_enc[i]; - ring->vm_hub = AMDGPU_MMHUB_0; + ring->vm_hub = AMDGPU_MMHUB0(0); sprintf(ring->name, "vcn_enc%d", i); r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0, hw_prio, NULL); diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c index 7c2b3aa48083..c975aed2f6c7 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c @@ -129,7 +129,7 @@ static int vcn_v2_0_sw_init(void *handle) ring->use_doorbell = true; ring->doorbell_index = adev->doorbell_index.vcn.vcn_ring0_1 << 1; - ring->vm_hub = AMDGPU_MMHUB_0; + ring->vm_hub = AMDGPU_MMHUB0(0); sprintf(ring->name, "vcn_dec"); r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0, @@ -160,7 +160,7 @@ static int vcn_v2_0_sw_init(void *handle) ring = &adev->vcn.inst->ring_enc[i]; ring->use_doorbell = true; - ring->vm_hub = AMDGPU_MMHUB_0; + ring->vm_hub = AMDGPU_MMHUB0(0); if (!amdgpu_sriov_vf(adev)) ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + i; else diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c index ab0b45d0ead1..7044bd7c9f62 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c @@ -188,9 +188,9 @@ static int vcn_v2_5_sw_init(void *handle) (amdgpu_sriov_vf(adev) ? 2*j : 8*j); if (adev->ip_versions[UVD_HWIP][0] == IP_VERSION(2, 5, 0)) - ring->vm_hub = AMDGPU_MMHUB_1; + ring->vm_hub = AMDGPU_MMHUB1(0); else - ring->vm_hub = AMDGPU_MMHUB_0; + ring->vm_hub = AMDGPU_MMHUB0(0); sprintf(ring->name, "vcn_dec_%d", j); r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[j].irq, @@ -208,9 +208,9 @@ static int vcn_v2_5_sw_init(void *handle) (amdgpu_sriov_vf(adev) ? (1 + i + 2*j) : (2 + i + 8*j)); if (adev->ip_versions[UVD_HWIP][0] == IP_VERSION(2, 5, 0)) - ring->vm_hub = AMDGPU_MMHUB_1; + ring->vm_hub = AMDGPU_MMHUB1(0); else - ring->vm_hub = AMDGPU_MMHUB_0; + ring->vm_hub = AMDGPU_MMHUB0(0); sprintf(ring->name, "vcn_enc_%d.%d", j, i); r = amdgpu_ring_init(adev, ring, 512, diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c index 3eab186261aa..70fefbf26c48 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c @@ -189,7 +189,7 @@ static int vcn_v3_0_sw_init(void *handle) } else { ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 8 * i; } - ring->vm_hub = AMDGPU_MMHUB_0; + ring->vm_hub = AMDGPU_MMHUB0(0); sprintf(ring->name, "vcn_dec_%d", i); r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[i].irq, 0, AMDGPU_RING_PRIO_DEFAULT, @@ -213,7 +213,7 @@ static int vcn_v3_0_sw_init(void *handle) } else { ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + j + 8 * i; } - ring->vm_hub = AMDGPU_MMHUB_0; + ring->vm_hub = AMDGPU_MMHUB0(0); sprintf(ring->name, "vcn_enc_%d.%d", i, j); r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[i].irq, 0, hw_prio, &adev->vcn.inst[i].sched_score); diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c index bf0674039598..81446e6996df 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c @@ -149,7 +149,7 @@ static int vcn_v4_0_sw_init(void *handle) ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + i * (adev->vcn.num_enc_rings + 1) + 1; else ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + 8 * i; - ring->vm_hub = AMDGPU_MMHUB_0; + ring->vm_hub = AMDGPU_MMHUB0(0); sprintf(ring->name, "vcn_unified_%d", i); r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[i].irq, 0, -- cgit v1.2.3 From 3566938b3491bb3aad701b487130f3efc363e2dc Mon Sep 17 00:00:00 2001 From: Le Ma <le.ma@amd.com> Date: Mon, 20 Dec 2021 16:06:25 +0800 Subject: drm/amdgpu: assign different AMDGPU_GFXHUB for rings on each xcc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pass the xcc_id to AMDGPU_GFXHUB(x) Signed-off-by: Le Ma <le.ma@amd.com> Reviewed-by: Christian König <christian.koenig@amd.com> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 2 +- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 488b3bb6dcb1..e5ff0bf9f23b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -315,7 +315,7 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, ring->use_doorbell = true; ring->doorbell_index = adev->doorbell_index.kiq; ring->xcc_id = xcc_id; - ring->vm_hub = AMDGPU_GFXHUB(0); + ring->vm_hub = AMDGPU_GFXHUB(xcc_id); if (xcc_id >= 1) ring->doorbell_index = adev->doorbell_index.xcc1_kiq_start + xcc_id - 1; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index f5104b982633..064cd02451c2 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -757,7 +757,7 @@ static int gfx_v9_4_3_compute_ring_init(struct amdgpu_device *adev, int ring_id, (adev->doorbell_index.mec_ring0 + ring_id) << 1; ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (ring_id * GFX9_MEC_HPD_SIZE); - ring->vm_hub = AMDGPU_GFXHUB(0); + ring->vm_hub = AMDGPU_GFXHUB(xcc_id); sprintf(ring->name, "comp_%d.%d.%d.%d", ring->xcc_id, ring->me, ring->pipe, ring->queue); -- cgit v1.2.3 From 98a54e88e87f7291d4bbc6ec646c498f64ae042f Mon Sep 17 00:00:00 2001 From: Le Ma <le.ma@amd.com> Date: Tue, 17 May 2022 22:20:10 +0800 Subject: drm/amdgpu: add sysfs node for compute partition mode Add current/available compute partitin mode sysfs node. v2: make the sysfs node as IP independent one in amdgpu_gfx.c Signed-off-by: Le Ma <le.ma@amd.com> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 132 +++++++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 6 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h | 3 + drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 67 +++++++++++++++ drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c | 32 +++++++ 6 files changed, 241 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index e94507a10e15..f432064a0535 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3634,6 +3634,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, mutex_init(&adev->srbm_mutex); mutex_init(&adev->gfx.pipe_reserve_mutex); mutex_init(&adev->gfx.gfx_off_mutex); + mutex_init(&adev->gfx.partition_mutex); mutex_init(&adev->grbm_idx_mutex); mutex_init(&adev->mn_lock); mutex_init(&adev->virt.vf_errors.lock); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index e5ff0bf9f23b..4a4d71ff9b95 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -1174,3 +1174,135 @@ bool amdgpu_gfx_is_master_xcc(struct amdgpu_device *adev, int xcc_id) return !(xcc_id % (adev->gfx.num_xcc_per_xcp ? adev->gfx.num_xcc_per_xcp : 1)); } + +static ssize_t amdgpu_gfx_get_current_compute_partition(struct device *dev, + struct device_attribute *addr, + char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + enum amdgpu_gfx_partition mode; + char *partition_mode; + + mode = adev->gfx.funcs->query_partition_mode(adev); + + switch (mode) { + case AMDGPU_SPX_PARTITION_MODE: + partition_mode = "SPX"; + break; + case AMDGPU_DPX_PARTITION_MODE: + partition_mode = "DPX"; + break; + case AMDGPU_TPX_PARTITION_MODE: + partition_mode = "TPX"; + break; + case AMDGPU_QPX_PARTITION_MODE: + partition_mode = "QPX"; + break; + case AMDGPU_CPX_PARTITION_MODE: + partition_mode = "CPX"; + break; + default: + partition_mode = "UNKNOWN"; + break; + } + + return sysfs_emit(buf, "%s\n", partition_mode); +} + +static ssize_t amdgpu_gfx_set_compute_partition(struct device *dev, + struct device_attribute *addr, + const char *buf, size_t count) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + enum amdgpu_gfx_partition mode; + int ret; + + if (adev->gfx.num_xcd % 2 != 0) + return -EINVAL; + + if (!strncasecmp("SPX", buf, strlen("SPX"))) { + mode = AMDGPU_SPX_PARTITION_MODE; + } else if (!strncasecmp("DPX", buf, strlen("DPX"))) { + if (adev->gfx.num_xcd != 4 || adev->gfx.num_xcd != 8) + return -EINVAL; + mode = AMDGPU_DPX_PARTITION_MODE; + } else if (!strncasecmp("TPX", buf, strlen("TPX"))) { + if (adev->gfx.num_xcd != 6) + return -EINVAL; + mode = AMDGPU_TPX_PARTITION_MODE; + } else if (!strncasecmp("QPX", buf, strlen("QPX"))) { + if (adev->gfx.num_xcd != 8) + return -EINVAL; + mode = AMDGPU_QPX_PARTITION_MODE; + } else if (!strncasecmp("CPX", buf, strlen("CPX"))) { + mode = AMDGPU_CPX_PARTITION_MODE; + } else { + return -EINVAL; + } + + mutex_lock(&adev->gfx.partition_mutex); + + ret = adev->gfx.funcs->switch_partition_mode(adev, mode); + + mutex_unlock(&adev->gfx.partition_mutex); + + if (ret) + return ret; + + return count; +} + +static ssize_t amdgpu_gfx_get_available_compute_partition(struct device *dev, + struct device_attribute *addr, + char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + char *supported_partition; + + /* TBD */ + switch (adev->gfx.num_xcd) { + case 8: + supported_partition = "SPX, DPX, QPX, CPX"; + break; + case 6: + supported_partition = "SPX, TPX, CPX"; + break; + case 4: + supported_partition = "SPX, DPX, CPX"; + break; + /* this seems only existing in emulation phase */ + case 2: + supported_partition = "SPX, CPX"; + break; + default: + supported_partition = "Not supported"; + break; + } + + return sysfs_emit(buf, "%s\n", supported_partition); +} + +static DEVICE_ATTR(current_compute_partition, S_IRUGO | S_IWUSR, + amdgpu_gfx_get_current_compute_partition, + amdgpu_gfx_set_compute_partition); + +static DEVICE_ATTR(available_compute_partition, S_IRUGO, + amdgpu_gfx_get_available_compute_partition, NULL); + +int amdgpu_gfx_sysfs_init(struct amdgpu_device *adev) +{ + int r; + + r = device_create_file(adev->dev, &dev_attr_current_compute_partition); + if (r) + return r; + + r = device_create_file(adev->dev, &dev_attr_available_compute_partition); + if (r) + return r; + + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 2755f00ac19a..8df36527aee9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -259,6 +259,10 @@ struct amdgpu_gfx_funcs { void (*update_perfmon_mgcg)(struct amdgpu_device *adev, bool enable); int (*get_gfx_shadow_info)(struct amdgpu_device *adev, struct amdgpu_gfx_shadow_info *shadow_info); + enum amdgpu_gfx_partition + (*query_partition_mode)(struct amdgpu_device *adev); + int (*switch_partition_mode)(struct amdgpu_device *adev, + enum amdgpu_gfx_partition mode); }; struct sq_work { @@ -394,6 +398,7 @@ struct amdgpu_gfx { enum amdgpu_gfx_partition partition_mode; uint32_t num_xcd; uint32_t num_xcc_per_xcp; + struct mutex partition_mutex; }; #define amdgpu_gfx_get_gpu_clock_counter(adev) (adev)->gfx.funcs->get_gpu_clock_counter((adev)) @@ -478,4 +483,5 @@ int amdgpu_gfx_poison_consumption_handler(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry); bool amdgpu_gfx_is_master_xcc(struct amdgpu_device *adev, int xcc_id); +int amdgpu_gfx_sysfs_init(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h index c686ff4bcc39..6a636c34b717 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h @@ -95,6 +95,9 @@ struct amdgpu_nbio_funcs { void (*apply_l1_link_width_reconfig_wa)(struct amdgpu_device *adev); void (*clear_doorbell_interrupt)(struct amdgpu_device *adev); u32 (*get_rom_offset)(struct amdgpu_device *adev); + u32 (*get_compute_partition_mode)(struct amdgpu_device *adev); + void (*set_compute_partition_mode)(struct amdgpu_device *adev, + enum amdgpu_gfx_partition mode); }; struct amdgpu_nbio { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 064cd02451c2..1d15db9423c9 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -662,6 +662,67 @@ static void gfx_v9_4_3_select_me_pipe_q(struct amdgpu_device *adev, soc15_grbm_select(adev, me, pipe, q, vm, 0); } +static enum amdgpu_gfx_partition +gfx_v9_4_3_query_compute_partition(struct amdgpu_device *adev) +{ + enum amdgpu_gfx_partition mode = AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE; + + if (adev->nbio.funcs->get_compute_partition_mode) + mode = adev->nbio.funcs->get_compute_partition_mode(adev); + + return mode; +} + +static int gfx_v9_4_3_switch_compute_partition(struct amdgpu_device *adev, + enum amdgpu_gfx_partition mode) +{ + u32 tmp = 0; + int num_xcc_per_partition, i; + + if (mode == adev->gfx.partition_mode) + return mode; + + switch (mode) { + case AMDGPU_SPX_PARTITION_MODE: + num_xcc_per_partition = adev->gfx.num_xcd; + break; + case AMDGPU_DPX_PARTITION_MODE: + num_xcc_per_partition = adev->gfx.num_xcd / 2; + break; + case AMDGPU_TPX_PARTITION_MODE: + num_xcc_per_partition = adev->gfx.num_xcd / 3; + break; + case AMDGPU_QPX_PARTITION_MODE: + num_xcc_per_partition = adev->gfx.num_xcd / 4; + break; + case AMDGPU_CPX_PARTITION_MODE: + num_xcc_per_partition = 1; + break; + default: + return -EINVAL; + } + + /* TODO: + * Stop user queues and threads, and make sure GPU is empty of work. + */ + + for (i = 0; i < adev->gfx.num_xcd; i++) { + tmp = REG_SET_FIELD(tmp, CP_HYP_XCP_CTL, NUM_XCC_IN_XCP, + num_xcc_per_partition); + tmp = REG_SET_FIELD(tmp, CP_HYP_XCP_CTL, VIRTUAL_XCC_ID, + i % num_xcc_per_partition); + WREG32_SOC15(GC, i, regCP_HYP_XCP_CTL, tmp); + } + + if (adev->nbio.funcs->set_compute_partition_mode) + adev->nbio.funcs->set_compute_partition_mode(adev, mode); + + adev->gfx.num_xcc_per_xcp = num_xcc_per_partition; + adev->gfx.partition_mode = mode; + + return 0; +} + static const struct amdgpu_gfx_funcs gfx_v9_4_3_gfx_funcs = { .get_gpu_clock_counter = &gfx_v9_4_3_get_gpu_clock_counter, .select_se_sh = &gfx_v9_4_3_select_se_sh, @@ -669,6 +730,8 @@ static const struct amdgpu_gfx_funcs gfx_v9_4_3_gfx_funcs = { .read_wave_sgprs = &gfx_v9_4_3_read_wave_sgprs, .read_wave_vgprs = &gfx_v9_4_3_read_wave_vgprs, .select_me_pipe_q = &gfx_v9_4_3_select_me_pipe_q, + .query_partition_mode = &gfx_v9_4_3_query_compute_partition, + .switch_partition_mode = &gfx_v9_4_3_switch_compute_partition, }; static int gfx_v9_4_3_gpu_early_init(struct amdgpu_device *adev) @@ -858,6 +921,10 @@ static int gfx_v9_4_3_sw_init(void *handle) if (r) return r; + r = amdgpu_gfx_sysfs_init(adev); + if (r) + return r; + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c index a331a59c49e3..bdb84a53f0e4 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c @@ -347,6 +347,36 @@ static void nbio_v7_9_enable_doorbell_interrupt(struct amdgpu_device *adev, DOORBELL_INTERRUPT_DISABLE, enable ? 0 : 1); } +static enum amdgpu_gfx_partition nbio_v7_9_get_compute_partition_mode(struct amdgpu_device *adev) +{ + u32 tmp; + + tmp = RREG32_SOC15(NBIO, 0, regBIF_BX_PF0_PARTITION_COMPUTE_CAP); + + if (REG_GET_FIELD(tmp, BIF_BX_PF0_PARTITION_COMPUTE_CAP, SPX_SUPPORT)) + return AMDGPU_SPX_PARTITION_MODE; + else if (REG_GET_FIELD(tmp, BIF_BX_PF0_PARTITION_COMPUTE_CAP, DPX_SUPPORT)) + return AMDGPU_DPX_PARTITION_MODE; + else if (REG_GET_FIELD(tmp, BIF_BX_PF0_PARTITION_COMPUTE_CAP, TPX_SUPPORT)) + return AMDGPU_TPX_PARTITION_MODE; + else if (REG_GET_FIELD(tmp, BIF_BX_PF0_PARTITION_COMPUTE_CAP, CPX_SUPPORT)) + return AMDGPU_CPX_PARTITION_MODE; + else + return AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE; +} + +static void nbio_v7_9_set_compute_partition_mode(struct amdgpu_device *adev, + enum amdgpu_gfx_partition mode) +{ + u32 tmp; + + tmp = RREG32_SOC15(NBIO, 0, regBIF_BX_PF0_PARTITION_COMPUTE_CAP); + tmp &= ~0x1f; + tmp |= 1 << mode; + + WREG32_SOC15(NBIO, 0, regBIF_BX_PF0_PARTITION_COMPUTE_CAP, tmp); +} + const struct amdgpu_nbio_funcs nbio_v7_9_funcs = { .get_hdp_flush_req_offset = nbio_v7_9_get_hdp_flush_req_offset, .get_hdp_flush_done_offset = nbio_v7_9_get_hdp_flush_done_offset, @@ -366,4 +396,6 @@ const struct amdgpu_nbio_funcs nbio_v7_9_funcs = { .get_clockgating_state = nbio_v7_9_get_clockgating_state, .ih_control = nbio_v7_9_ih_control, .remap_hdp_registers = nbio_v7_9_remap_hdp_registers, + .get_compute_partition_mode = nbio_v7_9_get_compute_partition_mode, + .set_compute_partition_mode = nbio_v7_9_set_compute_partition_mode, }; -- cgit v1.2.3 From 0fa49d108386c201b5c2cce68066a9b8f66883a5 Mon Sep 17 00:00:00 2001 From: Shiwu Zhang <shiwu.zhang@amd.com> Date: Fri, 17 Dec 2021 11:27:53 +0800 Subject: drm/amdgpu: override partition mode through module parameter Add a module parameter to override the partition mode. Signed-off-by: Shiwu Zhang <shiwu.zhang@amd.com> Reviewed-by: Le Ma <Le.Ma@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 13 +++++++++++++ drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 25 +++++++++++++++++++++++-- 3 files changed, 38 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 0f163d266812..a277bdc86057 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -242,6 +242,8 @@ extern int amdgpu_num_kcq; extern int amdgpu_vcnfw_log; extern int amdgpu_sg_display; +extern uint amdgpu_user_partt_mode; + #define AMDGPU_VM_MAX_NUM_CTX 4096 #define AMDGPU_SG_THRESHOLD (256*1024*1024) #define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS 3000 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 45544ebe576e..9e9da2ac5c82 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -193,6 +193,7 @@ int amdgpu_smartshift_bias; int amdgpu_use_xgmi_p2p = 1; int amdgpu_vcnfw_log; int amdgpu_sg_display = -1; /* auto */ +uint amdgpu_user_partt_mode; static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work); @@ -950,6 +951,18 @@ MODULE_PARM_DESC(smu_pptable_id, "specify pptable id to be used (-1 = auto(default) value, 0 = use pptable from vbios, > 0 = soft pptable id)"); module_param_named(smu_pptable_id, amdgpu_smu_pptable_id, int, 0444); +/** + * DOC: partition_mode (int) + * Used to override the default SPX mode. + */ +MODULE_PARM_DESC(user_partt_mode, + "specify partition mode to be used (0 = AMDGPU_SPX_PARTITION_MODE(default value), \ + 1 = AMDGPU_DPX_PARTITION_MODE, \ + 2 = AMDGPU_TPX_PARTITION_MODE, \ + 3 = AMDGPU_QPX_PARTITION_MODE, \ + 4 = AMDGPU_CPX_PARTITION_MODE)"); +module_param_named(user_partt_mode, amdgpu_user_partt_mode, uint, 0444); + /* These devices are not supported by amdgpu. * They are supported by the mach64, r128, radeon drivers */ diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 1d15db9423c9..2676a185c232 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -2162,8 +2162,29 @@ static int gfx_v9_4_3_early_init(void *handle) /* hardcode in emulation phase */ adev->gfx.num_xcd = 1; - adev->gfx.num_xcc_per_xcp = 1; - adev->gfx.partition_mode = AMDGPU_SPX_PARTITION_MODE; + + adev->gfx.partition_mode = amdgpu_user_partt_mode; + /* calculate the num_xcc_in_xcp for the partition mode*/ + switch (amdgpu_user_partt_mode) { + case AMDGPU_SPX_PARTITION_MODE: + adev->gfx.num_xcc_per_xcp = adev->gfx.num_xcd; + break; + case AMDGPU_DPX_PARTITION_MODE: + adev->gfx.num_xcc_per_xcp = adev->gfx.num_xcd / 2; + break; + case AMDGPU_TPX_PARTITION_MODE: + adev->gfx.num_xcc_per_xcp = adev->gfx.num_xcd / 3; + break; + case AMDGPU_QPX_PARTITION_MODE: + adev->gfx.num_xcc_per_xcp = adev->gfx.num_xcd / 4; + break; + case AMDGPU_CPX_PARTITION_MODE: + adev->gfx.num_xcc_per_xcp = 1; + break; + default: + adev->gfx.num_xcc_per_xcp = adev->gfx.num_xcd; + break; + } adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev), AMDGPU_MAX_COMPUTE_RINGS); -- cgit v1.2.3 From 9b4fd27601fbe7f77e7f8a8ca226211ef748844b Mon Sep 17 00:00:00 2001 From: Lijo Lazar <lijo.lazar@amd.com> Date: Wed, 12 Jan 2022 19:42:56 +0530 Subject: drm/amdgpu: Use the correct API to read register Use SOC15 API so that the register offset is calculated correctly. Signed-off-by: Lijo Lazar <lijo.lazar@amd.com> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 2676a185c232..ad3e8cefbdb2 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -1637,7 +1637,7 @@ static int gfx_v9_4_3_mqd_init(struct amdgpu_ring *ring) /* set static priority for a queue/ring */ gfx_v9_4_3_mqd_set_priority(ring, mqd); - mqd->cp_hqd_quantum = RREG32(regCP_HQD_QUANTUM); + mqd->cp_hqd_quantum = RREG32_SOC15(GC, 0, regCP_HQD_QUANTUM); /* map_queues packet doesn't need activate the queue, * so only kiq need set this field. -- cgit v1.2.3 From 89cf4549a949b4ba3ce771163b75285979c95602 Mon Sep 17 00:00:00 2001 From: Hawking Zhang <Hawking.Zhang@amd.com> Date: Mon, 23 May 2022 13:53:45 +0800 Subject: drm/amdgpu: support gc v9_4_3 ring_test running on all xcc Each xcc has its own sratch_reg offset Signed-off-by: Hawking Zhang <Hawking.Zhang@amd.com> Reviewed-by: Le Ma <Le.Ma@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index ad3e8cefbdb2..4ef39837e4c7 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -227,20 +227,23 @@ static int gfx_v9_4_3_ring_test_ring(struct amdgpu_ring *ring) uint32_t tmp = 0; unsigned i; int r; + /* scratch_reg0_offset is 32bit even with full XCD config */ + uint32_t scratch_reg0_offset; + + scratch_reg0_offset = SOC15_REG_OFFSET(GC, ring->xcc_id, regSCRATCH_REG0); + WREG32(scratch_reg0_offset, 0xCAFEDEAD); - WREG32_SOC15(GC, 0, regSCRATCH_REG0, 0xCAFEDEAD); r = amdgpu_ring_alloc(ring, 3); if (r) return r; amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); - amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0) - - PACKET3_SET_UCONFIG_REG_START); + amdgpu_ring_write(ring, scratch_reg0_offset - PACKET3_SET_UCONFIG_REG_START); amdgpu_ring_write(ring, 0xDEADBEEF); amdgpu_ring_commit(ring); for (i = 0; i < adev->usec_timeout; i++) { - tmp = RREG32_SOC15(GC, 0, regSCRATCH_REG0); + tmp = RREG32(scratch_reg0_offset); if (tmp == 0xDEADBEEF) break; udelay(1); -- cgit v1.2.3 From 147862d00bcf7e23e0a125f910f5db224f7b6722 Mon Sep 17 00:00:00 2001 From: Shiwu Zhang <shiwu.zhang@amd.com> Date: Fri, 3 Jun 2022 11:08:12 +0800 Subject: drm/amdgpu: enable the ring and IB test for slave kcq With the mec FW update to utilize the mqd base set by driver for kcq mapping, slave kcq ring test and IB test can be re-enabled. Signed-off-by: Shiwu Zhang <shiwu.zhang@amd.com> Reviewed-by: Le Ma <Le.Ma@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 59 +++++++++++++++------------------ drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 5 --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 12 +++---- 3 files changed, 32 insertions(+), 44 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 4a4d71ff9b95..682c157f2d8f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -449,8 +449,8 @@ int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev, ring->mqd_size = mqd_size; /* prepare MQD backup */ - adev->gfx.mec.mqd_backup[j] = kmalloc(mqd_size, GFP_KERNEL); - if (!adev->gfx.mec.mqd_backup[j]) + adev->gfx.mec.mqd_backup[j + xcc_id * adev->gfx.num_compute_rings] = kmalloc(mqd_size, GFP_KERNEL); + if (!adev->gfx.mec.mqd_backup[j + xcc_id * adev->gfx.num_compute_rings]) dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name); } } @@ -502,22 +502,20 @@ int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev, int xcc_id) return -EINVAL; spin_lock(&kiq->ring_lock); - if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) { - if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size * - adev->gfx.num_compute_rings)) { - spin_unlock(&kiq->ring_lock); - return -ENOMEM; - } + if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size * + adev->gfx.num_compute_rings)) { + spin_unlock(&kiq->ring_lock); + return -ENOMEM; + } - for (i = 0; i < adev->gfx.num_compute_rings; i++) { - j = i + xcc_id * adev->gfx.num_compute_rings; - kiq->pmf->kiq_unmap_queues(kiq_ring, - &adev->gfx.compute_ring[i], - RESET_QUEUES, 0, 0); - } + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + j = i + xcc_id * adev->gfx.num_compute_rings; + kiq->pmf->kiq_unmap_queues(kiq_ring, + &adev->gfx.compute_ring[i], + RESET_QUEUES, 0, 0); } - if (adev->gfx.kiq[0].ring.sched.ready && !adev->job_hang) + if (kiq_ring->sched.ready && !adev->job_hang) r = amdgpu_ring_test_helper(kiq_ring); spin_unlock(&kiq->ring_lock); @@ -598,26 +596,23 @@ int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev, int xcc_id) DRM_INFO("kiq ring mec %d pipe %d q %d\n", kiq_ring->me, kiq_ring->pipe, kiq_ring->queue); spin_lock(&kiq->ring_lock); - /* No need to map kcq on the slave */ - if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) { - r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size * - adev->gfx.num_compute_rings + - kiq->pmf->set_resources_size); - if (r) { - DRM_ERROR("Failed to lock KIQ (%d).\n", r); - spin_unlock(&kiq->ring_lock); - return r; - } + r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size * + adev->gfx.num_compute_rings + + kiq->pmf->set_resources_size); + if (r) { + DRM_ERROR("Failed to lock KIQ (%d).\n", r); + spin_unlock(&kiq->ring_lock); + return r; + } - if (adev->enable_mes) - queue_mask = ~0ULL; + if (adev->enable_mes) + queue_mask = ~0ULL; - kiq->pmf->kiq_set_resources(kiq_ring, queue_mask); - for (i = 0; i < adev->gfx.num_compute_rings; i++) { - j = i + xcc_id * adev->gfx.num_compute_rings; + kiq->pmf->kiq_set_resources(kiq_ring, queue_mask); + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + j = i + xcc_id * adev->gfx.num_compute_rings; kiq->pmf->kiq_map_queues(kiq_ring, - &adev->gfx.compute_ring[i]); - } + &adev->gfx.compute_ring[j]); } r = amdgpu_ring_test_helper(kiq_ring); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index c955c3f060cd..b27ac48ca123 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -433,11 +433,6 @@ int amdgpu_ib_ring_tests(struct amdgpu_device *adev) else tmo = tmo_gfx; - /* skip ib test on the slave kcq */ - if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE && - !amdgpu_gfx_is_master_xcc(adev, ring->xcc_id)) - continue; - r = amdgpu_ring_test_ib(ring, tmo); if (!r) { DRM_DEV_DEBUG(adev->dev, "ib test on %s succeeded\n", diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 4ef39837e4c7..ec273a217666 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -1956,13 +1956,11 @@ static int gfx_v9_4_3_cp_resume(struct amdgpu_device *adev) if (r) return r; - /* skip ring test on slave kcq */ - if (amdgpu_gfx_is_master_xcc(adev, i)) { - for (j = 0; j < adev->gfx.num_compute_rings; j++) { - ring = &adev->gfx.compute_ring[j + - i * adev->gfx.num_compute_rings]; - amdgpu_ring_test_helper(ring); - } + for (j = 0; j < adev->gfx.num_compute_rings; j++) { + ring = &adev->gfx.compute_ring[j + i * adev->gfx.num_compute_rings]; + r = amdgpu_ring_test_helper(ring); + if (r) + return r; } gfx_v9_4_3_enable_gui_idle_interrupt(adev, true, i); -- cgit v1.2.3 From 15091a6f4380a0c1a7202d52e82cdaaf80e2bb70 Mon Sep 17 00:00:00 2001 From: Le Ma <le.ma@amd.com> Date: Fri, 19 Nov 2021 18:03:34 +0800 Subject: drm/amdgpu: add node_id to physical id conversion in EOP handler A new field nodeid in interrupt cookie indicates the node ID. Signed-off-by: Le Ma <le.ma@amd.com> Reviewed-by: Shiwu Zhang <shiwu.zhang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c | 11 +++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h | 14 ++++++++++++++ drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 6 ++++-- 3 files changed, 29 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c index fafebec5b7b6..031610c1340a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c @@ -99,6 +99,17 @@ const char *soc15_ih_clientid_name[] = { "MP1" }; +const int node_id_to_phys_map[NODEID_MAX] = { + [XCD0_NODEID] = 0, + [XCD1_NODEID] = 1, + [XCD2_NODEID] = 2, + [XCD3_NODEID] = 3, + [XCD4_NODEID] = 4, + [XCD5_NODEID] = 5, + [XCD6_NODEID] = 6, + [XCD7_NODEID] = 7, +}; + /** * amdgpu_irq_disable_all - disable *all* interrupts * diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h index 1c747ac4129a..efe8a278cbdf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h @@ -102,6 +102,20 @@ struct amdgpu_irq { bool retry_cam_enabled; }; +enum interrupt_node_id_per_xcp { + XCD0_NODEID = 1, + XCD1_NODEID = 2, + XCD2_NODEID = 5, + XCD3_NODEID = 6, + XCD4_NODEID = 9, + XCD5_NODEID = 10, + XCD6_NODEID = 13, + XCD7_NODEID = 14, + NODEID_MAX, +}; + +extern const int node_id_to_phys_map[NODEID_MAX]; + void amdgpu_irq_disable_all(struct amdgpu_device *adev); int amdgpu_irq_init(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index ec273a217666..ce64c4fc5f1a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -2799,7 +2799,7 @@ static int gfx_v9_4_3_eop_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { - int i; + int i, phys_id; u8 me_id, pipe_id, queue_id; struct amdgpu_ring *ring; @@ -2808,12 +2808,14 @@ static int gfx_v9_4_3_eop_irq(struct amdgpu_device *adev, pipe_id = (entry->ring_id & 0x03) >> 0; queue_id = (entry->ring_id & 0x70) >> 4; + phys_id = node_id_to_phys_map[entry->node_id]; + switch (me_id) { case 0: case 1: case 2: for (i = 0; i < adev->gfx.num_compute_rings; i++) { - ring = &adev->gfx.compute_ring[i]; + ring = &adev->gfx.compute_ring[i + phys_id * adev->gfx.num_compute_rings]; /* Per-queue interrupt is supported for MEC starting from VI. * The interrupt can only be enabled/disabled per pipe instead of per queue. */ -- cgit v1.2.3 From e6a02e2cc7fe3fec05eeaf08855e57d616a037e1 Mon Sep 17 00:00:00 2001 From: Lijo Lazar <lijo.lazar@amd.com> Date: Fri, 18 Feb 2022 15:04:35 +0530 Subject: drm/amdgpu: Add some XCC programming Add additional XCC programming sequences. Signed-off-by: Lijo Lazar <lijo.lazar@amd.com> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index ce64c4fc5f1a..232feb387a40 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -1150,6 +1150,29 @@ static void gfx_v9_4_3_disable_gpa_mode(struct amdgpu_device *adev, int xcc_id) WREG32_SOC15(GC, xcc_id, regCPC_PSP_DEBUG, data); } +static void gfx_v9_4_3_program_xcc_id(struct amdgpu_device *adev, int xcc_id) +{ + uint32_t tmp = 0; + + switch (adev->gfx.num_xcd) { + /* directly config VIRTUAL_XCC_ID to 0 for 1-XCC */ + case 1: + WREG32_SOC15(GC, xcc_id, regCP_HYP_XCP_CTL, 0x8); + break; + case 2: + tmp = (xcc_id % adev->gfx.num_xcc_per_xcp) << REG_FIELD_SHIFT(CP_HYP_XCP_CTL, VIRTUAL_XCC_ID); + tmp = tmp | (adev->gfx.num_xcd << REG_FIELD_SHIFT(CP_HYP_XCP_CTL, NUM_XCC_IN_XCP)); + WREG32_SOC15(GC, xcc_id, regCP_HYP_XCP_CTL, tmp); + + tmp = xcc_id << REG_FIELD_SHIFT(CP_PSP_XCP_CTL, PHYSICAL_XCC_ID); + tmp = tmp | (xcc_id << REG_FIELD_SHIFT(CP_PSP_XCP_CTL, XCC_DIE_ID)); + WREG32_SOC15(GC, xcc_id, regCP_PSP_XCP_CTL, tmp); + break; + default: + break; + } +} + static bool gfx_v9_4_3_is_rlc_enabled(struct amdgpu_device *adev) { uint32_t rlc_setting; @@ -1948,6 +1971,9 @@ static int gfx_v9_4_3_cp_resume(struct amdgpu_device *adev) return r; } + /* set the virtual and physical id based on partition_mode */ + gfx_v9_4_3_program_xcc_id(adev, i); + r = gfx_v9_4_3_kiq_resume(adev, i); if (r) return r; -- cgit v1.2.3 From a8027fcd08f9127d38edeb59600ecb76c56a121a Mon Sep 17 00:00:00 2001 From: Mukul Joshi <mukul.joshi@amd.com> Date: Wed, 2 Mar 2022 21:40:38 -0500 Subject: drm/amdgpu: Fix CP_HYP_XCP_CTL register programming in CPX mode Currently, in CPX mode, the CP_HYP_XCP_CTL register is programmed incorrectly with the number of XCCs in the partition. As a result, HIQ doesn't work in CPX mode. Fix this by programming the correct number of XCCs in a partition, which is 1, in CPX mode. Signed-off-by: Mukul Joshi <mukul.joshi@amd.com> Reviewed-by: Le Ma <Le.Ma@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 232feb387a40..1dcb69b4816f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -1161,7 +1161,7 @@ static void gfx_v9_4_3_program_xcc_id(struct amdgpu_device *adev, int xcc_id) break; case 2: tmp = (xcc_id % adev->gfx.num_xcc_per_xcp) << REG_FIELD_SHIFT(CP_HYP_XCP_CTL, VIRTUAL_XCC_ID); - tmp = tmp | (adev->gfx.num_xcd << REG_FIELD_SHIFT(CP_HYP_XCP_CTL, NUM_XCC_IN_XCP)); + tmp = tmp | (adev->gfx.num_xcc_per_xcp << REG_FIELD_SHIFT(CP_HYP_XCP_CTL, NUM_XCC_IN_XCP)); WREG32_SOC15(GC, xcc_id, regCP_HYP_XCP_CTL, tmp); tmp = xcc_id << REG_FIELD_SHIFT(CP_PSP_XCP_CTL, PHYSICAL_XCC_ID); -- cgit v1.2.3 From 36be0181eab50abbb043a087988e6c2bef59dd45 Mon Sep 17 00:00:00 2001 From: Le Ma <le.ma@amd.com> Date: Wed, 20 Apr 2022 23:25:48 +0800 Subject: drm/amdgpu: program GRBM_MCM_ADDR for non-AID0 GRBM Otherwise the EOP interrupt on non-AID0 cannot route to IH0. Signed-off-by: Le Ma <le.ma@amd.com> Acked-by: Felix Kuehling <Felix.Kuehling@amd.com> Reviewed-by: Lijo Lazar <lijo.lazar@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 1dcb69b4816f..a9fab8de29e8 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -184,7 +184,10 @@ static void gfx_v9_4_3_set_kiq_pm4_funcs(struct amdgpu_device *adev) static void gfx_v9_4_3_init_golden_registers(struct amdgpu_device *adev) { + int i; + for (i = 2; i < adev->gfx.num_xcd; i++) + WREG32_SOC15(GC, i, regGRBM_MCM_ADDR, 0x4); } static void gfx_v9_4_3_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel, -- cgit v1.2.3 From 20bedf1379b1d4b060e3f43661f35e5447d0cfed Mon Sep 17 00:00:00 2001 From: Le Ma <le.ma@amd.com> Date: Mon, 25 Apr 2022 22:19:58 +0800 Subject: drm/amdgpu: introduce new doorbell assignment table for GC 9.4.3 Four basic reasons as below to do the change: 1. number of ring expand a lot on GC 9.4.3, and adjustment on old assignment cannot make each ring in a continuous doorbell space. 2. the SDMA doorbell index should not exceed 0x1FF on SDMA 4.2.2 due to regDOORBELLx_CTRL_ENTRY.BIF_DOORBELLx_RANGE_OFFSET_ENTRY field width. 3. re-design the doorbell assignment and unify the calculation as "start + ring/inst id" will make the code much concise. 4. only defining the START/END makes the table look simple v2: (Lijo) 1. replace name 2. use num_inst_per_aid/sdma_doorbell_range instead of hardcoding Signed-off-by: Le Ma <le.ma@amd.com> Reviewed-by: Lijo Lazar <lijo.lazar@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h | 32 ++++++++++++++++++++++++---- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 6 +----- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 8 +------ drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 19 +++-------------- 4 files changed, 33 insertions(+), 32 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h index b036d2f01930..a29a018ec84e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h @@ -59,7 +59,7 @@ struct amdgpu_doorbell_index { uint32_t gfx_ring1; uint32_t gfx_userqueue_start; uint32_t gfx_userqueue_end; - uint32_t sdma_engine[8]; + uint32_t sdma_engine[16]; uint32_t mes_ring0; uint32_t mes_ring1; uint32_t ih; @@ -83,9 +83,6 @@ struct amdgpu_doorbell_index { }; uint32_t first_non_cp; uint32_t last_non_cp; - uint32_t xcc1_kiq_start; - uint32_t xcc1_mec_ring0_start; - uint32_t aid1_sdma_start; uint32_t max_assignment; /* Per engine SDMA doorbell size in dword */ uint32_t sdma_doorbell_range; @@ -312,6 +309,33 @@ typedef enum _AMDGPU_DOORBELL64_ASSIGNMENT AMDGPU_DOORBELL64_INVALID = 0xFFFF } AMDGPU_DOORBELL64_ASSIGNMENT; +typedef enum _AMDGPU_DOORBELL_ASSIGNMENT_LAYOUT1 +{ + /* KIQ: 0~7 for maximum 8 XCD */ + AMDGPU_DOORBELL_LAYOUT1_KIQ_START = 0x000, + AMDGPU_DOORBELL_LAYOUT1_HIQ = 0x008, + AMDGPU_DOORBELL_LAYOUT1_DIQ = 0x009, + /* Compute: 0x0A ~ 0x49 */ + AMDGPU_DOORBELL_LAYOUT1_MEC_RING_START = 0x00A, + AMDGPU_DOORBELL_LAYOUT1_MEC_RING_END = 0x049, + AMDGPU_DOORBELL_LAYOUT1_USERQUEUE_START = 0x04A, + AMDGPU_DOORBELL_LAYOUT1_USERQUEUE_END = 0x0C9, + /* SDMA: 0x100 ~ 0x19F */ + AMDGPU_DOORBELL_LAYOUT1_sDMA_ENGINE_START = 0x100, + AMDGPU_DOORBELL_LAYOUT1_sDMA_ENGINE_END = 0x19F, + /* IH: 0x1A0 ~ 0x1AF */ + AMDGPU_DOORBELL_LAYOUT1_IH = 0x1A0, + /* VCN: 0x1B0 ~ 0x1C2 */ + AMDGPU_DOORBELL_LAYOUT1_VCN_START = 0x1B0, + AMDGPU_DOORBELL_LAYOUT1_VCN_END = 0x1C2, + + AMDGPU_DOORBELL_LAYOUT1_FIRST_NON_CP = AMDGPU_DOORBELL_LAYOUT1_sDMA_ENGINE_START, + AMDGPU_DOORBELL_LAYOUT1_LAST_NON_CP = AMDGPU_DOORBELL_LAYOUT1_VCN_END, + + AMDGPU_DOORBELL_LAYOUT1_MAX_ASSIGNMENT = 0x1C2, + AMDGPU_DOORBELL_LAYOUT1_INVALID = 0xFFFF +} AMDGPU_DOORBELL_ASSIGNMENT_LAYOUT1; + u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index); void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v); u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 49b18cf987aa..76438f197de1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -315,11 +315,7 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, ring->use_doorbell = true; ring->xcc_id = xcc_id; ring->vm_hub = AMDGPU_GFXHUB(xcc_id); - if (xcc_id >= 1) - ring->doorbell_index = (adev->doorbell_index.xcc1_kiq_start + - xcc_id - 1) << 1; - else - ring->doorbell_index = adev->doorbell_index.kiq << 1; + ring->doorbell_index = (adev->doorbell_index.kiq + xcc_id) << 1; r = amdgpu_gfx_kiq_acquire(adev, ring, xcc_id); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index a9fab8de29e8..52185b1d5d31 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -817,13 +817,7 @@ static int gfx_v9_4_3_compute_ring_init(struct amdgpu_device *adev, int ring_id, ring->ring_obj = NULL; ring->use_doorbell = true; - if (xcc_id >= 1) - ring->doorbell_index = - (adev->doorbell_index.xcc1_mec_ring0_start + - ring_id - adev->gfx.num_compute_rings) << 1; - else - ring->doorbell_index = - (adev->doorbell_index.mec_ring0 + ring_id) << 1; + ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1; ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (ring_id * GFX9_MEC_HPD_SIZE); ring->vm_hub = AMDGPU_GFXHUB(xcc_id); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c index d3c7f9a43ef1..9b53174925f8 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c @@ -1310,14 +1310,7 @@ static int sdma_v4_4_2_sw_init(void *handle) ring->use_doorbell?"true":"false"); /* doorbell size is 2 dwords, get DWORD offset */ - if (aid_id > 0) - ring->doorbell_index = - (adev->doorbell_index.aid1_sdma_start << 1) - + adev->doorbell_index.sdma_doorbell_range - * (i - adev->sdma.num_inst_per_aid); - else - ring->doorbell_index = - adev->doorbell_index.sdma_engine[i] << 1; + ring->doorbell_index = adev->doorbell_index.sdma_engine[i] << 1; ring->vm_hub = AMDGPU_MMHUB0(aid_id); sprintf(ring->name, "sdma%d.%d", aid_id, @@ -1336,14 +1329,8 @@ static int sdma_v4_4_2_sw_init(void *handle) /* doorbell index of page queue is assigned right after * gfx queue on the same instance */ - if (aid_id > 0) - ring->doorbell_index = - ((adev->doorbell_index.aid1_sdma_start + 1) << 1) - + adev->doorbell_index.sdma_doorbell_range - * (i - adev->sdma.num_inst_per_aid); - else - ring->doorbell_index = - (adev->doorbell_index.sdma_engine[i] + 1) << 1; + ring->doorbell_index = + (adev->doorbell_index.sdma_engine[i] + 1) << 1; ring->vm_hub = AMDGPU_MMHUB0(aid_id); sprintf(ring->name, "page%d.%d", aid_id, -- cgit v1.2.3 From 8078f1c610fdcdd8003e2c538fb04af41fa5c269 Mon Sep 17 00:00:00 2001 From: Lijo Lazar <lijo.lazar@amd.com> Date: Wed, 29 Jun 2022 11:41:53 +0530 Subject: drm/amdgpu: Change num_xcd to xcc_mask Instead of number of XCCs, keep a mask of XCCs for the exact XCCs available on the ASIC. XCC configuration could differ based on different ASIC configs. v2: Rename num_xcd to num_xcc (Hawking) Use smaller xcc_mask size, changed to u16 (Le) Signed-off-by: Lijo Lazar <lijo.lazar@amd.com> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> Reviewed-by: Le Ma <Le.Ma@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 21 ++--- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 4 +- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 133 ++++++++++++++++++------------- drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c | 67 ++++++++++------ drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 3 +- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 10 +-- 7 files changed, 141 insertions(+), 99 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 76438f197de1..069b259f384c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -209,12 +209,12 @@ void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev) int max_queues_per_mec = min(adev->gfx.mec.num_pipe_per_mec * adev->gfx.mec.num_queue_per_pipe, adev->gfx.num_compute_rings); - int num_xcd = (adev->gfx.num_xcd > 1) ? adev->gfx.num_xcd : 1; + int num_xcc = adev->gfx.xcc_mask ? NUM_XCC(adev->gfx.xcc_mask) : 1; if (multipipe_policy) { /* policy: make queues evenly cross all pipes on MEC1 only * for multiple xcc, just use the original policy for simplicity */ - for (j = 0; j < num_xcd; j++) { + for (j = 0; j < num_xcc; j++) { for (i = 0; i < max_queues_per_mec; i++) { pipe = i % adev->gfx.mec.num_pipe_per_mec; queue = (i / adev->gfx.mec.num_pipe_per_mec) % @@ -226,13 +226,13 @@ void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev) } } else { /* policy: amdgpu owns all queues in the given pipe */ - for (j = 0; j < num_xcd; j++) { + for (j = 0; j < num_xcc; j++) { for (i = 0; i < max_queues_per_mec; ++i) set_bit(i, adev->gfx.mec_bitmap[j].queue_bitmap); } } - for (j = 0; j < num_xcd; j++) { + for (j = 0; j < num_xcc; j++) { dev_dbg(adev->dev, "mec queue bitmap weight=%d\n", bitmap_weight(adev->gfx.mec_bitmap[j].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)); } @@ -1207,23 +1207,24 @@ static ssize_t amdgpu_gfx_set_compute_partition(struct device *dev, struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = drm_to_adev(ddev); enum amdgpu_gfx_partition mode; - int ret; + int ret = 0, num_xcc; - if (adev->gfx.num_xcd % 2 != 0) + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + if (num_xcc % 2 != 0) return -EINVAL; if (!strncasecmp("SPX", buf, strlen("SPX"))) { mode = AMDGPU_SPX_PARTITION_MODE; } else if (!strncasecmp("DPX", buf, strlen("DPX"))) { - if (adev->gfx.num_xcd != 4 || adev->gfx.num_xcd != 8) + if (num_xcc != 4 || num_xcc != 8) return -EINVAL; mode = AMDGPU_DPX_PARTITION_MODE; } else if (!strncasecmp("TPX", buf, strlen("TPX"))) { - if (adev->gfx.num_xcd != 6) + if (num_xcc != 6) return -EINVAL; mode = AMDGPU_TPX_PARTITION_MODE; } else if (!strncasecmp("QPX", buf, strlen("QPX"))) { - if (adev->gfx.num_xcd != 8) + if (num_xcc != 8) return -EINVAL; mode = AMDGPU_QPX_PARTITION_MODE; } else if (!strncasecmp("CPX", buf, strlen("CPX"))) { @@ -1253,7 +1254,7 @@ static ssize_t amdgpu_gfx_get_available_compute_partition(struct device *dev, char *supported_partition; /* TBD */ - switch (adev->gfx.num_xcd) { + switch (NUM_XCC(adev->gfx.xcc_mask)) { case 8: supported_partition = "SPX, DPX, QPX, CPX"; break; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 8df36527aee9..93f9875154db 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -64,6 +64,8 @@ enum amdgpu_gfx_partition { AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE, }; +#define NUM_XCC(x) hweight16(x) + struct amdgpu_mec { struct amdgpu_bo *hpd_eop_obj; u64 hpd_eop_gpu_addr; @@ -396,7 +398,7 @@ struct amdgpu_gfx { bool cp_gfx_shadow; /* for gfx11 */ enum amdgpu_gfx_partition partition_mode; - uint32_t num_xcd; + uint16_t xcc_mask; uint32_t num_xcc_per_xcp; struct mutex partition_mutex; }; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 91814dc083c9..da69177dc76f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -4536,7 +4536,7 @@ static int gfx_v9_0_early_init(void *handle) adev->gfx.num_gfx_rings = 0; else adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS; - adev->gfx.num_xcd = 1; + adev->gfx.xcc_mask = 1; adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev), AMDGPU_MAX_COMPUTE_RINGS); gfx_v9_0_set_kiq_pm4_funcs(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 52185b1d5d31..c776fc5884de 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -177,16 +177,19 @@ static const struct kiq_pm4_funcs gfx_v9_4_3_kiq_pm4_funcs = { static void gfx_v9_4_3_set_kiq_pm4_funcs(struct amdgpu_device *adev) { - int i; - for (i = 0; i < adev->gfx.num_xcd; i++) + int i, num_xcc; + + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) adev->gfx.kiq[i].pmf = &gfx_v9_4_3_kiq_pm4_funcs; } static void gfx_v9_4_3_init_golden_registers(struct amdgpu_device *adev) { - int i; + int i, num_xcc; - for (i = 2; i < adev->gfx.num_xcd; i++) + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 2; i < num_xcc; i++) WREG32_SOC15(GC, i, regGRBM_MCM_ADDR, 0x4); } @@ -499,7 +502,7 @@ static void gfx_v9_4_3_mec_fini(struct amdgpu_device *adev) static int gfx_v9_4_3_mec_init(struct amdgpu_device *adev) { - int r, i; + int r, i, num_xcc; u32 *hpd; const __le32 *fw_data; unsigned fw_size; @@ -508,7 +511,8 @@ static int gfx_v9_4_3_mec_init(struct amdgpu_device *adev) const struct gfx_firmware_header_v1_0 *mec_hdr; - for (i = 0; i < adev->gfx.num_xcd; i++) + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) bitmap_zero(adev->gfx.mec_bitmap[i].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); @@ -683,23 +687,24 @@ static int gfx_v9_4_3_switch_compute_partition(struct amdgpu_device *adev, enum amdgpu_gfx_partition mode) { u32 tmp = 0; - int num_xcc_per_partition, i; + int num_xcc_per_partition, i, num_xcc; if (mode == adev->gfx.partition_mode) return mode; + num_xcc = NUM_XCC(adev->gfx.xcc_mask); switch (mode) { case AMDGPU_SPX_PARTITION_MODE: - num_xcc_per_partition = adev->gfx.num_xcd; + num_xcc_per_partition = num_xcc; break; case AMDGPU_DPX_PARTITION_MODE: - num_xcc_per_partition = adev->gfx.num_xcd / 2; + num_xcc_per_partition = num_xcc / 2; break; case AMDGPU_TPX_PARTITION_MODE: - num_xcc_per_partition = adev->gfx.num_xcd / 3; + num_xcc_per_partition = num_xcc / 3; break; case AMDGPU_QPX_PARTITION_MODE: - num_xcc_per_partition = adev->gfx.num_xcd / 4; + num_xcc_per_partition = num_xcc / 4; break; case AMDGPU_CPX_PARTITION_MODE: num_xcc_per_partition = 1; @@ -712,7 +717,7 @@ static int gfx_v9_4_3_switch_compute_partition(struct amdgpu_device *adev, * Stop user queues and threads, and make sure GPU is empty of work. */ - for (i = 0; i < adev->gfx.num_xcd; i++) { + for (i = 0; i < num_xcc; i++) { tmp = REG_SET_FIELD(tmp, CP_HYP_XCP_CTL, NUM_XCC_IN_XCP, num_xcc_per_partition); tmp = REG_SET_FIELD(tmp, CP_HYP_XCP_CTL, VIRTUAL_XCC_ID, @@ -836,7 +841,7 @@ static int gfx_v9_4_3_compute_ring_init(struct amdgpu_device *adev, int ring_id, static int gfx_v9_4_3_sw_init(void *handle) { - int i, j, k, r, ring_id, xcc_id; + int i, j, k, r, ring_id, xcc_id, num_xcc; struct amdgpu_kiq *kiq; struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -844,6 +849,8 @@ static int gfx_v9_4_3_sw_init(void *handle) adev->gfx.mec.num_pipe_per_mec = 4; adev->gfx.mec.num_queue_per_pipe = 8; + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + /* EOP Event */ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq); if (r) @@ -877,8 +884,7 @@ static int gfx_v9_4_3_sw_init(void *handle) /* set up the compute queues - allocate horizontally across pipes */ ring_id = 0; - for (xcc_id = 0; xcc_id < adev->gfx.num_xcd; xcc_id++) { - + for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) { for (i = 0; i < adev->gfx.mec.num_mec; ++i) { for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; @@ -930,14 +936,14 @@ static int gfx_v9_4_3_sw_init(void *handle) static int gfx_v9_4_3_sw_fini(void *handle) { - int i; + int i, num_xcc; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - for (i = 0; i < adev->gfx.num_compute_rings * - adev->gfx.num_xcd; i++) + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < adev->gfx.num_compute_rings * num_xcc; i++) amdgpu_ring_fini(&adev->gfx.compute_ring[i]); - for (i = 0; i < adev->gfx.num_xcd; i++) { + for (i = 0; i < num_xcc; i++) { amdgpu_gfx_mqd_sw_fini(adev, i); amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[i].ring); amdgpu_gfx_kiq_fini(adev, i); @@ -1050,9 +1056,10 @@ static void gfx_v9_4_3_init_gds_vmid(struct amdgpu_device *adev, int xcc_id) static void gfx_v9_4_3_constants_init(struct amdgpu_device *adev) { u32 tmp; - int i, j; + int i, j, num_xcc; - for (i = 0; i < adev->gfx.num_xcd; i++) { + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) { WREG32_FIELD15_PREREG(GC, i, GRBM_CNTL, READ_TIMEOUT, 0xff); gfx_v9_4_3_setup_rb(adev, i); } @@ -1064,7 +1071,7 @@ static void gfx_v9_4_3_constants_init(struct amdgpu_device *adev) /* where to put LDS, scratch, GPUVM in FSA64 space */ mutex_lock(&adev->srbm_mutex); for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) { - for (j = 0; j < adev->gfx.num_xcd; j++) { + for (j = 0; j < num_xcc; j++) { soc15_grbm_select(adev, 0, 0, 0, i, j); /* CP and shaders */ if (i == 0) { @@ -1092,7 +1099,7 @@ static void gfx_v9_4_3_constants_init(struct amdgpu_device *adev) mutex_unlock(&adev->srbm_mutex); - for (i = 0; i < adev->gfx.num_xcd; i++) { + for (i = 0; i < num_xcc; i++) { gfx_v9_4_3_init_compute_vmid(adev, i); gfx_v9_4_3_init_gds_vmid(adev, i); } @@ -1150,8 +1157,10 @@ static void gfx_v9_4_3_disable_gpa_mode(struct amdgpu_device *adev, int xcc_id) static void gfx_v9_4_3_program_xcc_id(struct amdgpu_device *adev, int xcc_id) { uint32_t tmp = 0; + int num_xcc; - switch (adev->gfx.num_xcd) { + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + switch (num_xcc) { /* directly config VIRTUAL_XCC_ID to 0 for 1-XCC */ case 1: WREG32_SOC15(GC, xcc_id, regCP_HYP_XCP_CTL, 0x8); @@ -1288,9 +1297,10 @@ static void gfx_v9_4_3_enable_gui_idle_interrupt(struct amdgpu_device *adev, static void gfx_v9_4_3_rlc_stop(struct amdgpu_device *adev) { - int i; + int i, num_xcc; - for (i = 0; i < adev->gfx.num_xcd; i++) { + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) { WREG32_FIELD15_PREREG(GC, i, RLC_CNTL, RLC_ENABLE_F32, 0); gfx_v9_4_3_enable_gui_idle_interrupt(adev, false, i); gfx_v9_4_3_wait_for_rlc_serdes(adev, i); @@ -1299,9 +1309,10 @@ static void gfx_v9_4_3_rlc_stop(struct amdgpu_device *adev) static void gfx_v9_4_3_rlc_reset(struct amdgpu_device *adev) { - int i; + int i, num_xcc; - for (i = 0; i < adev->gfx.num_xcd; i++) { + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) { WREG32_FIELD15_PREREG(GC, i, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); udelay(50); WREG32_FIELD15_PREREG(GC, i, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0); @@ -1314,9 +1325,10 @@ static void gfx_v9_4_3_rlc_start(struct amdgpu_device *adev) #ifdef AMDGPU_RLC_DEBUG_RETRY u32 rlc_ucode_ver; #endif - int i; + int i, num_xcc; - for (i = 0; i < adev->gfx.num_xcd; i++) { + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) { WREG32_FIELD15_PREREG(GC, i, RLC_CNTL, RLC_ENABLE_F32, 1); udelay(50); @@ -1377,11 +1389,12 @@ static int gfx_v9_4_3_rlc_load_microcode(struct amdgpu_device *adev, int xcc_id) static int gfx_v9_4_3_rlc_resume(struct amdgpu_device *adev) { - int r, i; + int r, i, num_xcc; adev->gfx.rlc.funcs->stop(adev); - for (i = 0; i < adev->gfx.num_xcd; i++) { + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) { /* disable CG */ WREG32_SOC15(GC, i, regRLC_CGCG_CGLS_CTRL, 0); @@ -1954,10 +1967,11 @@ done: static int gfx_v9_4_3_cp_resume(struct amdgpu_device *adev) { - int r, i, j; + int r, i, j, num_xcc; struct amdgpu_ring *ring; - for (i = 0; i < adev->gfx.num_xcd; i++) { + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) { gfx_v9_4_3_enable_gui_idle_interrupt(adev, false, i); if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { @@ -2021,12 +2035,13 @@ static int gfx_v9_4_3_hw_init(void *handle) static int gfx_v9_4_3_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - int i; + int i, num_xcc; amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); - for (i = 0; i < adev->gfx.num_xcd; i++) { + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) { if (amdgpu_gfx_disable_kcq(adev, i)) DRM_ERROR("XCD %d KCQ disable failed\n", i); @@ -2069,9 +2084,10 @@ static int gfx_v9_4_3_resume(void *handle) static bool gfx_v9_4_3_is_idle(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - int i; + int i, num_xcc; - for (i = 0; i < adev->gfx.num_xcd; i++) { + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) { if (REG_GET_FIELD(RREG32_SOC15(GC, i, regGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE)) return false; @@ -2183,30 +2199,30 @@ static void gfx_v9_4_3_ring_emit_gds_switch(struct amdgpu_ring *ring, static int gfx_v9_4_3_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int num_xcc; - /* hardcode in emulation phase */ - adev->gfx.num_xcd = 1; + num_xcc = NUM_XCC(adev->gfx.xcc_mask); adev->gfx.partition_mode = amdgpu_user_partt_mode; /* calculate the num_xcc_in_xcp for the partition mode*/ switch (amdgpu_user_partt_mode) { case AMDGPU_SPX_PARTITION_MODE: - adev->gfx.num_xcc_per_xcp = adev->gfx.num_xcd; + adev->gfx.num_xcc_per_xcp = num_xcc; break; case AMDGPU_DPX_PARTITION_MODE: - adev->gfx.num_xcc_per_xcp = adev->gfx.num_xcd / 2; + adev->gfx.num_xcc_per_xcp = num_xcc / 2; break; case AMDGPU_TPX_PARTITION_MODE: - adev->gfx.num_xcc_per_xcp = adev->gfx.num_xcd / 3; + adev->gfx.num_xcc_per_xcp = num_xcc / 3; break; case AMDGPU_QPX_PARTITION_MODE: - adev->gfx.num_xcc_per_xcp = adev->gfx.num_xcd / 4; + adev->gfx.num_xcc_per_xcp = num_xcc / 4; break; case AMDGPU_CPX_PARTITION_MODE: adev->gfx.num_xcc_per_xcp = 1; break; default: - adev->gfx.num_xcc_per_xcp = adev->gfx.num_xcd; + adev->gfx.num_xcc_per_xcp = num_xcc; break; } @@ -2404,14 +2420,15 @@ static int gfx_v9_4_3_set_clockgating_state(void *handle, enum amd_clockgating_state state) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - int i; + int i, num_xcc; if (amdgpu_sriov_vf(adev)) return 0; + num_xcc = NUM_XCC(adev->gfx.xcc_mask); switch (adev->ip_versions[GC_HWIP][0]) { case IP_VERSION(9, 4, 3): - for (i = 0; i < adev->gfx.num_xcd; i++) + for (i = 0; i < num_xcc; i++) gfx_v9_4_3_update_gfx_clock_gating(adev, state == AMD_CG_STATE_GATE, i); break; @@ -2739,12 +2756,13 @@ static int gfx_v9_4_3_set_priv_reg_fault_state(struct amdgpu_device *adev, unsigned type, enum amdgpu_interrupt_state state) { - int i; + int i, num_xcc; + num_xcc = NUM_XCC(adev->gfx.xcc_mask); switch (state) { case AMDGPU_IRQ_STATE_DISABLE: case AMDGPU_IRQ_STATE_ENABLE: - for (i = 0; i < adev->gfx.num_xcd; i++) + for (i = 0; i < num_xcc; i++) WREG32_FIELD15_PREREG(GC, i, CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE, state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); @@ -2761,12 +2779,13 @@ static int gfx_v9_4_3_set_priv_inst_fault_state(struct amdgpu_device *adev, unsigned type, enum amdgpu_interrupt_state state) { - int i; + int i, num_xcc; + num_xcc = NUM_XCC(adev->gfx.xcc_mask); switch (state) { case AMDGPU_IRQ_STATE_DISABLE: case AMDGPU_IRQ_STATE_ENABLE: - for (i = 0; i < adev->gfx.num_xcd; i++) + for (i = 0; i < num_xcc; i++) WREG32_FIELD15_PREREG(GC, i, CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE, state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); @@ -2783,8 +2802,10 @@ static int gfx_v9_4_3_set_eop_interrupt_state(struct amdgpu_device *adev, unsigned type, enum amdgpu_interrupt_state state) { - int i; - for (i = 0; i < adev->gfx.num_xcd; i++) { + int i, num_xcc; + + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) { switch (type) { case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: gfx_v9_4_3_set_compute_eop_interrupt_state(adev, 1, 0, state, i); @@ -2842,6 +2863,7 @@ static int gfx_v9_4_3_eop_irq(struct amdgpu_device *adev, /* Per-queue interrupt is supported for MEC starting from VI. * The interrupt can only be enabled/disabled per pipe instead of per queue. */ + if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id)) amdgpu_fence_process(ring); } @@ -3056,9 +3078,10 @@ static const struct amdgpu_ring_funcs gfx_v9_4_3_ring_funcs_kiq = { static void gfx_v9_4_3_set_ring_funcs(struct amdgpu_device *adev) { - int i, j; + int i, j, num_xcc; - for (i = 0; i < adev->gfx.num_xcd; i++) { + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) { adev->gfx.kiq[i].ring.funcs = &gfx_v9_4_3_ring_funcs_kiq; for (j = 0; j < adev->gfx.num_compute_rings; j++) diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c index 1bb17d95f720..e35365ab3f1f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c @@ -43,9 +43,10 @@ static void gfxhub_v1_2_setup_vm_pt_regs(struct amdgpu_device *adev, uint64_t page_table_base) { struct amdgpu_vmhub *hub; - int i; + int i, num_xcc; - for (i = 0; i < adev->gfx.num_xcd; i++) { + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) { hub = &adev->vmhub[AMDGPU_GFXHUB(i)]; WREG32_SOC15_OFFSET(GC, i, regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, @@ -56,13 +57,14 @@ static void gfxhub_v1_2_setup_vm_pt_regs(struct amdgpu_device *adev, regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, hub->ctx_addr_distance * vmid, upper_32_bits(page_table_base)); + } } static void gfxhub_v1_2_init_gart_aperture_regs(struct amdgpu_device *adev) { uint64_t pt_base; - int i; + int i, num_xcc; if (adev->gmc.pdb0_bo) pt_base = amdgpu_gmc_pd_addr(adev->gmc.pdb0_bo); @@ -74,7 +76,8 @@ static void gfxhub_v1_2_init_gart_aperture_regs(struct amdgpu_device *adev) /* If use GART for FB translation, vmid0 page table covers both * vram and system memory (gart) */ - for (i = 0; i < adev->gfx.num_xcd; i++) { + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) { if (adev->gmc.pdb0_bo) { WREG32_SOC15(GC, i, regVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32, @@ -111,9 +114,10 @@ static void gfxhub_v1_2_init_system_aperture_regs(struct amdgpu_device *adev) { uint64_t value; uint32_t tmp; - int i; + int i, num_xcc; - for (i = 0; i < adev->gfx.num_xcd; i++) { + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) { /* Program the AGP BAR */ WREG32_SOC15_RLC(GC, i, regMC_VM_AGP_BASE, 0); WREG32_SOC15_RLC(GC, i, regMC_VM_AGP_BOT, adev->gmc.agp_start >> 24); @@ -177,9 +181,10 @@ static void gfxhub_v1_2_init_system_aperture_regs(struct amdgpu_device *adev) static void gfxhub_v1_2_init_tlb_regs(struct amdgpu_device *adev) { uint32_t tmp; - int i; + int i, num_xcc; - for (i = 0; i < adev->gfx.num_xcd; i++) { + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) { /* Setup TLB control */ tmp = RREG32_SOC15(GC, i, regMC_VM_MX_L1_TLB_CNTL); @@ -202,9 +207,10 @@ static void gfxhub_v1_2_init_tlb_regs(struct amdgpu_device *adev) static void gfxhub_v1_2_init_cache_regs(struct amdgpu_device *adev) { uint32_t tmp; - int i; + int i, num_xcc; - for (i = 0; i < adev->gfx.num_xcd; i++) { + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) { /* Setup L2 cache */ tmp = RREG32_SOC15(GC, i, regVM_L2_CNTL); tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 1); @@ -249,9 +255,10 @@ static void gfxhub_v1_2_init_cache_regs(struct amdgpu_device *adev) static void gfxhub_v1_2_enable_system_domain(struct amdgpu_device *adev) { uint32_t tmp; - int i; + int i, num_xcc; - for (i = 0; i < adev->gfx.num_xcd; i++) { + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) { tmp = RREG32_SOC15(GC, i, regVM_CONTEXT0_CNTL); tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1); tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, @@ -266,9 +273,10 @@ static void gfxhub_v1_2_enable_system_domain(struct amdgpu_device *adev) static void gfxhub_v1_2_disable_identity_aperture(struct amdgpu_device *adev) { - int i; + int i, num_xcc; - for (i = 0; i < adev->gfx.num_xcd; i++) { + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) { WREG32_SOC15(GC, i, regVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32, 0XFFFFFFFF); @@ -295,7 +303,7 @@ static void gfxhub_v1_2_setup_vmid_config(struct amdgpu_device *adev) struct amdgpu_vmhub *hub; unsigned num_level, block_size; uint32_t tmp; - int i, j; + int i, j, num_xcc; num_level = adev->vm_manager.num_level; block_size = adev->vm_manager.block_size; @@ -304,7 +312,8 @@ static void gfxhub_v1_2_setup_vmid_config(struct amdgpu_device *adev) else block_size -= 9; - for (j = 0; j < adev->gfx.num_xcd; j++) { + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (j = 0; j < num_xcc; j++) { hub = &adev->vmhub[AMDGPU_GFXHUB(j)]; for (i = 0; i <= 14; i++) { tmp = RREG32_SOC15_OFFSET(GC, j, regVM_CONTEXT1_CNTL, i); @@ -362,10 +371,12 @@ static void gfxhub_v1_2_setup_vmid_config(struct amdgpu_device *adev) static void gfxhub_v1_2_program_invalidation(struct amdgpu_device *adev) { struct amdgpu_vmhub *hub; - unsigned i, j; + unsigned i, j, num_xcc; - for (j = 0; j < adev->gfx.num_xcd; j++) { + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (j = 0; j < num_xcc; j++) { hub = &adev->vmhub[AMDGPU_GFXHUB(j)]; + for (i = 0 ; i < 18; ++i) { WREG32_SOC15_OFFSET(GC, j, regVM_INVALIDATE_ENG0_ADDR_RANGE_LO32, i * hub->eng_addr_distance, 0xffffffff); @@ -377,9 +388,10 @@ static void gfxhub_v1_2_program_invalidation(struct amdgpu_device *adev) static int gfxhub_v1_2_gart_enable(struct amdgpu_device *adev) { - int i; + int i, num_xcc; - for (i = 0; i < adev->gfx.num_xcd; i++) { + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) { if (amdgpu_sriov_vf(adev)) { /* * MC_VM_FB_LOCATION_BASE/TOP is NULL for VF, becuase they are @@ -413,9 +425,10 @@ static void gfxhub_v1_2_gart_disable(struct amdgpu_device *adev) { struct amdgpu_vmhub *hub; u32 tmp; - u32 i, j; + u32 i, j, num_xcc; - for (j = 0; j < adev->gfx.num_xcd; j++) { + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (j = 0; j < num_xcc; j++) { hub = &adev->vmhub[AMDGPU_GFXHUB(j)]; /* Disable all tables */ for (i = 0; i < 16; i++) @@ -449,9 +462,10 @@ static void gfxhub_v1_2_set_fault_enable_default(struct amdgpu_device *adev, bool value) { u32 tmp; - int i; + int i, num_xcc; - for (i = 0; i < adev->gfx.num_xcd; i++) { + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) { tmp = RREG32_SOC15(GC, i, regVM_L2_PROTECTION_FAULT_CNTL); tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value); @@ -490,9 +504,10 @@ static void gfxhub_v1_2_set_fault_enable_default(struct amdgpu_device *adev, static void gfxhub_v1_2_init(struct amdgpu_device *adev) { struct amdgpu_vmhub *hub; - int i; + int i, num_xcc; - for (i = 0; i < adev->gfx.num_xcd; i++) { + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) { hub = &adev->vmhub[AMDGPU_GFXHUB(i)]; hub->ctx0_ptb_addr_lo32 = diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 4b2c4ecd7253..2c322a25bf1c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1733,7 +1733,8 @@ static int gmc_v9_0_sw_init(void *handle) adev->gmc.translate_further = adev->vm_manager.num_level > 1; break; case IP_VERSION(9, 4, 3): - bitmap_set(adev->vmhubs_mask, AMDGPU_GFXHUB(0), adev->gfx.num_xcd); + bitmap_set(adev->vmhubs_mask, AMDGPU_GFXHUB(0), + NUM_XCC(adev->gfx.xcc_mask)); bitmap_set(adev->vmhubs_mask, AMDGPU_MMHUB0(0), adev->num_aid); amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 829e32433faf..df96c4c508a0 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -592,6 +592,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, struct kfd_node *node; uint32_t first_vmid_kfd, last_vmid_kfd, vmid_num_kfd; unsigned int max_proc_per_quantum; + int num_xcd; kfd->mec_fw_version = amdgpu_amdkfd_get_fw_version(kfd->adev, KGD_ENGINE_MEC1); @@ -601,16 +602,15 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, KGD_ENGINE_SDMA1); kfd->shared_resources = *gpu_resources; - if (kfd->adev->gfx.num_xcd == 0 || kfd->adev->gfx.num_xcd == 1 || - kfd->adev->gfx.num_xcc_per_xcp == 0) + num_xcd = NUM_XCC(kfd->adev->gfx.xcc_mask); + if (num_xcd == 0 || num_xcd == 1 || kfd->adev->gfx.num_xcc_per_xcp == 0) kfd->num_nodes = 1; else - kfd->num_nodes = - kfd->adev->gfx.num_xcd/kfd->adev->gfx.num_xcc_per_xcp; + kfd->num_nodes = num_xcd / kfd->adev->gfx.num_xcc_per_xcp; if (kfd->num_nodes == 0) { dev_err(kfd_device, "KFD num nodes cannot be 0, GC inst: %d, num_xcc_in_node: %d\n", - kfd->adev->gfx.num_xcd, kfd->adev->gfx.num_xcc_per_xcp); + num_xcd, kfd->adev->gfx.num_xcc_per_xcp); goto out; } -- cgit v1.2.3 From 0c7315e7d5ef9b36ca4db32ffeb34a187cbaf231 Mon Sep 17 00:00:00 2001 From: Mukul Joshi <mukul.joshi@amd.com> Date: Fri, 10 Jun 2022 09:41:29 -0400 Subject: drm/amdkfd: Add device repartition support GFX9.4.3 will support dynamic repartitioning of the GPU through sysfs. Add device repartitioning support in KFD to repartition GPU from one mode to other. v2: squash in fix ("drm/amdkfd: Fix warning kgd2kfd_unlock_kfd defined but not used") Signed-off-by: Mukul Joshi <mukul.joshi@amd.com> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 10 ++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 13 +++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 22 +++++++++++++++++++++- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 5 +---- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 21 +++++++++++++++++++++ 5 files changed, 66 insertions(+), 5 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 9d19c7ceda3f..bbbfe9ec4adf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -773,3 +773,13 @@ bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev) else return false; } + +int amdgpu_amdkfd_check_and_lock_kfd(struct amdgpu_device *adev) +{ + return kgd2kfd_check_and_lock_kfd(); +} + +void amdgpu_amdkfd_unlock_kfd(struct amdgpu_device *adev) +{ + kgd2kfd_unlock_kfd(); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index df07e212c21e..d1d643a050a1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -151,6 +151,8 @@ void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev, void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev); void amdgpu_amdkfd_device_init(struct amdgpu_device *adev); void amdgpu_amdkfd_device_fini_sw(struct amdgpu_device *adev); +int amdgpu_amdkfd_check_and_lock_kfd(struct amdgpu_device *adev); +void amdgpu_amdkfd_unlock_kfd(struct amdgpu_device *adev); int amdgpu_amdkfd_submit_ib(struct amdgpu_device *adev, enum kgd_engine_type engine, uint32_t vmid, uint64_t gpu_addr, @@ -373,6 +375,8 @@ int kgd2kfd_post_reset(struct kfd_dev *kfd); void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry); void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd); void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask); +int kgd2kfd_check_and_lock_kfd(void); +void kgd2kfd_unlock_kfd(void); #else static inline int kgd2kfd_init(void) { @@ -438,5 +442,14 @@ static inline void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask) { } + +static inline int kgd2kfd_check_and_lock_kfd(void) +{ + return 0; +} + +static inline void kgd2kfd_unlock_kfd(void) +{ +} #endif #endif /* AMDGPU_AMDKFD_H_INCLUDED */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 069b259f384c..69bac5b801ce 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -1233,10 +1233,30 @@ static ssize_t amdgpu_gfx_set_compute_partition(struct device *dev, return -EINVAL; } + if (!adev->kfd.init_complete) + return -EPERM; + mutex_lock(&adev->gfx.partition_mutex); - ret = adev->gfx.funcs->switch_partition_mode(adev, mode); + if (mode == adev->gfx.funcs->query_partition_mode(adev)) + goto out; + + ret = amdgpu_amdkfd_check_and_lock_kfd(adev); + if (ret) + goto out; + + amdgpu_amdkfd_device_fini_sw(adev); + + adev->gfx.funcs->switch_partition_mode(adev, mode); + + amdgpu_amdkfd_device_probe(adev); + amdgpu_amdkfd_device_init(adev); + /* If KFD init failed, return failure */ + if (!adev->kfd.init_complete) + ret = -EIO; + amdgpu_amdkfd_unlock_kfd(adev); +out: mutex_unlock(&adev->gfx.partition_mutex); if (ret) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index c776fc5884de..47d8ac64e877 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -675,7 +675,7 @@ static void gfx_v9_4_3_select_me_pipe_q(struct amdgpu_device *adev, static enum amdgpu_gfx_partition gfx_v9_4_3_query_compute_partition(struct amdgpu_device *adev) { - enum amdgpu_gfx_partition mode = AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE; + enum amdgpu_gfx_partition mode = adev->gfx.partition_mode; if (adev->nbio.funcs->get_compute_partition_mode) mode = adev->nbio.funcs->get_compute_partition_mode(adev); @@ -689,9 +689,6 @@ static int gfx_v9_4_3_switch_compute_partition(struct amdgpu_device *adev, u32 tmp = 0; int num_xcc_per_partition, i, num_xcc; - if (mode == adev->gfx.partition_mode) - return mode; - num_xcc = NUM_XCC(adev->gfx.xcc_mask); switch (mode) { case AMDGPU_SPX_PARTITION_MODE: diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index eb2b44fddf74..293787290e36 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -1356,6 +1356,27 @@ unsigned int kfd_get_num_xgmi_sdma_engines(struct kfd_node *node) kfd_get_num_sdma_engines(node); } +int kgd2kfd_check_and_lock_kfd(void) +{ + mutex_lock(&kfd_processes_mutex); + if (!hash_empty(kfd_processes_table) || kfd_is_locked()) { + mutex_unlock(&kfd_processes_mutex); + return -EBUSY; + } + + ++kfd_locked; + mutex_unlock(&kfd_processes_mutex); + + return 0; +} + +void kgd2kfd_unlock_kfd(void) +{ + mutex_lock(&kfd_processes_mutex); + --kfd_locked; + mutex_unlock(&kfd_processes_mutex); +} + #if defined(CONFIG_DEBUG_FS) /* This function will send a package to HIQ to hang the HWS -- cgit v1.2.3 From 659a4ab8e27eb39cc61cb74cc714ba1a8f8c9a61 Mon Sep 17 00:00:00 2001 From: Lijo Lazar <lijo.lazar@amd.com> Date: Tue, 5 Jul 2022 09:56:41 +0530 Subject: drm/amdgpu: Use instance lookup table for GC 9.4.3 Register accesses need to be based on physical instance on bare metal. Pass the right instance using logical to physical instance lookup table before accessing registers. Add a macro GET_INST to get the right physical instance of an IP corresponding to a logical instance. v2: fix gfx_v9_4_3_check_rlcg_range() (Alex) Signed-off-by: Lijo Lazar <lijo.lazar@amd.com> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> Reviewed-by: Le Ma <Le.Ma@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c | 20 +- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 381 +++++++++++---------- drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c | 146 ++++---- drivers/gpu/drm/amd/amdgpu/soc15_common.h | 3 + 4 files changed, 279 insertions(+), 271 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c index e81bdca53f42..5daec0b45545 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c @@ -279,8 +279,8 @@ static int kgd_gfx_v9_4_3_hqd_load(struct amdgpu_device *adev, void *mqd, /* HQD registers extend to CP_HQD_AQL_DISPATCH_ID_HI */ mqd_hqd = &m->cp_mqd_base_addr_lo; - hqd_base = SOC15_REG_OFFSET(GC, inst, regCP_MQD_BASE_ADDR); - hqd_end = SOC15_REG_OFFSET(GC, inst, regCP_HQD_AQL_DISPATCH_ID_HI); + hqd_base = SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_MQD_BASE_ADDR); + hqd_end = SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_AQL_DISPATCH_ID_HI); for (reg = hqd_base; reg <= hqd_end; reg++) WREG32_RLC(reg, mqd_hqd[reg - hqd_base]); @@ -289,7 +289,7 @@ static int kgd_gfx_v9_4_3_hqd_load(struct amdgpu_device *adev, void *mqd, /* Activate doorbell logic before triggering WPTR poll. */ data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); - WREG32_RLC(SOC15_REG_OFFSET(GC, inst, regCP_HQD_PQ_DOORBELL_CONTROL), + WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_PQ_DOORBELL_CONTROL), data); if (wptr) { @@ -319,27 +319,27 @@ static int kgd_gfx_v9_4_3_hqd_load(struct amdgpu_device *adev, void *mqd, guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1); guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32; - WREG32_RLC(SOC15_REG_OFFSET(GC, inst, regCP_HQD_PQ_WPTR_LO), + WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_LO), lower_32_bits(guessed_wptr)); - WREG32_RLC(SOC15_REG_OFFSET(GC, inst, regCP_HQD_PQ_WPTR_HI), + WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_HI), upper_32_bits(guessed_wptr)); - WREG32_RLC(SOC15_REG_OFFSET(GC, inst, regCP_HQD_PQ_WPTR_POLL_ADDR), + WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_POLL_ADDR), lower_32_bits((uintptr_t)wptr)); - WREG32_RLC(SOC15_REG_OFFSET(GC, inst, + WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_POLL_ADDR_HI), upper_32_bits((uintptr_t)wptr)); - WREG32(SOC15_REG_OFFSET(GC, inst, regCP_PQ_WPTR_POLL_CNTL1), + WREG32(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_PQ_WPTR_POLL_CNTL1), (uint32_t)kgd_gfx_v9_get_queue_mask(adev, pipe_id, queue_id)); } /* Start the EOP fetcher */ - WREG32_RLC(SOC15_REG_OFFSET(GC, inst, regCP_HQD_EOP_RPTR), + WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_EOP_RPTR), REG_SET_FIELD(m->cp_hqd_eop_rptr, CP_HQD_EOP_RPTR, INIT_FETCHER, 1)); data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1); - WREG32_RLC(SOC15_REG_OFFSET(GC, inst, regCP_HQD_ACTIVE), data); + WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_ACTIVE), data); kgd_gfx_v9_release_queue(adev, inst); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 47d8ac64e877..e9c12b4970f9 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -190,7 +190,7 @@ static void gfx_v9_4_3_init_golden_registers(struct amdgpu_device *adev) num_xcc = NUM_XCC(adev->gfx.xcc_mask); for (i = 2; i < num_xcc; i++) - WREG32_SOC15(GC, i, regGRBM_MCM_ADDR, 0x4); + WREG32_SOC15(GC, GET_INST(GC, i), regGRBM_MCM_ADDR, 0x4); } static void gfx_v9_4_3_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel, @@ -236,7 +236,7 @@ static int gfx_v9_4_3_ring_test_ring(struct amdgpu_ring *ring) /* scratch_reg0_offset is 32bit even with full XCD config */ uint32_t scratch_reg0_offset; - scratch_reg0_offset = SOC15_REG_OFFSET(GC, ring->xcc_id, regSCRATCH_REG0); + scratch_reg0_offset = SOC15_REG_OFFSET(GC, GET_INST(GC, ring->xcc_id), regSCRATCH_REG0); WREG32(scratch_reg0_offset, 0xCAFEDEAD); r = amdgpu_ring_alloc(ring, 3); @@ -324,9 +324,9 @@ static uint64_t gfx_v9_4_3_get_gpu_clock_counter(struct amdgpu_device *adev) amdgpu_gfx_off_ctrl(adev, false); mutex_lock(&adev->gfx.gpu_clock_mutex); - WREG32_SOC15(GC, 0, regRLC_CAPTURE_GPU_CLOCK_COUNT, 1); - clock = (uint64_t)RREG32_SOC15(GC, 0, regRLC_GPU_CLOCK_COUNT_LSB) | - ((uint64_t)RREG32_SOC15(GC, 0, regRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); + WREG32_SOC15(GC, GET_INST(GC, 0), regRLC_CAPTURE_GPU_CLOCK_COUNT, 1); + clock = (uint64_t)RREG32_SOC15(GC, GET_INST(GC, 0), regRLC_GPU_CLOCK_COUNT_LSB) | + ((uint64_t)RREG32_SOC15(GC, GET_INST(GC, 0), regRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); mutex_unlock(&adev->gfx.gpu_clock_mutex); amdgpu_gfx_off_ctrl(adev, true); @@ -598,24 +598,24 @@ static void gfx_v9_4_3_select_se_sh(struct amdgpu_device *adev, else data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num); - WREG32_SOC15_RLC_SHADOW_EX(reg, GC, xcc_id, regGRBM_GFX_INDEX, data); + WREG32_SOC15_RLC_SHADOW_EX(reg, GC, GET_INST(GC, xcc_id), regGRBM_GFX_INDEX, data); } static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address) { - WREG32_SOC15_RLC(GC, 0, regSQ_IND_INDEX, + WREG32_SOC15_RLC(GC, GET_INST(GC, 0), regSQ_IND_INDEX, (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | (address << SQ_IND_INDEX__INDEX__SHIFT) | (SQ_IND_INDEX__FORCE_READ_MASK)); - return RREG32_SOC15(GC, 0, regSQ_IND_DATA); + return RREG32_SOC15(GC, GET_INST(GC, 0), regSQ_IND_DATA); } static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t thread, uint32_t regno, uint32_t num, uint32_t *out) { - WREG32_SOC15_RLC(GC, 0, regSQ_IND_INDEX, + WREG32_SOC15_RLC(GC, GET_INST(GC, 0), regSQ_IND_INDEX, (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | (regno << SQ_IND_INDEX__INDEX__SHIFT) | @@ -623,7 +623,7 @@ static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd, (SQ_IND_INDEX__FORCE_READ_MASK) | (SQ_IND_INDEX__AUTO_INCR_MASK)); while (num--) - *(out++) = RREG32_SOC15(GC, 0, regSQ_IND_DATA); + *(out++) = RREG32_SOC15(GC, GET_INST(GC, 0), regSQ_IND_DATA); } static void gfx_v9_4_3_read_wave_data(struct amdgpu_device *adev, @@ -669,7 +669,7 @@ static void gfx_v9_4_3_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd static void gfx_v9_4_3_select_me_pipe_q(struct amdgpu_device *adev, u32 me, u32 pipe, u32 q, u32 vm) { - soc15_grbm_select(adev, me, pipe, q, vm, 0); + soc15_grbm_select(adev, me, pipe, q, vm, GET_INST(GC, 0)); } static enum amdgpu_gfx_partition @@ -719,7 +719,7 @@ static int gfx_v9_4_3_switch_compute_partition(struct amdgpu_device *adev, num_xcc_per_partition); tmp = REG_SET_FIELD(tmp, CP_HYP_XCP_CTL, VIRTUAL_XCC_ID, i % num_xcc_per_partition); - WREG32_SOC15(GC, i, regCP_HYP_XCP_CTL, tmp); + WREG32_SOC15(GC, GET_INST(GC, i), regCP_HYP_XCP_CTL, tmp); } if (adev->nbio.funcs->set_compute_partition_mode) @@ -755,7 +755,7 @@ static int gfx_v9_4_3_gpu_early_init(struct amdgpu_device *adev) adev->gfx.config.sc_prim_fifo_size_backend = 0x100; adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; - gb_addr_config = RREG32_SOC15(GC, 0, regGB_ADDR_CONFIG); + gb_addr_config = RREG32_SOC15(GC, GET_INST(GC, 0), regGB_ADDR_CONFIG); break; default: BUG(); @@ -957,8 +957,8 @@ static u32 gfx_v9_4_3_get_rb_active_bitmap(struct amdgpu_device *adev) { u32 data, mask; - data = RREG32_SOC15(GC, 0, regCC_RB_BACKEND_DISABLE); - data |= RREG32_SOC15(GC, 0, regGC_USER_RB_BACKEND_DISABLE); + data = RREG32_SOC15(GC, GET_INST(GC, 0), regCC_RB_BACKEND_DISABLE); + data |= RREG32_SOC15(GC, GET_INST(GC, 0), regGC_USER_RB_BACKEND_DISABLE); data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK; data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT; @@ -1014,21 +1014,21 @@ static void gfx_v9_4_3_init_compute_vmid(struct amdgpu_device *adev, int xcc_id) mutex_lock(&adev->srbm_mutex); for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { - soc15_grbm_select(adev, 0, 0, 0, i, xcc_id); + soc15_grbm_select(adev, 0, 0, 0, i, GET_INST(GC, xcc_id)); /* CP and shaders */ - WREG32_SOC15_RLC(GC, xcc_id, regSH_MEM_CONFIG, sh_mem_config); - WREG32_SOC15_RLC(GC, xcc_id, regSH_MEM_BASES, sh_mem_bases); + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regSH_MEM_CONFIG, sh_mem_config); + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regSH_MEM_BASES, sh_mem_bases); } - soc15_grbm_select(adev, 0, 0, 0, 0, xcc_id); + soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id)); mutex_unlock(&adev->srbm_mutex); /* Initialize all compute VMIDs to have no GDS, GWS, or OA acccess. These should be enabled by FW for target VMIDs. */ for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { - WREG32_SOC15_OFFSET(GC, xcc_id, regGDS_VMID0_BASE, 2 * i, 0); - WREG32_SOC15_OFFSET(GC, xcc_id, regGDS_VMID0_SIZE, 2 * i, 0); - WREG32_SOC15_OFFSET(GC, xcc_id, regGDS_GWS_VMID0, i, 0); - WREG32_SOC15_OFFSET(GC, xcc_id, regGDS_OA_VMID0, i, 0); + WREG32_SOC15_OFFSET(GC, GET_INST(GC, xcc_id), regGDS_VMID0_BASE, 2 * i, 0); + WREG32_SOC15_OFFSET(GC, GET_INST(GC, xcc_id), regGDS_VMID0_SIZE, 2 * i, 0); + WREG32_SOC15_OFFSET(GC, GET_INST(GC, xcc_id), regGDS_GWS_VMID0, i, 0); + WREG32_SOC15_OFFSET(GC, GET_INST(GC, xcc_id), regGDS_OA_VMID0, i, 0); } } @@ -1043,10 +1043,10 @@ static void gfx_v9_4_3_init_gds_vmid(struct amdgpu_device *adev, int xcc_id) * access so that HWS firmware can save/restore entries. */ for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) { - WREG32_SOC15_OFFSET(GC, xcc_id, regGDS_VMID0_BASE, 2 * vmid, 0); - WREG32_SOC15_OFFSET(GC, xcc_id, regGDS_VMID0_SIZE, 2 * vmid, 0); - WREG32_SOC15_OFFSET(GC, xcc_id, regGDS_GWS_VMID0, vmid, 0); - WREG32_SOC15_OFFSET(GC, xcc_id, regGDS_OA_VMID0, vmid, 0); + WREG32_SOC15_OFFSET(GC, GET_INST(GC, xcc_id), regGDS_VMID0_BASE, 2 * vmid, 0); + WREG32_SOC15_OFFSET(GC, GET_INST(GC, xcc_id), regGDS_VMID0_SIZE, 2 * vmid, 0); + WREG32_SOC15_OFFSET(GC, GET_INST(GC, xcc_id), regGDS_GWS_VMID0, vmid, 0); + WREG32_SOC15_OFFSET(GC, GET_INST(GC, xcc_id), regGDS_OA_VMID0, vmid, 0); } } @@ -1057,42 +1057,42 @@ static void gfx_v9_4_3_constants_init(struct amdgpu_device *adev) num_xcc = NUM_XCC(adev->gfx.xcc_mask); for (i = 0; i < num_xcc; i++) { - WREG32_FIELD15_PREREG(GC, i, GRBM_CNTL, READ_TIMEOUT, 0xff); + WREG32_FIELD15_PREREG(GC, GET_INST(GC, i), GRBM_CNTL, READ_TIMEOUT, 0xff); gfx_v9_4_3_setup_rb(adev, i); } gfx_v9_4_3_get_cu_info(adev, &adev->gfx.cu_info); - adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, regDB_DEBUG2); + adev->gfx.config.db_debug2 = RREG32_SOC15(GC, GET_INST(GC, 0), regDB_DEBUG2); /* XXX SH_MEM regs */ /* where to put LDS, scratch, GPUVM in FSA64 space */ mutex_lock(&adev->srbm_mutex); for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) { for (j = 0; j < num_xcc; j++) { - soc15_grbm_select(adev, 0, 0, 0, i, j); + soc15_grbm_select(adev, 0, 0, 0, i, GET_INST(GC, j)); /* CP and shaders */ if (i == 0) { tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, SH_MEM_ALIGNMENT_MODE_UNALIGNED); tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE, !!adev->gmc.noretry); - WREG32_SOC15_RLC(GC, j, regSH_MEM_CONFIG, tmp); - WREG32_SOC15_RLC(GC, j, regSH_MEM_BASES, 0); + WREG32_SOC15_RLC(GC, GET_INST(GC, j), regSH_MEM_CONFIG, tmp); + WREG32_SOC15_RLC(GC, GET_INST(GC, j), regSH_MEM_BASES, 0); } else { tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, SH_MEM_ALIGNMENT_MODE_UNALIGNED); tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE, !!adev->gmc.noretry); - WREG32_SOC15_RLC(GC, j, regSH_MEM_CONFIG, tmp); + WREG32_SOC15_RLC(GC, GET_INST(GC, j), regSH_MEM_CONFIG, tmp); tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE, (adev->gmc.private_aperture_start >> 48)); tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE, (adev->gmc.shared_aperture_start >> 48)); - WREG32_SOC15_RLC(GC, j, regSH_MEM_BASES, tmp); + WREG32_SOC15_RLC(GC, GET_INST(GC, j), regSH_MEM_BASES, tmp); } } } - soc15_grbm_select(adev, 0, 0, 0, 0, 0); + soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, 0)); mutex_unlock(&adev->srbm_mutex); @@ -1105,18 +1105,18 @@ static void gfx_v9_4_3_constants_init(struct amdgpu_device *adev) static void gfx_v9_4_3_enable_save_restore_machine(struct amdgpu_device *adev, int xcc_id) { - WREG32_FIELD15_PREREG(GC, xcc_id, RLC_SRM_CNTL, SRM_ENABLE, 1); + WREG32_FIELD15_PREREG(GC, GET_INST(GC, xcc_id), RLC_SRM_CNTL, SRM_ENABLE, 1); } static void gfx_v9_4_3_init_csb(struct amdgpu_device *adev, int xcc_id) { adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr); /* csib */ - WREG32_RLC(SOC15_REG_OFFSET(GC, xcc_id, regRLC_CSIB_ADDR_HI), + WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regRLC_CSIB_ADDR_HI), adev->gfx.rlc.clear_state_gpu_addr >> 32); - WREG32_RLC(SOC15_REG_OFFSET(GC, xcc_id, regRLC_CSIB_ADDR_LO), + WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regRLC_CSIB_ADDR_LO), adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); - WREG32_RLC(SOC15_REG_OFFSET(GC, xcc_id, regRLC_CSIB_LENGTH), + WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regRLC_CSIB_LENGTH), adev->gfx.rlc.clear_state_size); } @@ -1137,7 +1137,7 @@ static void gfx_v9_4_3_init_pg(struct amdgpu_device *adev, int xcc_id) AMD_PG_SUPPORT_CP | AMD_PG_SUPPORT_GDS | AMD_PG_SUPPORT_RLC_SMU_HS)) { - WREG32_SOC15(GC, 0, regRLC_JUMP_TABLE_RESTORE, + WREG32_SOC15(GC, GET_INST(GC, 0), regRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8); } } @@ -1146,9 +1146,9 @@ static void gfx_v9_4_3_disable_gpa_mode(struct amdgpu_device *adev, int xcc_id) { uint32_t data; - data = RREG32_SOC15(GC, xcc_id, regCPC_PSP_DEBUG); + data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCPC_PSP_DEBUG); data |= CPC_PSP_DEBUG__UTCL2IUGPAOVERRIDE_MASK; - WREG32_SOC15(GC, xcc_id, regCPC_PSP_DEBUG, data); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCPC_PSP_DEBUG, data); } static void gfx_v9_4_3_program_xcc_id(struct amdgpu_device *adev, int xcc_id) @@ -1160,16 +1160,16 @@ static void gfx_v9_4_3_program_xcc_id(struct amdgpu_device *adev, int xcc_id) switch (num_xcc) { /* directly config VIRTUAL_XCC_ID to 0 for 1-XCC */ case 1: - WREG32_SOC15(GC, xcc_id, regCP_HYP_XCP_CTL, 0x8); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HYP_XCP_CTL, 0x8); break; case 2: tmp = (xcc_id % adev->gfx.num_xcc_per_xcp) << REG_FIELD_SHIFT(CP_HYP_XCP_CTL, VIRTUAL_XCC_ID); tmp = tmp | (adev->gfx.num_xcc_per_xcp << REG_FIELD_SHIFT(CP_HYP_XCP_CTL, NUM_XCC_IN_XCP)); - WREG32_SOC15(GC, xcc_id, regCP_HYP_XCP_CTL, tmp); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HYP_XCP_CTL, tmp); tmp = xcc_id << REG_FIELD_SHIFT(CP_PSP_XCP_CTL, PHYSICAL_XCC_ID); tmp = tmp | (xcc_id << REG_FIELD_SHIFT(CP_PSP_XCP_CTL, XCC_DIE_ID)); - WREG32_SOC15(GC, xcc_id, regCP_PSP_XCP_CTL, tmp); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_PSP_XCP_CTL, tmp); break; default: break; @@ -1181,7 +1181,7 @@ static bool gfx_v9_4_3_is_rlc_enabled(struct amdgpu_device *adev) uint32_t rlc_setting; /* if RLC is not enabled, do nothing */ - rlc_setting = RREG32_SOC15(GC, 0, regRLC_CNTL); + rlc_setting = RREG32_SOC15(GC, GET_INST(GC, 0), regRLC_CNTL); if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK)) return false; @@ -1195,11 +1195,11 @@ static void gfx_v9_4_3_set_safe_mode(struct amdgpu_device *adev, int xcc_id) data = RLC_SAFE_MODE__CMD_MASK; data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); - WREG32_SOC15(GC, xcc_id, regRLC_SAFE_MODE, data); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_SAFE_MODE, data); /* wait for RLC_SAFE_MODE */ for (i = 0; i < adev->usec_timeout; i++) { - if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, regRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) + if (!REG_GET_FIELD(RREG32_SOC15(GC, GET_INST(GC, 0), regRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) break; udelay(1); } @@ -1210,7 +1210,7 @@ static void gfx_v9_4_3_unset_safe_mode(struct amdgpu_device *adev, int xcc_id) uint32_t data; data = RLC_SAFE_MODE__CMD_MASK; - WREG32_SOC15(GC, xcc_id, regRLC_SAFE_MODE, data); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_SAFE_MODE, data); } static int gfx_v9_4_3_rlc_init(struct amdgpu_device *adev) @@ -1247,7 +1247,7 @@ static void gfx_v9_4_3_wait_for_rlc_serdes(struct amdgpu_device *adev, for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { gfx_v9_4_3_select_se_sh(adev, i, j, 0xffffffff, xcc_id); for (k = 0; k < adev->usec_timeout; k++) { - if (RREG32_SOC15(GC, 0, regRLC_SERDES_CU_MASTER_BUSY) == 0) + if (RREG32_SOC15(GC, GET_INST(GC, 0), regRLC_SERDES_CU_MASTER_BUSY) == 0) break; udelay(1); } @@ -1270,7 +1270,7 @@ static void gfx_v9_4_3_wait_for_rlc_serdes(struct amdgpu_device *adev, RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK | RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK; for (k = 0; k < adev->usec_timeout; k++) { - if ((RREG32_SOC15(GC, 0, regRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0) + if ((RREG32_SOC15(GC, GET_INST(GC, 0), regRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0) break; udelay(1); } @@ -1283,13 +1283,13 @@ static void gfx_v9_4_3_enable_gui_idle_interrupt(struct amdgpu_device *adev, /* These interrupts should be enabled to drive DS clock */ - tmp = RREG32_SOC15(GC, xcc_id, regCP_INT_CNTL_RING0); + tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_INT_CNTL_RING0); tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0); tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0); tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0); - WREG32_SOC15(GC, xcc_id, regCP_INT_CNTL_RING0, tmp); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_INT_CNTL_RING0, tmp); } static void gfx_v9_4_3_rlc_stop(struct amdgpu_device *adev) @@ -1298,7 +1298,7 @@ static void gfx_v9_4_3_rlc_stop(struct amdgpu_device *adev) num_xcc = NUM_XCC(adev->gfx.xcc_mask); for (i = 0; i < num_xcc; i++) { - WREG32_FIELD15_PREREG(GC, i, RLC_CNTL, RLC_ENABLE_F32, 0); + WREG32_FIELD15_PREREG(GC, GET_INST(GC, i), RLC_CNTL, RLC_ENABLE_F32, 0); gfx_v9_4_3_enable_gui_idle_interrupt(adev, false, i); gfx_v9_4_3_wait_for_rlc_serdes(adev, i); } @@ -1310,9 +1310,9 @@ static void gfx_v9_4_3_rlc_reset(struct amdgpu_device *adev) num_xcc = NUM_XCC(adev->gfx.xcc_mask); for (i = 0; i < num_xcc; i++) { - WREG32_FIELD15_PREREG(GC, i, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); + WREG32_FIELD15_PREREG(GC, GET_INST(GC, i), GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); udelay(50); - WREG32_FIELD15_PREREG(GC, i, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0); + WREG32_FIELD15_PREREG(GC, GET_INST(GC, i), GRBM_SOFT_RESET, SOFT_RESET_RLC, 0); udelay(50); } } @@ -1326,7 +1326,7 @@ static void gfx_v9_4_3_rlc_start(struct amdgpu_device *adev) num_xcc = NUM_XCC(adev->gfx.xcc_mask); for (i = 0; i < num_xcc; i++) { - WREG32_FIELD15_PREREG(GC, i, RLC_CNTL, RLC_ENABLE_F32, 1); + WREG32_FIELD15_PREREG(GC, GET_INST(GC, i), RLC_CNTL, RLC_ENABLE_F32, 1); udelay(50); /* carrizo do enable cp interrupt after cp inited */ @@ -1337,18 +1337,18 @@ static void gfx_v9_4_3_rlc_start(struct amdgpu_device *adev) #ifdef AMDGPU_RLC_DEBUG_RETRY /* RLC_GPM_GENERAL_6 : RLC Ucode version */ - rlc_ucode_ver = RREG32_SOC15(GC, i, regRLC_GPM_GENERAL_6); + rlc_ucode_ver = RREG32_SOC15(GC, GET_INST(GC, i), regRLC_GPM_GENERAL_6); if (rlc_ucode_ver == 0x108) { dev_info(adev->dev, "Using rlc debug ucode. regRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n", rlc_ucode_ver, adev->gfx.rlc_fw_version); /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles, * default is 0x9C4 to create a 100us interval */ - WREG32_SOC15(GC, i, regRLC_GPM_TIMER_INT_3, 0x9C4); + WREG32_SOC15(GC, GET_INST(GC, i), regRLC_GPM_TIMER_INT_3, 0x9C4); /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr * to disable the page fault retry interrupts, default is * 0x100 (256) */ - WREG32_SOC15(GC, i, regRLC_GPM_GENERAL_12, 0x100); + WREG32_SOC15(GC, GET_INST(GC, i), regRLC_GPM_GENERAL_12, 0x100); } #endif } @@ -1370,16 +1370,16 @@ static int gfx_v9_4_3_rlc_load_microcode(struct amdgpu_device *adev, int xcc_id) le32_to_cpu(hdr->header.ucode_array_offset_bytes)); fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; - WREG32_SOC15(GC, xcc_id, regRLC_GPM_UCODE_ADDR, + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_GPM_UCODE_ADDR, RLCG_UCODE_LOADING_START_ADDRESS); for (i = 0; i < fw_size; i++) { if (amdgpu_emu_mode == 1 && i % 100 == 0) { dev_info(adev->dev, "Write RLC ucode data %u DWs\n", i); msleep(1); } - WREG32_SOC15(GC, xcc_id, regRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++)); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++)); } - WREG32_SOC15(GC, xcc_id, regRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version); return 0; } @@ -1393,7 +1393,7 @@ static int gfx_v9_4_3_rlc_resume(struct amdgpu_device *adev) num_xcc = NUM_XCC(adev->gfx.xcc_mask); for (i = 0; i < num_xcc; i++) { /* disable CG */ - WREG32_SOC15(GC, i, regRLC_CGCG_CGLS_CTRL, 0); + WREG32_SOC15(GC, GET_INST(GC, i), regRLC_CGCG_CGLS_CTRL, 0); gfx_v9_4_3_init_pg(adev, i); @@ -1415,7 +1415,7 @@ static void gfx_v9_4_3_update_spm_vmid(struct amdgpu_device *adev, { u32 reg, data; - reg = SOC15_REG_OFFSET(GC, 0, regRLC_SPM_MC_CNTL); + reg = SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regRLC_SPM_MC_CNTL); if (amdgpu_sriov_is_pp_one_vf(adev)) data = RREG32_NO_KIQ(reg); else @@ -1425,9 +1425,9 @@ static void gfx_v9_4_3_update_spm_vmid(struct amdgpu_device *adev, data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT; if (amdgpu_sriov_is_pp_one_vf(adev)) - WREG32_SOC15_NO_KIQ(GC, 0, regRLC_SPM_MC_CNTL, data); + WREG32_SOC15_NO_KIQ(GC, GET_INST(GC, 0), regRLC_SPM_MC_CNTL, data); else - WREG32_SOC15(GC, 0, regRLC_SPM_MC_CNTL, data); + WREG32_SOC15(GC, GET_INST(GC, 0), regRLC_SPM_MC_CNTL, data); } static const struct soc15_reg_rlcg rlcg_access_gc_9_4_3[] = { @@ -1439,7 +1439,7 @@ static bool gfx_v9_4_3_check_rlcg_range(struct amdgpu_device *adev, uint32_t offset, struct soc15_reg_rlcg *entries, int arr_size) { - int i; + int i, inst; uint32_t reg; if (!entries) @@ -1449,7 +1449,12 @@ static bool gfx_v9_4_3_check_rlcg_range(struct amdgpu_device *adev, const struct soc15_reg_rlcg *entry; entry = &entries[i]; - reg = adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg; + inst = adev->ip_map.logical_to_dev_inst ? + adev->ip_map.logical_to_dev_inst( + adev, entry->hwip, entry->instance) : + entry->instance; + reg = adev->reg_offset[entry->hwip][inst][entry->segment] + + entry->reg; if (offset == reg) return true; } @@ -1468,9 +1473,9 @@ static void gfx_v9_4_3_cp_compute_enable(struct amdgpu_device *adev, bool enable, int xcc_id) { if (enable) { - WREG32_SOC15_RLC(GC, xcc_id, regCP_MEC_CNTL, 0); + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_MEC_CNTL, 0); } else { - WREG32_SOC15_RLC(GC, xcc_id, regCP_MEC_CNTL, + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); adev->gfx.kiq[xcc_id].ring.sched.ready = false; } @@ -1501,17 +1506,17 @@ static int gfx_v9_4_3_cp_compute_load_microcode(struct amdgpu_device *adev, tmp = 0; tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0); tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); - WREG32_SOC15(GC, xcc_id, regCP_CPC_IC_BASE_CNTL, tmp); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_IC_BASE_CNTL, tmp); - WREG32_SOC15(GC, xcc_id, regCP_CPC_IC_BASE_LO, + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_IC_BASE_LO, adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000); - WREG32_SOC15(GC, xcc_id, regCP_CPC_IC_BASE_HI, + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_IC_BASE_HI, upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr)); mec_ucode_addr_offset = - SOC15_REG_OFFSET(GC, xcc_id, regCP_MEC_ME1_UCODE_ADDR); + SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regCP_MEC_ME1_UCODE_ADDR); mec_ucode_data_offset = - SOC15_REG_OFFSET(GC, xcc_id, regCP_MEC_ME1_UCODE_DATA); + SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regCP_MEC_ME1_UCODE_DATA); /* MEC1 */ WREG32(mec_ucode_addr_offset, mec_hdr->jt_offset); @@ -1532,12 +1537,12 @@ static void gfx_v9_4_3_kiq_setting(struct amdgpu_ring *ring, int xcc_id) struct amdgpu_device *adev = ring->adev; /* tell RLC which is KIQ queue */ - tmp = RREG32_SOC15(GC, xcc_id, regRLC_CP_SCHEDULERS); + tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS); tmp &= 0xffffff00; tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); - WREG32_SOC15_RLC(GC, xcc_id, regRLC_CP_SCHEDULERS, tmp); + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS, tmp); tmp |= 0x80; - WREG32_SOC15_RLC(GC, xcc_id, regRLC_CP_SCHEDULERS, tmp); + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS, tmp); } static void gfx_v9_4_3_mqd_set_priority(struct amdgpu_ring *ring, struct v9_mqd *mqd) @@ -1580,14 +1585,14 @@ static int gfx_v9_4_3_mqd_init(struct amdgpu_ring *ring) mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ - tmp = RREG32_SOC15(GC, 0, regCP_HQD_EOP_CONTROL); + tmp = RREG32_SOC15(GC, GET_INST(GC, 0), regCP_HQD_EOP_CONTROL); tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1)); mqd->cp_hqd_eop_control = tmp; /* enable doorbell? */ - tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL); + tmp = RREG32_SOC15(GC, GET_INST(GC, 0), regCP_HQD_PQ_DOORBELL_CONTROL); if (ring->use_doorbell) { tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, @@ -1617,7 +1622,7 @@ static int gfx_v9_4_3_mqd_init(struct amdgpu_ring *ring) mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr); /* set MQD vmid to 0 */ - tmp = RREG32_SOC15(GC, 0, regCP_MQD_CONTROL); + tmp = RREG32_SOC15(GC, GET_INST(GC, 0), regCP_MQD_CONTROL); tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); mqd->cp_mqd_control = tmp; @@ -1627,7 +1632,7 @@ static int gfx_v9_4_3_mqd_init(struct amdgpu_ring *ring) mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); /* set up the HQD, this is similar to CP_RB0_CNTL */ - tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_CONTROL); + tmp = RREG32_SOC15(GC, GET_INST(GC, 0), regCP_HQD_PQ_CONTROL); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, (order_base_2(ring->ring_size / 4) - 1)); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, @@ -1654,23 +1659,23 @@ static int gfx_v9_4_3_mqd_init(struct amdgpu_ring *ring) /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ ring->wptr = 0; - mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR); + mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, GET_INST(GC, 0), regCP_HQD_PQ_RPTR); /* set the vmid for the queue */ mqd->cp_hqd_vmid = 0; - tmp = RREG32_SOC15(GC, 0, regCP_HQD_PERSISTENT_STATE); + tmp = RREG32_SOC15(GC, GET_INST(GC, 0), regCP_HQD_PERSISTENT_STATE); tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); mqd->cp_hqd_persistent_state = tmp; /* set MIN_IB_AVAIL_SIZE */ - tmp = RREG32_SOC15(GC, 0, regCP_HQD_IB_CONTROL); + tmp = RREG32_SOC15(GC, GET_INST(GC, 0), regCP_HQD_IB_CONTROL); tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); mqd->cp_hqd_ib_control = tmp; /* set static priority for a queue/ring */ gfx_v9_4_3_mqd_set_priority(ring, mqd); - mqd->cp_hqd_quantum = RREG32_SOC15(GC, 0, regCP_HQD_QUANTUM); + mqd->cp_hqd_quantum = RREG32_SOC15(GC, GET_INST(GC, 0), regCP_HQD_QUANTUM); /* map_queues packet doesn't need activate the queue, * so only kiq need set this field. @@ -1690,94 +1695,94 @@ static int gfx_v9_4_3_kiq_init_register(struct amdgpu_ring *ring, int xcc_id) /* disable wptr polling */ WREG32_FIELD15_PREREG(GC, xcc_id, CP_PQ_WPTR_POLL_CNTL, EN, 0); - WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_EOP_BASE_ADDR, + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_EOP_BASE_ADDR, mqd->cp_hqd_eop_base_addr_lo); - WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_EOP_BASE_ADDR_HI, + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_EOP_BASE_ADDR_HI, mqd->cp_hqd_eop_base_addr_hi); /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ - WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_EOP_CONTROL, + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_EOP_CONTROL, mqd->cp_hqd_eop_control); /* enable doorbell? */ - WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_PQ_DOORBELL_CONTROL, + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control); /* disable the queue if it's active */ - if (RREG32_SOC15(GC, xcc_id, regCP_HQD_ACTIVE) & 1) { - WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_DEQUEUE_REQUEST, 1); + if (RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE) & 1) { + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_DEQUEUE_REQUEST, 1); for (j = 0; j < adev->usec_timeout; j++) { - if (!(RREG32_SOC15(GC, xcc_id, regCP_HQD_ACTIVE) & 1)) + if (!(RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE) & 1)) break; udelay(1); } - WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_DEQUEUE_REQUEST, + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request); - WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_PQ_RPTR, + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr); - WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_PQ_WPTR_LO, + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_LO, mqd->cp_hqd_pq_wptr_lo); - WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_PQ_WPTR_HI, + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_HI, mqd->cp_hqd_pq_wptr_hi); } /* set the pointer to the MQD */ - WREG32_SOC15_RLC(GC, xcc_id, regCP_MQD_BASE_ADDR, + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo); - WREG32_SOC15_RLC(GC, xcc_id, regCP_MQD_BASE_ADDR_HI, + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi); /* set MQD vmid to 0 */ - WREG32_SOC15_RLC(GC, xcc_id, regCP_MQD_CONTROL, + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_MQD_CONTROL, mqd->cp_mqd_control); /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ - WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_PQ_BASE, + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo); - WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_PQ_BASE_HI, + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi); /* set up the HQD, this is similar to CP_RB0_CNTL */ - WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_PQ_CONTROL, + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_CONTROL, mqd->cp_hqd_pq_control); /* set the wb address whether it's enabled or not */ - WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_PQ_RPTR_REPORT_ADDR, + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_RPTR_REPORT_ADDR, mqd->cp_hqd_pq_rptr_report_addr_lo); - WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_PQ_RPTR_REPORT_ADDR_HI, + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_RPTR_REPORT_ADDR_HI, mqd->cp_hqd_pq_rptr_report_addr_hi); /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ - WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_PQ_WPTR_POLL_ADDR, + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo); - WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_PQ_WPTR_POLL_ADDR_HI, + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_POLL_ADDR_HI, mqd->cp_hqd_pq_wptr_poll_addr_hi); /* enable the doorbell if requested */ if (ring->use_doorbell) { - WREG32_SOC15(GC, xcc_id, regCP_MEC_DOORBELL_RANGE_LOWER, + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_DOORBELL_RANGE_LOWER, (adev->doorbell_index.kiq * 2) << 2); - WREG32_SOC15(GC, xcc_id, regCP_MEC_DOORBELL_RANGE_UPPER, + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_DOORBELL_RANGE_UPPER, (adev->doorbell_index.userqueue_end * 2) << 2); } - WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_PQ_DOORBELL_CONTROL, + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control); /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ - WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_PQ_WPTR_LO, + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_LO, mqd->cp_hqd_pq_wptr_lo); - WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_PQ_WPTR_HI, + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_HI, mqd->cp_hqd_pq_wptr_hi); /* set the vmid for the queue */ - WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_VMID, mqd->cp_hqd_vmid); + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_VMID, mqd->cp_hqd_vmid); - WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_PERSISTENT_STATE, + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state); /* activate the queue */ - WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_ACTIVE, + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE, mqd->cp_hqd_active); if (ring->use_doorbell) @@ -1792,12 +1797,12 @@ static int gfx_v9_4_3_kiq_fini_register(struct amdgpu_ring *ring, int xcc_id) int j; /* disable the queue if it's active */ - if (RREG32_SOC15(GC, xcc_id, regCP_HQD_ACTIVE) & 1) { + if (RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE) & 1) { - WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_DEQUEUE_REQUEST, 1); + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_DEQUEUE_REQUEST, 1); for (j = 0; j < adev->usec_timeout; j++) { - if (!(RREG32_SOC15(GC, xcc_id, regCP_HQD_ACTIVE) & 1)) + if (!(RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE) & 1)) break; udelay(1); } @@ -1806,21 +1811,21 @@ static int gfx_v9_4_3_kiq_fini_register(struct amdgpu_ring *ring, int xcc_id) DRM_DEBUG("KIQ dequeue request failed.\n"); /* Manual disable if dequeue request times out */ - WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_ACTIVE, 0); + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE, 0); } - WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_DEQUEUE_REQUEST, + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_DEQUEUE_REQUEST, 0); } - WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_IQ_TIMER, 0); - WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_IB_CONTROL, 0); - WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_PERSISTENT_STATE, 0); - WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000); - WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_PQ_DOORBELL_CONTROL, 0); - WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_PQ_RPTR, 0); - WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_PQ_WPTR_HI, 0); - WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_PQ_WPTR_LO, 0); + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_IQ_TIMER, 0); + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_IB_CONTROL, 0); + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PERSISTENT_STATE, 0); + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000); + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL, 0); + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_RPTR, 0); + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_HI, 0); + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_LO, 0); return 0; } @@ -1848,19 +1853,19 @@ static int gfx_v9_4_3_kiq_init_queue(struct amdgpu_ring *ring, int xcc_id) ring->wptr = 0; amdgpu_ring_clear_ring(ring); mutex_lock(&adev->srbm_mutex); - soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, xcc_id); + soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, GET_INST(GC, xcc_id)); gfx_v9_4_3_kiq_init_register(ring, xcc_id); - soc15_grbm_select(adev, 0, 0, 0, 0, xcc_id); + soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id)); mutex_unlock(&adev->srbm_mutex); } else { memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation)); ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; mutex_lock(&adev->srbm_mutex); - soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, xcc_id); + soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, GET_INST(GC, xcc_id)); gfx_v9_4_3_mqd_init(ring); gfx_v9_4_3_kiq_init_register(ring, xcc_id); - soc15_grbm_select(adev, 0, 0, 0, 0, xcc_id); + soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id)); mutex_unlock(&adev->srbm_mutex); if (adev->gfx.kiq[xcc_id].mqd_backup) @@ -1888,9 +1893,9 @@ static int gfx_v9_4_3_kcq_init_queue(struct amdgpu_ring *ring, int xcc_id) ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; mutex_lock(&adev->srbm_mutex); - soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, xcc_id); + soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, GET_INST(GC, xcc_id)); gfx_v9_4_3_mqd_init(ring); - soc15_grbm_select(adev, 0, 0, 0, 0, xcc_id); + soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id)); mutex_unlock(&adev->srbm_mutex); if (adev->gfx.mec.mqd_backup[mqd_idx]) @@ -2049,9 +2054,9 @@ static int gfx_v9_4_3_hw_fini(void *handle) mutex_lock(&adev->srbm_mutex); soc15_grbm_select(adev, adev->gfx.kiq[i].ring.me, adev->gfx.kiq[i].ring.pipe, - adev->gfx.kiq[i].ring.queue, 0, i); + adev->gfx.kiq[i].ring.queue, 0, GET_INST(GC, i)); gfx_v9_4_3_kiq_fini_register(&adev->gfx.kiq[i].ring, i); - soc15_grbm_select(adev, 0, 0, 0, 0, i); + soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, i)); mutex_unlock(&adev->srbm_mutex); } @@ -2085,7 +2090,7 @@ static bool gfx_v9_4_3_is_idle(void *handle) num_xcc = NUM_XCC(adev->gfx.xcc_mask); for (i = 0; i < num_xcc; i++) { - if (REG_GET_FIELD(RREG32_SOC15(GC, i, regGRBM_STATUS), + if (REG_GET_FIELD(RREG32_SOC15(GC, GET_INST(GC, i), regGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE)) return false; } @@ -2112,7 +2117,7 @@ static int gfx_v9_4_3_soft_reset(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; /* GRBM_STATUS */ - tmp = RREG32_SOC15(GC, 0, regGRBM_STATUS); + tmp = RREG32_SOC15(GC, GET_INST(GC, 0), regGRBM_STATUS); if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK | GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK | GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK | @@ -2131,7 +2136,7 @@ static int gfx_v9_4_3_soft_reset(void *handle) } /* GRBM_STATUS2 */ - tmp = RREG32_SOC15(GC, 0, regGRBM_STATUS2); + tmp = RREG32_SOC15(GC, GET_INST(GC, 0), regGRBM_STATUS2); if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY)) grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); @@ -2145,17 +2150,17 @@ static int gfx_v9_4_3_soft_reset(void *handle) gfx_v9_4_3_cp_compute_enable(adev, false, 0); if (grbm_soft_reset) { - tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET); + tmp = RREG32_SOC15(GC, GET_INST(GC, 0), regGRBM_SOFT_RESET); tmp |= grbm_soft_reset; dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); - WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, tmp); - tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET); + WREG32_SOC15(GC, GET_INST(GC, 0), regGRBM_SOFT_RESET, tmp); + tmp = RREG32_SOC15(GC, GET_INST(GC, 0), regGRBM_SOFT_RESET); udelay(50); tmp &= ~grbm_soft_reset; - WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, tmp); - tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET); + WREG32_SOC15(GC, GET_INST(GC, 0), regGRBM_SOFT_RESET, tmp); + tmp = RREG32_SOC15(GC, GET_INST(GC, 0), regGRBM_SOFT_RESET); } /* Wait a little for things to settle down */ @@ -2174,22 +2179,22 @@ static void gfx_v9_4_3_ring_emit_gds_switch(struct amdgpu_ring *ring, /* GDS Base */ gfx_v9_4_3_write_data_to_reg(ring, 0, false, - SOC15_REG_OFFSET(GC, 0, regGDS_VMID0_BASE) + 2 * vmid, + SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regGDS_VMID0_BASE) + 2 * vmid, gds_base); /* GDS Size */ gfx_v9_4_3_write_data_to_reg(ring, 0, false, - SOC15_REG_OFFSET(GC, 0, regGDS_VMID0_SIZE) + 2 * vmid, + SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regGDS_VMID0_SIZE) + 2 * vmid, gds_size); /* GWS */ gfx_v9_4_3_write_data_to_reg(ring, 0, false, - SOC15_REG_OFFSET(GC, 0, regGDS_GWS_VMID0) + vmid, + SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regGDS_GWS_VMID0) + vmid, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base); /* OA */ gfx_v9_4_3_write_data_to_reg(ring, 0, false, - SOC15_REG_OFFSET(GC, 0, regGDS_OA_VMID0) + vmid, + SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regGDS_OA_VMID0) + vmid, (1 << (oa_size + oa_base)) - (1 << oa_base)); } @@ -2260,7 +2265,7 @@ static void gfx_v9_4_3_update_medium_grain_clock_gating(struct amdgpu_device *ad /* It is disabled by HW by default */ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { /* 1 - RLC_CGTT_MGCG_OVERRIDE */ - def = data = RREG32_SOC15(GC, xcc_id, regRLC_CGTT_MGCG_OVERRIDE); + def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE); data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK | @@ -2270,28 +2275,28 @@ static void gfx_v9_4_3_update_medium_grain_clock_gating(struct amdgpu_device *ad data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK; if (def != data) - WREG32_SOC15(GC, xcc_id, regRLC_CGTT_MGCG_OVERRIDE, data); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE, data); /* MGLS is a global flag to control all MGLS in GFX */ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { /* 2 - RLC memory Light sleep */ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) { - def = data = RREG32_SOC15(GC, xcc_id, regRLC_MEM_SLP_CNTL); + def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_MEM_SLP_CNTL); data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; if (def != data) - WREG32_SOC15(GC, xcc_id, regRLC_MEM_SLP_CNTL, data); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_MEM_SLP_CNTL, data); } /* 3 - CP memory Light sleep */ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) { - def = data = RREG32_SOC15(GC, xcc_id, regCP_MEM_SLP_CNTL); + def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEM_SLP_CNTL); data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; if (def != data) - WREG32_SOC15(GC, xcc_id, regCP_MEM_SLP_CNTL, data); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEM_SLP_CNTL, data); } } } else { /* 1 - MGCG_OVERRIDE */ - def = data = RREG32_SOC15(GC, xcc_id, regRLC_CGTT_MGCG_OVERRIDE); + def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE); data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK | RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | @@ -2299,20 +2304,20 @@ static void gfx_v9_4_3_update_medium_grain_clock_gating(struct amdgpu_device *ad RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK); if (def != data) - WREG32_SOC15(GC, xcc_id, regRLC_CGTT_MGCG_OVERRIDE, data); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE, data); /* 2 - disable MGLS in RLC */ - data = RREG32_SOC15(GC, xcc_id, regRLC_MEM_SLP_CNTL); + data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_MEM_SLP_CNTL); if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) { data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; - WREG32_SOC15(GC, xcc_id, regRLC_MEM_SLP_CNTL, data); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_MEM_SLP_CNTL, data); } /* 3 - disable MGLS in CP */ - data = RREG32_SOC15(GC, xcc_id, regCP_MEM_SLP_CNTL); + data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEM_SLP_CNTL); if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) { data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; - WREG32_SOC15(GC, xcc_id, regCP_MEM_SLP_CNTL, data); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEM_SLP_CNTL, data); } } @@ -2327,7 +2332,7 @@ static void gfx_v9_4_3_update_coarse_grain_clock_gating(struct amdgpu_device *ad amdgpu_gfx_rlc_enter_safe_mode(adev, xcc_id); if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { - def = data = RREG32_SOC15(GC, xcc_id, regRLC_CGTT_MGCG_OVERRIDE); + def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE); /* unset CGCG override */ data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK; if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) @@ -2336,10 +2341,10 @@ static void gfx_v9_4_3_update_coarse_grain_clock_gating(struct amdgpu_device *ad data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; /* update CGCG and CGLS override bits */ if (def != data) - WREG32_SOC15(GC, xcc_id, regRLC_CGTT_MGCG_OVERRIDE, data); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE, data); /* enable cgcg FSM(0x0000363F) */ - def = RREG32_SOC15(GC, xcc_id, regRLC_CGCG_CGLS_CTRL); + def = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGCG_CGLS_CTRL); if (adev->asic_type == CHIP_ARCTURUS) data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | @@ -2351,21 +2356,21 @@ static void gfx_v9_4_3_update_coarse_grain_clock_gating(struct amdgpu_device *ad data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; if (def != data) - WREG32_SOC15(GC, xcc_id, regRLC_CGCG_CGLS_CTRL, data); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGCG_CGLS_CTRL, data); /* set IDLE_POLL_COUNT(0x00900100) */ - def = RREG32_SOC15(GC, xcc_id, regCP_RB_WPTR_POLL_CNTL); + def = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_RB_WPTR_POLL_CNTL); data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); if (def != data) - WREG32_SOC15(GC, xcc_id, regCP_RB_WPTR_POLL_CNTL, data); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_RB_WPTR_POLL_CNTL, data); } else { - def = data = RREG32_SOC15(GC, xcc_id, regRLC_CGCG_CGLS_CTRL); + def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGCG_CGLS_CTRL); /* reset CGCG/CGLS bits */ data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); /* disable cgcg and cgls in FSM */ if (def != data) - WREG32_SOC15(GC, xcc_id, regRLC_CGCG_CGLS_CTRL, data); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGCG_CGLS_CTRL, data); } amdgpu_gfx_rlc_exit_safe_mode(adev, xcc_id); @@ -2444,12 +2449,12 @@ static void gfx_v9_4_3_get_clockgating_state(void *handle, u64 *flags) *flags = 0; /* AMD_CG_SUPPORT_GFX_MGCG */ - data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, regRLC_CGTT_MGCG_OVERRIDE)); + data = RREG32_KIQ(SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regRLC_CGTT_MGCG_OVERRIDE)); if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK)) *flags |= AMD_CG_SUPPORT_GFX_MGCG; /* AMD_CG_SUPPORT_GFX_CGCG */ - data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, regRLC_CGCG_CGLS_CTRL)); + data = RREG32_KIQ(SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regRLC_CGCG_CGLS_CTRL)); if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK) *flags |= AMD_CG_SUPPORT_GFX_CGCG; @@ -2458,12 +2463,12 @@ static void gfx_v9_4_3_get_clockgating_state(void *handle, u64 *flags) *flags |= AMD_CG_SUPPORT_GFX_CGLS; /* AMD_CG_SUPPORT_GFX_RLC_LS */ - data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, regRLC_MEM_SLP_CNTL)); + data = RREG32_KIQ(SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regRLC_MEM_SLP_CNTL)); if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS; /* AMD_CG_SUPPORT_GFX_CP_LS */ - data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, regCP_MEM_SLP_CNTL)); + data = RREG32_KIQ(SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regCP_MEM_SLP_CNTL)); if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS; } @@ -2634,7 +2639,7 @@ static void gfx_v9_4_3_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr, amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | WRITE_DATA_DST_SEL(0) | WR_CONFIRM)); - amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, regCPC_INT_STATUS)); + amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regCPC_INT_STATUS)); amdgpu_ring_write(ring, 0); amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */ } @@ -2710,16 +2715,16 @@ static void gfx_v9_4_3_set_compute_eop_interrupt_state(struct amdgpu_device *ade if (me == 1) { switch (pipe) { case 0: - mec_int_cntl_reg = SOC15_REG_OFFSET(GC, xcc_id, regCP_ME1_PIPE0_INT_CNTL); + mec_int_cntl_reg = SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regCP_ME1_PIPE0_INT_CNTL); break; case 1: - mec_int_cntl_reg = SOC15_REG_OFFSET(GC, xcc_id, regCP_ME1_PIPE1_INT_CNTL); + mec_int_cntl_reg = SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regCP_ME1_PIPE1_INT_CNTL); break; case 2: - mec_int_cntl_reg = SOC15_REG_OFFSET(GC, xcc_id, regCP_ME1_PIPE2_INT_CNTL); + mec_int_cntl_reg = SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regCP_ME1_PIPE2_INT_CNTL); break; case 3: - mec_int_cntl_reg = SOC15_REG_OFFSET(GC, xcc_id, regCP_ME1_PIPE3_INT_CNTL); + mec_int_cntl_reg = SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regCP_ME1_PIPE3_INT_CNTL); break; default: DRM_DEBUG("invalid pipe %d\n", pipe); @@ -2760,7 +2765,7 @@ static int gfx_v9_4_3_set_priv_reg_fault_state(struct amdgpu_device *adev, case AMDGPU_IRQ_STATE_DISABLE: case AMDGPU_IRQ_STATE_ENABLE: for (i = 0; i < num_xcc; i++) - WREG32_FIELD15_PREREG(GC, i, CP_INT_CNTL_RING0, + WREG32_FIELD15_PREREG(GC, GET_INST(GC, i), CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE, state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); break; @@ -2783,7 +2788,7 @@ static int gfx_v9_4_3_set_priv_inst_fault_state(struct amdgpu_device *adev, case AMDGPU_IRQ_STATE_DISABLE: case AMDGPU_IRQ_STATE_ENABLE: for (i = 0; i < num_xcc; i++) - WREG32_FIELD15_PREREG(GC, i, CP_INT_CNTL_RING0, + WREG32_FIELD15_PREREG(GC, GET_INST(GC, i), CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE, state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); break; @@ -2943,16 +2948,16 @@ static void gfx_v9_4_3_emit_wave_limit_cs(struct amdgpu_ring *ring, switch (pipe) { case 0: - wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, regSPI_WCL_PIPE_PERCENT_CS0); + wcl_cs_reg = SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regSPI_WCL_PIPE_PERCENT_CS0); break; case 1: - wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, regSPI_WCL_PIPE_PERCENT_CS1); + wcl_cs_reg = SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regSPI_WCL_PIPE_PERCENT_CS1); break; case 2: - wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, regSPI_WCL_PIPE_PERCENT_CS2); + wcl_cs_reg = SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regSPI_WCL_PIPE_PERCENT_CS2); break; case 3: - wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, regSPI_WCL_PIPE_PERCENT_CS3); + wcl_cs_reg = SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regSPI_WCL_PIPE_PERCENT_CS3); break; default: DRM_DEBUG("invalid pipe %d\n", pipe); @@ -2974,7 +2979,7 @@ static void gfx_v9_4_3_emit_wave_limit(struct amdgpu_ring *ring, bool enable) */ val = enable ? 0x1f : 0x07ffffff; amdgpu_ring_emit_wreg(ring, - SOC15_REG_OFFSET(GC, 0, regSPI_WCL_PIPE_PERCENT_GFX), + SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regSPI_WCL_PIPE_PERCENT_GFX), val); /* Restrict waves for normal/low priority compute queues as well @@ -3161,15 +3166,15 @@ static void gfx_v9_4_3_set_user_cu_inactive_bitmap(struct amdgpu_device *adev, data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; - WREG32_SOC15(GC, 0, regGC_USER_SHADER_ARRAY_CONFIG, data); + WREG32_SOC15(GC, GET_INST(GC, 0), regGC_USER_SHADER_ARRAY_CONFIG, data); } static u32 gfx_v9_4_3_get_cu_active_bitmap(struct amdgpu_device *adev) { u32 data, mask; - data = RREG32_SOC15(GC, 0, regCC_GC_SHADER_ARRAY_CONFIG); - data |= RREG32_SOC15(GC, 0, regGC_USER_SHADER_ARRAY_CONFIG); + data = RREG32_SOC15(GC, GET_INST(GC, 0), regCC_GC_SHADER_ARRAY_CONFIG); + data |= RREG32_SOC15(GC, GET_INST(GC, 0), regGC_USER_SHADER_ARRAY_CONFIG); data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c index e35365ab3f1f..c26ac0662c7e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c @@ -35,7 +35,7 @@ static u64 gfxhub_v1_2_get_mc_fb_offset(struct amdgpu_device *adev) { - return (u64)RREG32_SOC15(GC, 0, regMC_VM_FB_OFFSET) << 24; + return (u64)RREG32_SOC15(GC, GET_INST(GC, 0), regMC_VM_FB_OFFSET) << 24; } static void gfxhub_v1_2_setup_vm_pt_regs(struct amdgpu_device *adev, @@ -48,12 +48,12 @@ static void gfxhub_v1_2_setup_vm_pt_regs(struct amdgpu_device *adev, num_xcc = NUM_XCC(adev->gfx.xcc_mask); for (i = 0; i < num_xcc; i++) { hub = &adev->vmhub[AMDGPU_GFXHUB(i)]; - WREG32_SOC15_OFFSET(GC, i, + WREG32_SOC15_OFFSET(GC, GET_INST(GC, i), regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, hub->ctx_addr_distance * vmid, lower_32_bits(page_table_base)); - WREG32_SOC15_OFFSET(GC, i, + WREG32_SOC15_OFFSET(GC, GET_INST(GC, i), regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, hub->ctx_addr_distance * vmid, upper_32_bits(page_table_base)); @@ -79,31 +79,31 @@ static void gfxhub_v1_2_init_gart_aperture_regs(struct amdgpu_device *adev) num_xcc = NUM_XCC(adev->gfx.xcc_mask); for (i = 0; i < num_xcc; i++) { if (adev->gmc.pdb0_bo) { - WREG32_SOC15(GC, i, + WREG32_SOC15(GC, GET_INST(GC, i), regVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32, (u32)(adev->gmc.fb_start >> 12)); - WREG32_SOC15(GC, i, + WREG32_SOC15(GC, GET_INST(GC, i), regVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32, (u32)(adev->gmc.fb_start >> 44)); - WREG32_SOC15(GC, i, + WREG32_SOC15(GC, GET_INST(GC, i), regVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32, (u32)(adev->gmc.gart_end >> 12)); - WREG32_SOC15(GC, i, + WREG32_SOC15(GC, GET_INST(GC, i), regVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32, (u32)(adev->gmc.gart_end >> 44)); } else { - WREG32_SOC15(GC, i, + WREG32_SOC15(GC, GET_INST(GC, i), regVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32, (u32)(adev->gmc.gart_start >> 12)); - WREG32_SOC15(GC, i, + WREG32_SOC15(GC, GET_INST(GC, i), regVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32, (u32)(adev->gmc.gart_start >> 44)); - WREG32_SOC15(GC, i, + WREG32_SOC15(GC, GET_INST(GC, i), regVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32, (u32)(adev->gmc.gart_end >> 12)); - WREG32_SOC15(GC, i, + WREG32_SOC15(GC, GET_INST(GC, i), regVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32, (u32)(adev->gmc.gart_end >> 44)); } @@ -119,13 +119,13 @@ static void gfxhub_v1_2_init_system_aperture_regs(struct amdgpu_device *adev) num_xcc = NUM_XCC(adev->gfx.xcc_mask); for (i = 0; i < num_xcc; i++) { /* Program the AGP BAR */ - WREG32_SOC15_RLC(GC, i, regMC_VM_AGP_BASE, 0); - WREG32_SOC15_RLC(GC, i, regMC_VM_AGP_BOT, adev->gmc.agp_start >> 24); - WREG32_SOC15_RLC(GC, i, regMC_VM_AGP_TOP, adev->gmc.agp_end >> 24); + WREG32_SOC15_RLC(GC, GET_INST(GC, i), regMC_VM_AGP_BASE, 0); + WREG32_SOC15_RLC(GC, GET_INST(GC, i), regMC_VM_AGP_BOT, adev->gmc.agp_start >> 24); + WREG32_SOC15_RLC(GC, GET_INST(GC, i), regMC_VM_AGP_TOP, adev->gmc.agp_end >> 24); if (!amdgpu_sriov_vf(adev) || adev->asic_type <= CHIP_VEGA10) { /* Program the system aperture low logical page number. */ - WREG32_SOC15_RLC(GC, i, regMC_VM_SYSTEM_APERTURE_LOW_ADDR, + WREG32_SOC15_RLC(GC, GET_INST(GC, i), regMC_VM_SYSTEM_APERTURE_LOW_ADDR, min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18); if (adev->apu_flags & AMD_APU_IS_RAVEN2) @@ -136,44 +136,44 @@ static void gfxhub_v1_2_init_system_aperture_regs(struct amdgpu_device *adev) * aperture high address (add 1) to get rid of the VM * fault and hardware hang. */ - WREG32_SOC15_RLC(GC, i, + WREG32_SOC15_RLC(GC, GET_INST(GC, i), regMC_VM_SYSTEM_APERTURE_HIGH_ADDR, max((adev->gmc.fb_end >> 18) + 0x1, adev->gmc.agp_end >> 18)); else - WREG32_SOC15_RLC(GC, i, + WREG32_SOC15_RLC(GC, GET_INST(GC, i), regMC_VM_SYSTEM_APERTURE_HIGH_ADDR, max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18); /* Set default page address. */ value = amdgpu_gmc_vram_mc2pa(adev, adev->mem_scratch.gpu_addr); - WREG32_SOC15(GC, i, regMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB, + WREG32_SOC15(GC, GET_INST(GC, i), regMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB, (u32)(value >> 12)); - WREG32_SOC15(GC, i, regMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB, + WREG32_SOC15(GC, GET_INST(GC, i), regMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB, (u32)(value >> 44)); /* Program "protection fault". */ - WREG32_SOC15(GC, i, regVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32, + WREG32_SOC15(GC, GET_INST(GC, i), regVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32, (u32)(adev->dummy_page_addr >> 12)); - WREG32_SOC15(GC, i, regVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32, + WREG32_SOC15(GC, GET_INST(GC, i), regVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32, (u32)((u64)adev->dummy_page_addr >> 44)); - tmp = RREG32_SOC15(GC, i, regVM_L2_PROTECTION_FAULT_CNTL2); + tmp = RREG32_SOC15(GC, GET_INST(GC, i), regVM_L2_PROTECTION_FAULT_CNTL2); tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL2, ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1); - WREG32_SOC15(GC, i, regVM_L2_PROTECTION_FAULT_CNTL2, tmp); + WREG32_SOC15(GC, GET_INST(GC, i), regVM_L2_PROTECTION_FAULT_CNTL2, tmp); } /* In the case squeezing vram into GART aperture, we don't use * FB aperture and AGP aperture. Disable them. */ if (adev->gmc.pdb0_bo) { - WREG32_SOC15(GC, i, regMC_VM_FB_LOCATION_TOP, 0); - WREG32_SOC15(GC, i, regMC_VM_FB_LOCATION_BASE, 0x00FFFFFF); - WREG32_SOC15(GC, i, regMC_VM_AGP_TOP, 0); - WREG32_SOC15(GC, i, regMC_VM_AGP_BOT, 0xFFFFFF); - WREG32_SOC15(GC, i, regMC_VM_SYSTEM_APERTURE_LOW_ADDR, 0x3FFFFFFF); - WREG32_SOC15(GC, i, regMC_VM_SYSTEM_APERTURE_HIGH_ADDR, 0); + WREG32_SOC15(GC, GET_INST(GC, i), regMC_VM_FB_LOCATION_TOP, 0); + WREG32_SOC15(GC, GET_INST(GC, i), regMC_VM_FB_LOCATION_BASE, 0x00FFFFFF); + WREG32_SOC15(GC, GET_INST(GC, i), regMC_VM_AGP_TOP, 0); + WREG32_SOC15(GC, GET_INST(GC, i), regMC_VM_AGP_BOT, 0xFFFFFF); + WREG32_SOC15(GC, GET_INST(GC, i), regMC_VM_SYSTEM_APERTURE_LOW_ADDR, 0x3FFFFFFF); + WREG32_SOC15(GC, GET_INST(GC, i), regMC_VM_SYSTEM_APERTURE_HIGH_ADDR, 0); } } } @@ -186,7 +186,7 @@ static void gfxhub_v1_2_init_tlb_regs(struct amdgpu_device *adev) num_xcc = NUM_XCC(adev->gfx.xcc_mask); for (i = 0; i < num_xcc; i++) { /* Setup TLB control */ - tmp = RREG32_SOC15(GC, i, regMC_VM_MX_L1_TLB_CNTL); + tmp = RREG32_SOC15(GC, GET_INST(GC, i), regMC_VM_MX_L1_TLB_CNTL); tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1); @@ -200,7 +200,7 @@ static void gfxhub_v1_2_init_tlb_regs(struct amdgpu_device *adev) MTYPE, MTYPE_UC);/* XXX for emulation. */ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ATC_EN, 1); - WREG32_SOC15_RLC(GC, i, regMC_VM_MX_L1_TLB_CNTL, tmp); + WREG32_SOC15_RLC(GC, GET_INST(GC, i), regMC_VM_MX_L1_TLB_CNTL, tmp); } } @@ -212,7 +212,7 @@ static void gfxhub_v1_2_init_cache_regs(struct amdgpu_device *adev) num_xcc = NUM_XCC(adev->gfx.xcc_mask); for (i = 0; i < num_xcc; i++) { /* Setup L2 cache */ - tmp = RREG32_SOC15(GC, i, regVM_L2_CNTL); + tmp = RREG32_SOC15(GC, GET_INST(GC, i), regVM_L2_CNTL); tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 1); tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 1); /* XXX for emulation, Refer to closed source code.*/ @@ -221,12 +221,12 @@ static void gfxhub_v1_2_init_cache_regs(struct amdgpu_device *adev) tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 0); tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1); tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0); - WREG32_SOC15_RLC(GC, i, regVM_L2_CNTL, tmp); + WREG32_SOC15_RLC(GC, GET_INST(GC, i), regVM_L2_CNTL, tmp); - tmp = RREG32_SOC15(GC, i, regVM_L2_CNTL2); + tmp = RREG32_SOC15(GC, GET_INST(GC, i), regVM_L2_CNTL2); tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1); tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1); - WREG32_SOC15_RLC(GC, i, regVM_L2_CNTL2, tmp); + WREG32_SOC15_RLC(GC, GET_INST(GC, i), regVM_L2_CNTL2, tmp); tmp = regVM_L2_CNTL3_DEFAULT; if (adev->gmc.translate_further) { @@ -238,7 +238,7 @@ static void gfxhub_v1_2_init_cache_regs(struct amdgpu_device *adev) tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_FRAGMENT_SIZE, 6); } - WREG32_SOC15_RLC(GC, i, regVM_L2_CNTL3, tmp); + WREG32_SOC15_RLC(GC, GET_INST(GC, i), regVM_L2_CNTL3, tmp); tmp = regVM_L2_CNTL4_DEFAULT; if (adev->gmc.xgmi.connected_to_cpu) { @@ -248,7 +248,7 @@ static void gfxhub_v1_2_init_cache_regs(struct amdgpu_device *adev) tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0); tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 0); } - WREG32_SOC15_RLC(GC, i, regVM_L2_CNTL4, tmp); + WREG32_SOC15_RLC(GC, GET_INST(GC, i), regVM_L2_CNTL4, tmp); } } @@ -259,7 +259,7 @@ static void gfxhub_v1_2_enable_system_domain(struct amdgpu_device *adev) num_xcc = NUM_XCC(adev->gfx.xcc_mask); for (i = 0; i < num_xcc; i++) { - tmp = RREG32_SOC15(GC, i, regVM_CONTEXT0_CNTL); + tmp = RREG32_SOC15(GC, GET_INST(GC, i), regVM_CONTEXT0_CNTL); tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1); tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, adev->gmc.vmid0_page_table_depth); @@ -267,7 +267,7 @@ static void gfxhub_v1_2_enable_system_domain(struct amdgpu_device *adev) adev->gmc.vmid0_page_table_block_size); tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0); - WREG32_SOC15(GC, i, regVM_CONTEXT0_CNTL, tmp); + WREG32_SOC15(GC, GET_INST(GC, i), regVM_CONTEXT0_CNTL, tmp); } } @@ -277,23 +277,23 @@ static void gfxhub_v1_2_disable_identity_aperture(struct amdgpu_device *adev) num_xcc = NUM_XCC(adev->gfx.xcc_mask); for (i = 0; i < num_xcc; i++) { - WREG32_SOC15(GC, i, + WREG32_SOC15(GC, GET_INST(GC, i), regVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32, 0XFFFFFFFF); - WREG32_SOC15(GC, i, + WREG32_SOC15(GC, GET_INST(GC, i), regVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32, 0x0000000F); - WREG32_SOC15(GC, i, + WREG32_SOC15(GC, GET_INST(GC, i), regVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32, 0); - WREG32_SOC15(GC, i, + WREG32_SOC15(GC, GET_INST(GC, i), regVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32, 0); - WREG32_SOC15(GC, i, + WREG32_SOC15(GC, GET_INST(GC, i), regVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32, 0); - WREG32_SOC15(GC, i, + WREG32_SOC15(GC, GET_INST(GC, i), regVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32, 0); } } @@ -316,7 +316,7 @@ static void gfxhub_v1_2_setup_vmid_config(struct amdgpu_device *adev) for (j = 0; j < num_xcc; j++) { hub = &adev->vmhub[AMDGPU_GFXHUB(j)]; for (i = 0; i <= 14; i++) { - tmp = RREG32_SOC15_OFFSET(GC, j, regVM_CONTEXT1_CNTL, i); + tmp = RREG32_SOC15_OFFSET(GC, GET_INST(GC, j), regVM_CONTEXT1_CNTL, i); tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1); tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH, num_level); @@ -348,19 +348,19 @@ static void gfxhub_v1_2_setup_vmid_config(struct amdgpu_device *adev) !adev->gmc.noretry || adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2) || adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3)); - WREG32_SOC15_OFFSET(GC, j, regVM_CONTEXT1_CNTL, + WREG32_SOC15_OFFSET(GC, GET_INST(GC, j), regVM_CONTEXT1_CNTL, i * hub->ctx_distance, tmp); - WREG32_SOC15_OFFSET(GC, j, + WREG32_SOC15_OFFSET(GC, GET_INST(GC, j), regVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, i * hub->ctx_addr_distance, 0); - WREG32_SOC15_OFFSET(GC, j, + WREG32_SOC15_OFFSET(GC, GET_INST(GC, j), regVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, i * hub->ctx_addr_distance, 0); - WREG32_SOC15_OFFSET(GC, j, + WREG32_SOC15_OFFSET(GC, GET_INST(GC, j), regVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32, i * hub->ctx_addr_distance, lower_32_bits(adev->vm_manager.max_pfn - 1)); - WREG32_SOC15_OFFSET(GC, j, + WREG32_SOC15_OFFSET(GC, GET_INST(GC, j), regVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32, i * hub->ctx_addr_distance, upper_32_bits(adev->vm_manager.max_pfn - 1)); @@ -378,9 +378,9 @@ static void gfxhub_v1_2_program_invalidation(struct amdgpu_device *adev) hub = &adev->vmhub[AMDGPU_GFXHUB(j)]; for (i = 0 ; i < 18; ++i) { - WREG32_SOC15_OFFSET(GC, j, regVM_INVALIDATE_ENG0_ADDR_RANGE_LO32, + WREG32_SOC15_OFFSET(GC, GET_INST(GC, j), regVM_INVALIDATE_ENG0_ADDR_RANGE_LO32, i * hub->eng_addr_distance, 0xffffffff); - WREG32_SOC15_OFFSET(GC, j, regVM_INVALIDATE_ENG0_ADDR_RANGE_HI32, + WREG32_SOC15_OFFSET(GC, GET_INST(GC, j), regVM_INVALIDATE_ENG0_ADDR_RANGE_HI32, i * hub->eng_addr_distance, 0x1f); } } @@ -398,9 +398,9 @@ static int gfxhub_v1_2_gart_enable(struct amdgpu_device *adev) * VF copy registers so vbios post doesn't program them, for * SRIOV driver need to program them */ - WREG32_SOC15_RLC(GC, i, regMC_VM_FB_LOCATION_BASE, + WREG32_SOC15_RLC(GC, GET_INST(GC, i), regMC_VM_FB_LOCATION_BASE, adev->gmc.vram_start >> 24); - WREG32_SOC15_RLC(GC, i, regMC_VM_FB_LOCATION_TOP, + WREG32_SOC15_RLC(GC, GET_INST(GC, i), regMC_VM_FB_LOCATION_TOP, adev->gmc.vram_end >> 24); } } @@ -432,23 +432,23 @@ static void gfxhub_v1_2_gart_disable(struct amdgpu_device *adev) hub = &adev->vmhub[AMDGPU_GFXHUB(j)]; /* Disable all tables */ for (i = 0; i < 16; i++) - WREG32_SOC15_OFFSET(GC, j, regVM_CONTEXT0_CNTL, + WREG32_SOC15_OFFSET(GC, GET_INST(GC, j), regVM_CONTEXT0_CNTL, i * hub->ctx_distance, 0); /* Setup TLB control */ - tmp = RREG32_SOC15(GC, j, regMC_VM_MX_L1_TLB_CNTL); + tmp = RREG32_SOC15(GC, GET_INST(GC, j), regMC_VM_MX_L1_TLB_CNTL); tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0); tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_ADVANCED_DRIVER_MODEL, 0); - WREG32_SOC15_RLC(GC, j, regMC_VM_MX_L1_TLB_CNTL, tmp); + WREG32_SOC15_RLC(GC, GET_INST(GC, j), regMC_VM_MX_L1_TLB_CNTL, tmp); /* Setup L2 cache */ - tmp = RREG32_SOC15(GC, j, regVM_L2_CNTL); + tmp = RREG32_SOC15(GC, GET_INST(GC, j), regVM_L2_CNTL); tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0); - WREG32_SOC15(GC, j, regVM_L2_CNTL, tmp); - WREG32_SOC15(GC, j, regVM_L2_CNTL3, 0); + WREG32_SOC15(GC, GET_INST(GC, j), regVM_L2_CNTL, tmp); + WREG32_SOC15(GC, GET_INST(GC, j), regVM_L2_CNTL3, 0); } } @@ -466,7 +466,7 @@ static void gfxhub_v1_2_set_fault_enable_default(struct amdgpu_device *adev, num_xcc = NUM_XCC(adev->gfx.xcc_mask); for (i = 0; i < num_xcc; i++) { - tmp = RREG32_SOC15(GC, i, regVM_L2_PROTECTION_FAULT_CNTL); + tmp = RREG32_SOC15(GC, GET_INST(GC, i), regVM_L2_PROTECTION_FAULT_CNTL); tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value); tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, @@ -497,7 +497,7 @@ static void gfxhub_v1_2_set_fault_enable_default(struct amdgpu_device *adev, tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, CRASH_ON_RETRY_FAULT, 1); } - WREG32_SOC15(GC, i, regVM_L2_PROTECTION_FAULT_CNTL, tmp); + WREG32_SOC15(GC, GET_INST(GC, i), regVM_L2_PROTECTION_FAULT_CNTL, tmp); } } @@ -511,24 +511,24 @@ static void gfxhub_v1_2_init(struct amdgpu_device *adev) hub = &adev->vmhub[AMDGPU_GFXHUB(i)]; hub->ctx0_ptb_addr_lo32 = - SOC15_REG_OFFSET(GC, i, + SOC15_REG_OFFSET(GC, GET_INST(GC, i), regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32); hub->ctx0_ptb_addr_hi32 = - SOC15_REG_OFFSET(GC, i, + SOC15_REG_OFFSET(GC, GET_INST(GC, i), regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32); hub->vm_inv_eng0_sem = - SOC15_REG_OFFSET(GC, i, regVM_INVALIDATE_ENG0_SEM); + SOC15_REG_OFFSET(GC, GET_INST(GC, i), regVM_INVALIDATE_ENG0_SEM); hub->vm_inv_eng0_req = - SOC15_REG_OFFSET(GC, i, regVM_INVALIDATE_ENG0_REQ); + SOC15_REG_OFFSET(GC, GET_INST(GC, i), regVM_INVALIDATE_ENG0_REQ); hub->vm_inv_eng0_ack = - SOC15_REG_OFFSET(GC, i, regVM_INVALIDATE_ENG0_ACK); + SOC15_REG_OFFSET(GC, GET_INST(GC, i), regVM_INVALIDATE_ENG0_ACK); hub->vm_context0_cntl = - SOC15_REG_OFFSET(GC, i, regVM_CONTEXT0_CNTL); + SOC15_REG_OFFSET(GC, GET_INST(GC, i), regVM_CONTEXT0_CNTL); hub->vm_l2_pro_fault_status = - SOC15_REG_OFFSET(GC, i, + SOC15_REG_OFFSET(GC, GET_INST(GC, i), regVM_L2_PROTECTION_FAULT_STATUS); hub->vm_l2_pro_fault_cntl = - SOC15_REG_OFFSET(GC, i, regVM_L2_PROTECTION_FAULT_CNTL); + SOC15_REG_OFFSET(GC, GET_INST(GC, i), regVM_L2_PROTECTION_FAULT_CNTL); hub->ctx_distance = regVM_CONTEXT1_CNTL - regVM_CONTEXT0_CNTL; @@ -551,9 +551,9 @@ static int gfxhub_v1_2_get_xgmi_info(struct amdgpu_device *adev) u32 max_region; u64 seg_size; - xgmi_lfb_cntl = RREG32_SOC15(GC, 0, regMC_VM_XGMI_LFB_CNTL); + xgmi_lfb_cntl = RREG32_SOC15(GC, GET_INST(GC, 0), regMC_VM_XGMI_LFB_CNTL); seg_size = REG_GET_FIELD( - RREG32_SOC15(GC, 0, regMC_VM_XGMI_LFB_SIZE), + RREG32_SOC15(GC, GET_INST(GC, 0), regMC_VM_XGMI_LFB_SIZE), MC_VM_XGMI_LFB_SIZE, PF_LFB_SIZE) << 24; max_region = REG_GET_FIELD(xgmi_lfb_cntl, MC_VM_XGMI_LFB_CNTL, PF_MAX_REGION); diff --git a/drivers/gpu/drm/amd/amdgpu/soc15_common.h b/drivers/gpu/drm/amd/amdgpu/soc15_common.h index eb35096756b8..39e4406da4ae 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15_common.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15_common.h @@ -24,6 +24,9 @@ #ifndef __SOC15_COMMON_H__ #define __SOC15_COMMON_H__ +/* GET_INST returns the physical instance corresponding to a logical instance */ +#define GET_INST(ip, inst) (adev->ip_map.logical_to_dev_inst? adev->ip_map.logical_to_dev_inst(adev, ip##_HWIP, inst): inst) + /* Register Access Macros */ #define SOC15_REG_OFFSET(ip, inst, reg) (adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) #define SOC15_REG_OFFSET1(ip, inst, reg, offset) \ -- cgit v1.2.3 From 7aa8a266aaa25e9e2f85d9d2d594cdff6b5635f2 Mon Sep 17 00:00:00 2001 From: Lijo Lazar <lijo.lazar@amd.com> Date: Wed, 6 Jul 2022 13:50:45 +0530 Subject: drm/amdgpu: Fix GRBM programming sequence It needs to be done only for XCC instances in non-AID0. Use the physical instance to determine non-AID0 XCC instances. Signed-off-by: Lijo Lazar <lijo.lazar@amd.com> Reviewed-by: Le Ma <Le.Ma@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index e9c12b4970f9..43126f7b70ea 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -186,11 +186,14 @@ static void gfx_v9_4_3_set_kiq_pm4_funcs(struct amdgpu_device *adev) static void gfx_v9_4_3_init_golden_registers(struct amdgpu_device *adev) { - int i, num_xcc; + int i, num_xcc, dev_inst; num_xcc = NUM_XCC(adev->gfx.xcc_mask); - for (i = 2; i < num_xcc; i++) - WREG32_SOC15(GC, GET_INST(GC, i), regGRBM_MCM_ADDR, 0x4); + for (i = 0; i < num_xcc; i++) { + dev_inst = GET_INST(GC, i); + if (dev_inst >= 2) + WREG32_SOC15(GC, dev_inst, regGRBM_MCM_ADDR, 0x4); + } } static void gfx_v9_4_3_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel, -- cgit v1.2.3 From 0b02364e03caecbe30bdd9db0b3e6ba0196bb0ef Mon Sep 17 00:00:00 2001 From: Lijo Lazar <lijo.lazar@amd.com> Date: Fri, 29 Jul 2022 14:56:59 +0530 Subject: drm/amdgpu: Conform to SET_UCONFIG_REG spec The packet expects only 16 bits register offset. Hence pass register offset which is local to each XCC. Signed-off-by: Lijo Lazar <lijo.lazar@amd.com> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 43126f7b70ea..14b96b43f02c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -232,13 +232,14 @@ static void gfx_v9_4_3_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, static int gfx_v9_4_3_ring_test_ring(struct amdgpu_ring *ring) { + uint32_t scratch_reg0_offset, xcc_offset; struct amdgpu_device *adev = ring->adev; uint32_t tmp = 0; unsigned i; int r; - /* scratch_reg0_offset is 32bit even with full XCD config */ - uint32_t scratch_reg0_offset; + /* Use register offset which is local to XCC in the packet */ + xcc_offset = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0); scratch_reg0_offset = SOC15_REG_OFFSET(GC, GET_INST(GC, ring->xcc_id), regSCRATCH_REG0); WREG32(scratch_reg0_offset, 0xCAFEDEAD); @@ -247,7 +248,7 @@ static int gfx_v9_4_3_ring_test_ring(struct amdgpu_ring *ring) return r; amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); - amdgpu_ring_write(ring, scratch_reg0_offset - PACKET3_SET_UCONFIG_REG_START); + amdgpu_ring_write(ring, xcc_offset - PACKET3_SET_UCONFIG_REG_START); amdgpu_ring_write(ring, 0xDEADBEEF); amdgpu_ring_commit(ring); -- cgit v1.2.3 From 9cd51d53695e1df134301c1bdb8a8f965506e35b Mon Sep 17 00:00:00 2001 From: Le Ma <le.ma@amd.com> Date: Thu, 25 Aug 2022 15:51:43 +0800 Subject: drm/amdgpu: drop redundant csb init for gfx943 It's not required for compute pipeline and will cause soft lockup on emulation due to long-time writing. Signed-off-by: Le Ma <le.ma@amd.com> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 99 --------------------------------- 1 file changed, 99 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 14b96b43f02c..73f652ad5b00 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -429,75 +429,6 @@ static int gfx_v9_4_3_init_microcode(struct amdgpu_device *adev) return r; } -static u32 gfx_v9_4_3_get_csb_size(struct amdgpu_device *adev) -{ - u32 count = 0; - const struct cs_section_def *sect = NULL; - const struct cs_extent_def *ext = NULL; - - /* begin clear state */ - count += 2; - /* context control state */ - count += 3; - - for (sect = gfx9_cs_data; sect->section != NULL; ++sect) { - for (ext = sect->section; ext->extent != NULL; ++ext) { - if (sect->id == SECT_CONTEXT) - count += 2 + ext->reg_count; - else - return 0; - } - } - - /* end clear state */ - count += 2; - /* clear state */ - count += 2; - - return count; -} - -static void gfx_v9_4_3_get_csb_buffer(struct amdgpu_device *adev, - volatile u32 *buffer) -{ - u32 count = 0, i; - const struct cs_section_def *sect = NULL; - const struct cs_extent_def *ext = NULL; - - if (adev->gfx.rlc.cs_data == NULL) - return; - if (buffer == NULL) - return; - - buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); - buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); - - buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1)); - buffer[count++] = cpu_to_le32(0x80000000); - buffer[count++] = cpu_to_le32(0x80000000); - - for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) { - for (ext = sect->section; ext->extent != NULL; ++ext) { - if (sect->id == SECT_CONTEXT) { - buffer[count++] = - cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count)); - buffer[count++] = cpu_to_le32(ext->reg_index - - PACKET3_SET_CONTEXT_REG_START); - for (i = 0; i < ext->reg_count; i++) - buffer[count++] = cpu_to_le32(ext->extent[i]); - } else { - return; - } - } - } - - buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); - buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE); - - buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0)); - buffer[count++] = cpu_to_le32(0); -} - static void gfx_v9_4_3_mec_fini(struct amdgpu_device *adev) { amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); @@ -1112,22 +1043,8 @@ static void gfx_v9_4_3_enable_save_restore_machine(struct amdgpu_device *adev, WREG32_FIELD15_PREREG(GC, GET_INST(GC, xcc_id), RLC_SRM_CNTL, SRM_ENABLE, 1); } -static void gfx_v9_4_3_init_csb(struct amdgpu_device *adev, int xcc_id) -{ - adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr); - /* csib */ - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regRLC_CSIB_ADDR_HI), - adev->gfx.rlc.clear_state_gpu_addr >> 32); - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regRLC_CSIB_ADDR_LO), - adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regRLC_CSIB_LENGTH), - adev->gfx.rlc.clear_state_size); -} - static void gfx_v9_4_3_init_pg(struct amdgpu_device *adev, int xcc_id) { - gfx_v9_4_3_init_csb(adev, xcc_id); - /* * Rlc save restore list is workable since v2_1. * And it's needed by gfxoff feature. @@ -1219,20 +1136,6 @@ static void gfx_v9_4_3_unset_safe_mode(struct amdgpu_device *adev, int xcc_id) static int gfx_v9_4_3_rlc_init(struct amdgpu_device *adev) { - const struct cs_section_def *cs_data; - int r; - - adev->gfx.rlc.cs_data = gfx9_cs_data; - - cs_data = adev->gfx.rlc.cs_data; - - if (cs_data) { - /* init clear state block */ - r = amdgpu_gfx_rlc_init_csb(adev); - if (r) - return r; - } - /* init spm vmid with 0xf */ if (adev->gfx.rlc.funcs->update_spm_vmid) adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf); @@ -2406,8 +2309,6 @@ static const struct amdgpu_rlc_funcs gfx_v9_4_3_rlc_funcs = { .set_safe_mode = gfx_v9_4_3_set_safe_mode, .unset_safe_mode = gfx_v9_4_3_unset_safe_mode, .init = gfx_v9_4_3_rlc_init, - .get_csb_size = gfx_v9_4_3_get_csb_size, - .get_csb_buffer = gfx_v9_4_3_get_csb_buffer, .resume = gfx_v9_4_3_rlc_resume, .stop = gfx_v9_4_3_rlc_stop, .reset = gfx_v9_4_3_rlc_reset, -- cgit v1.2.3 From 955220b04d42c41050158fec0f53957f320b96f9 Mon Sep 17 00:00:00 2001 From: Lijo Lazar <lijo.lazar@amd.com> Date: Thu, 6 Oct 2022 15:25:08 +0530 Subject: drm/amdgpu: Fix programming of initial XCP mode On initialization set the partition mode correctly to SPX (default) or any other user specified partition mode. Use switch_compute_partition API so that all settings are initialized correctly. Signed-off-by: Lijo Lazar <lijo.lazar@amd.com> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 28 ++++++---------------------- 1 file changed, 6 insertions(+), 22 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 73f652ad5b00..b6b7dbb62448 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -1891,6 +1891,11 @@ static int gfx_v9_4_3_cp_resume(struct amdgpu_device *adev) return r; } + if (adev->gfx.partition_mode == + AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE) + gfx_v9_4_3_switch_compute_partition( + adev, amdgpu_user_partt_mode); + /* set the virtual and physical id based on partition_mode */ gfx_v9_4_3_program_xcc_id(adev, i); @@ -2112,28 +2117,7 @@ static int gfx_v9_4_3_early_init(void *handle) num_xcc = NUM_XCC(adev->gfx.xcc_mask); - adev->gfx.partition_mode = amdgpu_user_partt_mode; - /* calculate the num_xcc_in_xcp for the partition mode*/ - switch (amdgpu_user_partt_mode) { - case AMDGPU_SPX_PARTITION_MODE: - adev->gfx.num_xcc_per_xcp = num_xcc; - break; - case AMDGPU_DPX_PARTITION_MODE: - adev->gfx.num_xcc_per_xcp = num_xcc / 2; - break; - case AMDGPU_TPX_PARTITION_MODE: - adev->gfx.num_xcc_per_xcp = num_xcc / 3; - break; - case AMDGPU_QPX_PARTITION_MODE: - adev->gfx.num_xcc_per_xcp = num_xcc / 4; - break; - case AMDGPU_CPX_PARTITION_MODE: - adev->gfx.num_xcc_per_xcp = 1; - break; - default: - adev->gfx.num_xcc_per_xcp = num_xcc; - break; - } + adev->gfx.partition_mode = AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE; adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev), AMDGPU_MAX_COMPUTE_RINGS); -- cgit v1.2.3 From 5a8b26a88639d69453d592ee11c03a24e0b62b9a Mon Sep 17 00:00:00 2001 From: Lijo Lazar <lijo.lazar@amd.com> Date: Mon, 19 Sep 2022 18:08:25 +0530 Subject: drm/amdgpu: Fix register access on GC v9.4.3 In GC v9.4.3 there are multiple XCCs. It's required to use physical instance number to get the right register offset. Use GET_INST API for that. Signed-off-by: Lijo Lazar <lijo.lazar@amd.com> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index b6b7dbb62448..f1e6da4d62a3 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -1600,7 +1600,7 @@ static int gfx_v9_4_3_kiq_init_register(struct amdgpu_ring *ring, int xcc_id) int j; /* disable wptr polling */ - WREG32_FIELD15_PREREG(GC, xcc_id, CP_PQ_WPTR_POLL_CNTL, EN, 0); + WREG32_FIELD15_PREREG(GC, GET_INST(GC, xcc_id), CP_PQ_WPTR_POLL_CNTL, EN, 0); WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_EOP_BASE_ADDR, mqd->cp_hqd_eop_base_addr_lo); @@ -1693,7 +1693,7 @@ static int gfx_v9_4_3_kiq_init_register(struct amdgpu_ring *ring, int xcc_id) mqd->cp_hqd_active); if (ring->use_doorbell) - WREG32_FIELD15_PREREG(GC, xcc_id, CP_PQ_STATUS, DOORBELL_ENABLE, 1); + WREG32_FIELD15_PREREG(GC, GET_INST(GC, xcc_id), CP_PQ_STATUS, DOORBELL_ENABLE, 1); return 0; } -- cgit v1.2.3 From 870d1e5afca58261a147e9080abb8cc75fccb849 Mon Sep 17 00:00:00 2001 From: Lijo Lazar <lijo.lazar@amd.com> Date: Fri, 14 Oct 2022 13:06:18 +0530 Subject: drm/amdgpu: Fix interrupt handling in GFX v9.4.3 IH follows a different identification scheme for its clients. Get the right mapping of xcc instance from IH node id. Signed-off-by: Lijo Lazar <lijo.lazar@amd.com> Reviewed-by: Le Ma <le.ma@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 35 ++++++++++++++++++++++++++++----- 1 file changed, 30 insertions(+), 5 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index f1e6da4d62a3..c5ffcd70ec7b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -2730,11 +2730,24 @@ static int gfx_v9_4_3_set_eop_interrupt_state(struct amdgpu_device *adev, return 0; } +static int gfx_v9_4_3_ih_to_xcc_inst(struct amdgpu_device *adev, int ih_node) +{ + int xcc; + + xcc = hweight8(adev->gfx.xcc_mask & GENMASK(ih_node / 2, 0)); + if (!xcc) { + dev_err(adev->dev, "Couldn't find xcc mapping from IH node"); + return -EINVAL; + } + + return xcc - 1; +} + static int gfx_v9_4_3_eop_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { - int i, phys_id; + int i, xcc_id; u8 me_id, pipe_id, queue_id; struct amdgpu_ring *ring; @@ -2743,14 +2756,19 @@ static int gfx_v9_4_3_eop_irq(struct amdgpu_device *adev, pipe_id = (entry->ring_id & 0x03) >> 0; queue_id = (entry->ring_id & 0x70) >> 4; - phys_id = node_id_to_phys_map[entry->node_id]; + xcc_id = gfx_v9_4_3_ih_to_xcc_inst(adev, entry->node_id); + + if (xcc_id == -EINVAL) + return -EINVAL; switch (me_id) { case 0: case 1: case 2: for (i = 0; i < adev->gfx.num_compute_rings; i++) { - ring = &adev->gfx.compute_ring[i + phys_id * adev->gfx.num_compute_rings]; + ring = &adev->gfx.compute_ring + [i + + xcc_id * adev->gfx.num_compute_rings]; /* Per-queue interrupt is supported for MEC starting from VI. * The interrupt can only be enabled/disabled per pipe instead of per queue. */ @@ -2768,18 +2786,25 @@ static void gfx_v9_4_3_fault(struct amdgpu_device *adev, { u8 me_id, pipe_id, queue_id; struct amdgpu_ring *ring; - int i; + int i, xcc_id; me_id = (entry->ring_id & 0x0c) >> 2; pipe_id = (entry->ring_id & 0x03) >> 0; queue_id = (entry->ring_id & 0x70) >> 4; + xcc_id = gfx_v9_4_3_ih_to_xcc_inst(adev, entry->node_id); + + if (xcc_id == -EINVAL) + return; + switch (me_id) { case 0: case 1: case 2: for (i = 0; i < adev->gfx.num_compute_rings; i++) { - ring = &adev->gfx.compute_ring[i]; + ring = &adev->gfx.compute_ring + [i + + xcc_id * adev->gfx.num_compute_rings]; if (ring->me == me_id && ring->pipe == pipe_id && ring->queue == queue_id) drm_sched_fault(&ring->sched); -- cgit v1.2.3 From 553f973a0d7bbe95ea5da46979d926a9c0ada109 Mon Sep 17 00:00:00 2001 From: Tom St Denis <tom.stdenis@amd.com> Date: Tue, 11 Oct 2022 09:52:58 -0400 Subject: drm/amd/amdgpu: Update debugfs for XCC support (v3) This patch updates the 'regs2' interface for MMIO registers to add a new IOCTL command for a 'v2' state data that includes the XCC ID. This patch then updates amdgpu_gfx_select_se_sh() and amdgpu_gfx_select_me_pipe_q() (and the implementations in the gfx drivers) to support an additional parameter. This patch then creates a new debugfs interface "gprwave" which is a merge of shader GPR and wave status access. This new inteface uses an IOCTL to select banks as well as XCC identity. (v2) Fix missing xcc_id in wave_ind function (v3) Fix pm runtime calls and mutex locking (v4) Fix bad label Signed-off-by: Tom St Denis <tom.stdenis@amd.com> Reviewed-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 202 +++++++++++++++++++++++++--- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 10 +- drivers/gpu/drm/amd/amdgpu/amdgpu_umr.h | 36 ++++- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 8 +- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 8 +- drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c | 6 +- drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 6 +- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 6 +- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 8 +- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 60 ++++----- 10 files changed, 275 insertions(+), 75 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index df94cd2c4b39..e94d0cf3f793 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -139,7 +139,7 @@ static int amdgpu_debugfs_process_reg_op(bool read, struct file *f, sh_bank, instance_bank, 0); } else if (use_ring) { mutex_lock(&adev->srbm_mutex); - amdgpu_gfx_select_me_pipe_q(adev, me, pipe, queue, vmid); + amdgpu_gfx_select_me_pipe_q(adev, me, pipe, queue, vmid, 0); } if (pm_pg_lock) @@ -172,7 +172,7 @@ end: amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); } else if (use_ring) { - amdgpu_gfx_select_me_pipe_q(adev, 0, 0, 0, 0); + amdgpu_gfx_select_me_pipe_q(adev, 0, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); } @@ -263,14 +263,14 @@ static ssize_t amdgpu_debugfs_regs2_op(struct file *f, char __user *buf, u32 off } mutex_lock(&adev->grbm_idx_mutex); amdgpu_gfx_select_se_sh(adev, rd->id.grbm.se, - rd->id.grbm.sh, - rd->id.grbm.instance, 0); + rd->id.grbm.sh, + rd->id.grbm.instance, rd->id.xcc_id); } if (rd->id.use_srbm) { mutex_lock(&adev->srbm_mutex); amdgpu_gfx_select_me_pipe_q(adev, rd->id.srbm.me, rd->id.srbm.pipe, - rd->id.srbm.queue, rd->id.srbm.vmid); + rd->id.srbm.queue, rd->id.srbm.vmid, rd->id.xcc_id); } if (rd->id.pg_lock) @@ -296,12 +296,12 @@ static ssize_t amdgpu_debugfs_regs2_op(struct file *f, char __user *buf, u32 off } end: if (rd->id.use_grbm) { - amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, rd->id.xcc_id); mutex_unlock(&adev->grbm_idx_mutex); } if (rd->id.use_srbm) { - amdgpu_gfx_select_me_pipe_q(adev, 0, 0, 0, 0); + amdgpu_gfx_select_me_pipe_q(adev, 0, 0, 0, 0, rd->id.xcc_id); mutex_unlock(&adev->srbm_mutex); } @@ -320,19 +320,45 @@ end: static long amdgpu_debugfs_regs2_ioctl(struct file *f, unsigned int cmd, unsigned long data) { struct amdgpu_debugfs_regs2_data *rd = f->private_data; + struct amdgpu_debugfs_regs2_iocdata v1_data; int r; + mutex_lock(&rd->lock); + switch (cmd) { - case AMDGPU_DEBUGFS_REGS2_IOC_SET_STATE: - mutex_lock(&rd->lock); - r = copy_from_user(&rd->id, (struct amdgpu_debugfs_regs2_iocdata *)data, + case AMDGPU_DEBUGFS_REGS2_IOC_SET_STATE_V2: + r = copy_from_user(&rd->id, (struct amdgpu_debugfs_regs2_iocdata_v2 *)data, sizeof(rd->id)); - mutex_unlock(&rd->lock); - return r ? -EINVAL : 0; + if (r) + r = -EINVAL; + goto done; + case AMDGPU_DEBUGFS_REGS2_IOC_SET_STATE: + r = copy_from_user(&v1_data, (struct amdgpu_debugfs_regs2_iocdata *)data, + sizeof(v1_data)); + if (r) { + r = -EINVAL; + goto done; + } + goto v1_copy; default: - return -EINVAL; - } - return 0; + r = -EINVAL; + goto done; + } + +v1_copy: + rd->id.use_srbm = v1_data.use_srbm; + rd->id.use_grbm = v1_data.use_grbm; + rd->id.pg_lock = v1_data.pg_lock; + rd->id.grbm.se = v1_data.grbm.se; + rd->id.grbm.sh = v1_data.grbm.sh; + rd->id.grbm.instance = v1_data.grbm.instance; + rd->id.srbm.me = v1_data.srbm.me; + rd->id.srbm.pipe = v1_data.srbm.pipe; + rd->id.srbm.queue = v1_data.srbm.queue; + rd->id.xcc_id = 0; +done: + mutex_unlock(&rd->lock); + return r; } static ssize_t amdgpu_debugfs_regs2_read(struct file *f, char __user *buf, size_t size, loff_t *pos) @@ -345,6 +371,135 @@ static ssize_t amdgpu_debugfs_regs2_write(struct file *f, const char __user *buf return amdgpu_debugfs_regs2_op(f, (char __user *)buf, *pos, size, 1); } +static int amdgpu_debugfs_gprwave_open(struct inode *inode, struct file *file) +{ + struct amdgpu_debugfs_gprwave_data *rd; + + rd = kzalloc(sizeof *rd, GFP_KERNEL); + if (!rd) + return -ENOMEM; + rd->adev = file_inode(file)->i_private; + file->private_data = rd; + mutex_init(&rd->lock); + + return 0; +} + +static int amdgpu_debugfs_gprwave_release(struct inode *inode, struct file *file) +{ + struct amdgpu_debugfs_gprwave_data *rd = file->private_data; + mutex_destroy(&rd->lock); + kfree(file->private_data); + return 0; +} + +static ssize_t amdgpu_debugfs_gprwave_read(struct file *f, char __user *buf, size_t size, loff_t *pos) +{ + struct amdgpu_debugfs_gprwave_data *rd = f->private_data; + struct amdgpu_device *adev = rd->adev; + ssize_t result = 0; + int r; + uint32_t *data, x; + + if (size & 0x3 || *pos & 0x3) + return -EINVAL; + + r = pm_runtime_get_sync(adev_to_drm(adev)->dev); + if (r < 0) { + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); + return r; + } + + r = amdgpu_virt_enable_access_debugfs(adev); + if (r < 0) { + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); + return r; + } + + data = kcalloc(1024, sizeof(*data), GFP_KERNEL); + if (!data) { + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); + amdgpu_virt_disable_access_debugfs(adev); + return -ENOMEM; + } + + /* switch to the specific se/sh/cu */ + mutex_lock(&adev->grbm_idx_mutex); + amdgpu_gfx_select_se_sh(adev, rd->id.se, rd->id.sh, rd->id.cu, rd->id.xcc_id); + + if (!rd->id.gpr_or_wave) { + x = 0; + if (adev->gfx.funcs->read_wave_data) + adev->gfx.funcs->read_wave_data(adev, rd->id.xcc_id, rd->id.simd, rd->id.wave, data, &x); + } else { + x = size >> 2; + if (rd->id.gpr.vpgr_or_sgpr) { + if (adev->gfx.funcs->read_wave_vgprs) + adev->gfx.funcs->read_wave_vgprs(adev, rd->id.xcc_id, rd->id.simd, rd->id.wave, rd->id.gpr.thread, *pos, size>>2, data); + } else { + if (adev->gfx.funcs->read_wave_sgprs) + adev->gfx.funcs->read_wave_sgprs(adev, rd->id.xcc_id, rd->id.simd, rd->id.wave, *pos, size>>2, data); + } + } + + amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, rd->id.xcc_id); + mutex_unlock(&adev->grbm_idx_mutex); + + pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); + + if (!x) { + result = -EINVAL; + goto done; + } + + while (size && (*pos < x * 4)) { + uint32_t value; + + value = data[*pos >> 2]; + r = put_user(value, (uint32_t *)buf); + if (r) { + result = r; + goto done; + } + + result += 4; + buf += 4; + *pos += 4; + size -= 4; + } + +done: + amdgpu_virt_disable_access_debugfs(adev); + kfree(data); + return result; +} + +static long amdgpu_debugfs_gprwave_ioctl(struct file *f, unsigned int cmd, unsigned long data) +{ + struct amdgpu_debugfs_gprwave_data *rd = f->private_data; + int r; + + mutex_lock(&rd->lock); + + switch (cmd) { + case AMDGPU_DEBUGFS_GPRWAVE_IOC_SET_STATE: + r = copy_from_user(&rd->id, (struct amdgpu_debugfs_gprwave_iocdata *)data, sizeof rd->id); + if (r) + return r ? -EINVAL : 0; + goto done; + default: + r = -EINVAL; + goto done; + } + +done: + mutex_unlock(&rd->lock); + return r; +} + + + /** * amdgpu_debugfs_regs_pcie_read - Read from a PCIE register @@ -913,7 +1068,7 @@ static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf, x = 0; if (adev->gfx.funcs->read_wave_data) - adev->gfx.funcs->read_wave_data(adev, simd, wave, data, &x); + adev->gfx.funcs->read_wave_data(adev, 0, simd, wave, data, &x); amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0); mutex_unlock(&adev->grbm_idx_mutex); @@ -1007,10 +1162,10 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf, if (bank == 0) { if (adev->gfx.funcs->read_wave_vgprs) - adev->gfx.funcs->read_wave_vgprs(adev, simd, wave, thread, offset, size>>2, data); + adev->gfx.funcs->read_wave_vgprs(adev, 0, simd, wave, thread, offset, size>>2, data); } else { if (adev->gfx.funcs->read_wave_sgprs) - adev->gfx.funcs->read_wave_sgprs(adev, simd, wave, offset, size>>2, data); + adev->gfx.funcs->read_wave_sgprs(adev, 0, simd, wave, offset, size>>2, data); } amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0); @@ -1341,6 +1496,15 @@ static const struct file_operations amdgpu_debugfs_regs2_fops = { .llseek = default_llseek }; +static const struct file_operations amdgpu_debugfs_gprwave_fops = { + .owner = THIS_MODULE, + .unlocked_ioctl = amdgpu_debugfs_gprwave_ioctl, + .read = amdgpu_debugfs_gprwave_read, + .open = amdgpu_debugfs_gprwave_open, + .release = amdgpu_debugfs_gprwave_release, + .llseek = default_llseek +}; + static const struct file_operations amdgpu_debugfs_regs_fops = { .owner = THIS_MODULE, .read = amdgpu_debugfs_regs_read, @@ -1418,6 +1582,7 @@ static const struct file_operations amdgpu_debugfs_gfxoff_residency_fops = { static const struct file_operations *debugfs_regs[] = { &amdgpu_debugfs_regs_fops, &amdgpu_debugfs_regs2_fops, + &amdgpu_debugfs_gprwave_fops, &amdgpu_debugfs_regs_didt_fops, &amdgpu_debugfs_regs_pcie_fops, &amdgpu_debugfs_regs_smc_fops, @@ -1434,6 +1599,7 @@ static const struct file_operations *debugfs_regs[] = { static const char * const debugfs_regs_names[] = { "amdgpu_regs", "amdgpu_regs2", + "amdgpu_gprwave", "amdgpu_regs_didt", "amdgpu_regs_pcie", "amdgpu_regs_smc", diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 93f9875154db..2b2d75763875 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -247,16 +247,16 @@ struct amdgpu_gfx_funcs { uint64_t (*get_gpu_clock_counter)(struct amdgpu_device *adev); void (*select_se_sh)(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance, int xcc_id); - void (*read_wave_data)(struct amdgpu_device *adev, uint32_t simd, + void (*read_wave_data)(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields); - void (*read_wave_vgprs)(struct amdgpu_device *adev, uint32_t simd, + void (*read_wave_vgprs)(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t thread, uint32_t start, uint32_t size, uint32_t *dst); - void (*read_wave_sgprs)(struct amdgpu_device *adev, uint32_t simd, + void (*read_wave_sgprs)(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t start, uint32_t size, uint32_t *dst); void (*select_me_pipe_q)(struct amdgpu_device *adev, u32 me, u32 pipe, - u32 queue, u32 vmid); + u32 queue, u32 vmid, u32 xcc_id); void (*init_spm_golden)(struct amdgpu_device *adev); void (*update_perfmon_mgcg)(struct amdgpu_device *adev, bool enable); int (*get_gfx_shadow_info)(struct amdgpu_device *adev, @@ -405,7 +405,7 @@ struct amdgpu_gfx { #define amdgpu_gfx_get_gpu_clock_counter(adev) (adev)->gfx.funcs->get_gpu_clock_counter((adev)) #define amdgpu_gfx_select_se_sh(adev, se, sh, instance, xcc_id) ((adev)->gfx.funcs->select_se_sh((adev), (se), (sh), (instance), (xcc_id))) -#define amdgpu_gfx_select_me_pipe_q(adev, me, pipe, q, vmid) (adev)->gfx.funcs->select_me_pipe_q((adev), (me), (pipe), (q), (vmid)) +#define amdgpu_gfx_select_me_pipe_q(adev, me, pipe, q, vmid, xcc_id) ((adev)->gfx.funcs->select_me_pipe_q((adev), (me), (pipe), (q), (vmid), (xcc_id))) #define amdgpu_gfx_init_spm_golden(adev) (adev)->gfx.funcs->init_spm_golden((adev)) #define amdgpu_gfx_get_gfx_shadow_info(adev, si) ((adev)->gfx.funcs->get_gfx_shadow_info((adev), (si))) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umr.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umr.h index 919d9d401750..107f9bb0e24f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umr.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umr.h @@ -35,17 +35,51 @@ struct amdgpu_debugfs_regs2_iocdata { } srbm; }; +struct amdgpu_debugfs_regs2_iocdata_v2 { + __u32 use_srbm, use_grbm, pg_lock; + struct { + __u32 se, sh, instance; + } grbm; + struct { + __u32 me, pipe, queue, vmid; + } srbm; + u32 xcc_id; +}; + +struct amdgpu_debugfs_gprwave_iocdata { + u32 gpr_or_wave, se, sh, cu, wave, simd, xcc_id; + struct { + u32 thread, vpgr_or_sgpr; + } gpr; +}; + /* * MMIO debugfs state data (per file* handle) */ struct amdgpu_debugfs_regs2_data { struct amdgpu_device *adev; struct mutex lock; - struct amdgpu_debugfs_regs2_iocdata id; + struct amdgpu_debugfs_regs2_iocdata_v2 id; +}; + +struct amdgpu_debugfs_gprwave_data { + struct amdgpu_device *adev; + struct mutex lock; + struct amdgpu_debugfs_gprwave_iocdata id; }; enum AMDGPU_DEBUGFS_REGS2_CMDS { AMDGPU_DEBUGFS_REGS2_CMD_SET_STATE=0, + AMDGPU_DEBUGFS_REGS2_CMD_SET_STATE_V2, +}; + +enum AMDGPU_DEBUGFS_GPRWAVE_CMDS { + AMDGPU_DEBUGFS_GPRWAVE_CMD_SET_STATE=0, }; +//reg2 interface #define AMDGPU_DEBUGFS_REGS2_IOC_SET_STATE _IOWR(0x20, AMDGPU_DEBUGFS_REGS2_CMD_SET_STATE, struct amdgpu_debugfs_regs2_iocdata) +#define AMDGPU_DEBUGFS_REGS2_IOC_SET_STATE_V2 _IOWR(0x20, AMDGPU_DEBUGFS_REGS2_CMD_SET_STATE_V2, struct amdgpu_debugfs_regs2_iocdata_v2) + +//gprwave interface +#define AMDGPU_DEBUGFS_GPRWAVE_IOC_SET_STATE _IOWR(0x20, AMDGPU_DEBUGFS_GPRWAVE_CMD_SET_STATE, struct amdgpu_debugfs_gprwave_iocdata) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 7b585141e10e..89158c72753e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -4291,7 +4291,7 @@ static void wave_read_regs(struct amdgpu_device *adev, uint32_t wave, *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA); } -static void gfx_v10_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) +static void gfx_v10_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) { /* in gfx10 the SIMD_ID is specified as part of the INSTANCE * field when performing a select_se_sh so it should be @@ -4318,7 +4318,7 @@ static void gfx_v10_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_MODE); } -static void gfx_v10_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, +static void gfx_v10_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t start, uint32_t size, uint32_t *dst) { @@ -4329,7 +4329,7 @@ static void gfx_v10_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, dst); } -static void gfx_v10_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd, +static void gfx_v10_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t thread, uint32_t start, uint32_t size, uint32_t *dst) @@ -4340,7 +4340,7 @@ static void gfx_v10_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd, } static void gfx_v10_0_select_me_pipe_q(struct amdgpu_device *adev, - u32 me, u32 pipe, u32 q, u32 vm) + u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id) { nv_grbm_select(adev, me, pipe, q, vm); } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 790df2cc3480..4b7224de879e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -765,7 +765,7 @@ static void wave_read_regs(struct amdgpu_device *adev, uint32_t wave, *(out++) = RREG32_SOC15(GC, 0, regSQ_IND_DATA); } -static void gfx_v11_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) +static void gfx_v11_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) { /* in gfx11 the SIMD_ID is specified as part of the INSTANCE * field when performing a select_se_sh so it should be @@ -791,7 +791,7 @@ static void gfx_v11_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_MODE); } -static void gfx_v11_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, +static void gfx_v11_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t start, uint32_t size, uint32_t *dst) { @@ -802,7 +802,7 @@ static void gfx_v11_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, dst); } -static void gfx_v11_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd, +static void gfx_v11_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t thread, uint32_t start, uint32_t size, uint32_t *dst) @@ -813,7 +813,7 @@ static void gfx_v11_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd, } static void gfx_v11_0_select_me_pipe_q(struct amdgpu_device *adev, - u32 me, u32 pipe, u32 q, u32 vm) + u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id) { soc21_grbm_select(adev, me, pipe, q, vm); } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c index 7cb72bf1acdd..809558c718e3 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c @@ -2968,7 +2968,7 @@ static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd, *(out++) = RREG32(mmSQ_IND_DATA); } -static void gfx_v6_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) +static void gfx_v6_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) { /* type 0 wave data */ dst[(*no_fields)++] = 0; @@ -2993,7 +2993,7 @@ static void gfx_v6_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, u dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE); } -static void gfx_v6_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, +static void gfx_v6_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t start, uint32_t size, uint32_t *dst) { @@ -3003,7 +3003,7 @@ static void gfx_v6_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, } static void gfx_v6_0_select_me_pipe_q(struct amdgpu_device *adev, - u32 me, u32 pipe, u32 q, u32 vm) + u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id) { DRM_INFO("Not implemented\n"); } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index d56dda5fc588..0f0c12bbe228 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -4112,7 +4112,7 @@ static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd, *(out++) = RREG32(mmSQ_IND_DATA); } -static void gfx_v7_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) +static void gfx_v7_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) { /* type 0 wave data */ dst[(*no_fields)++] = 0; @@ -4137,7 +4137,7 @@ static void gfx_v7_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, u dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE); } -static void gfx_v7_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, +static void gfx_v7_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t start, uint32_t size, uint32_t *dst) { @@ -4147,7 +4147,7 @@ static void gfx_v7_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, } static void gfx_v7_0_select_me_pipe_q(struct amdgpu_device *adev, - u32 me, u32 pipe, u32 q, u32 vm) + u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id) { cik_srbm_select(adev, me, pipe, q, vm); } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 2ae7f167985f..6d0589dc1d6c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -3419,7 +3419,7 @@ static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, } static void gfx_v8_0_select_me_pipe_q(struct amdgpu_device *adev, - u32 me, u32 pipe, u32 q, u32 vm) + u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id) { vi_srbm_select(adev, me, pipe, q, vm); } @@ -5217,7 +5217,7 @@ static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd, *(out++) = RREG32(mmSQ_IND_DATA); } -static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) +static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) { /* type 0 wave data */ dst[(*no_fields)++] = 0; @@ -5242,7 +5242,7 @@ static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, u dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE); } -static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, +static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t start, uint32_t size, uint32_t *dst) { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index da69177dc76f..cc005e3bcd40 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -1788,7 +1788,7 @@ static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd, *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA); } -static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) +static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) { /* type 1 wave data */ dst[(*no_fields)++] = 1; @@ -1809,7 +1809,7 @@ static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, u dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE); } -static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, +static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t start, uint32_t size, uint32_t *dst) { @@ -1818,7 +1818,7 @@ static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, start + SQIND_WAVE_SGPRS_OFFSET, size, dst); } -static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd, +static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t thread, uint32_t start, uint32_t size, uint32_t *dst) @@ -1829,7 +1829,7 @@ static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd, } static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev, - u32 me, u32 pipe, u32 q, u32 vm) + u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id) { soc15_grbm_select(adev, me, pipe, q, vm, 0); } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index c5ffcd70ec7b..76e3571ec5c8 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -536,21 +536,21 @@ static void gfx_v9_4_3_select_se_sh(struct amdgpu_device *adev, WREG32_SOC15_RLC_SHADOW_EX(reg, GC, GET_INST(GC, xcc_id), regGRBM_GFX_INDEX, data); } -static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address) +static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t address) { - WREG32_SOC15_RLC(GC, GET_INST(GC, 0), regSQ_IND_INDEX, + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regSQ_IND_INDEX, (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | (address << SQ_IND_INDEX__INDEX__SHIFT) | (SQ_IND_INDEX__FORCE_READ_MASK)); - return RREG32_SOC15(GC, GET_INST(GC, 0), regSQ_IND_DATA); + return RREG32_SOC15(GC, GET_INST(GC, xcc_id), regSQ_IND_DATA); } -static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd, +static void wave_read_regs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t thread, uint32_t regno, uint32_t num, uint32_t *out) { - WREG32_SOC15_RLC(GC, GET_INST(GC, 0), regSQ_IND_INDEX, + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regSQ_IND_INDEX, (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | (regno << SQ_IND_INDEX__INDEX__SHIFT) | @@ -558,53 +558,53 @@ static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd, (SQ_IND_INDEX__FORCE_READ_MASK) | (SQ_IND_INDEX__AUTO_INCR_MASK)); while (num--) - *(out++) = RREG32_SOC15(GC, GET_INST(GC, 0), regSQ_IND_DATA); + *(out++) = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regSQ_IND_DATA); } static void gfx_v9_4_3_read_wave_data(struct amdgpu_device *adev, - uint32_t simd, uint32_t wave, + uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) { /* type 1 wave data */ dst[(*no_fields)++] = 1; - dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS); - dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO); - dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI); - dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO); - dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI); - dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID); - dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0); - dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1); - dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC); - dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC); - dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS); - dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS); - dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0); - dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0); - dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE); -} - -static void gfx_v9_4_3_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, + dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_STATUS); + dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_PC_LO); + dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_PC_HI); + dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_EXEC_LO); + dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_EXEC_HI); + dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_HW_ID); + dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_INST_DW0); + dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_INST_DW1); + dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_GPR_ALLOC); + dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_LDS_ALLOC); + dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_TRAPSTS); + dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_IB_STS); + dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_IB_DBG0); + dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_M0); + dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_MODE); +} + +static void gfx_v9_4_3_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t start, uint32_t size, uint32_t *dst) { - wave_read_regs(adev, simd, wave, 0, + wave_read_regs(adev, xcc_id, simd, wave, 0, start + SQIND_WAVE_SGPRS_OFFSET, size, dst); } -static void gfx_v9_4_3_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd, +static void gfx_v9_4_3_read_wave_vgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t thread, uint32_t start, uint32_t size, uint32_t *dst) { - wave_read_regs(adev, simd, wave, thread, + wave_read_regs(adev, xcc_id, simd, wave, thread, start + SQIND_WAVE_VGPRS_OFFSET, size, dst); } static void gfx_v9_4_3_select_me_pipe_q(struct amdgpu_device *adev, - u32 me, u32 pipe, u32 q, u32 vm) + u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id) { - soc15_grbm_select(adev, me, pipe, q, vm, GET_INST(GC, 0)); + soc15_grbm_select(adev, me, pipe, q, vm, GET_INST(GC, xcc_id)); } static enum amdgpu_gfx_partition -- cgit v1.2.3 From ebadc1061e045e961339e0df7b8a07f3e589579c Mon Sep 17 00:00:00 2001 From: Hawking Zhang <Hawking.Zhang@amd.com> Date: Mon, 7 Nov 2022 11:11:29 +0800 Subject: drm/amdgpu: retire render backend setup from gfx_v9_4_3 gfx v9_4_3 only support compute. render backend doesn't need to be involved in any compute shader execution. Signed-off-by: Hawking Zhang <Hawking.Zhang@amd.com> Reviewed-by: Le Ma <le.ma@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 44 --------------------------------- 1 file changed, 44 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 76e3571ec5c8..6ed97371ff1e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -888,46 +888,6 @@ static int gfx_v9_4_3_sw_fini(void *handle) return 0; } -static u32 gfx_v9_4_3_get_rb_active_bitmap(struct amdgpu_device *adev) -{ - u32 data, mask; - - data = RREG32_SOC15(GC, GET_INST(GC, 0), regCC_RB_BACKEND_DISABLE); - data |= RREG32_SOC15(GC, GET_INST(GC, 0), regGC_USER_RB_BACKEND_DISABLE); - - data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK; - data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT; - - mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se / - adev->gfx.config.max_sh_per_se); - - return (~data) & mask; -} - -static void gfx_v9_4_3_setup_rb(struct amdgpu_device *adev, int xcc_id) -{ - int i, j; - u32 data; - u32 active_rbs = 0; - u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se / - adev->gfx.config.max_sh_per_se; - - mutex_lock(&adev->grbm_idx_mutex); - for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { - for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { - gfx_v9_4_3_select_se_sh(adev, i, j, 0xffffffff, xcc_id); - data = gfx_v9_4_3_get_rb_active_bitmap(adev); - active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * - rb_bitmap_width_per_sh); - } - } - gfx_v9_4_3_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, xcc_id); - mutex_unlock(&adev->grbm_idx_mutex); - - adev->gfx.config.backend_enable_mask = active_rbs; - adev->gfx.config.num_rbs = hweight32(active_rbs); -} - #define DEFAULT_SH_MEM_BASES (0x6000) static void gfx_v9_4_3_init_compute_vmid(struct amdgpu_device *adev, int xcc_id) { @@ -991,10 +951,6 @@ static void gfx_v9_4_3_constants_init(struct amdgpu_device *adev) int i, j, num_xcc; num_xcc = NUM_XCC(adev->gfx.xcc_mask); - for (i = 0; i < num_xcc; i++) { - WREG32_FIELD15_PREREG(GC, GET_INST(GC, i), GRBM_CNTL, READ_TIMEOUT, 0xff); - gfx_v9_4_3_setup_rb(adev, i); - } gfx_v9_4_3_get_cu_info(adev, &adev->gfx.cu_info); adev->gfx.config.db_debug2 = RREG32_SOC15(GC, GET_INST(GC, 0), regDB_DEBUG2); -- cgit v1.2.3 From ea2d2f8ececdd4c778b66e19b82ce278dfc5e1c4 Mon Sep 17 00:00:00 2001 From: Rajneesh Bhardwaj <rajneesh.bhardwaj@amd.com> Date: Tue, 5 Apr 2022 13:00:13 -0400 Subject: drm/amdgpu: detect current GPU memory partition mode - Add helpers to detect the current GPU memory partition. - Add current memory partition mode sysfs node. Tested-by: Ori Messinger <Ori.Messinger@amd.com> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com> Signed-off-by: Rajneesh Bhardwaj <rajneesh.bhardwaj@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 25 +++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 11 +++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h | 1 + drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 11 +++++++++++ drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c | 12 ++++++++++++ 5 files changed, 60 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 2f6d85090b55..c2e92664031b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -1200,6 +1200,24 @@ static ssize_t amdgpu_gfx_get_current_compute_partition(struct device *dev, return sysfs_emit(buf, "%s\n", partition_mode); } +static ssize_t amdgpu_gfx_get_current_memory_partition(struct device *dev, + struct device_attribute *addr, + char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + enum amdgpu_memory_partition mode; + static const char *partition_modes[] = { + "UNKNOWN", "NPS1", "NPS2", "NPS4", "NPS8" + }; + BUILD_BUG_ON(ARRAY_SIZE(partition_modes) <= AMDGPU_NPS8_PARTITION_MODE); + + mode = min((int)adev->gfx.funcs->query_mem_partition_mode(adev), + AMDGPU_NPS8_PARTITION_MODE); + + return sysfs_emit(buf, "%s\n", partition_modes[mode]); +} + static ssize_t amdgpu_gfx_set_compute_partition(struct device *dev, struct device_attribute *addr, const char *buf, size_t count) @@ -1307,6 +1325,9 @@ static DEVICE_ATTR(current_compute_partition, S_IRUGO | S_IWUSR, static DEVICE_ATTR(available_compute_partition, S_IRUGO, amdgpu_gfx_get_available_compute_partition, NULL); +static DEVICE_ATTR(current_memory_partition, S_IRUGO, + amdgpu_gfx_get_current_memory_partition, NULL); + int amdgpu_gfx_sysfs_init(struct amdgpu_device *adev) { int r; @@ -1319,5 +1340,9 @@ int amdgpu_gfx_sysfs_init(struct amdgpu_device *adev) if (r) return r; + r = device_create_file(adev->dev, &dev_attr_current_memory_partition); + if (r) + return r; + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 789115f5505f..e37501685fa9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -71,6 +71,14 @@ enum amdgpu_pkg_type { AMDGPU_PKG_TYPE_UNKNOWN, }; +enum amdgpu_memory_partition { + UNKNOWN_MEMORY_PARTITION_MODE = 0, + AMDGPU_NPS1_PARTITION_MODE = 1, + AMDGPU_NPS2_PARTITION_MODE = 2, + AMDGPU_NPS4_PARTITION_MODE = 3, + AMDGPU_NPS8_PARTITION_MODE = 4, +}; + struct amdgpu_mec { struct amdgpu_bo *hpd_eop_obj; u64 hpd_eop_gpu_addr; @@ -268,6 +276,8 @@ struct amdgpu_gfx_funcs { struct amdgpu_gfx_shadow_info *shadow_info); enum amdgpu_gfx_partition (*query_partition_mode)(struct amdgpu_device *adev); + enum amdgpu_memory_partition + (*query_mem_partition_mode)(struct amdgpu_device *adev); int (*switch_partition_mode)(struct amdgpu_device *adev, enum amdgpu_gfx_partition mode); }; @@ -404,6 +414,7 @@ struct amdgpu_gfx { enum amdgpu_gfx_partition partition_mode; uint16_t xcc_mask; + enum amdgpu_memory_partition mem_partition_mode; uint32_t num_xcc_per_xcp; struct mutex partition_mutex; }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h index 8fa3a1f3b181..639b86c4d664 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h @@ -97,6 +97,7 @@ struct amdgpu_nbio_funcs { void (*clear_doorbell_interrupt)(struct amdgpu_device *adev); u32 (*get_rom_offset)(struct amdgpu_device *adev); u32 (*get_compute_partition_mode)(struct amdgpu_device *adev); + u32 (*get_memory_partition_mode)(struct amdgpu_device *adev); void (*set_compute_partition_mode)(struct amdgpu_device *adev, enum amdgpu_gfx_partition mode); }; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 6ed97371ff1e..3c19c5a0069d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -606,6 +606,16 @@ static void gfx_v9_4_3_select_me_pipe_q(struct amdgpu_device *adev, { soc15_grbm_select(adev, me, pipe, q, vm, GET_INST(GC, xcc_id)); } +static enum amdgpu_memory_partition +gfx_v9_4_3_query_memory_partition(struct amdgpu_device *adev) +{ + enum amdgpu_memory_partition mode = UNKNOWN_MEMORY_PARTITION_MODE; + + if (adev->nbio.funcs->get_memory_partition_mode) + mode = adev->nbio.funcs->get_memory_partition_mode(adev); + + return mode; +} static enum amdgpu_gfx_partition gfx_v9_4_3_query_compute_partition(struct amdgpu_device *adev) @@ -675,6 +685,7 @@ static const struct amdgpu_gfx_funcs gfx_v9_4_3_gfx_funcs = { .select_me_pipe_q = &gfx_v9_4_3_select_me_pipe_q, .query_partition_mode = &gfx_v9_4_3_query_compute_partition, .switch_partition_mode = &gfx_v9_4_3_switch_compute_partition, + .query_mem_partition_mode = &gfx_v9_4_3_query_memory_partition, }; static int gfx_v9_4_3_gpu_early_init(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c index 78eab4d48e38..fa4b423c399b 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c @@ -30,6 +30,8 @@ #include "ivsrcid/nbio/irqsrcs_nbif_7_4.h" #include <uapi/linux/kfd_ioctl.h> +#define NPS_MODE_MASK 0x000000FFL + static void nbio_v7_9_remap_hdp_registers(struct amdgpu_device *adev) { WREG32_SOC15(NBIO, 0, regBIF_BX0_REMAP_HDP_MEM_FLUSH_CNTL, @@ -406,6 +408,15 @@ static void nbio_v7_9_set_compute_partition_mode(struct amdgpu_device *adev, WREG32_SOC15(NBIO, 0, regBIF_BX_PF0_PARTITION_COMPUTE_STATUS, tmp); } +static enum amdgpu_memory_partition nbio_v7_9_get_memory_partition_mode(struct amdgpu_device *adev) +{ + u32 tmp; + tmp = RREG32_SOC15(NBIO, 0, regBIF_BX_PF0_PARTITION_MEM_STATUS); + tmp = REG_GET_FIELD(tmp, BIF_BX_PF0_PARTITION_MEM_STATUS, NPS_MODE); + + return ffs(tmp); +} + const struct amdgpu_nbio_funcs nbio_v7_9_funcs = { .get_hdp_flush_req_offset = nbio_v7_9_get_hdp_flush_req_offset, .get_hdp_flush_done_offset = nbio_v7_9_get_hdp_flush_done_offset, @@ -428,4 +439,5 @@ const struct amdgpu_nbio_funcs nbio_v7_9_funcs = { .remap_hdp_registers = nbio_v7_9_remap_hdp_registers, .get_compute_partition_mode = nbio_v7_9_get_compute_partition_mode, .set_compute_partition_mode = nbio_v7_9_set_compute_partition_mode, + .get_memory_partition_mode = nbio_v7_9_get_memory_partition_mode, }; -- cgit v1.2.3 From d55391c2138c1a5bcd1316ccedf1835dd067d568 Mon Sep 17 00:00:00 2001 From: Lijo Lazar <lijo.lazar@amd.com> Date: Wed, 9 Nov 2022 19:47:38 +0530 Subject: drm/amdgpu: Revert programming of CP_PSP_XCP_CTL Programming of this register is taken care by PSP. Incorrect programming causes CP not to detect its XCC. Signed-off-by: Lijo Lazar <lijo.lazar@amd.com> Reported-by: Alexander Turek <Alexander.Turek@amd.com> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 3c19c5a0069d..9d7852ffd708 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -1055,9 +1055,6 @@ static void gfx_v9_4_3_program_xcc_id(struct amdgpu_device *adev, int xcc_id) tmp = tmp | (adev->gfx.num_xcc_per_xcp << REG_FIELD_SHIFT(CP_HYP_XCP_CTL, NUM_XCC_IN_XCP)); WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HYP_XCP_CTL, tmp); - tmp = xcc_id << REG_FIELD_SHIFT(CP_PSP_XCP_CTL, PHYSICAL_XCC_ID); - tmp = tmp | (xcc_id << REG_FIELD_SHIFT(CP_PSP_XCP_CTL, XCC_DIE_ID)); - WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_PSP_XCP_CTL, tmp); break; default: break; -- cgit v1.2.3 From 993d218f82211b1e17fcea7a5f727fa16efba353 Mon Sep 17 00:00:00 2001 From: Shiwu Zhang <shiwu.zhang@amd.com> Date: Mon, 14 Nov 2022 15:52:19 +0800 Subject: drm/amdgpu: remove partition attributes sys file for gfx_v9_4_3 For driver de-init like rmmod operations those partition specific attributes need to be removed accordingly. Signed-off-by: Shiwu Zhang <shiwu.zhang@amd.com> Reviewed-by: Rajneesh Bhardwaj <rajneesh.bhardwaj@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 7 +++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 1 + drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 1 + 3 files changed, 9 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 8aea9e023275..5ff49737d7c6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -1345,3 +1345,10 @@ int amdgpu_gfx_sysfs_init(struct amdgpu_device *adev) return 0; } + +void amdgpu_gfx_sysfs_fini(struct amdgpu_device *adev) +{ + device_remove_file(adev->dev, &dev_attr_current_compute_partition); + device_remove_file(adev->dev, &dev_attr_available_compute_partition); + device_remove_file(adev->dev, &dev_attr_current_memory_partition); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index e37501685fa9..023c5b08929f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -502,4 +502,5 @@ int amdgpu_gfx_poison_consumption_handler(struct amdgpu_device *adev, bool amdgpu_gfx_is_master_xcc(struct amdgpu_device *adev, int xcc_id); int amdgpu_gfx_sysfs_init(struct amdgpu_device *adev); +void amdgpu_gfx_sysfs_fini(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 9d7852ffd708..75ad5176e99e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -895,6 +895,7 @@ static int gfx_v9_4_3_sw_fini(void *handle) gfx_v9_4_3_mec_fini(adev); amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj); gfx_v9_4_3_free_microcode(adev); + amdgpu_gfx_sysfs_fini(adev); return 0; } -- cgit v1.2.3 From 880f8b3f8032a022c85351857ba7020fd3345592 Mon Sep 17 00:00:00 2001 From: Lijo Lazar <lijo.lazar@amd.com> Date: Wed, 20 Jul 2022 13:45:30 +0530 Subject: drm/amdgpu: Rename xcc specific functions Add 'xcc' prefix to xcc specific functions to distinguish from IP block functions. Signed-off-by: Lijo Lazar <lijo.lazar@amd.com> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 205 ++++++++++++++++++-------------- 1 file changed, 113 insertions(+), 92 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 75ad5176e99e..93420c7db93c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -506,11 +506,8 @@ static int gfx_v9_4_3_mec_init(struct amdgpu_device *adev) return 0; } -static void gfx_v9_4_3_select_se_sh(struct amdgpu_device *adev, - u32 se_num, - u32 sh_num, - u32 instance, - int xcc_id) +static void gfx_v9_4_3_xcc_select_se_sh(struct amdgpu_device *adev, u32 se_num, + u32 sh_num, u32 instance, int xcc_id) { u32 data; @@ -678,7 +675,7 @@ static int gfx_v9_4_3_switch_compute_partition(struct amdgpu_device *adev, static const struct amdgpu_gfx_funcs gfx_v9_4_3_gfx_funcs = { .get_gpu_clock_counter = &gfx_v9_4_3_get_gpu_clock_counter, - .select_se_sh = &gfx_v9_4_3_select_se_sh, + .select_se_sh = &gfx_v9_4_3_xcc_select_se_sh, .read_wave_data = &gfx_v9_4_3_read_wave_data, .read_wave_sgprs = &gfx_v9_4_3_read_wave_sgprs, .read_wave_vgprs = &gfx_v9_4_3_read_wave_vgprs, @@ -901,7 +898,8 @@ static int gfx_v9_4_3_sw_fini(void *handle) } #define DEFAULT_SH_MEM_BASES (0x6000) -static void gfx_v9_4_3_init_compute_vmid(struct amdgpu_device *adev, int xcc_id) +static void gfx_v9_4_3_xcc_init_compute_vmid(struct amdgpu_device *adev, + int xcc_id) { int i; uint32_t sh_mem_config; @@ -939,7 +937,7 @@ static void gfx_v9_4_3_init_compute_vmid(struct amdgpu_device *adev, int xcc_id) } } -static void gfx_v9_4_3_init_gds_vmid(struct amdgpu_device *adev, int xcc_id) +static void gfx_v9_4_3_xcc_init_gds_vmid(struct amdgpu_device *adev, int xcc_id) { int vmid; @@ -1000,25 +998,26 @@ static void gfx_v9_4_3_constants_init(struct amdgpu_device *adev) mutex_unlock(&adev->srbm_mutex); for (i = 0; i < num_xcc; i++) { - gfx_v9_4_3_init_compute_vmid(adev, i); - gfx_v9_4_3_init_gds_vmid(adev, i); + gfx_v9_4_3_xcc_init_compute_vmid(adev, i); + gfx_v9_4_3_xcc_init_gds_vmid(adev, i); } } -static void gfx_v9_4_3_enable_save_restore_machine(struct amdgpu_device *adev, - int xcc_id) +static void +gfx_v9_4_3_xcc_enable_save_restore_machine(struct amdgpu_device *adev, + int xcc_id) { WREG32_FIELD15_PREREG(GC, GET_INST(GC, xcc_id), RLC_SRM_CNTL, SRM_ENABLE, 1); } -static void gfx_v9_4_3_init_pg(struct amdgpu_device *adev, int xcc_id) +static void gfx_v9_4_3_xcc_init_pg(struct amdgpu_device *adev, int xcc_id) { /* * Rlc save restore list is workable since v2_1. * And it's needed by gfxoff feature. */ if (adev->gfx.rlc.is_rlc_v2_1) - gfx_v9_4_3_enable_save_restore_machine(adev, xcc_id); + gfx_v9_4_3_xcc_enable_save_restore_machine(adev, xcc_id); if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG | @@ -1031,7 +1030,7 @@ static void gfx_v9_4_3_init_pg(struct amdgpu_device *adev, int xcc_id) } } -static void gfx_v9_4_3_disable_gpa_mode(struct amdgpu_device *adev, int xcc_id) +static void gfx_v9_4_3_xcc_disable_gpa_mode(struct amdgpu_device *adev, int xcc_id) { uint32_t data; @@ -1040,7 +1039,8 @@ static void gfx_v9_4_3_disable_gpa_mode(struct amdgpu_device *adev, int xcc_id) WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCPC_PSP_DEBUG, data); } -static void gfx_v9_4_3_program_xcc_id(struct amdgpu_device *adev, int xcc_id) +static void gfx_v9_4_3_xcc_program_xcc_id(struct amdgpu_device *adev, + int xcc_id) { uint32_t tmp = 0; int num_xcc; @@ -1074,7 +1074,7 @@ static bool gfx_v9_4_3_is_rlc_enabled(struct amdgpu_device *adev) return true; } -static void gfx_v9_4_3_set_safe_mode(struct amdgpu_device *adev, int xcc_id) +static void gfx_v9_4_3_xcc_set_safe_mode(struct amdgpu_device *adev, int xcc_id) { uint32_t data; unsigned i; @@ -1091,7 +1091,8 @@ static void gfx_v9_4_3_set_safe_mode(struct amdgpu_device *adev, int xcc_id) } } -static void gfx_v9_4_3_unset_safe_mode(struct amdgpu_device *adev, int xcc_id) +static void gfx_v9_4_3_xcc_unset_safe_mode(struct amdgpu_device *adev, + int xcc_id) { uint32_t data; @@ -1108,8 +1109,8 @@ static int gfx_v9_4_3_rlc_init(struct amdgpu_device *adev) return 0; } -static void gfx_v9_4_3_wait_for_rlc_serdes(struct amdgpu_device *adev, - int xcc_id) +static void gfx_v9_4_3_xcc_wait_for_rlc_serdes(struct amdgpu_device *adev, + int xcc_id) { u32 i, j, k; u32 mask; @@ -1117,16 +1118,17 @@ static void gfx_v9_4_3_wait_for_rlc_serdes(struct amdgpu_device *adev, mutex_lock(&adev->grbm_idx_mutex); for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { - gfx_v9_4_3_select_se_sh(adev, i, j, 0xffffffff, xcc_id); + gfx_v9_4_3_xcc_select_se_sh(adev, i, j, 0xffffffff, + xcc_id); for (k = 0; k < adev->usec_timeout; k++) { if (RREG32_SOC15(GC, GET_INST(GC, 0), regRLC_SERDES_CU_MASTER_BUSY) == 0) break; udelay(1); } if (k == adev->usec_timeout) { - gfx_v9_4_3_select_se_sh(adev, 0xffffffff, - 0xffffffff, 0xffffffff, - xcc_id); + gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, + 0xffffffff, + 0xffffffff, xcc_id); mutex_unlock(&adev->grbm_idx_mutex); DRM_INFO("Timeout wait for RLC serdes %u,%u\n", i, j); @@ -1134,7 +1136,8 @@ static void gfx_v9_4_3_wait_for_rlc_serdes(struct amdgpu_device *adev, } } } - gfx_v9_4_3_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, xcc_id); + gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, + xcc_id); mutex_unlock(&adev->grbm_idx_mutex); mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK | @@ -1148,8 +1151,8 @@ static void gfx_v9_4_3_wait_for_rlc_serdes(struct amdgpu_device *adev, } } -static void gfx_v9_4_3_enable_gui_idle_interrupt(struct amdgpu_device *adev, - bool enable, int xcc_id) +static void gfx_v9_4_3_xcc_enable_gui_idle_interrupt(struct amdgpu_device *adev, + bool enable, int xcc_id) { u32 tmp; @@ -1171,8 +1174,8 @@ static void gfx_v9_4_3_rlc_stop(struct amdgpu_device *adev) num_xcc = NUM_XCC(adev->gfx.xcc_mask); for (i = 0; i < num_xcc; i++) { WREG32_FIELD15_PREREG(GC, GET_INST(GC, i), RLC_CNTL, RLC_ENABLE_F32, 0); - gfx_v9_4_3_enable_gui_idle_interrupt(adev, false, i); - gfx_v9_4_3_wait_for_rlc_serdes(adev, i); + gfx_v9_4_3_xcc_enable_gui_idle_interrupt(adev, false, i); + gfx_v9_4_3_xcc_wait_for_rlc_serdes(adev, i); } } @@ -1203,7 +1206,7 @@ static void gfx_v9_4_3_rlc_start(struct amdgpu_device *adev) /* carrizo do enable cp interrupt after cp inited */ if (!(adev->flags & AMD_IS_APU)) { - gfx_v9_4_3_enable_gui_idle_interrupt(adev, true, i); + gfx_v9_4_3_xcc_enable_gui_idle_interrupt(adev, true, i); udelay(50); } @@ -1226,7 +1229,8 @@ static void gfx_v9_4_3_rlc_start(struct amdgpu_device *adev) } } -static int gfx_v9_4_3_rlc_load_microcode(struct amdgpu_device *adev, int xcc_id) +static int gfx_v9_4_3_xcc_rlc_load_microcode(struct amdgpu_device *adev, + int xcc_id) { const struct rlc_firmware_header_v2_0 *hdr; const __le32 *fw_data; @@ -1267,11 +1271,11 @@ static int gfx_v9_4_3_rlc_resume(struct amdgpu_device *adev) /* disable CG */ WREG32_SOC15(GC, GET_INST(GC, i), regRLC_CGCG_CGLS_CTRL, 0); - gfx_v9_4_3_init_pg(adev, i); + gfx_v9_4_3_xcc_init_pg(adev, i); if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { /* legacy rlc firmware loading */ - r = gfx_v9_4_3_rlc_load_microcode(adev, i); + r = gfx_v9_4_3_xcc_rlc_load_microcode(adev, i); if (r) return r; } @@ -1341,8 +1345,8 @@ static bool gfx_v9_4_3_is_rlcg_access_range(struct amdgpu_device *adev, u32 offs ARRAY_SIZE(rlcg_access_gc_9_4_3)); } -static void gfx_v9_4_3_cp_compute_enable(struct amdgpu_device *adev, - bool enable, int xcc_id) +static void gfx_v9_4_3_xcc_cp_compute_enable(struct amdgpu_device *adev, + bool enable, int xcc_id) { if (enable) { WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_MEC_CNTL, 0); @@ -1354,8 +1358,8 @@ static void gfx_v9_4_3_cp_compute_enable(struct amdgpu_device *adev, udelay(50); } -static int gfx_v9_4_3_cp_compute_load_microcode(struct amdgpu_device *adev, - int xcc_id) +static int gfx_v9_4_3_xcc_cp_compute_load_microcode(struct amdgpu_device *adev, + int xcc_id) { const struct gfx_firmware_header_v1_0 *mec_hdr; const __le32 *fw_data; @@ -1367,7 +1371,7 @@ static int gfx_v9_4_3_cp_compute_load_microcode(struct amdgpu_device *adev, if (!adev->gfx.mec_fw) return -EINVAL; - gfx_v9_4_3_cp_compute_enable(adev, false, xcc_id); + gfx_v9_4_3_xcc_cp_compute_enable(adev, false, xcc_id); mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); @@ -1403,7 +1407,7 @@ static int gfx_v9_4_3_cp_compute_load_microcode(struct amdgpu_device *adev, } /* KIQ functions */ -static void gfx_v9_4_3_kiq_setting(struct amdgpu_ring *ring, int xcc_id) +static void gfx_v9_4_3_xcc_kiq_setting(struct amdgpu_ring *ring, int xcc_id) { uint32_t tmp; struct amdgpu_device *adev = ring->adev; @@ -1558,7 +1562,8 @@ static int gfx_v9_4_3_mqd_init(struct amdgpu_ring *ring) return 0; } -static int gfx_v9_4_3_kiq_init_register(struct amdgpu_ring *ring, int xcc_id) +static int gfx_v9_4_3_xcc_kiq_init_register(struct amdgpu_ring *ring, + int xcc_id) { struct amdgpu_device *adev = ring->adev; struct v9_mqd *mqd = ring->mqd_ptr; @@ -1663,7 +1668,8 @@ static int gfx_v9_4_3_kiq_init_register(struct amdgpu_ring *ring, int xcc_id) return 0; } -static int gfx_v9_4_3_kiq_fini_register(struct amdgpu_ring *ring, int xcc_id) +static int gfx_v9_4_3_xcc_kiq_fini_register(struct amdgpu_ring *ring, + int xcc_id) { struct amdgpu_device *adev = ring->adev; int j; @@ -1702,13 +1708,13 @@ static int gfx_v9_4_3_kiq_fini_register(struct amdgpu_ring *ring, int xcc_id) return 0; } -static int gfx_v9_4_3_kiq_init_queue(struct amdgpu_ring *ring, int xcc_id) +static int gfx_v9_4_3_xcc_kiq_init_queue(struct amdgpu_ring *ring, int xcc_id) { struct amdgpu_device *adev = ring->adev; struct v9_mqd *mqd = ring->mqd_ptr; struct v9_mqd *tmp_mqd; - gfx_v9_4_3_kiq_setting(ring, xcc_id); + gfx_v9_4_3_xcc_kiq_setting(ring, xcc_id); /* GPU could be in bad state during probe, driver trigger the reset * after load the SMU, in this case , the mqd is not be initialized. @@ -1726,7 +1732,7 @@ static int gfx_v9_4_3_kiq_init_queue(struct amdgpu_ring *ring, int xcc_id) amdgpu_ring_clear_ring(ring); mutex_lock(&adev->srbm_mutex); soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, GET_INST(GC, xcc_id)); - gfx_v9_4_3_kiq_init_register(ring, xcc_id); + gfx_v9_4_3_xcc_kiq_init_register(ring, xcc_id); soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id)); mutex_unlock(&adev->srbm_mutex); } else { @@ -1736,7 +1742,7 @@ static int gfx_v9_4_3_kiq_init_queue(struct amdgpu_ring *ring, int xcc_id) mutex_lock(&adev->srbm_mutex); soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, GET_INST(GC, xcc_id)); gfx_v9_4_3_mqd_init(ring); - gfx_v9_4_3_kiq_init_register(ring, xcc_id); + gfx_v9_4_3_xcc_kiq_init_register(ring, xcc_id); soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id)); mutex_unlock(&adev->srbm_mutex); @@ -1747,7 +1753,7 @@ static int gfx_v9_4_3_kiq_init_queue(struct amdgpu_ring *ring, int xcc_id) return 0; } -static int gfx_v9_4_3_kcq_init_queue(struct amdgpu_ring *ring, int xcc_id) +static int gfx_v9_4_3_xcc_kcq_init_queue(struct amdgpu_ring *ring, int xcc_id) { struct amdgpu_device *adev = ring->adev; struct v9_mqd *mqd = ring->mqd_ptr; @@ -1785,7 +1791,7 @@ static int gfx_v9_4_3_kcq_init_queue(struct amdgpu_ring *ring, int xcc_id) return 0; } -static int gfx_v9_4_3_kiq_resume(struct amdgpu_device *adev, int xcc_id) +static int gfx_v9_4_3_xcc_kiq_resume(struct amdgpu_device *adev, int xcc_id) { struct amdgpu_ring *ring; int r; @@ -1802,7 +1808,7 @@ static int gfx_v9_4_3_kiq_resume(struct amdgpu_device *adev, int xcc_id) return r; } - gfx_v9_4_3_kiq_init_queue(ring, xcc_id); + gfx_v9_4_3_xcc_kiq_init_queue(ring, xcc_id); amdgpu_bo_kunmap(ring->mqd_obj); ring->mqd_ptr = NULL; amdgpu_bo_unreserve(ring->mqd_obj); @@ -1810,12 +1816,12 @@ static int gfx_v9_4_3_kiq_resume(struct amdgpu_device *adev, int xcc_id) return 0; } -static int gfx_v9_4_3_kcq_resume(struct amdgpu_device *adev, int xcc_id) +static int gfx_v9_4_3_xcc_kcq_resume(struct amdgpu_device *adev, int xcc_id) { struct amdgpu_ring *ring = NULL; int r = 0, i; - gfx_v9_4_3_cp_compute_enable(adev, true, xcc_id); + gfx_v9_4_3_xcc_cp_compute_enable(adev, true, xcc_id); for (i = 0; i < adev->gfx.num_compute_rings; i++) { ring = &adev->gfx.compute_ring[i + xcc_id * adev->gfx.num_compute_rings]; @@ -1825,7 +1831,7 @@ static int gfx_v9_4_3_kcq_resume(struct amdgpu_device *adev, int xcc_id) goto done; r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); if (!r) { - r = gfx_v9_4_3_kcq_init_queue(ring, xcc_id); + r = gfx_v9_4_3_xcc_kcq_init_queue(ring, xcc_id); amdgpu_bo_kunmap(ring->mqd_obj); ring->mqd_ptr = NULL; } @@ -1846,12 +1852,12 @@ static int gfx_v9_4_3_cp_resume(struct amdgpu_device *adev) num_xcc = NUM_XCC(adev->gfx.xcc_mask); for (i = 0; i < num_xcc; i++) { - gfx_v9_4_3_enable_gui_idle_interrupt(adev, false, i); + gfx_v9_4_3_xcc_enable_gui_idle_interrupt(adev, false, i); if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { - gfx_v9_4_3_disable_gpa_mode(adev, i); + gfx_v9_4_3_xcc_disable_gpa_mode(adev, i); - r = gfx_v9_4_3_cp_compute_load_microcode(adev, i); + r = gfx_v9_4_3_xcc_cp_compute_load_microcode(adev, i); if (r) return r; } @@ -1862,13 +1868,13 @@ static int gfx_v9_4_3_cp_resume(struct amdgpu_device *adev) adev, amdgpu_user_partt_mode); /* set the virtual and physical id based on partition_mode */ - gfx_v9_4_3_program_xcc_id(adev, i); + gfx_v9_4_3_xcc_program_xcc_id(adev, i); - r = gfx_v9_4_3_kiq_resume(adev, i); + r = gfx_v9_4_3_xcc_kiq_resume(adev, i); if (r) return r; - r = gfx_v9_4_3_kcq_resume(adev, i); + r = gfx_v9_4_3_xcc_kcq_resume(adev, i); if (r) return r; @@ -1879,16 +1885,16 @@ static int gfx_v9_4_3_cp_resume(struct amdgpu_device *adev) return r; } - gfx_v9_4_3_enable_gui_idle_interrupt(adev, true, i); + gfx_v9_4_3_xcc_enable_gui_idle_interrupt(adev, true, i); } return 0; } -static void gfx_v9_4_3_cp_enable(struct amdgpu_device *adev, bool enable, - int xcc_id) +static void gfx_v9_4_3_xcc_cp_enable(struct amdgpu_device *adev, bool enable, + int xcc_id) { - gfx_v9_4_3_cp_compute_enable(adev, enable, xcc_id); + gfx_v9_4_3_xcc_cp_compute_enable(adev, enable, xcc_id); } static int gfx_v9_4_3_hw_init(void *handle) @@ -1932,12 +1938,13 @@ static int gfx_v9_4_3_hw_fini(void *handle) soc15_grbm_select(adev, adev->gfx.kiq[i].ring.me, adev->gfx.kiq[i].ring.pipe, adev->gfx.kiq[i].ring.queue, 0, GET_INST(GC, i)); - gfx_v9_4_3_kiq_fini_register(&adev->gfx.kiq[i].ring, i); + gfx_v9_4_3_xcc_kiq_fini_register(&adev->gfx.kiq[i].ring, + i); soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, i)); mutex_unlock(&adev->srbm_mutex); } - gfx_v9_4_3_cp_enable(adev, false, i); + gfx_v9_4_3_xcc_cp_enable(adev, false, i); } /* Skip suspend with A+A reset */ @@ -2024,7 +2031,7 @@ static int gfx_v9_4_3_soft_reset(void *handle) adev->gfx.rlc.funcs->stop(adev); /* Disable MEC parsing/prefetching */ - gfx_v9_4_3_cp_compute_enable(adev, false, 0); + gfx_v9_4_3_xcc_cp_compute_enable(adev, false, 0); if (grbm_soft_reset) { tmp = RREG32_SOC15(GC, GET_INST(GC, 0), regGRBM_SOFT_RESET); @@ -2111,8 +2118,9 @@ static int gfx_v9_4_3_late_init(void *handle) return 0; } -static void gfx_v9_4_3_update_medium_grain_clock_gating(struct amdgpu_device *adev, - bool enable, int xcc_id) +static void +gfx_v9_4_3_xcc_update_medium_grain_clock_gating(struct amdgpu_device *adev, + bool enable, int xcc_id) { uint32_t data, def; @@ -2180,8 +2188,9 @@ static void gfx_v9_4_3_update_medium_grain_clock_gating(struct amdgpu_device *ad amdgpu_gfx_rlc_exit_safe_mode(adev, xcc_id); } -static void gfx_v9_4_3_update_coarse_grain_clock_gating(struct amdgpu_device *adev, - bool enable, int xcc_id) +static void +gfx_v9_4_3_xcc_update_coarse_grain_clock_gating(struct amdgpu_device *adev, + bool enable, int xcc_id) { uint32_t def, data; @@ -2232,31 +2241,35 @@ static void gfx_v9_4_3_update_coarse_grain_clock_gating(struct amdgpu_device *ad amdgpu_gfx_rlc_exit_safe_mode(adev, xcc_id); } -static int gfx_v9_4_3_update_gfx_clock_gating(struct amdgpu_device *adev, - bool enable, int xcc_id) +static int gfx_v9_4_3_xcc_update_gfx_clock_gating(struct amdgpu_device *adev, + bool enable, int xcc_id) { if (enable) { /* CGCG/CGLS should be enabled after MGCG/MGLS * === MGCG + MGLS === */ - gfx_v9_4_3_update_medium_grain_clock_gating(adev, enable, xcc_id); + gfx_v9_4_3_xcc_update_medium_grain_clock_gating(adev, enable, + xcc_id); /* === CGCG + CGLS === */ - gfx_v9_4_3_update_coarse_grain_clock_gating(adev, enable, xcc_id); + gfx_v9_4_3_xcc_update_coarse_grain_clock_gating(adev, enable, + xcc_id); } else { /* CGCG/CGLS should be disabled before MGCG/MGLS * === CGCG + CGLS === */ - gfx_v9_4_3_update_coarse_grain_clock_gating(adev, enable, xcc_id); + gfx_v9_4_3_xcc_update_coarse_grain_clock_gating(adev, enable, + xcc_id); /* === MGCG + MGLS === */ - gfx_v9_4_3_update_medium_grain_clock_gating(adev, enable, xcc_id); + gfx_v9_4_3_xcc_update_medium_grain_clock_gating(adev, enable, + xcc_id); } return 0; } static const struct amdgpu_rlc_funcs gfx_v9_4_3_rlc_funcs = { .is_rlc_enabled = gfx_v9_4_3_is_rlc_enabled, - .set_safe_mode = gfx_v9_4_3_set_safe_mode, - .unset_safe_mode = gfx_v9_4_3_unset_safe_mode, + .set_safe_mode = gfx_v9_4_3_xcc_set_safe_mode, + .unset_safe_mode = gfx_v9_4_3_xcc_unset_safe_mode, .init = gfx_v9_4_3_rlc_init, .resume = gfx_v9_4_3_rlc_resume, .stop = gfx_v9_4_3_rlc_stop, @@ -2285,8 +2298,8 @@ static int gfx_v9_4_3_set_clockgating_state(void *handle, switch (adev->ip_versions[GC_HWIP][0]) { case IP_VERSION(9, 4, 3): for (i = 0; i < num_xcc; i++) - gfx_v9_4_3_update_gfx_clock_gating(adev, - state == AMD_CG_STATE_GATE, i); + gfx_v9_4_3_xcc_update_gfx_clock_gating( + adev, state == AMD_CG_STATE_GATE, i); break; default: break; @@ -2553,10 +2566,9 @@ static void gfx_v9_4_3_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, ref, mask); } -static void gfx_v9_4_3_set_compute_eop_interrupt_state(struct amdgpu_device *adev, - int me, int pipe, - enum amdgpu_interrupt_state state, - int xcc_id) +static void gfx_v9_4_3_xcc_set_compute_eop_interrupt_state( + struct amdgpu_device *adev, int me, int pipe, + enum amdgpu_interrupt_state state, int xcc_id) { u32 mec_int_cntl, mec_int_cntl_reg; @@ -2664,28 +2676,36 @@ static int gfx_v9_4_3_set_eop_interrupt_state(struct amdgpu_device *adev, for (i = 0; i < num_xcc; i++) { switch (type) { case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: - gfx_v9_4_3_set_compute_eop_interrupt_state(adev, 1, 0, state, i); + gfx_v9_4_3_xcc_set_compute_eop_interrupt_state( + adev, 1, 0, state, i); break; case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP: - gfx_v9_4_3_set_compute_eop_interrupt_state(adev, 1, 1, state, i); + gfx_v9_4_3_xcc_set_compute_eop_interrupt_state( + adev, 1, 1, state, i); break; case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP: - gfx_v9_4_3_set_compute_eop_interrupt_state(adev, 1, 2, state, i); + gfx_v9_4_3_xcc_set_compute_eop_interrupt_state( + adev, 1, 2, state, i); break; case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP: - gfx_v9_4_3_set_compute_eop_interrupt_state(adev, 1, 3, state, i); + gfx_v9_4_3_xcc_set_compute_eop_interrupt_state( + adev, 1, 3, state, i); break; case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP: - gfx_v9_4_3_set_compute_eop_interrupt_state(adev, 2, 0, state, i); + gfx_v9_4_3_xcc_set_compute_eop_interrupt_state( + adev, 2, 0, state, i); break; case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP: - gfx_v9_4_3_set_compute_eop_interrupt_state(adev, 2, 1, state, i); + gfx_v9_4_3_xcc_set_compute_eop_interrupt_state( + adev, 2, 1, state, i); break; case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP: - gfx_v9_4_3_set_compute_eop_interrupt_state(adev, 2, 2, state, i); + gfx_v9_4_3_xcc_set_compute_eop_interrupt_state( + adev, 2, 2, state, i); break; case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP: - gfx_v9_4_3_set_compute_eop_interrupt_state(adev, 2, 3, state, i); + gfx_v9_4_3_xcc_set_compute_eop_interrupt_state( + adev, 2, 3, state, i); break; default: break; @@ -3090,7 +3110,7 @@ static int gfx_v9_4_3_get_cu_info(struct amdgpu_device *adev, mask = 1; ao_bitmap = 0; counter = 0; - gfx_v9_4_3_select_se_sh(adev, i, j, 0xffffffff, 0); + gfx_v9_4_3_xcc_select_se_sh(adev, i, j, 0xffffffff, 0); gfx_v9_4_3_set_user_cu_inactive_bitmap( adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]); bitmap = gfx_v9_4_3_get_cu_active_bitmap(adev); @@ -3123,7 +3143,8 @@ static int gfx_v9_4_3_get_cu_info(struct amdgpu_device *adev, cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap; } } - gfx_v9_4_3_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); + gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, + 0); mutex_unlock(&adev->grbm_idx_mutex); cu_info->number = active_cu_number; -- cgit v1.2.3 From 44b5cf2e0f7952856f48b9be56b9eb2f688d70f0 Mon Sep 17 00:00:00 2001 From: Lijo Lazar <lijo.lazar@amd.com> Date: Wed, 16 Nov 2022 10:47:18 +0530 Subject: drm/amdgpu: Add xcc specific functions Add more XCC specific functions and use them from IP block functions. RLC, CP functions are further split to have xcc specific versions. Signed-off-by: Lijo Lazar <lijo.lazar@amd.com> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 295 +++++++++++++++++++------------- 1 file changed, 176 insertions(+), 119 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 93420c7db93c..93a0baa4515c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -955,52 +955,64 @@ static void gfx_v9_4_3_xcc_init_gds_vmid(struct amdgpu_device *adev, int xcc_id) } } -static void gfx_v9_4_3_constants_init(struct amdgpu_device *adev) +static void gfx_v9_4_3_xcc_constants_init(struct amdgpu_device *adev, + int xcc_id) { u32 tmp; - int i, j, num_xcc; - - num_xcc = NUM_XCC(adev->gfx.xcc_mask); - - gfx_v9_4_3_get_cu_info(adev, &adev->gfx.cu_info); - adev->gfx.config.db_debug2 = RREG32_SOC15(GC, GET_INST(GC, 0), regDB_DEBUG2); + int i; /* XXX SH_MEM regs */ /* where to put LDS, scratch, GPUVM in FSA64 space */ mutex_lock(&adev->srbm_mutex); for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) { - for (j = 0; j < num_xcc; j++) { - soc15_grbm_select(adev, 0, 0, 0, i, GET_INST(GC, j)); - /* CP and shaders */ - if (i == 0) { - tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, - SH_MEM_ALIGNMENT_MODE_UNALIGNED); - tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE, - !!adev->gmc.noretry); - WREG32_SOC15_RLC(GC, GET_INST(GC, j), regSH_MEM_CONFIG, tmp); - WREG32_SOC15_RLC(GC, GET_INST(GC, j), regSH_MEM_BASES, 0); - } else { - tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, - SH_MEM_ALIGNMENT_MODE_UNALIGNED); - tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE, - !!adev->gmc.noretry); - WREG32_SOC15_RLC(GC, GET_INST(GC, j), regSH_MEM_CONFIG, tmp); - tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE, - (adev->gmc.private_aperture_start >> 48)); - tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE, - (adev->gmc.shared_aperture_start >> 48)); - WREG32_SOC15_RLC(GC, GET_INST(GC, j), regSH_MEM_BASES, tmp); - } + soc15_grbm_select(adev, 0, 0, 0, i, GET_INST(GC, xcc_id)); + /* CP and shaders */ + if (i == 0) { + tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, + SH_MEM_ALIGNMENT_MODE_UNALIGNED); + tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE, + !!adev->gmc.noretry); + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), + regSH_MEM_CONFIG, tmp); + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), + regSH_MEM_BASES, 0); + } else { + tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, + SH_MEM_ALIGNMENT_MODE_UNALIGNED); + tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE, + !!adev->gmc.noretry); + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), + regSH_MEM_CONFIG, tmp); + tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE, + (adev->gmc.private_aperture_start >> + 48)); + tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE, + (adev->gmc.shared_aperture_start >> + 48)); + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), + regSH_MEM_BASES, tmp); } } soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, 0)); mutex_unlock(&adev->srbm_mutex); - for (i = 0; i < num_xcc; i++) { - gfx_v9_4_3_xcc_init_compute_vmid(adev, i); - gfx_v9_4_3_xcc_init_gds_vmid(adev, i); - } + gfx_v9_4_3_xcc_init_compute_vmid(adev, xcc_id); + gfx_v9_4_3_xcc_init_gds_vmid(adev, xcc_id); +} + +static void gfx_v9_4_3_constants_init(struct amdgpu_device *adev) +{ + int i, num_xcc; + + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + + gfx_v9_4_3_get_cu_info(adev, &adev->gfx.cu_info); + adev->gfx.config.db_debug2 = + RREG32_SOC15(GC, GET_INST(GC, 0), regDB_DEBUG2); + + for (i = 0; i < num_xcc; i++) + gfx_v9_4_3_xcc_constants_init(adev, i); } static void @@ -1167,16 +1179,31 @@ static void gfx_v9_4_3_xcc_enable_gui_idle_interrupt(struct amdgpu_device *adev, WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_INT_CNTL_RING0, tmp); } +static void gfx_v9_4_3_xcc_rlc_stop(struct amdgpu_device *adev, int xcc_id) +{ + WREG32_FIELD15_PREREG(GC, GET_INST(GC, xcc_id), RLC_CNTL, + RLC_ENABLE_F32, 0); + gfx_v9_4_3_xcc_enable_gui_idle_interrupt(adev, false, xcc_id); + gfx_v9_4_3_xcc_wait_for_rlc_serdes(adev, xcc_id); +} + static void gfx_v9_4_3_rlc_stop(struct amdgpu_device *adev) { int i, num_xcc; num_xcc = NUM_XCC(adev->gfx.xcc_mask); - for (i = 0; i < num_xcc; i++) { - WREG32_FIELD15_PREREG(GC, GET_INST(GC, i), RLC_CNTL, RLC_ENABLE_F32, 0); - gfx_v9_4_3_xcc_enable_gui_idle_interrupt(adev, false, i); - gfx_v9_4_3_xcc_wait_for_rlc_serdes(adev, i); - } + for (i = 0; i < num_xcc; i++) + gfx_v9_4_3_xcc_rlc_stop(adev, i); +} + +static void gfx_v9_4_3_xcc_rlc_reset(struct amdgpu_device *adev, int xcc_id) +{ + WREG32_FIELD15_PREREG(GC, GET_INST(GC, xcc_id), GRBM_SOFT_RESET, + SOFT_RESET_RLC, 1); + udelay(50); + WREG32_FIELD15_PREREG(GC, GET_INST(GC, xcc_id), GRBM_SOFT_RESET, + SOFT_RESET_RLC, 0); + udelay(50); } static void gfx_v9_4_3_rlc_reset(struct amdgpu_device *adev) @@ -1184,10 +1211,19 @@ static void gfx_v9_4_3_rlc_reset(struct amdgpu_device *adev) int i, num_xcc; num_xcc = NUM_XCC(adev->gfx.xcc_mask); - for (i = 0; i < num_xcc; i++) { - WREG32_FIELD15_PREREG(GC, GET_INST(GC, i), GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); - udelay(50); - WREG32_FIELD15_PREREG(GC, GET_INST(GC, i), GRBM_SOFT_RESET, SOFT_RESET_RLC, 0); + for (i = 0; i < num_xcc; i++) + gfx_v9_4_3_xcc_rlc_reset(adev, i); +} + +static void gfx_v9_4_3_xcc_rlc_start(struct amdgpu_device *adev, int xcc_id) +{ + WREG32_FIELD15_PREREG(GC, GET_INST(GC, xcc_id), RLC_CNTL, + RLC_ENABLE_F32, 1); + udelay(50); + + /* carrizo do enable cp interrupt after cp inited */ + if (!(adev->flags & AMD_IS_APU)) { + gfx_v9_4_3_xcc_enable_gui_idle_interrupt(adev, true, xcc_id); udelay(50); } } @@ -1201,15 +1237,7 @@ static void gfx_v9_4_3_rlc_start(struct amdgpu_device *adev) num_xcc = NUM_XCC(adev->gfx.xcc_mask); for (i = 0; i < num_xcc; i++) { - WREG32_FIELD15_PREREG(GC, GET_INST(GC, i), RLC_CNTL, RLC_ENABLE_F32, 1); - udelay(50); - - /* carrizo do enable cp interrupt after cp inited */ - if (!(adev->flags & AMD_IS_APU)) { - gfx_v9_4_3_xcc_enable_gui_idle_interrupt(adev, true, i); - udelay(50); - } - + gfx_v9_4_3_xcc_rlc_start(adev, i); #ifdef AMDGPU_RLC_DEBUG_RETRY /* RLC_GPM_GENERAL_6 : RLC Ucode version */ rlc_ucode_ver = RREG32_SOC15(GC, GET_INST(GC, i), regRLC_GPM_GENERAL_6); @@ -1260,28 +1288,39 @@ static int gfx_v9_4_3_xcc_rlc_load_microcode(struct amdgpu_device *adev, return 0; } -static int gfx_v9_4_3_rlc_resume(struct amdgpu_device *adev) +static int gfx_v9_4_3_xcc_rlc_resume(struct amdgpu_device *adev, int xcc_id) { - int r, i, num_xcc; + int r; - adev->gfx.rlc.funcs->stop(adev); + gfx_v9_4_3_xcc_rlc_stop(adev, xcc_id); - num_xcc = NUM_XCC(adev->gfx.xcc_mask); - for (i = 0; i < num_xcc; i++) { - /* disable CG */ - WREG32_SOC15(GC, GET_INST(GC, i), regRLC_CGCG_CGLS_CTRL, 0); + /* disable CG */ + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGCG_CGLS_CTRL, 0); - gfx_v9_4_3_xcc_init_pg(adev, i); + gfx_v9_4_3_xcc_init_pg(adev, xcc_id); - if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { - /* legacy rlc firmware loading */ - r = gfx_v9_4_3_xcc_rlc_load_microcode(adev, i); - if (r) - return r; - } + if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { + /* legacy rlc firmware loading */ + r = gfx_v9_4_3_xcc_rlc_load_microcode(adev, xcc_id); + if (r) + return r; } - adev->gfx.rlc.funcs->start(adev); + gfx_v9_4_3_xcc_rlc_start(adev, xcc_id); + + return 0; +} + +static int gfx_v9_4_3_rlc_resume(struct amdgpu_device *adev) +{ + int r, i, num_xcc; + + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) { + r = gfx_v9_4_3_xcc_rlc_resume(adev, i); + if (r) + return r; + } return 0; } @@ -1845,47 +1884,58 @@ done: return r; } -static int gfx_v9_4_3_cp_resume(struct amdgpu_device *adev) +static int gfx_v9_4_3_xcc_cp_resume(struct amdgpu_device *adev, int xcc_id) { - int r, i, j, num_xcc; struct amdgpu_ring *ring; + int r, j; - num_xcc = NUM_XCC(adev->gfx.xcc_mask); - for (i = 0; i < num_xcc; i++) { - gfx_v9_4_3_xcc_enable_gui_idle_interrupt(adev, false, i); + gfx_v9_4_3_xcc_enable_gui_idle_interrupt(adev, false, xcc_id); - if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { - gfx_v9_4_3_xcc_disable_gpa_mode(adev, i); + if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { + gfx_v9_4_3_xcc_disable_gpa_mode(adev, xcc_id); - r = gfx_v9_4_3_xcc_cp_compute_load_microcode(adev, i); - if (r) - return r; - } + r = gfx_v9_4_3_xcc_cp_compute_load_microcode(adev, xcc_id); + if (r) + return r; + } - if (adev->gfx.partition_mode == - AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE) - gfx_v9_4_3_switch_compute_partition( - adev, amdgpu_user_partt_mode); + if (adev->gfx.partition_mode == AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE) + gfx_v9_4_3_switch_compute_partition(adev, + amdgpu_user_partt_mode); - /* set the virtual and physical id based on partition_mode */ - gfx_v9_4_3_xcc_program_xcc_id(adev, i); + /* set the virtual and physical id based on partition_mode */ + gfx_v9_4_3_xcc_program_xcc_id(adev, xcc_id); - r = gfx_v9_4_3_xcc_kiq_resume(adev, i); - if (r) - return r; + r = gfx_v9_4_3_xcc_kiq_resume(adev, xcc_id); + if (r) + return r; + + r = gfx_v9_4_3_xcc_kcq_resume(adev, xcc_id); + if (r) + return r; - r = gfx_v9_4_3_xcc_kcq_resume(adev, i); + for (j = 0; j < adev->gfx.num_compute_rings; j++) { + ring = &adev->gfx.compute_ring + [j + xcc_id * adev->gfx.num_compute_rings]; + r = amdgpu_ring_test_helper(ring); if (r) return r; + } - for (j = 0; j < adev->gfx.num_compute_rings; j++) { - ring = &adev->gfx.compute_ring[j + i * adev->gfx.num_compute_rings]; - r = amdgpu_ring_test_helper(ring); - if (r) - return r; - } + gfx_v9_4_3_xcc_enable_gui_idle_interrupt(adev, true, xcc_id); + + return 0; +} + +static int gfx_v9_4_3_cp_resume(struct amdgpu_device *adev) +{ + int r, i, num_xcc; - gfx_v9_4_3_xcc_enable_gui_idle_interrupt(adev, true, i); + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) { + r = gfx_v9_4_3_xcc_cp_resume(adev, i); + if (r) + return r; } return 0; @@ -1897,6 +1947,37 @@ static void gfx_v9_4_3_xcc_cp_enable(struct amdgpu_device *adev, bool enable, gfx_v9_4_3_xcc_cp_compute_enable(adev, enable, xcc_id); } +static void gfx_v9_4_3_xcc_fini(struct amdgpu_device *adev, int xcc_id) +{ + if (amdgpu_gfx_disable_kcq(adev, xcc_id)) + DRM_ERROR("XCD %d KCQ disable failed\n", xcc_id); + + /* Use deinitialize sequence from CAIL when unbinding device + * from driver, otherwise KIQ is hanging when binding back + */ + if (!amdgpu_in_reset(adev) && !adev->in_suspend) { + mutex_lock(&adev->srbm_mutex); + soc15_grbm_select(adev, adev->gfx.kiq[xcc_id].ring.me, + adev->gfx.kiq[xcc_id].ring.pipe, + adev->gfx.kiq[xcc_id].ring.queue, 0, + GET_INST(GC, xcc_id)); + gfx_v9_4_3_xcc_kiq_fini_register(&adev->gfx.kiq[xcc_id].ring, + xcc_id); + soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id)); + mutex_unlock(&adev->srbm_mutex); + } + + gfx_v9_4_3_xcc_cp_enable(adev, false, xcc_id); + + /* Skip suspend with A+A reset */ + if (adev->gmc.xgmi.connected_to_cpu && amdgpu_in_reset(adev)) { + dev_dbg(adev->dev, "Device in reset. Skipping RLC halt\n"); + return; + } + + gfx_v9_4_3_xcc_rlc_stop(adev, xcc_id); +} + static int gfx_v9_4_3_hw_init(void *handle) { int r; @@ -1927,33 +2008,9 @@ static int gfx_v9_4_3_hw_fini(void *handle) num_xcc = NUM_XCC(adev->gfx.xcc_mask); for (i = 0; i < num_xcc; i++) { - if (amdgpu_gfx_disable_kcq(adev, i)) - DRM_ERROR("XCD %d KCQ disable failed\n", i); - - /* Use deinitialize sequence from CAIL when unbinding device - * from driver, otherwise KIQ is hanging when binding back - */ - if (!amdgpu_in_reset(adev) && !adev->in_suspend) { - mutex_lock(&adev->srbm_mutex); - soc15_grbm_select(adev, adev->gfx.kiq[i].ring.me, - adev->gfx.kiq[i].ring.pipe, - adev->gfx.kiq[i].ring.queue, 0, GET_INST(GC, i)); - gfx_v9_4_3_xcc_kiq_fini_register(&adev->gfx.kiq[i].ring, - i); - soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, i)); - mutex_unlock(&adev->srbm_mutex); - } - - gfx_v9_4_3_xcc_cp_enable(adev, false, i); - } - - /* Skip suspend with A+A reset */ - if (adev->gmc.xgmi.connected_to_cpu && amdgpu_in_reset(adev)) { - dev_dbg(adev->dev, "Device in reset. Skipping RLC halt\n"); - return 0; + gfx_v9_4_3_xcc_fini(adev, i); } - adev->gfx.rlc.funcs->stop(adev); return 0; } -- cgit v1.2.3 From 8e7fd19380f9187dae3ad18a61793b1752dfa097 Mon Sep 17 00:00:00 2001 From: Lijo Lazar <lijo.lazar@amd.com> Date: Wed, 16 Nov 2022 17:15:47 +0530 Subject: drm/amdgpu: Switch to SOC partition funcs For GFXv9.4.3, use SOC level partition switch implementation rather than keeping them at GFX IP level. Change the exisiting implementation in GFX IP for keeping partition mode and restrict it to only GFX related switch. Signed-off-by: Lijo Lazar <lijo.lazar@amd.com> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 31 ++---------- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 5 -- .../gpu/drm/amd/amdgpu/aqua_vanjaram_reg_init.c | 4 +- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 59 ++++------------------ drivers/gpu/drm/amd/amdkfd/kfd_device.c | 8 +-- 5 files changed, 20 insertions(+), 87 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 5ff49737d7c6..f895a4b8ca0d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -28,6 +28,7 @@ #include "amdgpu_gfx.h" #include "amdgpu_rlc.h" #include "amdgpu_ras.h" +#include "amdgpu_xcp.h" /* delay 0.1 second to enable gfx off feature */ #define GFX_OFF_DELAY_ENABLE msecs_to_jiffies(100) @@ -1170,10 +1171,10 @@ static ssize_t amdgpu_gfx_get_current_compute_partition(struct device *dev, { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = drm_to_adev(ddev); - enum amdgpu_gfx_partition mode; + int mode; char *partition_mode; - mode = adev->gfx.funcs->query_partition_mode(adev); + mode = amdgpu_xcp_query_partition_mode(adev->xcp_mgr); switch (mode) { case AMDGPU_SPX_PARTITION_MODE: @@ -1254,31 +1255,7 @@ static ssize_t amdgpu_gfx_set_compute_partition(struct device *dev, return -EINVAL; } - if (!adev->kfd.init_complete) - return -EPERM; - - mutex_lock(&adev->gfx.partition_mutex); - - if (mode == adev->gfx.funcs->query_partition_mode(adev)) - goto out; - - ret = amdgpu_amdkfd_check_and_lock_kfd(adev); - if (ret) - goto out; - - amdgpu_amdkfd_device_fini_sw(adev); - - adev->gfx.funcs->switch_partition_mode(adev, mode); - - amdgpu_amdkfd_device_probe(adev); - amdgpu_amdkfd_device_init(adev); - /* If KFD init failed, return failure */ - if (!adev->kfd.init_complete) - ret = -EIO; - - amdgpu_amdkfd_unlock_kfd(adev); -out: - mutex_unlock(&adev->gfx.partition_mutex); + ret = amdgpu_xcp_switch_partition_mode(adev->xcp_mgr, mode); if (ret) return ret; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 8be4ab50b171..2287768ed141 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -278,11 +278,7 @@ struct amdgpu_gfx_funcs { (*query_partition_mode)(struct amdgpu_device *adev); enum amdgpu_memory_partition (*query_mem_partition_mode)(struct amdgpu_device *adev); - int (*switch_partition_mode)(struct amdgpu_device *adev, - enum amdgpu_gfx_partition mode); - - int (*switch_gfx_partition_mode)(struct amdgpu_device *adev, int num_xccs_per_xcp); }; @@ -416,7 +412,6 @@ struct amdgpu_gfx { bool cp_gfx_shadow; /* for gfx11 */ - enum amdgpu_gfx_partition partition_mode; uint16_t xcc_mask; enum amdgpu_memory_partition mem_partition_mode; uint32_t num_xcc_per_xcp; diff --git a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram_reg_init.c b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram_reg_init.c index a9de229a2828..bbcdececfd2f 100644 --- a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram_reg_init.c +++ b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram_reg_init.c @@ -307,8 +307,8 @@ static int aqua_vanjaram_switch_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr, goto unlock; num_xcc_per_xcp = __aqua_vanjaram_get_xcc_per_xcp(xcp_mgr, mode); - if (adev->gfx.funcs->switch_gfx_partition_mode) - adev->gfx.funcs->switch_gfx_partition_mode(xcp_mgr->adev, + if (adev->gfx.funcs->switch_partition_mode) + adev->gfx.funcs->switch_partition_mode(xcp_mgr->adev, num_xcc_per_xcp); if (adev->nbio.funcs->set_compute_partition_mode) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 93a0baa4515c..d684037a7a5d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -38,6 +38,7 @@ #include "gc/gc_9_4_3_sh_mask.h" #include "gfx_v9_4_3.h" +#include "amdgpu_xcp.h" MODULE_FIRMWARE("amdgpu/gc_9_4_3_mec.bin"); MODULE_FIRMWARE("amdgpu/gc_9_4_3_rlc.bin"); @@ -614,61 +615,23 @@ gfx_v9_4_3_query_memory_partition(struct amdgpu_device *adev) return mode; } -static enum amdgpu_gfx_partition -gfx_v9_4_3_query_compute_partition(struct amdgpu_device *adev) -{ - enum amdgpu_gfx_partition mode = adev->gfx.partition_mode; - - if (adev->nbio.funcs->get_compute_partition_mode) - mode = adev->nbio.funcs->get_compute_partition_mode(adev); - - return mode; -} - static int gfx_v9_4_3_switch_compute_partition(struct amdgpu_device *adev, - enum amdgpu_gfx_partition mode) + int num_xccs_per_xcp) { + int i, num_xcc; u32 tmp = 0; - int num_xcc_per_partition, i, num_xcc; num_xcc = NUM_XCC(adev->gfx.xcc_mask); - switch (mode) { - case AMDGPU_SPX_PARTITION_MODE: - num_xcc_per_partition = num_xcc; - break; - case AMDGPU_DPX_PARTITION_MODE: - num_xcc_per_partition = num_xcc / 2; - break; - case AMDGPU_TPX_PARTITION_MODE: - num_xcc_per_partition = num_xcc / 3; - break; - case AMDGPU_QPX_PARTITION_MODE: - num_xcc_per_partition = num_xcc / 4; - break; - case AMDGPU_CPX_PARTITION_MODE: - num_xcc_per_partition = 1; - break; - default: - return -EINVAL; - } - - /* TODO: - * Stop user queues and threads, and make sure GPU is empty of work. - */ for (i = 0; i < num_xcc; i++) { tmp = REG_SET_FIELD(tmp, CP_HYP_XCP_CTL, NUM_XCC_IN_XCP, - num_xcc_per_partition); + num_xccs_per_xcp); tmp = REG_SET_FIELD(tmp, CP_HYP_XCP_CTL, VIRTUAL_XCC_ID, - i % num_xcc_per_partition); + i % num_xccs_per_xcp); WREG32_SOC15(GC, GET_INST(GC, i), regCP_HYP_XCP_CTL, tmp); } - if (adev->nbio.funcs->set_compute_partition_mode) - adev->nbio.funcs->set_compute_partition_mode(adev, mode); - - adev->gfx.num_xcc_per_xcp = num_xcc_per_partition; - adev->gfx.partition_mode = mode; + adev->gfx.num_xcc_per_xcp = num_xccs_per_xcp; return 0; } @@ -680,7 +643,6 @@ static const struct amdgpu_gfx_funcs gfx_v9_4_3_gfx_funcs = { .read_wave_sgprs = &gfx_v9_4_3_read_wave_sgprs, .read_wave_vgprs = &gfx_v9_4_3_read_wave_vgprs, .select_me_pipe_q = &gfx_v9_4_3_select_me_pipe_q, - .query_partition_mode = &gfx_v9_4_3_query_compute_partition, .switch_partition_mode = &gfx_v9_4_3_switch_compute_partition, .query_mem_partition_mode = &gfx_v9_4_3_query_memory_partition, }; @@ -1899,10 +1861,6 @@ static int gfx_v9_4_3_xcc_cp_resume(struct amdgpu_device *adev, int xcc_id) return r; } - if (adev->gfx.partition_mode == AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE) - gfx_v9_4_3_switch_compute_partition(adev, - amdgpu_user_partt_mode); - /* set the virtual and physical id based on partition_mode */ gfx_v9_4_3_xcc_program_xcc_id(adev, xcc_id); @@ -1931,6 +1889,9 @@ static int gfx_v9_4_3_cp_resume(struct amdgpu_device *adev) { int r, i, num_xcc; + if (amdgpu_xcp_query_partition_mode(adev->xcp_mgr) == AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE) + amdgpu_xcp_switch_partition_mode(adev->xcp_mgr, amdgpu_user_partt_mode); + num_xcc = NUM_XCC(adev->gfx.xcc_mask); for (i = 0; i < num_xcc; i++) { r = gfx_v9_4_3_xcc_cp_resume(adev, i); @@ -2146,8 +2107,6 @@ static int gfx_v9_4_3_early_init(void *handle) num_xcc = NUM_XCC(adev->gfx.xcc_mask); - adev->gfx.partition_mode = AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE; - adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev), AMDGPU_MAX_COMPUTE_RINGS); gfx_v9_4_3_set_kiq_pm4_funcs(adev); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 293787290e36..7a963d0a34e2 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -34,6 +34,7 @@ #include "kfd_smi_events.h" #include "kfd_migrate.h" #include "amdgpu.h" +#include "amdgpu_xcp.h" #define MQD_SIZE_ALIGNED 768 @@ -592,7 +593,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, struct kfd_node *node; uint32_t first_vmid_kfd, last_vmid_kfd, vmid_num_kfd; unsigned int max_proc_per_quantum; - int num_xcd; + int num_xcd, partition_mode; kfd->mec_fw_version = amdgpu_amdkfd_get_fw_version(kfd->adev, KGD_ENGINE_MEC1); @@ -644,8 +645,9 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, * If the VMID range changes for GFX9.4.3, then this code MUST be * revisited. */ + partition_mode = amdgpu_xcp_query_partition_mode(kfd->adev->xcp_mgr); if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 3) && - kfd->adev->gfx.partition_mode == AMDGPU_CPX_PARTITION_MODE && + partition_mode == AMDGPU_CPX_PARTITION_MODE && kfd->num_nodes != 1) { vmid_num_kfd /= 2; first_vmid_kfd = last_vmid_kfd + 1 - vmid_num_kfd*2; @@ -761,7 +763,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, node->start_xcc_id = node->num_xcc_per_node * i; if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 3) && - kfd->adev->gfx.partition_mode == AMDGPU_CPX_PARTITION_MODE && + partition_mode == AMDGPU_CPX_PARTITION_MODE && kfd->num_nodes != 1) { /* For GFX9.4.3 and CPX mode, first XCD gets VMID range * 4-9 and second XCD gets VMID range 10-15. -- cgit v1.2.3 From 73c84f7c478aeb98bce494cac73f2d20f4a81c6e Mon Sep 17 00:00:00 2001 From: Lijo Lazar <lijo.lazar@amd.com> Date: Fri, 23 Sep 2022 16:48:43 +0530 Subject: drm/amdgpu: Add XCP functions for GFX v9.4.3 Add functions to suspend/resume GFX instances belonging to an XCP. Signed-off-by: Lijo Lazar <lijo.lazar@amd.com> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 47 +++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index d684037a7a5d..aaa67592bbb5 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -23,6 +23,7 @@ #include <linux/firmware.h> #include "amdgpu.h" +#include "amdgpu_xcp.h" #include "amdgpu_gfx.h" #include "soc15.h" #include "soc15d.h" @@ -3177,3 +3178,49 @@ const struct amdgpu_ip_block_version gfx_v9_4_3_ip_block = { .rev = 0, .funcs = &gfx_v9_4_3_ip_funcs, }; + +static int gfx_v9_4_3_xcp_resume(void *handle, uint32_t inst_mask) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + uint32_t tmp_mask; + int i, r; + + /* TODO : Initialize golden regs */ + /* gfx_v9_4_3_init_golden_registers(adev); */ + + tmp_mask = inst_mask; + for_each_inst(i, tmp_mask) + gfx_v9_4_3_xcc_constants_init(adev, i); + + tmp_mask = inst_mask; + for_each_inst(i, tmp_mask) { + r = gfx_v9_4_3_xcc_rlc_resume(adev, i); + if (r) + return r; + } + + tmp_mask = inst_mask; + for_each_inst(i, tmp_mask) { + r = gfx_v9_4_3_xcc_cp_resume(adev, i); + if (r) + return r; + } + + return 0; +} + +static int gfx_v9_4_3_xcp_suspend(void *handle, uint32_t inst_mask) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i; + + for_each_inst(i, inst_mask) + gfx_v9_4_3_xcc_fini(adev, i); + + return 0; +} + +struct amdgpu_xcp_ip_funcs gfx_v9_4_3_xcp_funcs = { + .suspend = &gfx_v9_4_3_xcp_suspend, + .resume = &gfx_v9_4_3_xcp_resume +}; -- cgit v1.2.3 From fee500fa7cb7e11a4d2d66e75e65e67c156e27c6 Mon Sep 17 00:00:00 2001 From: Shiwu Zhang <shiwu.zhang@amd.com> Date: Fri, 18 Nov 2022 14:21:15 +0800 Subject: drm/amdgpu: Fix the KCQ hang when binding back Just like the KIQ, KCQ need to clear the doorbell related regs as well to avoid hangs when to load driver again after unloading. Signed-off-by: Shiwu Zhang <shiwu.zhang@amd.com> Reviewed-by: Le Ma <le.ma@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index aaa67592bbb5..ef552c9b19b5 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -1670,7 +1670,7 @@ static int gfx_v9_4_3_xcc_kiq_init_register(struct amdgpu_ring *ring, return 0; } -static int gfx_v9_4_3_xcc_kiq_fini_register(struct amdgpu_ring *ring, +static int gfx_v9_4_3_xcc_q_fini_register(struct amdgpu_ring *ring, int xcc_id) { struct amdgpu_device *adev = ring->adev; @@ -1688,7 +1688,7 @@ static int gfx_v9_4_3_xcc_kiq_fini_register(struct amdgpu_ring *ring, } if (j == AMDGPU_MAX_USEC_TIMEOUT) { - DRM_DEBUG("KIQ dequeue request failed.\n"); + DRM_DEBUG("%s dequeue request failed.\n", ring->name); /* Manual disable if dequeue request times out */ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE, 0); @@ -1793,6 +1793,27 @@ static int gfx_v9_4_3_xcc_kcq_init_queue(struct amdgpu_ring *ring, int xcc_id) return 0; } +static int gfx_v9_4_3_xcc_kcq_fini_register(struct amdgpu_device *adev, int xcc_id) +{ + struct amdgpu_ring *ring; + int j; + + for (j = 0; j < adev->gfx.num_compute_rings; j++) { + ring = &adev->gfx.compute_ring[j + xcc_id * adev->gfx.num_compute_rings]; + if (!amdgpu_in_reset(adev) && !adev->in_suspend) { + mutex_lock(&adev->srbm_mutex); + soc15_grbm_select(adev, ring->me, + ring->pipe, + ring->queue, 0, GET_INST(GC, xcc_id)); + gfx_v9_4_3_xcc_q_fini_register(ring, xcc_id); + soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id)); + mutex_unlock(&adev->srbm_mutex); + } + } + + return 0; +} + static int gfx_v9_4_3_xcc_kiq_resume(struct amdgpu_device *adev, int xcc_id) { struct amdgpu_ring *ring; @@ -1923,12 +1944,13 @@ static void gfx_v9_4_3_xcc_fini(struct amdgpu_device *adev, int xcc_id) adev->gfx.kiq[xcc_id].ring.pipe, adev->gfx.kiq[xcc_id].ring.queue, 0, GET_INST(GC, xcc_id)); - gfx_v9_4_3_xcc_kiq_fini_register(&adev->gfx.kiq[xcc_id].ring, + gfx_v9_4_3_xcc_q_fini_register(&adev->gfx.kiq[xcc_id].ring, xcc_id); soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id)); mutex_unlock(&adev->srbm_mutex); } + gfx_v9_4_3_xcc_kcq_fini_register(adev, xcc_id); gfx_v9_4_3_xcc_cp_enable(adev, false, xcc_id); /* Skip suspend with A+A reset */ -- cgit v1.2.3 From 98b2e9cad2279132e3aa4b9caf9164b2e35c1a52 Mon Sep 17 00:00:00 2001 From: Le Ma <le.ma@amd.com> Date: Fri, 9 Dec 2022 19:44:05 +0800 Subject: drm/amdgpu: correct the vmhub index when page fault occurs The AMDGPU_GFXHUB was bind to each xcc in the logical order. Thus convert the node_id to logical xcc_id to index the correct AMDGPU_GFXHUB. And "node_id / 4" can get the correct AMDGPU_MMHUB0 index. Signed-off-by: Le Ma <le.ma@amd.com> Tested-by: Asad kamal <asad.kamal@amd.com> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 1 + drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 27 ++++++++++++++------------- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 14 ++++++++++---- 3 files changed, 25 insertions(+), 17 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 2287768ed141..81b4c7e684af 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -280,6 +280,7 @@ struct amdgpu_gfx_funcs { (*query_mem_partition_mode)(struct amdgpu_device *adev); int (*switch_partition_mode)(struct amdgpu_device *adev, int num_xccs_per_xcp); + int (*ih_node_to_logical_xcc)(struct amdgpu_device *adev, int ih_node); }; struct sq_work { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index ef552c9b19b5..6aaa810ea044 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -637,6 +637,19 @@ static int gfx_v9_4_3_switch_compute_partition(struct amdgpu_device *adev, return 0; } +static int gfx_v9_4_3_ih_to_xcc_inst(struct amdgpu_device *adev, int ih_node) +{ + int xcc; + + xcc = hweight8(adev->gfx.xcc_mask & GENMASK(ih_node / 2, 0)); + if (!xcc) { + dev_err(adev->dev, "Couldn't find xcc mapping from IH node"); + return -EINVAL; + } + + return xcc - 1; +} + static const struct amdgpu_gfx_funcs gfx_v9_4_3_gfx_funcs = { .get_gpu_clock_counter = &gfx_v9_4_3_get_gpu_clock_counter, .select_se_sh = &gfx_v9_4_3_xcc_select_se_sh, @@ -646,6 +659,7 @@ static const struct amdgpu_gfx_funcs gfx_v9_4_3_gfx_funcs = { .select_me_pipe_q = &gfx_v9_4_3_select_me_pipe_q, .switch_partition_mode = &gfx_v9_4_3_switch_compute_partition, .query_mem_partition_mode = &gfx_v9_4_3_query_memory_partition, + .ih_node_to_logical_xcc = &gfx_v9_4_3_ih_to_xcc_inst, }; static int gfx_v9_4_3_gpu_early_init(struct amdgpu_device *adev) @@ -2754,19 +2768,6 @@ static int gfx_v9_4_3_set_eop_interrupt_state(struct amdgpu_device *adev, return 0; } -static int gfx_v9_4_3_ih_to_xcc_inst(struct amdgpu_device *adev, int ih_node) -{ - int xcc; - - xcc = hweight8(adev->gfx.xcc_mask & GENMASK(ih_node / 2, 0)); - if (!xcc) { - dev_err(adev->dev, "Couldn't find xcc mapping from IH node"); - return -EINVAL; - } - - return xcc - 1; -} - static int gfx_v9_4_3_eop_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 3765178e6fc5..841333148610 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -557,22 +557,28 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, u64 addr; uint32_t cam_index = 0; int ret; - uint32_t node_id; + uint32_t node_id, xcc_id = 0; - node_id = (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3)) ? entry->node_id : 0; + node_id = entry->node_id; addr = (u64)entry->src_data[0] << 12; addr |= ((u64)entry->src_data[1] & 0xf) << 44; if (entry->client_id == SOC15_IH_CLIENTID_VMC) { hub_name = "mmhub0"; - hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; + hub = &adev->vmhub[AMDGPU_MMHUB0(node_id / 4)]; } else if (entry->client_id == SOC15_IH_CLIENTID_VMC1) { hub_name = "mmhub1"; hub = &adev->vmhub[AMDGPU_MMHUB1(0)]; } else { hub_name = "gfxhub0"; - hub = &adev->vmhub[node_id/2]; + if (adev->gfx.funcs->ih_node_to_logical_xcc) { + xcc_id = adev->gfx.funcs->ih_node_to_logical_xcc(adev, + node_id); + if (xcc_id < 0) + xcc_id = 0; + } + hub = &adev->vmhub[xcc_id]; } if (retry_fault) { -- cgit v1.2.3 From d524180b88009d9158bff7fd20f3916455e0c32c Mon Sep 17 00:00:00 2001 From: Lijo Lazar <lijo.lazar@amd.com> Date: Mon, 19 Dec 2022 17:39:42 +0530 Subject: drm/amdgpu: Fix GFX v9.4.3 EOP buffer allocation Each compute cluster gets 8 compute queues in GFX v9.4.3. Fix the EOP buffer allocation so that compute queue on every XCC gets a unique address. Signed-off-by: Lijo Lazar <lijo.lazar@amd.com> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> Tested-and-Reviewed-by: Asad Kamal <asad.kamal@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 6aaa810ea044..b56fa2945464 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -455,7 +455,8 @@ static int gfx_v9_4_3_mec_init(struct amdgpu_device *adev) /* take ownership of the relevant compute queues */ amdgpu_gfx_compute_queue_acquire(adev); - mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE; + mec_hpd_size = + adev->gfx.num_compute_rings * num_xcc * GFX9_MEC_HPD_SIZE; if (mec_hpd_size) { r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, -- cgit v1.2.3 From 34fd9d686772f6725242e900913ca2be987c12dd Mon Sep 17 00:00:00 2001 From: Lijo Lazar <lijo.lazar@amd.com> Date: Tue, 20 Dec 2022 14:21:57 +0530 Subject: drm/amdgpu: Add FGCG logic for GFX v9.4.3 Add logic for fine grain clock gating logic for GFX v9.4.3. The feature will be controlled using CG flags. Also, make a change so that RLC safe mode entry/exit is done only once during CG update sequence. Signed-off-by: Lijo Lazar <lijo.lazar@amd.com> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 71 ++++++++++++++++++++++++++++++--- 1 file changed, 65 insertions(+), 6 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index b56fa2945464..55d99c4ea48c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -2172,14 +2172,64 @@ static int gfx_v9_4_3_late_init(void *handle) return 0; } +static void gfx_v9_4_3_xcc_update_sram_fgcg(struct amdgpu_device *adev, + bool enable, int xcc_id) +{ + uint32_t def, data; + + if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_FGCG)) + return; + + def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), + regRLC_CGTT_MGCG_OVERRIDE); + + if (enable) + data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK; + else + data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK; + + if (def != data) + WREG32_SOC15(GC, GET_INST(GC, xcc_id), + regRLC_CGTT_MGCG_OVERRIDE, data); + + def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CLK_CNTL); + + if (enable) + data &= ~RLC_CLK_CNTL__RLC_SRAM_CLK_GATER_OVERRIDE_MASK; + else + data |= RLC_CLK_CNTL__RLC_SRAM_CLK_GATER_OVERRIDE_MASK; + + if (def != data) + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CLK_CNTL, data); +} + +static void gfx_v9_4_3_xcc_update_repeater_fgcg(struct amdgpu_device *adev, + bool enable, int xcc_id) +{ + uint32_t def, data; + + if (!(adev->cg_flags & AMD_CG_SUPPORT_REPEATER_FGCG)) + return; + + def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), + regRLC_CGTT_MGCG_OVERRIDE); + + if (enable) + data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_REP_FGCG_OVERRIDE_MASK; + else + data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_REP_FGCG_OVERRIDE_MASK; + + if (def != data) + WREG32_SOC15(GC, GET_INST(GC, xcc_id), + regRLC_CGTT_MGCG_OVERRIDE, data); +} + static void gfx_v9_4_3_xcc_update_medium_grain_clock_gating(struct amdgpu_device *adev, bool enable, int xcc_id) { uint32_t data, def; - amdgpu_gfx_rlc_enter_safe_mode(adev, xcc_id); - /* It is disabled by HW by default */ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { /* 1 - RLC_CGTT_MGCG_OVERRIDE */ @@ -2239,7 +2289,6 @@ gfx_v9_4_3_xcc_update_medium_grain_clock_gating(struct amdgpu_device *adev, } } - amdgpu_gfx_rlc_exit_safe_mode(adev, xcc_id); } static void @@ -2248,8 +2297,6 @@ gfx_v9_4_3_xcc_update_coarse_grain_clock_gating(struct amdgpu_device *adev, { uint32_t def, data; - amdgpu_gfx_rlc_enter_safe_mode(adev, xcc_id); - if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE); /* unset CGCG override */ @@ -2292,13 +2339,18 @@ gfx_v9_4_3_xcc_update_coarse_grain_clock_gating(struct amdgpu_device *adev, WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGCG_CGLS_CTRL, data); } - amdgpu_gfx_rlc_exit_safe_mode(adev, xcc_id); } static int gfx_v9_4_3_xcc_update_gfx_clock_gating(struct amdgpu_device *adev, bool enable, int xcc_id) { + amdgpu_gfx_rlc_enter_safe_mode(adev, xcc_id); + if (enable) { + /* FGCG */ + gfx_v9_4_3_xcc_update_sram_fgcg(adev, enable, xcc_id); + gfx_v9_4_3_xcc_update_repeater_fgcg(adev, enable, xcc_id); + /* CGCG/CGLS should be enabled after MGCG/MGLS * === MGCG + MGLS === */ @@ -2316,7 +2368,14 @@ static int gfx_v9_4_3_xcc_update_gfx_clock_gating(struct amdgpu_device *adev, /* === MGCG + MGLS === */ gfx_v9_4_3_xcc_update_medium_grain_clock_gating(adev, enable, xcc_id); + + /* FGCG */ + gfx_v9_4_3_xcc_update_sram_fgcg(adev, enable, xcc_id); + gfx_v9_4_3_xcc_update_repeater_fgcg(adev, enable, xcc_id); } + + amdgpu_gfx_rlc_exit_safe_mode(adev, xcc_id); + return 0; } -- cgit v1.2.3 From 233bb3733bd43966696f4a5e95129476e86bf4e3 Mon Sep 17 00:00:00 2001 From: Lijo Lazar <lijo.lazar@amd.com> Date: Thu, 19 Jan 2023 14:47:22 +0530 Subject: drm/amdgpu: Use unique doorbell range per xcc Program different ranges in each XCC with MEC_DOORBELL_RANGE_LOWER/HIGHER. Keeping the same range causes CPF in other XCCs also to be busy when an IB packet is submitted to KCQ. Only the XCC which processes the packet comes back to idle afterwards and this causes other CPs not be idle. This in turn affects clockgating behavior as RLC doesn't get idle interrupt. LOWER/HIGHER covers only KIQ/KCQs which are per XCC queues. Assigning different ranges doesn't seem to have any side effect as user queue ranges are outside of this range. User queue tests - PM4 through KFD and AQL through rocr - have the same results after this change. Signed-off-by: Lijo Lazar <lijo.lazar@amd.com> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h | 35 ++++++++++++---------- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 5 +++- .../gpu/drm/amd/amdgpu/aqua_vanjaram_reg_init.c | 1 + drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 31 +++++++++++++------ 4 files changed, 47 insertions(+), 25 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h index 3c3ae2b4dbc8..f637574644c0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h @@ -86,6 +86,8 @@ struct amdgpu_doorbell_index { uint32_t max_assignment; /* Per engine SDMA doorbell size in dword */ uint32_t sdma_doorbell_range; + /* Per xcc doorbell size for KIQ/KCQ */ + uint32_t xcc_doorbell_range; }; typedef enum _AMDGPU_DOORBELL_ASSIGNMENT @@ -309,28 +311,31 @@ typedef enum _AMDGPU_DOORBELL64_ASSIGNMENT AMDGPU_DOORBELL64_INVALID = 0xFFFF } AMDGPU_DOORBELL64_ASSIGNMENT; -typedef enum _AMDGPU_DOORBELL_ASSIGNMENT_LAYOUT1 -{ - /* KIQ: 0~7 for maximum 8 XCD */ - AMDGPU_DOORBELL_LAYOUT1_KIQ_START = 0x000, - AMDGPU_DOORBELL_LAYOUT1_HIQ = 0x008, - AMDGPU_DOORBELL_LAYOUT1_DIQ = 0x009, - /* Compute: 0x0A ~ 0x49 */ - AMDGPU_DOORBELL_LAYOUT1_MEC_RING_START = 0x00A, - AMDGPU_DOORBELL_LAYOUT1_MEC_RING_END = 0x049, - AMDGPU_DOORBELL_LAYOUT1_USERQUEUE_START = 0x04A, - AMDGPU_DOORBELL_LAYOUT1_USERQUEUE_END = 0x0C9, +typedef enum _AMDGPU_DOORBELL_ASSIGNMENT_LAYOUT1 { + /* XCC0: 0x00 ~20, XCC1: 20 ~ 2F ... */ + + /* KIQ/HIQ/DIQ */ + AMDGPU_DOORBELL_LAYOUT1_KIQ_START = 0x000, + AMDGPU_DOORBELL_LAYOUT1_HIQ = 0x001, + AMDGPU_DOORBELL_LAYOUT1_DIQ = 0x002, + /* Compute: 0x08 ~ 0x20 */ + AMDGPU_DOORBELL_LAYOUT1_MEC_RING_START = 0x008, + AMDGPU_DOORBELL_LAYOUT1_MEC_RING_END = 0x00F, + AMDGPU_DOORBELL_LAYOUT1_USERQUEUE_START = 0x010, + AMDGPU_DOORBELL_LAYOUT1_USERQUEUE_END = 0x01F, + AMDGPU_DOORBELL_LAYOUT1_XCC_RANGE = 0x020, + /* SDMA: 0x100 ~ 0x19F */ - AMDGPU_DOORBELL_LAYOUT1_sDMA_ENGINE_START = 0x100, - AMDGPU_DOORBELL_LAYOUT1_sDMA_ENGINE_END = 0x19F, + AMDGPU_DOORBELL_LAYOUT1_sDMA_ENGINE_START = 0x100, + AMDGPU_DOORBELL_LAYOUT1_sDMA_ENGINE_END = 0x19F, /* IH: 0x1A0 ~ 0x1AF */ AMDGPU_DOORBELL_LAYOUT1_IH = 0x1A0, /* VCN: 0x1B0 ~ 0x1D4 */ AMDGPU_DOORBELL_LAYOUT1_VCN_START = 0x1B0, AMDGPU_DOORBELL_LAYOUT1_VCN_END = 0x1D4, - AMDGPU_DOORBELL_LAYOUT1_FIRST_NON_CP = AMDGPU_DOORBELL_LAYOUT1_sDMA_ENGINE_START, - AMDGPU_DOORBELL_LAYOUT1_LAST_NON_CP = AMDGPU_DOORBELL_LAYOUT1_VCN_END, + AMDGPU_DOORBELL_LAYOUT1_FIRST_NON_CP = AMDGPU_DOORBELL_LAYOUT1_sDMA_ENGINE_START, + AMDGPU_DOORBELL_LAYOUT1_LAST_NON_CP = AMDGPU_DOORBELL_LAYOUT1_VCN_END, AMDGPU_DOORBELL_LAYOUT1_MAX_ASSIGNMENT = 0x1D4, AMDGPU_DOORBELL_LAYOUT1_INVALID = 0xFFFF diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index f895a4b8ca0d..70c6099353b8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -316,7 +316,10 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, ring->use_doorbell = true; ring->xcc_id = xcc_id; ring->vm_hub = AMDGPU_GFXHUB(xcc_id); - ring->doorbell_index = (adev->doorbell_index.kiq + xcc_id) << 1; + ring->doorbell_index = + (adev->doorbell_index.kiq + + xcc_id * adev->doorbell_index.xcc_doorbell_range) + << 1; r = amdgpu_gfx_kiq_acquire(adev, ring, xcc_id); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram_reg_init.c b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram_reg_init.c index 6591d39c6518..55a6ebb940ba 100644 --- a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram_reg_init.c +++ b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram_reg_init.c @@ -44,6 +44,7 @@ void aqua_vanjaram_doorbell_index_init(struct amdgpu_device *adev) adev->doorbell_index.userqueue_start = AMDGPU_DOORBELL_LAYOUT1_USERQUEUE_START; adev->doorbell_index.userqueue_end = AMDGPU_DOORBELL_LAYOUT1_USERQUEUE_END; + adev->doorbell_index.xcc_doorbell_range = AMDGPU_DOORBELL_LAYOUT1_XCC_RANGE; adev->doorbell_index.sdma_doorbell_range = 20; for (i = 0; i < adev->sdma.num_instances; i++) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 55d99c4ea48c..557a2458ef5e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -729,8 +729,10 @@ static int gfx_v9_4_3_compute_ring_init(struct amdgpu_device *adev, int ring_id, unsigned irq_type; struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id]; unsigned int hw_prio; + uint32_t xcc_doorbell_start; - ring = &adev->gfx.compute_ring[ring_id]; + ring = &adev->gfx.compute_ring[xcc_id * adev->gfx.num_compute_rings + + ring_id]; /* mec0 is me1 */ ring->xcc_id = xcc_id; @@ -740,9 +742,12 @@ static int gfx_v9_4_3_compute_ring_init(struct amdgpu_device *adev, int ring_id, ring->ring_obj = NULL; ring->use_doorbell = true; - ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1; - ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr - + (ring_id * GFX9_MEC_HPD_SIZE); + xcc_doorbell_start = adev->doorbell_index.mec_ring0 + + xcc_id * adev->doorbell_index.xcc_doorbell_range; + ring->doorbell_index = (xcc_doorbell_start + ring_id) << 1; + ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + + (ring_id + xcc_id * adev->gfx.num_compute_rings) * + GFX9_MEC_HPD_SIZE; ring->vm_hub = AMDGPU_GFXHUB(xcc_id); sprintf(ring->name, "comp_%d.%d.%d.%d", ring->xcc_id, ring->me, ring->pipe, ring->queue); @@ -801,8 +806,8 @@ static int gfx_v9_4_3_sw_init(void *handle) } /* set up the compute queues - allocate horizontally across pipes */ - ring_id = 0; for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) { + ring_id = 0; for (i = 0; i < adev->gfx.mec.num_mec; ++i) { for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; @@ -1654,10 +1659,18 @@ static int gfx_v9_4_3_xcc_kiq_init_register(struct amdgpu_ring *ring, /* enable the doorbell if requested */ if (ring->use_doorbell) { - WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_DOORBELL_RANGE_LOWER, - (adev->doorbell_index.kiq * 2) << 2); - WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_DOORBELL_RANGE_UPPER, - (adev->doorbell_index.userqueue_end * 2) << 2); + WREG32_SOC15( + GC, GET_INST(GC, xcc_id), + regCP_MEC_DOORBELL_RANGE_LOWER, + ((adev->doorbell_index.kiq + + xcc_id * adev->doorbell_index.xcc_doorbell_range) * + 2) << 2); + WREG32_SOC15( + GC, GET_INST(GC, xcc_id), + regCP_MEC_DOORBELL_RANGE_UPPER, + ((adev->doorbell_index.userqueue_end + + xcc_id * adev->doorbell_index.xcc_doorbell_range) * + 2) << 2); } WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL, -- cgit v1.2.3 From b7c7011e67b09efc486b1de38f6bfbed75139989 Mon Sep 17 00:00:00 2001 From: Lijo Lazar <lijo.lazar@amd.com> Date: Thu, 19 Jan 2023 15:00:45 +0530 Subject: drm/amdgpu: Enable CGCG/LS for GC 9.4.3 Enable coarse grain clockgating/light sleep for GC v9.4.3. Remove programming that is not meant for GC 9.4.3. Signed-off-by: Lijo Lazar <lijo.lazar@amd.com> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 14 +++++--------- drivers/gpu/drm/amd/amdgpu/soc15.c | 3 ++- 2 files changed, 7 insertions(+), 10 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 557a2458ef5e..52041d4cb14a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -2250,11 +2250,9 @@ gfx_v9_4_3_xcc_update_medium_grain_clock_gating(struct amdgpu_device *adev, data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK | + RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK | RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK); - /* only for Vega10 & Raven1 */ - data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK; - if (def != data) WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE, data); @@ -2311,6 +2309,7 @@ gfx_v9_4_3_xcc_update_coarse_grain_clock_gating(struct amdgpu_device *adev, uint32_t def, data; if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { + def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE); /* unset CGCG override */ data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK; @@ -2325,12 +2324,9 @@ gfx_v9_4_3_xcc_update_coarse_grain_clock_gating(struct amdgpu_device *adev, /* enable cgcg FSM(0x0000363F) */ def = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGCG_CGLS_CTRL); - if (adev->asic_type == CHIP_ARCTURUS) - data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | - RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; - else - data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | - RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; + data = (0x36 + << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | + RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 06a18b2f6e04..4138d9634266 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -1121,7 +1121,8 @@ static int soc15_common_early_init(void *handle) case IP_VERSION(9, 4, 3): adev->asic_funcs = &aqua_vanjaram_asic_funcs; adev->cg_flags = - AMD_CG_SUPPORT_VCN_MGCG | + AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_CGCG | + AMD_CG_SUPPORT_GFX_CGLS | AMD_CG_SUPPORT_VCN_MGCG | AMD_CG_SUPPORT_JPEG_MGCG; adev->pg_flags = AMD_PG_SUPPORT_VCN | -- cgit v1.2.3 From c1d3f627ff33bf1ae145209030a114c4985beddf Mon Sep 17 00:00:00 2001 From: Lijo Lazar <lijo.lazar@amd.com> Date: Fri, 20 Jan 2023 12:42:00 +0530 Subject: drm/amdgpu: Fix mqd init on GFX v9.4.3 For MQD init, an XCC's queue is selected with GRBM select. However, for initialization of MQD, values read from logical XCC0 registers are used. This results in garbage values being read from XCC0 whose queue is not selected. Change to read from the right XCC for MQD initialization. Signed-off-by: Lijo Lazar <lijo.lazar@amd.com> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 52041d4cb14a..1f1268cd5e09 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -1456,7 +1456,7 @@ static void gfx_v9_4_3_mqd_set_priority(struct amdgpu_ring *ring, struct v9_mqd } } -static int gfx_v9_4_3_mqd_init(struct amdgpu_ring *ring) +static int gfx_v9_4_3_xcc_mqd_init(struct amdgpu_ring *ring, int xcc_id) { struct amdgpu_device *adev = ring->adev; struct v9_mqd *mqd = ring->mqd_ptr; @@ -1483,14 +1483,14 @@ static int gfx_v9_4_3_mqd_init(struct amdgpu_ring *ring) mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ - tmp = RREG32_SOC15(GC, GET_INST(GC, 0), regCP_HQD_EOP_CONTROL); + tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_EOP_CONTROL); tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1)); mqd->cp_hqd_eop_control = tmp; /* enable doorbell? */ - tmp = RREG32_SOC15(GC, GET_INST(GC, 0), regCP_HQD_PQ_DOORBELL_CONTROL); + tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL); if (ring->use_doorbell) { tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, @@ -1520,7 +1520,7 @@ static int gfx_v9_4_3_mqd_init(struct amdgpu_ring *ring) mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr); /* set MQD vmid to 0 */ - tmp = RREG32_SOC15(GC, GET_INST(GC, 0), regCP_MQD_CONTROL); + tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MQD_CONTROL); tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); mqd->cp_mqd_control = tmp; @@ -1530,7 +1530,7 @@ static int gfx_v9_4_3_mqd_init(struct amdgpu_ring *ring) mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); /* set up the HQD, this is similar to CP_RB0_CNTL */ - tmp = RREG32_SOC15(GC, GET_INST(GC, 0), regCP_HQD_PQ_CONTROL); + tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_CONTROL); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, (order_base_2(ring->ring_size / 4) - 1)); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, @@ -1557,23 +1557,23 @@ static int gfx_v9_4_3_mqd_init(struct amdgpu_ring *ring) /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ ring->wptr = 0; - mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, GET_INST(GC, 0), regCP_HQD_PQ_RPTR); + mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_RPTR); /* set the vmid for the queue */ mqd->cp_hqd_vmid = 0; - tmp = RREG32_SOC15(GC, GET_INST(GC, 0), regCP_HQD_PERSISTENT_STATE); + tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PERSISTENT_STATE); tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); mqd->cp_hqd_persistent_state = tmp; /* set MIN_IB_AVAIL_SIZE */ - tmp = RREG32_SOC15(GC, GET_INST(GC, 0), regCP_HQD_IB_CONTROL); + tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_IB_CONTROL); tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); mqd->cp_hqd_ib_control = tmp; /* set static priority for a queue/ring */ gfx_v9_4_3_mqd_set_priority(ring, mqd); - mqd->cp_hqd_quantum = RREG32_SOC15(GC, GET_INST(GC, 0), regCP_HQD_QUANTUM); + mqd->cp_hqd_quantum = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_QUANTUM); /* map_queues packet doesn't need activate the queue, * so only kiq need set this field. @@ -1771,7 +1771,7 @@ static int gfx_v9_4_3_xcc_kiq_init_queue(struct amdgpu_ring *ring, int xcc_id) ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; mutex_lock(&adev->srbm_mutex); soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, GET_INST(GC, xcc_id)); - gfx_v9_4_3_mqd_init(ring); + gfx_v9_4_3_xcc_mqd_init(ring, xcc_id); gfx_v9_4_3_xcc_kiq_init_register(ring, xcc_id); soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id)); mutex_unlock(&adev->srbm_mutex); @@ -1802,7 +1802,7 @@ static int gfx_v9_4_3_xcc_kcq_init_queue(struct amdgpu_ring *ring, int xcc_id) ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; mutex_lock(&adev->srbm_mutex); soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, GET_INST(GC, xcc_id)); - gfx_v9_4_3_mqd_init(ring); + gfx_v9_4_3_xcc_mqd_init(ring, xcc_id); soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id)); mutex_unlock(&adev->srbm_mutex); -- cgit v1.2.3 From 228ce176434b0f61451019065393040d58e1668d Mon Sep 17 00:00:00 2001 From: Rajneesh Bhardwaj <rajneesh.bhardwaj@amd.com> Date: Fri, 27 Jan 2023 21:57:00 -0500 Subject: drm/amdgpu: Handle VRAM dependencies on GFXIP9.4.3 [For 1P NPS1 mode driver bringup] Changes required to initialize the amdgpu driver with frontdoor firmware loading and discovery=2 with the native mode SBIOS that enables CPU GPU unified interleaved memory. sudo modprobe amdgpu discovery=2 Once PSP TMR region is reported via the ACPI interface, the dependency on the ip_discovery.bin will be removed. Choice of where to allocate driver table is given to each IP version. In general, both GTT and VRAM domains will be considered. If one of the tables has a strict restriction for VRAM domain, then only VRAM domain is considered. Reviewed-by: Felix Kuehling <felix.kuehling@amd.com> (lijo: Modified the handling for SMU Tables) Signed-off-by: Lijo Lazar <lijo.lazar@amd.com> Signed-off-by: Rajneesh Bhardwaj <rajneesh.bhardwaj@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 3 +- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 6 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 89 +++++++++++++--------- drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 7 +- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 3 +- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 9 ++- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 6 ++ drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 5 ++ drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 10 ++- .../gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 6 +- 11 files changed, 99 insertions(+), 47 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index af37f2ef4438..4e179e50de25 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -2292,8 +2292,9 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct amdgpu_device *adev, (*mem)->dmabuf = dma_buf; (*mem)->bo = bo; (*mem)->va = va; - (*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ? + (*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) && !adev->gmc.is_app_apu ? AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT; + (*mem)->mapped_to_gpu_memory = 0; (*mem)->process_info = avm->process_info; add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, false); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 9f0d5f02119e..f431205e1077 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -1044,7 +1044,7 @@ static const char * const amdgpu_vram_names[] = { int amdgpu_bo_init(struct amdgpu_device *adev) { /* On A+A platform, VRAM can be mapped as WB */ - if (!adev->gmc.xgmi.connected_to_cpu) { + if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) { /* reserve PAT memory space to WC for VRAM */ int r = arch_io_reserve_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 863fa331e6ff..4395c53d09d8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -476,7 +476,8 @@ static int psp_sw_init(void *handle) return ret; ret = amdgpu_bo_create_kernel(adev, PSP_FENCE_BUFFER_SIZE, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, + AMDGPU_GEM_DOMAIN_VRAM | + AMDGPU_GEM_DOMAIN_GTT, &psp->fence_buf_bo, &psp->fence_buf_mc_addr, &psp->fence_buf); @@ -484,7 +485,8 @@ static int psp_sw_init(void *handle) goto failed1; ret = amdgpu_bo_create_kernel(adev, PSP_CMD_BUFFER_SIZE, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, + AMDGPU_GEM_DOMAIN_VRAM | + AMDGPU_GEM_DOMAIN_GTT, &psp->cmd_buf_bo, &psp->cmd_buf_mc_addr, (void **)&psp->cmd_buf_mem); if (ret) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 6bbe3b89aef5..bc11ae56bba5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1708,15 +1708,20 @@ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev) ctx->init = PSP_MEM_TRAIN_RESERVE_SUCCESS; } - ret = amdgpu_bo_create_kernel_at(adev, - adev->gmc.real_vram_size - adev->mman.discovery_tmr_size, - adev->mman.discovery_tmr_size, - &adev->mman.discovery_memory, - NULL); - if (ret) { - DRM_ERROR("alloc tmr failed(%d)!\n", ret); - amdgpu_bo_free_kernel(&adev->mman.discovery_memory, NULL, NULL); - return ret; + if (!adev->gmc.is_app_apu) { + ret = amdgpu_bo_create_kernel_at(adev, + adev->gmc.real_vram_size - + adev->mman.discovery_tmr_size, + adev->mman.discovery_tmr_size, + &adev->mman.discovery_memory, + NULL); + if (ret) { + DRM_ERROR("alloc tmr failed(%d)!\n", ret); + amdgpu_bo_free_kernel(&adev->mman.discovery_memory, NULL, NULL); + return ret; + } + } else { + DRM_DEBUG_DRIVER("backdoor fw loading path for PSP TMR, no reservation needed\n"); } return 0; @@ -1765,10 +1770,12 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) adev->mman.aper_base_kaddr = ioremap_cache(adev->gmc.aper_base, adev->gmc.visible_vram_size); - else + else if (!adev->gmc.is_app_apu) #endif adev->mman.aper_base_kaddr = ioremap_wc(adev->gmc.aper_base, adev->gmc.visible_vram_size); + else + DRM_DEBUG_DRIVER("No need to ioremap when real vram size is 0\n"); #endif /* @@ -1803,23 +1810,32 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) * This is used for VGA emulation and pre-OS scanout buffers to * avoid display artifacts while transitioning between pre-OS * and driver. */ - r = amdgpu_bo_create_kernel_at(adev, 0, adev->mman.stolen_vga_size, - &adev->mman.stolen_vga_memory, - NULL); - if (r) - return r; - r = amdgpu_bo_create_kernel_at(adev, adev->mman.stolen_vga_size, - adev->mman.stolen_extended_size, - &adev->mman.stolen_extended_memory, - NULL); - if (r) - return r; - r = amdgpu_bo_create_kernel_at(adev, adev->mman.stolen_reserved_offset, - adev->mman.stolen_reserved_size, - &adev->mman.stolen_reserved_memory, - NULL); - if (r) - return r; + if (!adev->gmc.is_app_apu) { + r = amdgpu_bo_create_kernel_at(adev, 0, + adev->mman.stolen_vga_size, + &adev->mman.stolen_vga_memory, + NULL); + if (r) + return r; + + r = amdgpu_bo_create_kernel_at(adev, adev->mman.stolen_vga_size, + adev->mman.stolen_extended_size, + &adev->mman.stolen_extended_memory, + NULL); + + if (r) + return r; + + r = amdgpu_bo_create_kernel_at(adev, + adev->mman.stolen_reserved_offset, + adev->mman.stolen_reserved_size, + &adev->mman.stolen_reserved_memory, + NULL); + if (r) + return r; + } else { + DRM_DEBUG_DRIVER("Skipped stolen memory reservation\n"); + } DRM_INFO("amdgpu: %uM of VRAM memory ready\n", (unsigned) (adev->gmc.real_vram_size / (1024 * 1024))); @@ -1866,7 +1882,6 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) DRM_ERROR("Failed initializing oa heap.\n"); return r; } - if (amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, &adev->mman.sdma_access_bo, NULL, @@ -1887,13 +1902,15 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev) amdgpu_ttm_training_reserve_vram_fini(adev); /* return the stolen vga memory back to VRAM */ - amdgpu_bo_free_kernel(&adev->mman.stolen_vga_memory, NULL, NULL); - amdgpu_bo_free_kernel(&adev->mman.stolen_extended_memory, NULL, NULL); - /* return the IP Discovery TMR memory back to VRAM */ - amdgpu_bo_free_kernel(&adev->mman.discovery_memory, NULL, NULL); - if (adev->mman.stolen_reserved_size) - amdgpu_bo_free_kernel(&adev->mman.stolen_reserved_memory, - NULL, NULL); + if (!adev->gmc.is_app_apu) { + amdgpu_bo_free_kernel(&adev->mman.stolen_vga_memory, NULL, NULL); + amdgpu_bo_free_kernel(&adev->mman.stolen_extended_memory, NULL, NULL); + /* return the IP Discovery TMR memory back to VRAM */ + amdgpu_bo_free_kernel(&adev->mman.discovery_memory, NULL, NULL); + if (adev->mman.stolen_reserved_size) + amdgpu_bo_free_kernel(&adev->mman.stolen_reserved_memory, + NULL, NULL); + } amdgpu_bo_free_kernel(&adev->mman.sdma_access_bo, NULL, &adev->mman.sdma_access_ptr); amdgpu_ttm_fw_reserve_vram_fini(adev); @@ -1935,7 +1952,7 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable) int r; if (!adev->mman.initialized || amdgpu_in_reset(adev) || - adev->mman.buffer_funcs_enabled == enable) + adev->mman.buffer_funcs_enabled == enable || adev->gmc.is_app_apu) return; if (enable) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c index df63dc3bca18..bc5d126b600b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c @@ -512,7 +512,12 @@ int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm, bp.size = amdgpu_vm_pt_size(adev, level); bp.byte_align = AMDGPU_GPU_PAGE_SIZE; - bp.domain = AMDGPU_GEM_DOMAIN_VRAM; + + if (!adev->gmc.is_app_apu) + bp.domain = AMDGPU_GEM_DOMAIN_VRAM; + else + bp.domain = AMDGPU_GEM_DOMAIN_GTT; + bp.domain = amdgpu_bo_get_preferred_domain(adev, bp.domain); bp.flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | AMDGPU_GEM_CREATE_CPU_GTT_USWC; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 1f1268cd5e09..42877c4505f1 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -459,7 +459,8 @@ static int gfx_v9_4_3_mec_init(struct amdgpu_device *adev) adev->gfx.num_compute_rings * num_xcc * GFX9_MEC_HPD_SIZE; if (mec_hpd_size) { r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, + AMDGPU_GEM_DOMAIN_VRAM | + AMDGPU_GEM_DOMAIN_GTT, &adev->gfx.mec.hpd_eop_obj, &adev->gfx.mec.hpd_eop_gpu_addr, (void **)&hpd); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 16634a791e10..245de27c7540 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1593,8 +1593,13 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev) int r; /* size in MB on si */ - adev->gmc.mc_vram_size = - adev->nbio.funcs->get_memsize(adev) * 1024ULL * 1024ULL; + if (!adev->gmc.is_app_apu) { + adev->gmc.mc_vram_size = + adev->nbio.funcs->get_memsize(adev) * 1024ULL * 1024ULL; + } else { + DRM_DEBUG("Set mc_vram_size = 0 for APP APU\n"); + adev->gmc.mc_vram_size = 0; + } adev->gmc.real_vram_size = adev->gmc.mc_vram_size; if (!(adev->flags & AMD_IS_APU) && diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 8b9accecf49b..f85ac4dbc673 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -1026,6 +1026,12 @@ bool kfd_dev_is_large_bar(struct kfd_node *dev) if (dev->kfd->local_mem_info.local_mem_size_private == 0 && dev->kfd->local_mem_info.local_mem_size_public > 0) return true; + + if (dev->kfd->local_mem_info.local_mem_size_public == 0 && dev->kfd->adev->gmc.is_app_apu) { + pr_debug("APP APU, Consider like a large bar system\n"); + return true; + } + return false; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index 16475921587b..1aaf933f9f48 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -30,6 +30,9 @@ #include "amdgpu.h" #include "amdgpu_amdkfd.h" +/* Fixme: Fake 32GB for 1PNPS1 mode bringup */ +#define DUMMY_VRAM_SIZE 31138512896 + /* GPU Processor ID base for dGPUs for which VCRAT needs to be created. * GPU processor ID are expressed with Bit[31]=1. * The base is set to 0x8000_0000 + 0x1000 to avoid collision with GPU IDs @@ -1053,6 +1056,8 @@ static int kfd_parse_subtype_mem(struct crat_subtype_memory *mem, props->heap_type = heap_type; props->flags = flags; + if (size_in_bytes == 0) + size_in_bytes = DUMMY_VRAM_SIZE; /* Fixme: TBD */ props->size_in_bytes = size_in_bytes; props->width = width; diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index 2ddf5198e5c4..4dea79a0c5b5 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -822,11 +822,20 @@ static int smu_init_fb_allocations(struct smu_context *smu) } } + driver_table->domain = AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT; /* VRAM allocation for driver table */ for (i = 0; i < SMU_TABLE_COUNT; i++) { if (tables[i].size == 0) continue; + /* If one of the tables has VRAM domain restriction, keep it in + * VRAM + */ + if ((tables[i].domain & + (AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT)) == + AMDGPU_GEM_DOMAIN_VRAM) + driver_table->domain = AMDGPU_GEM_DOMAIN_VRAM; + if (i == SMU_TABLE_PMSTATUSLOG) continue; @@ -836,7 +845,6 @@ static int smu_init_fb_allocations(struct smu_context *smu) driver_table->size = max_table_size; driver_table->align = PAGE_SIZE; - driver_table->domain = AMDGPU_GEM_DOMAIN_VRAM; ret = amdgpu_bo_create_kernel(adev, driver_table->size, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index ea8f3d6fb98b..8969b3ff5c8f 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -220,10 +220,12 @@ static int smu_v13_0_6_tables_init(struct smu_context *smu) PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM); SMU_TABLE_INIT(tables, SMU_TABLE_SMU_METRICS, sizeof(MetricsTable_t), - PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM); + PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT); SMU_TABLE_INIT(tables, SMU_TABLE_I2C_COMMANDS, sizeof(SwI2cRequest_t), - PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM); + PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT); smu_table->metrics_table = kzalloc(sizeof(MetricsTable_t), GFP_KERNEL); if (!smu_table->metrics_table) -- cgit v1.2.3 From ded7d99eb5b78931cec30dd49cd4097d0ac770e1 Mon Sep 17 00:00:00 2001 From: Lijo Lazar <lijo.lazar@amd.com> Date: Mon, 16 Jan 2023 10:55:38 +0530 Subject: drm/amdgpu: Add flags for partition mode query It's not required to take lock on all cases while querying partition mode. Querying partition mode during KFD init process doesn't need to take a lock. Init process after a switch will already be happening under lock. Control the behaviour by adding flags to xcp_query_partition_mode. Signed-off-by: Lijo Lazar <lijo.lazar@amd.com> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 3 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c | 8 +++++--- drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h | 5 ++++- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 4 +++- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 2 +- 5 files changed, 15 insertions(+), 7 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 70c6099353b8..1487ecac2705 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -1177,7 +1177,8 @@ static ssize_t amdgpu_gfx_get_current_compute_partition(struct device *dev, int mode; char *partition_mode; - mode = amdgpu_xcp_query_partition_mode(adev->xcp_mgr); + mode = amdgpu_xcp_query_partition_mode(adev->xcp_mgr, + AMDGPU_XCP_FL_NONE); switch (mode) { case AMDGPU_SPX_PARTITION_MODE: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c index f59bc450cabe..5b999e5334bb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c @@ -170,7 +170,7 @@ out: return ret; } -int amdgpu_xcp_query_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr) +int amdgpu_xcp_query_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr, u32 flags) { int mode; @@ -180,7 +180,8 @@ int amdgpu_xcp_query_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr) if (!xcp_mgr->funcs || !xcp_mgr->funcs->query_partition_mode) return xcp_mgr->mode; - mutex_lock(&xcp_mgr->xcp_lock); + if (!(flags & AMDGPU_XCP_FL_LOCKED)) + mutex_lock(&xcp_mgr->xcp_lock); mode = xcp_mgr->funcs->query_partition_mode(xcp_mgr); if (mode != xcp_mgr->mode) dev_WARN( @@ -188,7 +189,8 @@ int amdgpu_xcp_query_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr) "Cached partition mode %d not matching with device mode %d", xcp_mgr->mode, mode); - mutex_unlock(&xcp_mgr->xcp_lock); + if (!(flags & AMDGPU_XCP_FL_LOCKED)) + mutex_unlock(&xcp_mgr->xcp_lock); return mode; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h index f0b973c6092f..9fa6f0ea2061 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h @@ -30,6 +30,9 @@ #define AMDGPU_XCP_MODE_NONE -1 +#define AMDGPU_XCP_FL_NONE 0 +#define AMDGPU_XCP_FL_LOCKED (1 << 0) + enum AMDGPU_XCP_IP_BLOCK { AMDGPU_XCP_GFXHUB, AMDGPU_XCP_GFX, @@ -99,7 +102,7 @@ int amdgpu_xcp_resume(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id); int amdgpu_xcp_mgr_init(struct amdgpu_device *adev, int init_mode, int init_xcps, struct amdgpu_xcp_mgr_funcs *xcp_funcs); -int amdgpu_xcp_query_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr); +int amdgpu_xcp_query_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr, u32 flags); int amdgpu_xcp_switch_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr, int mode); int amdgpu_xcp_get_partition(struct amdgpu_xcp_mgr *xcp_mgr, enum AMDGPU_XCP_IP_BLOCK ip, int instance); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 42877c4505f1..69867294117e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -1940,7 +1940,9 @@ static int gfx_v9_4_3_cp_resume(struct amdgpu_device *adev) { int r, i, num_xcc; - if (amdgpu_xcp_query_partition_mode(adev->xcp_mgr) == AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE) + if (amdgpu_xcp_query_partition_mode(adev->xcp_mgr, + AMDGPU_XCP_FL_NONE) == + AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE) amdgpu_xcp_switch_partition_mode(adev->xcp_mgr, amdgpu_user_partt_mode); num_xcc = NUM_XCC(adev->gfx.xcc_mask); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index d7cffd91f1d7..4293cbf9ceb0 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -645,7 +645,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, * If the VMID range changes for GFX9.4.3, then this code MUST be * revisited. */ - partition_mode = amdgpu_xcp_query_partition_mode(kfd->adev->xcp_mgr); + partition_mode = amdgpu_xcp_query_partition_mode(kfd->adev->xcp_mgr, AMDGPU_XCP_FL_LOCKED); if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 3) && partition_mode == AMDGPU_CPX_PARTITION_MODE && kfd->num_nodes != 1) { -- cgit v1.2.3 From b6f90baafe267a0705c5d9b1429c875d3c39fbc7 Mon Sep 17 00:00:00 2001 From: Lijo Lazar <lijo.lazar@amd.com> Date: Tue, 31 Jan 2023 12:39:49 +0530 Subject: drm/amdgpu: Move memory partition query to gmc GMC block handles memory related information, it makes more sense to keep memory partition functions in gmc block. Signed-off-by: Lijo Lazar <lijo.lazar@amd.com> Reviewed-by: Le Ma <le.ma@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 30 +--------------------- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 11 --------- drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 44 +++++++++++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h | 16 ++++++++++++ drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 10 -------- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 18 ++++++++++++++ 6 files changed, 79 insertions(+), 50 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 1487ecac2705..2f7a101593e7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -1204,24 +1204,6 @@ static ssize_t amdgpu_gfx_get_current_compute_partition(struct device *dev, return sysfs_emit(buf, "%s\n", partition_mode); } -static ssize_t amdgpu_gfx_get_current_memory_partition(struct device *dev, - struct device_attribute *addr, - char *buf) -{ - struct drm_device *ddev = dev_get_drvdata(dev); - struct amdgpu_device *adev = drm_to_adev(ddev); - enum amdgpu_memory_partition mode; - static const char *partition_modes[] = { - "UNKNOWN", "NPS1", "NPS2", "NPS4", "NPS8" - }; - BUILD_BUG_ON(ARRAY_SIZE(partition_modes) <= AMDGPU_NPS8_PARTITION_MODE); - - mode = min((int)adev->gfx.funcs->query_mem_partition_mode(adev), - AMDGPU_NPS8_PARTITION_MODE); - - return sysfs_emit(buf, "%s\n", partition_modes[mode]); -} - static ssize_t amdgpu_gfx_set_compute_partition(struct device *dev, struct device_attribute *addr, const char *buf, size_t count) @@ -1305,9 +1287,6 @@ static DEVICE_ATTR(current_compute_partition, S_IRUGO | S_IWUSR, static DEVICE_ATTR(available_compute_partition, S_IRUGO, amdgpu_gfx_get_available_compute_partition, NULL); -static DEVICE_ATTR(current_memory_partition, S_IRUGO, - amdgpu_gfx_get_current_memory_partition, NULL); - int amdgpu_gfx_sysfs_init(struct amdgpu_device *adev) { int r; @@ -1317,19 +1296,12 @@ int amdgpu_gfx_sysfs_init(struct amdgpu_device *adev) return r; r = device_create_file(adev->dev, &dev_attr_available_compute_partition); - if (r) - return r; - r = device_create_file(adev->dev, &dev_attr_current_memory_partition); - if (r) - return r; - - return 0; + return r; } void amdgpu_gfx_sysfs_fini(struct amdgpu_device *adev) { device_remove_file(adev->dev, &dev_attr_current_compute_partition); device_remove_file(adev->dev, &dev_attr_available_compute_partition); - device_remove_file(adev->dev, &dev_attr_current_memory_partition); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 81b4c7e684af..728977f8afe7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -71,14 +71,6 @@ enum amdgpu_pkg_type { AMDGPU_PKG_TYPE_UNKNOWN, }; -enum amdgpu_memory_partition { - UNKNOWN_MEMORY_PARTITION_MODE = 0, - AMDGPU_NPS1_PARTITION_MODE = 1, - AMDGPU_NPS2_PARTITION_MODE = 2, - AMDGPU_NPS4_PARTITION_MODE = 3, - AMDGPU_NPS8_PARTITION_MODE = 4, -}; - struct amdgpu_mec { struct amdgpu_bo *hpd_eop_obj; u64 hpd_eop_gpu_addr; @@ -276,8 +268,6 @@ struct amdgpu_gfx_funcs { struct amdgpu_gfx_shadow_info *shadow_info); enum amdgpu_gfx_partition (*query_partition_mode)(struct amdgpu_device *adev); - enum amdgpu_memory_partition - (*query_mem_partition_mode)(struct amdgpu_device *adev); int (*switch_partition_mode)(struct amdgpu_device *adev, int num_xccs_per_xcp); int (*ih_node_to_logical_xcc)(struct amdgpu_device *adev, int ih_node); @@ -414,7 +404,6 @@ struct amdgpu_gfx { bool cp_gfx_shadow; /* for gfx11 */ uint16_t xcc_mask; - enum amdgpu_memory_partition mem_partition_mode; uint32_t num_xcc_per_xcp; struct mutex partition_mutex; }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index b8825a0670a4..d12625f1de5a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -891,3 +891,47 @@ int amdgpu_gmc_vram_checking(struct amdgpu_device *adev) return 0; } + +static ssize_t current_memory_partition_show( + struct device *dev, struct device_attribute *addr, char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + enum amdgpu_memory_partition mode; + + mode = adev->gmc.gmc_funcs->query_mem_partition_mode(adev); + switch (mode) { + case AMDGPU_NPS1_PARTITION_MODE: + return sysfs_emit(buf, "NPS1\n"); + case AMDGPU_NPS2_PARTITION_MODE: + return sysfs_emit(buf, "NPS2\n"); + case AMDGPU_NPS3_PARTITION_MODE: + return sysfs_emit(buf, "NPS3\n"); + case AMDGPU_NPS4_PARTITION_MODE: + return sysfs_emit(buf, "NPS4\n"); + case AMDGPU_NPS6_PARTITION_MODE: + return sysfs_emit(buf, "NPS6\n"); + case AMDGPU_NPS8_PARTITION_MODE: + return sysfs_emit(buf, "NPS8\n"); + default: + return sysfs_emit(buf, "UNKNOWN\n"); + } + + return sysfs_emit(buf, "UNKNOWN\n"); +} + +static DEVICE_ATTR_RO(current_memory_partition); + +int amdgpu_gmc_sysfs_init(struct amdgpu_device *adev) +{ + if (!adev->gmc.gmc_funcs->query_mem_partition_mode) + return 0; + + return device_create_file(adev->dev, + &dev_attr_current_memory_partition); +} + +void amdgpu_gmc_sysfs_fini(struct amdgpu_device *adev) +{ + device_remove_file(adev->dev, &dev_attr_current_memory_partition); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index e408abfc2daf..2bd3b9665ebf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -63,6 +63,16 @@ struct firmware; +enum amdgpu_memory_partition { + UNKNOWN_MEMORY_PARTITION_MODE = 0, + AMDGPU_NPS1_PARTITION_MODE = 1, + AMDGPU_NPS2_PARTITION_MODE = 2, + AMDGPU_NPS3_PARTITION_MODE = 3, + AMDGPU_NPS4_PARTITION_MODE = 4, + AMDGPU_NPS6_PARTITION_MODE = 6, + AMDGPU_NPS8_PARTITION_MODE = 8, +}; + /* * GMC page fault information */ @@ -140,6 +150,9 @@ struct amdgpu_gmc_funcs { uint64_t *flags); /* get the amount of memory used by the vbios for pre-OS console */ unsigned int (*get_vbios_fb_size)(struct amdgpu_device *adev); + + enum amdgpu_memory_partition (*query_mem_partition_mode)( + struct amdgpu_device *adev); }; struct amdgpu_xgmi_ras { @@ -375,4 +388,7 @@ uint64_t amdgpu_gmc_vram_mc2pa(struct amdgpu_device *adev, uint64_t mc_addr); uint64_t amdgpu_gmc_vram_pa(struct amdgpu_device *adev, struct amdgpu_bo *bo); uint64_t amdgpu_gmc_vram_cpu_pa(struct amdgpu_device *adev, struct amdgpu_bo *bo); int amdgpu_gmc_vram_checking(struct amdgpu_device *adev); +int amdgpu_gmc_sysfs_init(struct amdgpu_device *adev); +void amdgpu_gmc_sysfs_fini(struct amdgpu_device *adev); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 69867294117e..81ab3cd2f229 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -607,16 +607,7 @@ static void gfx_v9_4_3_select_me_pipe_q(struct amdgpu_device *adev, { soc15_grbm_select(adev, me, pipe, q, vm, GET_INST(GC, xcc_id)); } -static enum amdgpu_memory_partition -gfx_v9_4_3_query_memory_partition(struct amdgpu_device *adev) -{ - enum amdgpu_memory_partition mode = UNKNOWN_MEMORY_PARTITION_MODE; - - if (adev->nbio.funcs->get_memory_partition_mode) - mode = adev->nbio.funcs->get_memory_partition_mode(adev); - return mode; -} static int gfx_v9_4_3_switch_compute_partition(struct amdgpu_device *adev, int num_xccs_per_xcp) @@ -660,7 +651,6 @@ static const struct amdgpu_gfx_funcs gfx_v9_4_3_gfx_funcs = { .read_wave_vgprs = &gfx_v9_4_3_read_wave_vgprs, .select_me_pipe_q = &gfx_v9_4_3_select_me_pipe_q, .switch_partition_mode = &gfx_v9_4_3_switch_compute_partition, - .query_mem_partition_mode = &gfx_v9_4_3_query_memory_partition, .ih_node_to_logical_xcc = &gfx_v9_4_3_ih_to_xcc_inst, }; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 245de27c7540..db157a31a780 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1330,6 +1330,17 @@ static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev) return size; } +static enum amdgpu_memory_partition +gmc_v9_0_query_memory_partition(struct amdgpu_device *adev) +{ + enum amdgpu_memory_partition mode = UNKNOWN_MEMORY_PARTITION_MODE; + + if (adev->nbio.funcs->get_memory_partition_mode) + mode = adev->nbio.funcs->get_memory_partition_mode(adev); + + return mode; +} + static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = { .flush_gpu_tlb = gmc_v9_0_flush_gpu_tlb, .flush_gpu_tlb_pasid = gmc_v9_0_flush_gpu_tlb_pasid, @@ -1339,6 +1350,7 @@ static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = { .get_vm_pde = gmc_v9_0_get_vm_pde, .get_vm_pte = gmc_v9_0_get_vm_pte, .get_vbios_fb_size = gmc_v9_0_get_vbios_fb_size, + .query_mem_partition_mode = &gmc_v9_0_query_memory_partition, }; static void gmc_v9_0_set_gmc_funcs(struct amdgpu_device *adev) @@ -1901,6 +1913,9 @@ static int gmc_v9_0_sw_init(void *handle) if (r) return r; + if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3)) + amdgpu_gmc_sysfs_init(adev); + return 0; } @@ -1908,6 +1923,9 @@ static int gmc_v9_0_sw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3)) + amdgpu_gmc_sysfs_fini(adev); + amdgpu_gmc_ras_fini(adev); amdgpu_gem_force_release(adev); amdgpu_vm_manager_fini(adev); -- cgit v1.2.3 From b6b85c8b43a85988ecd06f039f8f90c041842812 Mon Sep 17 00:00:00 2001 From: Lijo Lazar <lijo.lazar@amd.com> Date: Tue, 7 Mar 2023 10:36:08 +0530 Subject: drm/amdgpu: Return error on invalid compute mode Return error if an invalid compute partition mode is requested. Signed-off-by: Lijo Lazar <lijo.lazar@amd.com> Reviewed-by: Le Ma <le.ma@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/aqua_vanjaram_reg_init.c | 8 ++++++-- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 6 +++++- 2 files changed, 11 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c') diff --git a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram_reg_init.c b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram_reg_init.c index a165b51e9e58..848049db00ab 100644 --- a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram_reg_init.c +++ b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram_reg_init.c @@ -328,10 +328,14 @@ static int aqua_vanjaram_switch_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr, adev = xcp_mgr->adev; num_xcc = NUM_XCC(adev->gfx.xcc_mask); - if (mode == AMDGPU_AUTO_COMPUTE_PARTITION_MODE) + if (mode == AMDGPU_AUTO_COMPUTE_PARTITION_MODE) { mode = __aqua_vanjaram_get_auto_mode(xcp_mgr); - else if (!__aqua_vanjaram_is_valid_mode(xcp_mgr, mode)) + } else if (!__aqua_vanjaram_is_valid_mode(xcp_mgr, mode)) { + dev_err(adev->dev, + "Invalid compute partition mode requested, requested: %s, available memory partitions: %d", + amdgpu_gfx_compute_mode_desc(mode), adev->gmc.num_mem_partitions); return -EINVAL; + } if (adev->kfd.init_complete) flags |= AMDGPU_XCP_OPS_KFD; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 81ab3cd2f229..d0ddcd751432 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -1933,7 +1933,11 @@ static int gfx_v9_4_3_cp_resume(struct amdgpu_device *adev) if (amdgpu_xcp_query_partition_mode(adev->xcp_mgr, AMDGPU_XCP_FL_NONE) == AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE) - amdgpu_xcp_switch_partition_mode(adev->xcp_mgr, amdgpu_user_partt_mode); + r = amdgpu_xcp_switch_partition_mode(adev->xcp_mgr, + amdgpu_user_partt_mode); + + if (r) + return r; num_xcc = NUM_XCC(adev->gfx.xcc_mask); for (i = 0; i < num_xcc; i++) { -- cgit v1.2.3 From 1e91a5f79110b96baf7ad21d3c7b5c3e18cdf2a5 Mon Sep 17 00:00:00 2001 From: Lijo Lazar <lijo.lazar@amd.com> Date: Thu, 16 Mar 2023 14:59:27 +0530 Subject: drm/amdgpu: Fix register accesses in GFX v9.4.3 Access registers with the right xcc id. Also, remove the unused logic as PG is not used in GFX v9.4.3 Signed-off-by: Lijo Lazar <lijo.lazar@amd.com> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index d0ddcd751432..7ef2c9b515ef 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -1004,16 +1004,6 @@ static void gfx_v9_4_3_xcc_init_pg(struct amdgpu_device *adev, int xcc_id) */ if (adev->gfx.rlc.is_rlc_v2_1) gfx_v9_4_3_xcc_enable_save_restore_machine(adev, xcc_id); - - if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | - AMD_PG_SUPPORT_GFX_SMG | - AMD_PG_SUPPORT_GFX_DMG | - AMD_PG_SUPPORT_CP | - AMD_PG_SUPPORT_GDS | - AMD_PG_SUPPORT_RLC_SMU_HS)) { - WREG32_SOC15(GC, GET_INST(GC, 0), regRLC_JUMP_TABLE_RESTORE, - adev->gfx.rlc.cp_table_gpu_addr >> 8); - } } static void gfx_v9_4_3_xcc_disable_gpa_mode(struct amdgpu_device *adev, int xcc_id) @@ -1071,7 +1061,7 @@ static void gfx_v9_4_3_xcc_set_safe_mode(struct amdgpu_device *adev, int xcc_id) /* wait for RLC_SAFE_MODE */ for (i = 0; i < adev->usec_timeout; i++) { - if (!REG_GET_FIELD(RREG32_SOC15(GC, GET_INST(GC, 0), regRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) + if (!REG_GET_FIELD(RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) break; udelay(1); } @@ -1107,7 +1097,7 @@ static void gfx_v9_4_3_xcc_wait_for_rlc_serdes(struct amdgpu_device *adev, gfx_v9_4_3_xcc_select_se_sh(adev, i, j, 0xffffffff, xcc_id); for (k = 0; k < adev->usec_timeout; k++) { - if (RREG32_SOC15(GC, GET_INST(GC, 0), regRLC_SERDES_CU_MASTER_BUSY) == 0) + if (RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_SERDES_CU_MASTER_BUSY) == 0) break; udelay(1); } @@ -1131,7 +1121,7 @@ static void gfx_v9_4_3_xcc_wait_for_rlc_serdes(struct amdgpu_device *adev, RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK | RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK; for (k = 0; k < adev->usec_timeout; k++) { - if ((RREG32_SOC15(GC, GET_INST(GC, 0), regRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0) + if ((RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0) break; udelay(1); } -- cgit v1.2.3 From 00e1ab02c2ba31b2bd446979949193eb3ca2561c Mon Sep 17 00:00:00 2001 From: Lijo Lazar <lijo.lazar@amd.com> Date: Fri, 24 Mar 2023 15:21:30 +0530 Subject: drm/amdgpu: Skip halting RLC on GFX v9.4.3 RLC-PMFW handshake happens periodically when GFXCLK DPM is enabled and halting RLC may cause unexpected results. Avoid halting RLC from driver side. Signed-off-by: Lijo Lazar <lijo.lazar@amd.com> Reviewed-by: Le Ma <le.ma@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 23 +++++++---------------- 1 file changed, 7 insertions(+), 16 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 7ef2c9b515ef..6cde05421a10 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -1256,21 +1256,20 @@ static int gfx_v9_4_3_xcc_rlc_resume(struct amdgpu_device *adev, int xcc_id) { int r; - gfx_v9_4_3_xcc_rlc_stop(adev, xcc_id); - - /* disable CG */ - WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGCG_CGLS_CTRL, 0); - - gfx_v9_4_3_xcc_init_pg(adev, xcc_id); - if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { + gfx_v9_4_3_xcc_rlc_stop(adev, xcc_id); /* legacy rlc firmware loading */ r = gfx_v9_4_3_xcc_rlc_load_microcode(adev, xcc_id); if (r) return r; + gfx_v9_4_3_xcc_rlc_start(adev, xcc_id); } - gfx_v9_4_3_xcc_rlc_start(adev, xcc_id); + amdgpu_gfx_rlc_enter_safe_mode(adev, xcc_id); + /* disable CG */ + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGCG_CGLS_CTRL, 0); + gfx_v9_4_3_xcc_init_pg(adev, xcc_id); + amdgpu_gfx_rlc_exit_safe_mode(adev, xcc_id); return 0; } @@ -1967,14 +1966,6 @@ static void gfx_v9_4_3_xcc_fini(struct amdgpu_device *adev, int xcc_id) gfx_v9_4_3_xcc_kcq_fini_register(adev, xcc_id); gfx_v9_4_3_xcc_cp_enable(adev, false, xcc_id); - - /* Skip suspend with A+A reset */ - if (adev->gmc.xgmi.connected_to_cpu && amdgpu_in_reset(adev)) { - dev_dbg(adev->dev, "Device in reset. Skipping RLC halt\n"); - return; - } - - gfx_v9_4_3_xcc_rlc_stop(adev, xcc_id); } static int gfx_v9_4_3_hw_init(void *handle) -- cgit v1.2.3 From bf16235b39d4ca9c8dd47ec1b2faded6ea58f7a2 Mon Sep 17 00:00:00 2001 From: Tao Zhou <tao.zhou1@amd.com> Date: Thu, 2 Feb 2023 17:20:23 +0800 Subject: drm/amdgpu: add RAS status query for gfx_v9_4_3 Query GFX RAS status. v2: remove xcp operation. v3: change instance from 0 to xcc_id for register access. Signed-off-by: Tao Zhou <tao.zhou1@amd.com> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 75 +++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 6cde05421a10..f178e3f565e9 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -2980,6 +2980,81 @@ static void gfx_v9_4_3_emit_wave_limit(struct amdgpu_ring *ring, bool enable) } } +static const struct soc15_reg_entry gfx_v9_4_3_ea_err_status_regs = { + SOC15_REG_ENTRY(GC, 0, regGCEA_ERR_STATUS), 0, 1, 16 +}; + +static void gfx_v9_4_3_inst_query_ea_err_status(struct amdgpu_device *adev, + int xcc_id) +{ + uint32_t i, j; + uint32_t reg_value; + + mutex_lock(&adev->grbm_idx_mutex); + + for (i = 0; i < gfx_v9_4_3_ea_err_status_regs.se_num; i++) { + for (j = 0; j < gfx_v9_4_3_ea_err_status_regs.instance; j++) { + gfx_v9_4_3_xcc_select_se_sh(adev, i, 0, j, xcc_id); + reg_value = RREG32_SOC15(GC, GET_INST(GC, xcc_id), + regGCEA_ERR_STATUS); + if (REG_GET_FIELD(reg_value, GCEA_ERR_STATUS, SDP_RDRSP_STATUS) || + REG_GET_FIELD(reg_value, GCEA_ERR_STATUS, SDP_WRRSP_STATUS) || + REG_GET_FIELD(reg_value, GCEA_ERR_STATUS, SDP_RDRSP_DATAPARITY_ERROR)) { + dev_warn(adev->dev, + "GCEA err detected at instance: %d, status: 0x%x!\n", + j, reg_value); + } + /* clear after read */ + reg_value = REG_SET_FIELD(reg_value, GCEA_ERR_STATUS, + CLEAR_ERROR_STATUS, 0x1); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regGCEA_ERR_STATUS, + reg_value); + } + } + + gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, + xcc_id); + mutex_unlock(&adev->grbm_idx_mutex); +} + +static void gfx_v9_4_3_inst_query_utc_err_status(struct amdgpu_device *adev, + int xcc_id) +{ + uint32_t data; + + data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regUTCL2_MEM_ECC_STATUS); + if (data) { + dev_warn(adev->dev, "GFX UTCL2 Mem Ecc Status: 0x%x!\n", data); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regUTCL2_MEM_ECC_STATUS, 0x3); + } + + data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regVML2_MEM_ECC_STATUS); + if (data) { + dev_warn(adev->dev, "GFX VML2 Mem Ecc Status: 0x%x!\n", data); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regVML2_MEM_ECC_STATUS, 0x3); + } + + data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), + regVML2_WALKER_MEM_ECC_STATUS); + if (data) { + dev_warn(adev->dev, "GFX VML2 Walker Mem Ecc Status: 0x%x!\n", data); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regVML2_WALKER_MEM_ECC_STATUS, + 0x3); + } +} + +static void gfx_v9_4_3_inst_query_ras_err_status(struct amdgpu_device *adev, + void *ras_error_status, int xcc_id) +{ + gfx_v9_4_3_inst_query_ea_err_status(adev, xcc_id); + gfx_v9_4_3_inst_query_utc_err_status(adev, xcc_id); +} + +static void gfx_v9_4_3_query_ras_error_status(struct amdgpu_device *adev) +{ + amdgpu_gfx_ras_error_func(adev, NULL, gfx_v9_4_3_inst_query_ras_err_status); +} + static const struct amd_ip_funcs gfx_v9_4_3_ip_funcs = { .name = "gfx_v9_4_3", .early_init = gfx_v9_4_3_early_init, -- cgit v1.2.3 From 47e7f527c8256a2fe3e61fcd5f59c18bc3fb53fc Mon Sep 17 00:00:00 2001 From: Tao Zhou <tao.zhou1@amd.com> Date: Fri, 3 Feb 2023 10:41:26 +0800 Subject: drm/amdgpu: add RAS status reset for gfx_v9_4_3 Reset GFX RAS status registers. v2: fix typo in title. remove xcp operation. v3: change instance from 0 to xcc_id for register access. Signed-off-by: Tao Zhou <tao.zhou1@amd.com> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 41 +++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index f178e3f565e9..e6069d081f71 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -3050,11 +3050,52 @@ static void gfx_v9_4_3_inst_query_ras_err_status(struct amdgpu_device *adev, gfx_v9_4_3_inst_query_utc_err_status(adev, xcc_id); } +static void gfx_v9_4_3_inst_reset_utc_err_status(struct amdgpu_device *adev, + int xcc_id) +{ + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regUTCL2_MEM_ECC_STATUS, 0x3); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regVML2_MEM_ECC_STATUS, 0x3); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regVML2_WALKER_MEM_ECC_STATUS, 0x3); +} + +static void gfx_v9_4_3_inst_reset_ea_err_status(struct amdgpu_device *adev, + int xcc_id) +{ + uint32_t i, j; + uint32_t value; + + mutex_lock(&adev->grbm_idx_mutex); + for (i = 0; i < gfx_v9_4_3_ea_err_status_regs.se_num; i++) { + for (j = 0; j < gfx_v9_4_3_ea_err_status_regs.instance; j++) { + gfx_v9_4_3_xcc_select_se_sh(adev, i, 0, j, xcc_id); + value = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regGCEA_ERR_STATUS); + value = REG_SET_FIELD(value, GCEA_ERR_STATUS, + CLEAR_ERROR_STATUS, 0x1); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regGCEA_ERR_STATUS, value); + } + } + gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, + xcc_id); + mutex_unlock(&adev->grbm_idx_mutex); +} + +static void gfx_v9_4_3_inst_reset_ras_err_status(struct amdgpu_device *adev, + void *ras_error_status, int xcc_id) +{ + gfx_v9_4_3_inst_reset_utc_err_status(adev, xcc_id); + gfx_v9_4_3_inst_reset_ea_err_status(adev, xcc_id); +} + static void gfx_v9_4_3_query_ras_error_status(struct amdgpu_device *adev) { amdgpu_gfx_ras_error_func(adev, NULL, gfx_v9_4_3_inst_query_ras_err_status); } +static void gfx_v9_4_3_reset_ras_error_status(struct amdgpu_device *adev) +{ + amdgpu_gfx_ras_error_func(adev, NULL, gfx_v9_4_3_inst_reset_ras_err_status); +} + static const struct amd_ip_funcs gfx_v9_4_3_ip_funcs = { .name = "gfx_v9_4_3", .early_init = gfx_v9_4_3_early_init, -- cgit v1.2.3 From 5c1c09a71634423604c47502d8059a5c098c6f40 Mon Sep 17 00:00:00 2001 From: Tao Zhou <tao.zhou1@amd.com> Date: Mon, 6 Feb 2023 11:38:19 +0800 Subject: drm/amdgpu: add RAS error count definitions for gfx_v9_4_3 Prepare for the query of GFX RAS ce/ue count. v2: remove xcp operation. only select_se_sh when instance number is more than 1. v3: add more CE/UE registsers to query list. add check for se_num before select_se_sh. change instance from 0 to xcc_id for register access. v4: move gfx memory id definitions to gfx_v9_4_3. v5: create a dedicated patch for adding error count query function. Signed-off-by: Tao Zhou <tao.zhou1@amd.com> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 740 ++++++++++++++++++++++++++++++++ 1 file changed, 740 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index e6069d081f71..188b4d9a2cbb 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -2980,6 +2980,746 @@ static void gfx_v9_4_3_emit_wave_limit(struct amdgpu_ring *ring, bool enable) } } +enum amdgpu_gfx_cp_ras_mem_id { + AMDGPU_GFX_CP_MEM1 = 1, + AMDGPU_GFX_CP_MEM2, + AMDGPU_GFX_CP_MEM3, + AMDGPU_GFX_CP_MEM4, + AMDGPU_GFX_CP_MEM5, +}; + +enum amdgpu_gfx_gcea_ras_mem_id { + AMDGPU_GFX_GCEA_IOWR_CMDMEM = 4, + AMDGPU_GFX_GCEA_IORD_CMDMEM, + AMDGPU_GFX_GCEA_GMIWR_CMDMEM, + AMDGPU_GFX_GCEA_GMIRD_CMDMEM, + AMDGPU_GFX_GCEA_DRAMWR_CMDMEM, + AMDGPU_GFX_GCEA_DRAMRD_CMDMEM, + AMDGPU_GFX_GCEA_MAM_DMEM0, + AMDGPU_GFX_GCEA_MAM_DMEM1, + AMDGPU_GFX_GCEA_MAM_DMEM2, + AMDGPU_GFX_GCEA_MAM_DMEM3, + AMDGPU_GFX_GCEA_MAM_AMEM0, + AMDGPU_GFX_GCEA_MAM_AMEM1, + AMDGPU_GFX_GCEA_MAM_AMEM2, + AMDGPU_GFX_GCEA_MAM_AMEM3, + AMDGPU_GFX_GCEA_MAM_AFLUSH_BUFFER, + AMDGPU_GFX_GCEA_WRET_TAGMEM, + AMDGPU_GFX_GCEA_RRET_TAGMEM, + AMDGPU_GFX_GCEA_IOWR_DATAMEM, + AMDGPU_GFX_GCEA_GMIWR_DATAMEM, + AMDGPU_GFX_GCEA_DRAM_DATAMEM, +}; + +enum amdgpu_gfx_gc_cane_ras_mem_id { + AMDGPU_GFX_GC_CANE_MEM0 = 0, +}; + +enum amdgpu_gfx_gcutcl2_ras_mem_id { + AMDGPU_GFX_GCUTCL2_MEM2P512X95 = 160, +}; + +enum amdgpu_gfx_gds_ras_mem_id { + AMDGPU_GFX_GDS_MEM0 = 0, +}; + +enum amdgpu_gfx_lds_ras_mem_id { + AMDGPU_GFX_LDS_BANK0 = 0, + AMDGPU_GFX_LDS_BANK1, + AMDGPU_GFX_LDS_BANK2, + AMDGPU_GFX_LDS_BANK3, + AMDGPU_GFX_LDS_BANK4, + AMDGPU_GFX_LDS_BANK5, + AMDGPU_GFX_LDS_BANK6, + AMDGPU_GFX_LDS_BANK7, + AMDGPU_GFX_LDS_BANK8, + AMDGPU_GFX_LDS_BANK9, + AMDGPU_GFX_LDS_BANK10, + AMDGPU_GFX_LDS_BANK11, + AMDGPU_GFX_LDS_BANK12, + AMDGPU_GFX_LDS_BANK13, + AMDGPU_GFX_LDS_BANK14, + AMDGPU_GFX_LDS_BANK15, + AMDGPU_GFX_LDS_BANK16, + AMDGPU_GFX_LDS_BANK17, + AMDGPU_GFX_LDS_BANK18, + AMDGPU_GFX_LDS_BANK19, + AMDGPU_GFX_LDS_BANK20, + AMDGPU_GFX_LDS_BANK21, + AMDGPU_GFX_LDS_BANK22, + AMDGPU_GFX_LDS_BANK23, + AMDGPU_GFX_LDS_BANK24, + AMDGPU_GFX_LDS_BANK25, + AMDGPU_GFX_LDS_BANK26, + AMDGPU_GFX_LDS_BANK27, + AMDGPU_GFX_LDS_BANK28, + AMDGPU_GFX_LDS_BANK29, + AMDGPU_GFX_LDS_BANK30, + AMDGPU_GFX_LDS_BANK31, + AMDGPU_GFX_LDS_SP_BUFFER_A, + AMDGPU_GFX_LDS_SP_BUFFER_B, +}; + +enum amdgpu_gfx_rlc_ras_mem_id { + AMDGPU_GFX_RLC_GPMF32 = 1, + AMDGPU_GFX_RLC_RLCVF32, + AMDGPU_GFX_RLC_SCRATCH, + AMDGPU_GFX_RLC_SRM_ARAM, + AMDGPU_GFX_RLC_SRM_DRAM, + AMDGPU_GFX_RLC_TCTAG, + AMDGPU_GFX_RLC_SPM_SE, + AMDGPU_GFX_RLC_SPM_GRBMT, +}; + +enum amdgpu_gfx_sp_ras_mem_id { + AMDGPU_GFX_SP_SIMDID0 = 0, +}; + +enum amdgpu_gfx_spi_ras_mem_id { + AMDGPU_GFX_SPI_MEM0 = 0, + AMDGPU_GFX_SPI_MEM1, + AMDGPU_GFX_SPI_MEM2, + AMDGPU_GFX_SPI_MEM3, +}; + +enum amdgpu_gfx_sqc_ras_mem_id { + AMDGPU_GFX_SQC_INST_CACHE_A = 100, + AMDGPU_GFX_SQC_INST_CACHE_B = 101, + AMDGPU_GFX_SQC_INST_CACHE_TAG_A = 102, + AMDGPU_GFX_SQC_INST_CACHE_TAG_B = 103, + AMDGPU_GFX_SQC_INST_CACHE_MISS_FIFO_A = 104, + AMDGPU_GFX_SQC_INST_CACHE_MISS_FIFO_B = 105, + AMDGPU_GFX_SQC_INST_CACHE_GATCL1_MISS_FIFO_A = 106, + AMDGPU_GFX_SQC_INST_CACHE_GATCL1_MISS_FIFO_B = 107, + AMDGPU_GFX_SQC_DATA_CACHE_A = 200, + AMDGPU_GFX_SQC_DATA_CACHE_B = 201, + AMDGPU_GFX_SQC_DATA_CACHE_TAG_A = 202, + AMDGPU_GFX_SQC_DATA_CACHE_TAG_B = 203, + AMDGPU_GFX_SQC_DATA_CACHE_MISS_FIFO_A = 204, + AMDGPU_GFX_SQC_DATA_CACHE_MISS_FIFO_B = 205, + AMDGPU_GFX_SQC_DATA_CACHE_HIT_FIFO_A = 206, + AMDGPU_GFX_SQC_DATA_CACHE_HIT_FIFO_B = 207, + AMDGPU_GFX_SQC_DIRTY_BIT_A = 208, + AMDGPU_GFX_SQC_DIRTY_BIT_B = 209, + AMDGPU_GFX_SQC_WRITE_DATA_BUFFER_CU0 = 210, + AMDGPU_GFX_SQC_WRITE_DATA_BUFFER_CU1 = 211, + AMDGPU_GFX_SQC_UTCL1_MISS_LFIFO_DATA_CACHE_A = 212, + AMDGPU_GFX_SQC_UTCL1_MISS_LFIFO_DATA_CACHE_B = 213, + AMDGPU_GFX_SQC_UTCL1_MISS_LFIFO_INST_CACHE = 108, +}; + +enum amdgpu_gfx_sq_ras_mem_id { + AMDGPU_GFX_SQ_SGPR_MEM0 = 0, + AMDGPU_GFX_SQ_SGPR_MEM1, + AMDGPU_GFX_SQ_SGPR_MEM2, + AMDGPU_GFX_SQ_SGPR_MEM3, +}; + +enum amdgpu_gfx_ta_ras_mem_id { + AMDGPU_GFX_TA_FS_AFIFO_RAM_LO = 1, + AMDGPU_GFX_TA_FS_AFIFO_RAM_HI, + AMDGPU_GFX_TA_FS_CFIFO_RAM, + AMDGPU_GFX_TA_FSX_LFIFO, + AMDGPU_GFX_TA_FS_DFIFO_RAM, +}; + +enum amdgpu_gfx_tcc_ras_mem_id { + AMDGPU_GFX_TCC_MEM1 = 1, +}; + +enum amdgpu_gfx_tca_ras_mem_id { + AMDGPU_GFX_TCA_MEM1 = 1, +}; + +enum amdgpu_gfx_tci_ras_mem_id { + AMDGPU_GFX_TCIW_MEM = 1, +}; + +enum amdgpu_gfx_tcp_ras_mem_id { + AMDGPU_GFX_TCP_LFIFO0 = 1, + AMDGPU_GFX_TCP_SET0BANK0_RAM, + AMDGPU_GFX_TCP_SET0BANK1_RAM, + AMDGPU_GFX_TCP_SET0BANK2_RAM, + AMDGPU_GFX_TCP_SET0BANK3_RAM, + AMDGPU_GFX_TCP_SET1BANK0_RAM, + AMDGPU_GFX_TCP_SET1BANK1_RAM, + AMDGPU_GFX_TCP_SET1BANK2_RAM, + AMDGPU_GFX_TCP_SET1BANK3_RAM, + AMDGPU_GFX_TCP_SET2BANK0_RAM, + AMDGPU_GFX_TCP_SET2BANK1_RAM, + AMDGPU_GFX_TCP_SET2BANK2_RAM, + AMDGPU_GFX_TCP_SET2BANK3_RAM, + AMDGPU_GFX_TCP_SET3BANK0_RAM, + AMDGPU_GFX_TCP_SET3BANK1_RAM, + AMDGPU_GFX_TCP_SET3BANK2_RAM, + AMDGPU_GFX_TCP_SET3BANK3_RAM, + AMDGPU_GFX_TCP_VM_FIFO, + AMDGPU_GFX_TCP_DB_TAGRAM0, + AMDGPU_GFX_TCP_DB_TAGRAM1, + AMDGPU_GFX_TCP_DB_TAGRAM2, + AMDGPU_GFX_TCP_DB_TAGRAM3, + AMDGPU_GFX_TCP_UTCL1_LFIFO_PROBE0, + AMDGPU_GFX_TCP_UTCL1_LFIFO_PROBE1, + AMDGPU_GFX_TCP_CMD_FIFO, +}; + +enum amdgpu_gfx_td_ras_mem_id { + AMDGPU_GFX_TD_UTD_CS_FIFO_MEM = 1, + AMDGPU_GFX_TD_UTD_SS_FIFO_LO_MEM, + AMDGPU_GFX_TD_UTD_SS_FIFO_HI_MEM, +}; + +enum amdgpu_gfx_tcx_ras_mem_id { + AMDGPU_GFX_TCX_FIFOD0 = 0, + AMDGPU_GFX_TCX_FIFOD1, + AMDGPU_GFX_TCX_FIFOD2, + AMDGPU_GFX_TCX_FIFOD3, + AMDGPU_GFX_TCX_FIFOD4, + AMDGPU_GFX_TCX_FIFOD5, + AMDGPU_GFX_TCX_FIFOD6, + AMDGPU_GFX_TCX_FIFOD7, + AMDGPU_GFX_TCX_FIFOB0, + AMDGPU_GFX_TCX_FIFOB1, + AMDGPU_GFX_TCX_FIFOB2, + AMDGPU_GFX_TCX_FIFOB3, + AMDGPU_GFX_TCX_FIFOB4, + AMDGPU_GFX_TCX_FIFOB5, + AMDGPU_GFX_TCX_FIFOB6, + AMDGPU_GFX_TCX_FIFOB7, + AMDGPU_GFX_TCX_FIFOA0, + AMDGPU_GFX_TCX_FIFOA1, + AMDGPU_GFX_TCX_FIFOA2, + AMDGPU_GFX_TCX_FIFOA3, + AMDGPU_GFX_TCX_FIFOA4, + AMDGPU_GFX_TCX_FIFOA5, + AMDGPU_GFX_TCX_FIFOA6, + AMDGPU_GFX_TCX_FIFOA7, + AMDGPU_GFX_TCX_CFIFO0, + AMDGPU_GFX_TCX_CFIFO1, + AMDGPU_GFX_TCX_CFIFO2, + AMDGPU_GFX_TCX_CFIFO3, + AMDGPU_GFX_TCX_CFIFO4, + AMDGPU_GFX_TCX_CFIFO5, + AMDGPU_GFX_TCX_CFIFO6, + AMDGPU_GFX_TCX_CFIFO7, + AMDGPU_GFX_TCX_FIFO_ACKB0, + AMDGPU_GFX_TCX_FIFO_ACKB1, + AMDGPU_GFX_TCX_FIFO_ACKB2, + AMDGPU_GFX_TCX_FIFO_ACKB3, + AMDGPU_GFX_TCX_FIFO_ACKB4, + AMDGPU_GFX_TCX_FIFO_ACKB5, + AMDGPU_GFX_TCX_FIFO_ACKB6, + AMDGPU_GFX_TCX_FIFO_ACKB7, + AMDGPU_GFX_TCX_FIFO_ACKD0, + AMDGPU_GFX_TCX_FIFO_ACKD1, + AMDGPU_GFX_TCX_FIFO_ACKD2, + AMDGPU_GFX_TCX_FIFO_ACKD3, + AMDGPU_GFX_TCX_FIFO_ACKD4, + AMDGPU_GFX_TCX_FIFO_ACKD5, + AMDGPU_GFX_TCX_FIFO_ACKD6, + AMDGPU_GFX_TCX_FIFO_ACKD7, + AMDGPU_GFX_TCX_DST_FIFOA0, + AMDGPU_GFX_TCX_DST_FIFOA1, + AMDGPU_GFX_TCX_DST_FIFOA2, + AMDGPU_GFX_TCX_DST_FIFOA3, + AMDGPU_GFX_TCX_DST_FIFOA4, + AMDGPU_GFX_TCX_DST_FIFOA5, + AMDGPU_GFX_TCX_DST_FIFOA6, + AMDGPU_GFX_TCX_DST_FIFOA7, + AMDGPU_GFX_TCX_DST_FIFOB0, + AMDGPU_GFX_TCX_DST_FIFOB1, + AMDGPU_GFX_TCX_DST_FIFOB2, + AMDGPU_GFX_TCX_DST_FIFOB3, + AMDGPU_GFX_TCX_DST_FIFOB4, + AMDGPU_GFX_TCX_DST_FIFOB5, + AMDGPU_GFX_TCX_DST_FIFOB6, + AMDGPU_GFX_TCX_DST_FIFOB7, + AMDGPU_GFX_TCX_DST_FIFOD0, + AMDGPU_GFX_TCX_DST_FIFOD1, + AMDGPU_GFX_TCX_DST_FIFOD2, + AMDGPU_GFX_TCX_DST_FIFOD3, + AMDGPU_GFX_TCX_DST_FIFOD4, + AMDGPU_GFX_TCX_DST_FIFOD5, + AMDGPU_GFX_TCX_DST_FIFOD6, + AMDGPU_GFX_TCX_DST_FIFOD7, + AMDGPU_GFX_TCX_DST_FIFO_ACKB0, + AMDGPU_GFX_TCX_DST_FIFO_ACKB1, + AMDGPU_GFX_TCX_DST_FIFO_ACKB2, + AMDGPU_GFX_TCX_DST_FIFO_ACKB3, + AMDGPU_GFX_TCX_DST_FIFO_ACKB4, + AMDGPU_GFX_TCX_DST_FIFO_ACKB5, + AMDGPU_GFX_TCX_DST_FIFO_ACKB6, + AMDGPU_GFX_TCX_DST_FIFO_ACKB7, + AMDGPU_GFX_TCX_DST_FIFO_ACKD0, + AMDGPU_GFX_TCX_DST_FIFO_ACKD1, + AMDGPU_GFX_TCX_DST_FIFO_ACKD2, + AMDGPU_GFX_TCX_DST_FIFO_ACKD3, + AMDGPU_GFX_TCX_DST_FIFO_ACKD4, + AMDGPU_GFX_TCX_DST_FIFO_ACKD5, + AMDGPU_GFX_TCX_DST_FIFO_ACKD6, + AMDGPU_GFX_TCX_DST_FIFO_ACKD7, +}; + +enum amdgpu_gfx_atc_l2_ras_mem_id { + AMDGPU_GFX_ATC_L2_MEM0 = 0, +}; + +enum amdgpu_gfx_utcl2_ras_mem_id { + AMDGPU_GFX_UTCL2_MEM0 = 0, +}; + +enum amdgpu_gfx_vml2_ras_mem_id { + AMDGPU_GFX_VML2_MEM0 = 0, +}; + +enum amdgpu_gfx_vml2_walker_ras_mem_id { + AMDGPU_GFX_VML2_WALKER_MEM0 = 0, +}; + +static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_cp_mem_list[] = { + {AMDGPU_GFX_CP_MEM1, "CP_MEM1"}, + {AMDGPU_GFX_CP_MEM2, "CP_MEM2"}, + {AMDGPU_GFX_CP_MEM3, "CP_MEM3"}, + {AMDGPU_GFX_CP_MEM4, "CP_MEM4"}, + {AMDGPU_GFX_CP_MEM5, "CP_MEM5"}, +}; + +static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_gcea_mem_list[] = { + {AMDGPU_GFX_GCEA_IOWR_CMDMEM, "GCEA_IOWR_CMDMEM"}, + {AMDGPU_GFX_GCEA_IORD_CMDMEM, "GCEA_IORD_CMDMEM"}, + {AMDGPU_GFX_GCEA_GMIWR_CMDMEM, "GCEA_GMIWR_CMDMEM"}, + {AMDGPU_GFX_GCEA_GMIRD_CMDMEM, "GCEA_GMIRD_CMDMEM"}, + {AMDGPU_GFX_GCEA_DRAMWR_CMDMEM, "GCEA_DRAMWR_CMDMEM"}, + {AMDGPU_GFX_GCEA_DRAMRD_CMDMEM, "GCEA_DRAMRD_CMDMEM"}, + {AMDGPU_GFX_GCEA_MAM_DMEM0, "GCEA_MAM_DMEM0"}, + {AMDGPU_GFX_GCEA_MAM_DMEM1, "GCEA_MAM_DMEM1"}, + {AMDGPU_GFX_GCEA_MAM_DMEM2, "GCEA_MAM_DMEM2"}, + {AMDGPU_GFX_GCEA_MAM_DMEM3, "GCEA_MAM_DMEM3"}, + {AMDGPU_GFX_GCEA_MAM_AMEM0, "GCEA_MAM_AMEM0"}, + {AMDGPU_GFX_GCEA_MAM_AMEM1, "GCEA_MAM_AMEM1"}, + {AMDGPU_GFX_GCEA_MAM_AMEM2, "GCEA_MAM_AMEM2"}, + {AMDGPU_GFX_GCEA_MAM_AMEM3, "GCEA_MAM_AMEM3"}, + {AMDGPU_GFX_GCEA_MAM_AFLUSH_BUFFER, "GCEA_MAM_AFLUSH_BUFFER"}, + {AMDGPU_GFX_GCEA_WRET_TAGMEM, "GCEA_WRET_TAGMEM"}, + {AMDGPU_GFX_GCEA_RRET_TAGMEM, "GCEA_RRET_TAGMEM"}, + {AMDGPU_GFX_GCEA_IOWR_DATAMEM, "GCEA_IOWR_DATAMEM"}, + {AMDGPU_GFX_GCEA_GMIWR_DATAMEM, "GCEA_GMIWR_DATAMEM"}, + {AMDGPU_GFX_GCEA_DRAM_DATAMEM, "GCEA_DRAM_DATAMEM"}, +}; + +static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_gc_cane_mem_list[] = { + {AMDGPU_GFX_GC_CANE_MEM0, "GC_CANE_MEM0"}, +}; + +static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_gcutcl2_mem_list[] = { + {AMDGPU_GFX_GCUTCL2_MEM2P512X95, "GCUTCL2_MEM2P512X95"}, +}; + +static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_gds_mem_list[] = { + {AMDGPU_GFX_GDS_MEM0, "GDS_MEM"}, +}; + +static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_lds_mem_list[] = { + {AMDGPU_GFX_LDS_BANK0, "LDS_BANK0"}, + {AMDGPU_GFX_LDS_BANK1, "LDS_BANK1"}, + {AMDGPU_GFX_LDS_BANK2, "LDS_BANK2"}, + {AMDGPU_GFX_LDS_BANK3, "LDS_BANK3"}, + {AMDGPU_GFX_LDS_BANK4, "LDS_BANK4"}, + {AMDGPU_GFX_LDS_BANK5, "LDS_BANK5"}, + {AMDGPU_GFX_LDS_BANK6, "LDS_BANK6"}, + {AMDGPU_GFX_LDS_BANK7, "LDS_BANK7"}, + {AMDGPU_GFX_LDS_BANK8, "LDS_BANK8"}, + {AMDGPU_GFX_LDS_BANK9, "LDS_BANK9"}, + {AMDGPU_GFX_LDS_BANK10, "LDS_BANK10"}, + {AMDGPU_GFX_LDS_BANK11, "LDS_BANK11"}, + {AMDGPU_GFX_LDS_BANK12, "LDS_BANK12"}, + {AMDGPU_GFX_LDS_BANK13, "LDS_BANK13"}, + {AMDGPU_GFX_LDS_BANK14, "LDS_BANK14"}, + {AMDGPU_GFX_LDS_BANK15, "LDS_BANK15"}, + {AMDGPU_GFX_LDS_BANK16, "LDS_BANK16"}, + {AMDGPU_GFX_LDS_BANK17, "LDS_BANK17"}, + {AMDGPU_GFX_LDS_BANK18, "LDS_BANK18"}, + {AMDGPU_GFX_LDS_BANK19, "LDS_BANK19"}, + {AMDGPU_GFX_LDS_BANK20, "LDS_BANK20"}, + {AMDGPU_GFX_LDS_BANK21, "LDS_BANK21"}, + {AMDGPU_GFX_LDS_BANK22, "LDS_BANK22"}, + {AMDGPU_GFX_LDS_BANK23, "LDS_BANK23"}, + {AMDGPU_GFX_LDS_BANK24, "LDS_BANK24"}, + {AMDGPU_GFX_LDS_BANK25, "LDS_BANK25"}, + {AMDGPU_GFX_LDS_BANK26, "LDS_BANK26"}, + {AMDGPU_GFX_LDS_BANK27, "LDS_BANK27"}, + {AMDGPU_GFX_LDS_BANK28, "LDS_BANK28"}, + {AMDGPU_GFX_LDS_BANK29, "LDS_BANK29"}, + {AMDGPU_GFX_LDS_BANK30, "LDS_BANK30"}, + {AMDGPU_GFX_LDS_BANK31, "LDS_BANK31"}, + {AMDGPU_GFX_LDS_SP_BUFFER_A, "LDS_SP_BUFFER_A"}, + {AMDGPU_GFX_LDS_SP_BUFFER_B, "LDS_SP_BUFFER_B"}, +}; + +static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_rlc_mem_list[] = { + {AMDGPU_GFX_RLC_GPMF32, "RLC_GPMF32"}, + {AMDGPU_GFX_RLC_RLCVF32, "RLC_RLCVF32"}, + {AMDGPU_GFX_RLC_SCRATCH, "RLC_SCRATCH"}, + {AMDGPU_GFX_RLC_SRM_ARAM, "RLC_SRM_ARAM"}, + {AMDGPU_GFX_RLC_SRM_DRAM, "RLC_SRM_DRAM"}, + {AMDGPU_GFX_RLC_TCTAG, "RLC_TCTAG"}, + {AMDGPU_GFX_RLC_SPM_SE, "RLC_SPM_SE"}, + {AMDGPU_GFX_RLC_SPM_GRBMT, "RLC_SPM_GRBMT"}, +}; + +static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_sp_mem_list[] = { + {AMDGPU_GFX_SP_SIMDID0, "SP_SIMDID0"}, +}; + +static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_spi_mem_list[] = { + {AMDGPU_GFX_SPI_MEM0, "SPI_MEM0"}, + {AMDGPU_GFX_SPI_MEM1, "SPI_MEM1"}, + {AMDGPU_GFX_SPI_MEM2, "SPI_MEM2"}, + {AMDGPU_GFX_SPI_MEM3, "SPI_MEM3"}, +}; + +static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_sqc_mem_list[] = { + {AMDGPU_GFX_SQC_INST_CACHE_A, "SQC_INST_CACHE_A"}, + {AMDGPU_GFX_SQC_INST_CACHE_B, "SQC_INST_CACHE_B"}, + {AMDGPU_GFX_SQC_INST_CACHE_TAG_A, "SQC_INST_CACHE_TAG_A"}, + {AMDGPU_GFX_SQC_INST_CACHE_TAG_B, "SQC_INST_CACHE_TAG_B"}, + {AMDGPU_GFX_SQC_INST_CACHE_MISS_FIFO_A, "SQC_INST_CACHE_MISS_FIFO_A"}, + {AMDGPU_GFX_SQC_INST_CACHE_MISS_FIFO_B, "SQC_INST_CACHE_MISS_FIFO_B"}, + {AMDGPU_GFX_SQC_INST_CACHE_GATCL1_MISS_FIFO_A, "SQC_INST_CACHE_GATCL1_MISS_FIFO_A"}, + {AMDGPU_GFX_SQC_INST_CACHE_GATCL1_MISS_FIFO_B, "SQC_INST_CACHE_GATCL1_MISS_FIFO_B"}, + {AMDGPU_GFX_SQC_DATA_CACHE_A, "SQC_DATA_CACHE_A"}, + {AMDGPU_GFX_SQC_DATA_CACHE_B, "SQC_DATA_CACHE_B"}, + {AMDGPU_GFX_SQC_DATA_CACHE_TAG_A, "SQC_DATA_CACHE_TAG_A"}, + {AMDGPU_GFX_SQC_DATA_CACHE_TAG_B, "SQC_DATA_CACHE_TAG_B"}, + {AMDGPU_GFX_SQC_DATA_CACHE_MISS_FIFO_A, "SQC_DATA_CACHE_MISS_FIFO_A"}, + {AMDGPU_GFX_SQC_DATA_CACHE_MISS_FIFO_B, "SQC_DATA_CACHE_MISS_FIFO_B"}, + {AMDGPU_GFX_SQC_DATA_CACHE_HIT_FIFO_A, "SQC_DATA_CACHE_HIT_FIFO_A"}, + {AMDGPU_GFX_SQC_DATA_CACHE_HIT_FIFO_B, "SQC_DATA_CACHE_HIT_FIFO_B"}, + {AMDGPU_GFX_SQC_DIRTY_BIT_A, "SQC_DIRTY_BIT_A"}, + {AMDGPU_GFX_SQC_DIRTY_BIT_B, "SQC_DIRTY_BIT_B"}, + {AMDGPU_GFX_SQC_WRITE_DATA_BUFFER_CU0, "SQC_WRITE_DATA_BUFFER_CU0"}, + {AMDGPU_GFX_SQC_WRITE_DATA_BUFFER_CU1, "SQC_WRITE_DATA_BUFFER_CU1"}, + {AMDGPU_GFX_SQC_UTCL1_MISS_LFIFO_DATA_CACHE_A, "SQC_UTCL1_MISS_LFIFO_DATA_CACHE_A"}, + {AMDGPU_GFX_SQC_UTCL1_MISS_LFIFO_DATA_CACHE_B, "SQC_UTCL1_MISS_LFIFO_DATA_CACHE_B"}, + {AMDGPU_GFX_SQC_UTCL1_MISS_LFIFO_INST_CACHE, "SQC_UTCL1_MISS_LFIFO_INST_CACHE"}, +}; + +static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_sq_mem_list[] = { + {AMDGPU_GFX_SQ_SGPR_MEM0, "SQ_SGPR_MEM0"}, + {AMDGPU_GFX_SQ_SGPR_MEM1, "SQ_SGPR_MEM1"}, + {AMDGPU_GFX_SQ_SGPR_MEM2, "SQ_SGPR_MEM2"}, + {AMDGPU_GFX_SQ_SGPR_MEM3, "SQ_SGPR_MEM3"}, +}; + +static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_ta_mem_list[] = { + {AMDGPU_GFX_TA_FS_AFIFO_RAM_LO, "TA_FS_AFIFO_RAM_LO"}, + {AMDGPU_GFX_TA_FS_AFIFO_RAM_HI, "TA_FS_AFIFO_RAM_HI"}, + {AMDGPU_GFX_TA_FS_CFIFO_RAM, "TA_FS_CFIFO_RAM"}, + {AMDGPU_GFX_TA_FSX_LFIFO, "TA_FSX_LFIFO"}, + {AMDGPU_GFX_TA_FS_DFIFO_RAM, "TA_FS_DFIFO_RAM"}, +}; + +static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_tcc_mem_list[] = { + {AMDGPU_GFX_TCC_MEM1, "TCC_MEM1"}, +}; + +static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_tca_mem_list[] = { + {AMDGPU_GFX_TCA_MEM1, "TCA_MEM1"}, +}; + +static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_tci_mem_list[] = { + {AMDGPU_GFX_TCIW_MEM, "TCIW_MEM"}, +}; + +static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_tcp_mem_list[] = { + {AMDGPU_GFX_TCP_LFIFO0, "TCP_LFIFO0"}, + {AMDGPU_GFX_TCP_SET0BANK0_RAM, "TCP_SET0BANK0_RAM"}, + {AMDGPU_GFX_TCP_SET0BANK1_RAM, "TCP_SET0BANK1_RAM"}, + {AMDGPU_GFX_TCP_SET0BANK2_RAM, "TCP_SET0BANK2_RAM"}, + {AMDGPU_GFX_TCP_SET0BANK3_RAM, "TCP_SET0BANK3_RAM"}, + {AMDGPU_GFX_TCP_SET1BANK0_RAM, "TCP_SET1BANK0_RAM"}, + {AMDGPU_GFX_TCP_SET1BANK1_RAM, "TCP_SET1BANK1_RAM"}, + {AMDGPU_GFX_TCP_SET1BANK2_RAM, "TCP_SET1BANK2_RAM"}, + {AMDGPU_GFX_TCP_SET1BANK3_RAM, "TCP_SET1BANK3_RAM"}, + {AMDGPU_GFX_TCP_SET2BANK0_RAM, "TCP_SET2BANK0_RAM"}, + {AMDGPU_GFX_TCP_SET2BANK1_RAM, "TCP_SET2BANK1_RAM"}, + {AMDGPU_GFX_TCP_SET2BANK2_RAM, "TCP_SET2BANK2_RAM"}, + {AMDGPU_GFX_TCP_SET2BANK3_RAM, "TCP_SET2BANK3_RAM"}, + {AMDGPU_GFX_TCP_SET3BANK0_RAM, "TCP_SET3BANK0_RAM"}, + {AMDGPU_GFX_TCP_SET3BANK1_RAM, "TCP_SET3BANK1_RAM"}, + {AMDGPU_GFX_TCP_SET3BANK2_RAM, "TCP_SET3BANK2_RAM"}, + {AMDGPU_GFX_TCP_SET3BANK3_RAM, "TCP_SET3BANK3_RAM"}, + {AMDGPU_GFX_TCP_VM_FIFO, "TCP_VM_FIFO"}, + {AMDGPU_GFX_TCP_DB_TAGRAM0, "TCP_DB_TAGRAM0"}, + {AMDGPU_GFX_TCP_DB_TAGRAM1, "TCP_DB_TAGRAM1"}, + {AMDGPU_GFX_TCP_DB_TAGRAM2, "TCP_DB_TAGRAM2"}, + {AMDGPU_GFX_TCP_DB_TAGRAM3, "TCP_DB_TAGRAM3"}, + {AMDGPU_GFX_TCP_UTCL1_LFIFO_PROBE0, "TCP_UTCL1_LFIFO_PROBE0"}, + {AMDGPU_GFX_TCP_UTCL1_LFIFO_PROBE1, "TCP_UTCL1_LFIFO_PROBE1"}, + {AMDGPU_GFX_TCP_CMD_FIFO, "TCP_CMD_FIFO"}, +}; + +static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_td_mem_list[] = { + {AMDGPU_GFX_TD_UTD_CS_FIFO_MEM, "TD_UTD_CS_FIFO_MEM"}, + {AMDGPU_GFX_TD_UTD_SS_FIFO_LO_MEM, "TD_UTD_SS_FIFO_LO_MEM"}, + {AMDGPU_GFX_TD_UTD_SS_FIFO_HI_MEM, "TD_UTD_SS_FIFO_HI_MEM"}, +}; + +static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_tcx_mem_list[] = { + {AMDGPU_GFX_TCX_FIFOD0, "TCX_FIFOD0"}, + {AMDGPU_GFX_TCX_FIFOD1, "TCX_FIFOD1"}, + {AMDGPU_GFX_TCX_FIFOD2, "TCX_FIFOD2"}, + {AMDGPU_GFX_TCX_FIFOD3, "TCX_FIFOD3"}, + {AMDGPU_GFX_TCX_FIFOD4, "TCX_FIFOD4"}, + {AMDGPU_GFX_TCX_FIFOD5, "TCX_FIFOD5"}, + {AMDGPU_GFX_TCX_FIFOD6, "TCX_FIFOD6"}, + {AMDGPU_GFX_TCX_FIFOD7, "TCX_FIFOD7"}, + {AMDGPU_GFX_TCX_FIFOB0, "TCX_FIFOB0"}, + {AMDGPU_GFX_TCX_FIFOB1, "TCX_FIFOB1"}, + {AMDGPU_GFX_TCX_FIFOB2, "TCX_FIFOB2"}, + {AMDGPU_GFX_TCX_FIFOB3, "TCX_FIFOB3"}, + {AMDGPU_GFX_TCX_FIFOB4, "TCX_FIFOB4"}, + {AMDGPU_GFX_TCX_FIFOB5, "TCX_FIFOB5"}, + {AMDGPU_GFX_TCX_FIFOB6, "TCX_FIFOB6"}, + {AMDGPU_GFX_TCX_FIFOB7, "TCX_FIFOB7"}, + {AMDGPU_GFX_TCX_FIFOA0, "TCX_FIFOA0"}, + {AMDGPU_GFX_TCX_FIFOA1, "TCX_FIFOA1"}, + {AMDGPU_GFX_TCX_FIFOA2, "TCX_FIFOA2"}, + {AMDGPU_GFX_TCX_FIFOA3, "TCX_FIFOA3"}, + {AMDGPU_GFX_TCX_FIFOA4, "TCX_FIFOA4"}, + {AMDGPU_GFX_TCX_FIFOA5, "TCX_FIFOA5"}, + {AMDGPU_GFX_TCX_FIFOA6, "TCX_FIFOA6"}, + {AMDGPU_GFX_TCX_FIFOA7, "TCX_FIFOA7"}, + {AMDGPU_GFX_TCX_CFIFO0, "TCX_CFIFO0"}, + {AMDGPU_GFX_TCX_CFIFO1, "TCX_CFIFO1"}, + {AMDGPU_GFX_TCX_CFIFO2, "TCX_CFIFO2"}, + {AMDGPU_GFX_TCX_CFIFO3, "TCX_CFIFO3"}, + {AMDGPU_GFX_TCX_CFIFO4, "TCX_CFIFO4"}, + {AMDGPU_GFX_TCX_CFIFO5, "TCX_CFIFO5"}, + {AMDGPU_GFX_TCX_CFIFO6, "TCX_CFIFO6"}, + {AMDGPU_GFX_TCX_CFIFO7, "TCX_CFIFO7"}, + {AMDGPU_GFX_TCX_FIFO_ACKB0, "TCX_FIFO_ACKB0"}, + {AMDGPU_GFX_TCX_FIFO_ACKB1, "TCX_FIFO_ACKB1"}, + {AMDGPU_GFX_TCX_FIFO_ACKB2, "TCX_FIFO_ACKB2"}, + {AMDGPU_GFX_TCX_FIFO_ACKB3, "TCX_FIFO_ACKB3"}, + {AMDGPU_GFX_TCX_FIFO_ACKB4, "TCX_FIFO_ACKB4"}, + {AMDGPU_GFX_TCX_FIFO_ACKB5, "TCX_FIFO_ACKB5"}, + {AMDGPU_GFX_TCX_FIFO_ACKB6, "TCX_FIFO_ACKB6"}, + {AMDGPU_GFX_TCX_FIFO_ACKB7, "TCX_FIFO_ACKB7"}, + {AMDGPU_GFX_TCX_FIFO_ACKD0, "TCX_FIFO_ACKD0"}, + {AMDGPU_GFX_TCX_FIFO_ACKD1, "TCX_FIFO_ACKD1"}, + {AMDGPU_GFX_TCX_FIFO_ACKD2, "TCX_FIFO_ACKD2"}, + {AMDGPU_GFX_TCX_FIFO_ACKD3, "TCX_FIFO_ACKD3"}, + {AMDGPU_GFX_TCX_FIFO_ACKD4, "TCX_FIFO_ACKD4"}, + {AMDGPU_GFX_TCX_FIFO_ACKD5, "TCX_FIFO_ACKD5"}, + {AMDGPU_GFX_TCX_FIFO_ACKD6, "TCX_FIFO_ACKD6"}, + {AMDGPU_GFX_TCX_FIFO_ACKD7, "TCX_FIFO_ACKD7"}, + {AMDGPU_GFX_TCX_DST_FIFOA0, "TCX_DST_FIFOA0"}, + {AMDGPU_GFX_TCX_DST_FIFOA1, "TCX_DST_FIFOA1"}, + {AMDGPU_GFX_TCX_DST_FIFOA2, "TCX_DST_FIFOA2"}, + {AMDGPU_GFX_TCX_DST_FIFOA3, "TCX_DST_FIFOA3"}, + {AMDGPU_GFX_TCX_DST_FIFOA4, "TCX_DST_FIFOA4"}, + {AMDGPU_GFX_TCX_DST_FIFOA5, "TCX_DST_FIFOA5"}, + {AMDGPU_GFX_TCX_DST_FIFOA6, "TCX_DST_FIFOA6"}, + {AMDGPU_GFX_TCX_DST_FIFOA7, "TCX_DST_FIFOA7"}, + {AMDGPU_GFX_TCX_DST_FIFOB0, "TCX_DST_FIFOB0"}, + {AMDGPU_GFX_TCX_DST_FIFOB1, "TCX_DST_FIFOB1"}, + {AMDGPU_GFX_TCX_DST_FIFOB2, "TCX_DST_FIFOB2"}, + {AMDGPU_GFX_TCX_DST_FIFOB3, "TCX_DST_FIFOB3"}, + {AMDGPU_GFX_TCX_DST_FIFOB4, "TCX_DST_FIFOB4"}, + {AMDGPU_GFX_TCX_DST_FIFOB5, "TCX_DST_FIFOB5"}, + {AMDGPU_GFX_TCX_DST_FIFOB6, "TCX_DST_FIFOB6"}, + {AMDGPU_GFX_TCX_DST_FIFOB7, "TCX_DST_FIFOB7"}, + {AMDGPU_GFX_TCX_DST_FIFOD0, "TCX_DST_FIFOD0"}, + {AMDGPU_GFX_TCX_DST_FIFOD1, "TCX_DST_FIFOD1"}, + {AMDGPU_GFX_TCX_DST_FIFOD2, "TCX_DST_FIFOD2"}, + {AMDGPU_GFX_TCX_DST_FIFOD3, "TCX_DST_FIFOD3"}, + {AMDGPU_GFX_TCX_DST_FIFOD4, "TCX_DST_FIFOD4"}, + {AMDGPU_GFX_TCX_DST_FIFOD5, "TCX_DST_FIFOD5"}, + {AMDGPU_GFX_TCX_DST_FIFOD6, "TCX_DST_FIFOD6"}, + {AMDGPU_GFX_TCX_DST_FIFOD7, "TCX_DST_FIFOD7"}, + {AMDGPU_GFX_TCX_DST_FIFO_ACKB0, "TCX_DST_FIFO_ACKB0"}, + {AMDGPU_GFX_TCX_DST_FIFO_ACKB1, "TCX_DST_FIFO_ACKB1"}, + {AMDGPU_GFX_TCX_DST_FIFO_ACKB2, "TCX_DST_FIFO_ACKB2"}, + {AMDGPU_GFX_TCX_DST_FIFO_ACKB3, "TCX_DST_FIFO_ACKB3"}, + {AMDGPU_GFX_TCX_DST_FIFO_ACKB4, "TCX_DST_FIFO_ACKB4"}, + {AMDGPU_GFX_TCX_DST_FIFO_ACKB5, "TCX_DST_FIFO_ACKB5"}, + {AMDGPU_GFX_TCX_DST_FIFO_ACKB6, "TCX_DST_FIFO_ACKB6"}, + {AMDGPU_GFX_TCX_DST_FIFO_ACKB7, "TCX_DST_FIFO_ACKB7"}, + {AMDGPU_GFX_TCX_DST_FIFO_ACKD0, "TCX_DST_FIFO_ACKD0"}, + {AMDGPU_GFX_TCX_DST_FIFO_ACKD1, "TCX_DST_FIFO_ACKD1"}, + {AMDGPU_GFX_TCX_DST_FIFO_ACKD2, "TCX_DST_FIFO_ACKD2"}, + {AMDGPU_GFX_TCX_DST_FIFO_ACKD3, "TCX_DST_FIFO_ACKD3"}, + {AMDGPU_GFX_TCX_DST_FIFO_ACKD4, "TCX_DST_FIFO_ACKD4"}, + {AMDGPU_GFX_TCX_DST_FIFO_ACKD5, "TCX_DST_FIFO_ACKD5"}, + {AMDGPU_GFX_TCX_DST_FIFO_ACKD6, "TCX_DST_FIFO_ACKD6"}, + {AMDGPU_GFX_TCX_DST_FIFO_ACKD7, "TCX_DST_FIFO_ACKD7"}, +}; + +static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_atc_l2_mem_list[] = { + {AMDGPU_GFX_ATC_L2_MEM, "ATC_L2_MEM"}, +}; + +static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_utcl2_mem_list[] = { + {AMDGPU_GFX_UTCL2_MEM, "UTCL2_MEM"}, +}; + +static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_vml2_mem_list[] = { + {AMDGPU_GFX_VML2_MEM, "VML2_MEM"}, +}; + +static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_vml2_walker_mem_list[] = { + {AMDGPU_GFX_VML2_WALKER_MEM, "VML2_WALKER_MEM"}, +}; + +static const struct amdgpu_gfx_ras_mem_id_entry gfx_v9_4_3_ras_mem_list_array[AMDGPU_GFX_MEM_TYPE_NUM] = { + AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_cp_mem_list) + AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_gcea_mem_list) + AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_gc_cane_mem_list) + AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_gcutcl2_mem_list) + AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_gds_mem_list) + AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_lds_mem_list) + AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_rlc_mem_list) + AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_sp_mem_list) + AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_spi_mem_list) + AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_sqc_mem_list) + AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_sq_mem_list) + AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_ta_mem_list) + AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_tcc_mem_list) + AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_tca_mem_list) + AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_tci_mem_list) + AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_tcp_mem_list) + AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_td_mem_list) + AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_tcx_mem_list) + AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_atc_l2_mem_list) + AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_utcl2_mem_list) + AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_vml2_mem_list) + AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_vml2_walker_mem_list) +}; + +static const struct amdgpu_gfx_ras_reg_entry gfx_v9_4_3_ce_reg_list[] = { + {{AMDGPU_RAS_REG_ENTRY(GC, 0, regRLC_CE_ERR_STATUS_LOW, regRLC_CE_ERR_STATUS_HIGH), + 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "RLC"}, + AMDGPU_GFX_RLC_MEM, 1}, + {{AMDGPU_RAS_REG_ENTRY(GC, 0, regCPC_CE_ERR_STATUS_LO, regCPC_CE_ERR_STATUS_HI), + 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "CPC"}, + AMDGPU_GFX_CP_MEM, 1}, + {{AMDGPU_RAS_REG_ENTRY(GC, 0, regCPF_CE_ERR_STATUS_LO, regCPF_CE_ERR_STATUS_HI), + 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "CPF"}, + AMDGPU_GFX_CP_MEM, 1}, + {{AMDGPU_RAS_REG_ENTRY(GC, 0, regCPG_CE_ERR_STATUS_LO, regCPG_CE_ERR_STATUS_HI), + 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "CPG"}, + AMDGPU_GFX_CP_MEM, 1}, + {{AMDGPU_RAS_REG_ENTRY(GC, 0, regGDS_CE_ERR_STATUS_LO, regGDS_CE_ERR_STATUS_HI), + 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "GDS"}, + AMDGPU_GFX_GDS_MEM, 1}, + {{AMDGPU_RAS_REG_ENTRY(GC, 0, regGC_CANE_CE_ERR_STATUS_LO, regGC_CANE_CE_ERR_STATUS_HI), + 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "CANE"}, + AMDGPU_GFX_GC_CANE_MEM, 1}, + {{AMDGPU_RAS_REG_ENTRY(GC, 0, regSPI_CE_ERR_STATUS_LO, regSPI_CE_ERR_STATUS_HI), + 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "SPI"}, + AMDGPU_GFX_SPI_MEM, 8}, + {{AMDGPU_RAS_REG_ENTRY(GC, 0, regSP0_CE_ERR_STATUS_LO, regSP0_CE_ERR_STATUS_HI), + 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "SP0"}, + AMDGPU_GFX_SP_MEM, 1}, + {{AMDGPU_RAS_REG_ENTRY(GC, 0, regSP1_CE_ERR_STATUS_LO, regSP1_CE_ERR_STATUS_HI), + 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "SP1"}, + AMDGPU_GFX_SP_MEM, 1}, + {{AMDGPU_RAS_REG_ENTRY(GC, 0, regSQ_CE_ERR_STATUS_LO, regSQ_CE_ERR_STATUS_HI), + 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "SQ"}, + AMDGPU_GFX_SQ_MEM, 8}, + {{AMDGPU_RAS_REG_ENTRY(GC, 0, regSQC_CE_EDC_LO, regSQC_CE_EDC_HI), + 5, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "SQC"}, + AMDGPU_GFX_SQC_MEM, 8}, + {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTCX_CE_ERR_STATUS_LO, regTCX_CE_ERR_STATUS_HI), + 2, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TCX"}, + AMDGPU_GFX_TCX_MEM, 1}, + {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTCC_CE_ERR_STATUS_LO, regTCC_CE_ERR_STATUS_HI), + 16, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TCC"}, + AMDGPU_GFX_TCC_MEM, 1}, + {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTA_CE_EDC_LO, regTA_CE_EDC_HI), + 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TA"}, + AMDGPU_GFX_TA_MEM, 8}, + {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTCI_CE_EDC_LO_REG, regTCI_CE_EDC_HI_REG), + 31, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TCI"}, + AMDGPU_GFX_TCI_MEM, 1}, + {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTCP_CE_EDC_LO_REG, regTCP_CE_EDC_HI_REG), + 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TCP"}, + AMDGPU_GFX_TCP_MEM, 8}, + {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTD_CE_EDC_LO, regTD_CE_EDC_HI), + 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TD"}, + AMDGPU_GFX_TD_MEM, 8}, + {{AMDGPU_RAS_REG_ENTRY(GC, 0, regGCEA_CE_ERR_STATUS_LO, regGCEA_CE_ERR_STATUS_HI), + 16, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "GCEA"}, + AMDGPU_GFX_GCEA_MEM, 1}, + {{AMDGPU_RAS_REG_ENTRY(GC, 0, regLDS_CE_ERR_STATUS_LO, regLDS_CE_ERR_STATUS_HI), + 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "LDS"}, + AMDGPU_GFX_LDS_MEM, 1}, +}; + +static const struct amdgpu_gfx_ras_reg_entry gfx_v9_4_3_ue_reg_list[] = { + {{AMDGPU_RAS_REG_ENTRY(GC, 0, regRLC_UE_ERR_STATUS_LOW, regRLC_UE_ERR_STATUS_HIGH), + 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "RLC"}, + AMDGPU_GFX_RLC_MEM, 1}, + {{AMDGPU_RAS_REG_ENTRY(GC, 0, regCPC_UE_ERR_STATUS_LO, regCPC_UE_ERR_STATUS_HI), + 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "CPC"}, + AMDGPU_GFX_CP_MEM, 1}, + {{AMDGPU_RAS_REG_ENTRY(GC, 0, regCPF_UE_ERR_STATUS_LO, regCPF_UE_ERR_STATUS_HI), + 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "CPF"}, + AMDGPU_GFX_CP_MEM, 1}, + {{AMDGPU_RAS_REG_ENTRY(GC, 0, regCPG_UE_ERR_STATUS_LO, regCPG_UE_ERR_STATUS_HI), + 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "CPG"}, + AMDGPU_GFX_CP_MEM, 1}, + {{AMDGPU_RAS_REG_ENTRY(GC, 0, regGDS_UE_ERR_STATUS_LO, regGDS_UE_ERR_STATUS_HI), + 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "GDS"}, + AMDGPU_GFX_GDS_MEM, 1}, + {{AMDGPU_RAS_REG_ENTRY(GC, 0, regGC_CANE_UE_ERR_STATUS_LO, regGC_CANE_UE_ERR_STATUS_HI), + 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "CANE"}, + AMDGPU_GFX_GC_CANE_MEM, 1}, + {{AMDGPU_RAS_REG_ENTRY(GC, 0, regSPI_UE_ERR_STATUS_LO, regSPI_UE_ERR_STATUS_HI), + 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "SPI"}, + AMDGPU_GFX_SPI_MEM, 8}, + {{AMDGPU_RAS_REG_ENTRY(GC, 0, regSP0_UE_ERR_STATUS_LO, regSP0_UE_ERR_STATUS_HI), + 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "SP0"}, + AMDGPU_GFX_SP_MEM, 1}, + {{AMDGPU_RAS_REG_ENTRY(GC, 0, regSP1_UE_ERR_STATUS_LO, regSP1_UE_ERR_STATUS_HI), + 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "SP1"}, + AMDGPU_GFX_SP_MEM, 1}, + {{AMDGPU_RAS_REG_ENTRY(GC, 0, regSQ_UE_ERR_STATUS_LO, regSQ_UE_ERR_STATUS_HI), + 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "SQ"}, + AMDGPU_GFX_SQ_MEM, 8}, + {{AMDGPU_RAS_REG_ENTRY(GC, 0, regSQC_UE_EDC_LO, regSQC_UE_EDC_HI), + 5, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "SQC"}, + AMDGPU_GFX_SQC_MEM, 8}, + {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTCX_UE_ERR_STATUS_LO, regTCX_UE_ERR_STATUS_HI), + 2, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TCX"}, + AMDGPU_GFX_TCX_MEM, 1}, + {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTCC_UE_ERR_STATUS_LO, regTCC_UE_ERR_STATUS_HI), + 16, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TCC"}, + AMDGPU_GFX_TCC_MEM, 1}, + {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTA_UE_EDC_LO, regTA_UE_EDC_HI), + 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TA"}, + AMDGPU_GFX_TA_MEM, 8}, + {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTCI_UE_EDC_LO_REG, regTCI_UE_EDC_HI_REG), + 31, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TCI"}, + AMDGPU_GFX_TCI_MEM, 1}, + {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTCP_UE_EDC_LO_REG, regTCP_UE_EDC_HI_REG), + 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TCP"}, + AMDGPU_GFX_TCP_MEM, 8}, + {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTD_UE_EDC_LO, regTD_UE_EDC_HI), + 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TD"}, + AMDGPU_GFX_TD_MEM, 8}, + {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTCA_UE_ERR_STATUS_LO, regTCA_UE_ERR_STATUS_HI), + 2, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TCA"}, + AMDGPU_GFX_TCA_MEM, 1}, + {{AMDGPU_RAS_REG_ENTRY(GC, 0, regGCEA_UE_ERR_STATUS_LO, regGCEA_UE_ERR_STATUS_HI), + 16, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "GCEA"}, + AMDGPU_GFX_GCEA_MEM, 1}, + {{AMDGPU_RAS_REG_ENTRY(GC, 0, regLDS_UE_ERR_STATUS_LO, regLDS_UE_ERR_STATUS_HI), + 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "LDS"}, + AMDGPU_GFX_LDS_MEM, 1}, +}; + static const struct soc15_reg_entry gfx_v9_4_3_ea_err_status_regs = { SOC15_REG_ENTRY(GC, 0, regGCEA_ERR_STATUS), 0, 1, 16 }; -- cgit v1.2.3 From bfa84da6185cb1897fcee0ac3815625d162d39f0 Mon Sep 17 00:00:00 2001 From: Tao Zhou <tao.zhou1@amd.com> Date: Fri, 17 Mar 2023 17:13:46 +0800 Subject: drm/amdgpu: add RAS error count query for gfx_v9_4_3 Query GFX RAS ce/ue count. Signed-off-by: Tao Zhou <tao.zhou1@amd.com> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 56 +++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 188b4d9a2cbb..bfd041ba51d6 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -3724,6 +3724,55 @@ static const struct soc15_reg_entry gfx_v9_4_3_ea_err_status_regs = { SOC15_REG_ENTRY(GC, 0, regGCEA_ERR_STATUS), 0, 1, 16 }; +static void gfx_v9_4_3_inst_query_ras_err_count(struct amdgpu_device *adev, + void *ras_error_status, int xcc_id) +{ + struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; + unsigned long ce_count = 0, ue_count = 0; + uint32_t i, j, k; + + mutex_lock(&adev->grbm_idx_mutex); + + for (i = 0; i < ARRAY_SIZE(gfx_v9_4_3_ce_reg_list); i++) { + for (j = 0; j < gfx_v9_4_3_ce_reg_list[i].se_num; j++) { + for (k = 0; k < gfx_v9_4_3_ce_reg_list[i].reg_entry.reg_inst; k++) { + /* no need to select if instance number is 1 */ + if (gfx_v9_4_3_ce_reg_list[i].se_num > 1 || + gfx_v9_4_3_ce_reg_list[i].reg_entry.reg_inst > 1) + gfx_v9_4_3_xcc_select_se_sh(adev, j, 0, k, xcc_id); + + amdgpu_ras_inst_query_ras_error_count(adev, + &(gfx_v9_4_3_ce_reg_list[i].reg_entry), + 1, + gfx_v9_4_3_ras_mem_list_array[gfx_v9_4_3_ce_reg_list[i].mem_id_type].mem_id_ent, + gfx_v9_4_3_ras_mem_list_array[gfx_v9_4_3_ce_reg_list[i].mem_id_type].size, + GET_INST(GC, xcc_id), + AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, + &ce_count); + + amdgpu_ras_inst_query_ras_error_count(adev, + &(gfx_v9_4_3_ue_reg_list[i].reg_entry), + 1, + gfx_v9_4_3_ras_mem_list_array[gfx_v9_4_3_ue_reg_list[i].mem_id_type].mem_id_ent, + gfx_v9_4_3_ras_mem_list_array[gfx_v9_4_3_ue_reg_list[i].mem_id_type].size, + GET_INST(GC, xcc_id), + AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE, + &ue_count); + } + } + } + + gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, + xcc_id); + mutex_unlock(&adev->grbm_idx_mutex); + + /* the caller should make sure initialize value of + * err_data->ue_count and err_data->ce_count + */ + err_data->ce_count += ce_count; + err_data->ue_count += ue_count; +} + static void gfx_v9_4_3_inst_query_ea_err_status(struct amdgpu_device *adev, int xcc_id) { @@ -3826,6 +3875,13 @@ static void gfx_v9_4_3_inst_reset_ras_err_status(struct amdgpu_device *adev, gfx_v9_4_3_inst_reset_ea_err_status(adev, xcc_id); } +static void gfx_v9_4_3_query_ras_error_count(struct amdgpu_device *adev, + void *ras_error_status) +{ + amdgpu_gfx_ras_error_func(adev, ras_error_status, + gfx_v9_4_3_inst_query_ras_err_count); +} + static void gfx_v9_4_3_query_ras_error_status(struct amdgpu_device *adev) { amdgpu_gfx_ras_error_func(adev, NULL, gfx_v9_4_3_inst_query_ras_err_status); -- cgit v1.2.3 From 30feef0676092bdb4b8697e68b8d5864d54f096f Mon Sep 17 00:00:00 2001 From: Tao Zhou <tao.zhou1@amd.com> Date: Wed, 8 Feb 2023 14:54:01 +0800 Subject: drm/amdgpu: add RAS error count reset for gfx_v9_4_3 Add GFX RAS error count reset function. v2: remove xcp operation. only select_se_sh when instance number is more than 1. v3: add check for se_num before select_se_sh. change instance from 0 to xcc_id for register access. Signed-off-by: Tao Zhou <tao.zhou1@amd.com> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 38 +++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index bfd041ba51d6..ac5270d5eff4 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -3773,6 +3773,39 @@ static void gfx_v9_4_3_inst_query_ras_err_count(struct amdgpu_device *adev, err_data->ue_count += ue_count; } +static void gfx_v9_4_3_inst_reset_ras_err_count(struct amdgpu_device *adev, + void *ras_error_status, int xcc_id) +{ + uint32_t i, j, k; + + mutex_lock(&adev->grbm_idx_mutex); + + for (i = 0; i < ARRAY_SIZE(gfx_v9_4_3_ce_reg_list); i++) { + for (j = 0; j < gfx_v9_4_3_ce_reg_list[i].se_num; j++) { + for (k = 0; k < gfx_v9_4_3_ce_reg_list[i].reg_entry.reg_inst; k++) { + /* no need to select if instance number is 1 */ + if (gfx_v9_4_3_ce_reg_list[i].se_num > 1 || + gfx_v9_4_3_ce_reg_list[i].reg_entry.reg_inst > 1) + gfx_v9_4_3_xcc_select_se_sh(adev, j, 0, k, xcc_id); + + amdgpu_ras_inst_reset_ras_error_count(adev, + &(gfx_v9_4_3_ce_reg_list[i].reg_entry), + 1, + GET_INST(GC, xcc_id)); + + amdgpu_ras_inst_reset_ras_error_count(adev, + &(gfx_v9_4_3_ue_reg_list[i].reg_entry), + 1, + GET_INST(GC, xcc_id)); + } + } + } + + gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, + xcc_id); + mutex_unlock(&adev->grbm_idx_mutex); +} + static void gfx_v9_4_3_inst_query_ea_err_status(struct amdgpu_device *adev, int xcc_id) { @@ -3882,6 +3915,11 @@ static void gfx_v9_4_3_query_ras_error_count(struct amdgpu_device *adev, gfx_v9_4_3_inst_query_ras_err_count); } +static void gfx_v9_4_3_reset_ras_error_count(struct amdgpu_device *adev) +{ + amdgpu_gfx_ras_error_func(adev, NULL, gfx_v9_4_3_inst_reset_ras_err_count); +} + static void gfx_v9_4_3_query_ras_error_status(struct amdgpu_device *adev) { amdgpu_gfx_ras_error_func(adev, NULL, gfx_v9_4_3_inst_query_ras_err_status); -- cgit v1.2.3 From 0386d52d1516d80b81a25552df74b8a82dfb77f3 Mon Sep 17 00:00:00 2001 From: Tao Zhou <tao.zhou1@amd.com> Date: Fri, 10 Feb 2023 18:41:28 +0800 Subject: drm/amdgpu: add sq timeout status functions for gfx_v9_4_3 Query and reset sq timeout status. v2: change instance from 0 to xcc_id for register access. Signed-off-by: Tao Zhou <tao.zhou1@amd.com> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 98 +++++++++++++++++++++++++++++++++ 1 file changed, 98 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index ac5270d5eff4..5bd2f40a817e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -3865,11 +3865,87 @@ static void gfx_v9_4_3_inst_query_utc_err_status(struct amdgpu_device *adev, } } +static void gfx_v9_4_3_log_cu_timeout_status(struct amdgpu_device *adev, + uint32_t status, int xcc_id) +{ + struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info; + uint32_t i, simd, wave; + uint32_t wave_status; + uint32_t wave_pc_lo, wave_pc_hi; + uint32_t wave_exec_lo, wave_exec_hi; + uint32_t wave_inst_dw0, wave_inst_dw1; + uint32_t wave_ib_sts; + + for (i = 0; i < 32; i++) { + if (!((i << 1) & status)) + continue; + + simd = i / cu_info->max_waves_per_simd; + wave = i % cu_info->max_waves_per_simd; + + wave_status = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_STATUS); + wave_pc_lo = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_PC_LO); + wave_pc_hi = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_PC_HI); + wave_exec_lo = + wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_EXEC_LO); + wave_exec_hi = + wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_EXEC_HI); + wave_inst_dw0 = + wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_INST_DW0); + wave_inst_dw1 = + wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_INST_DW1); + wave_ib_sts = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_IB_STS); + + dev_info( + adev->dev, + "\t SIMD %d, Wave %d: status 0x%x, pc 0x%llx, exec 0x%llx, inst 0x%llx, ib_sts 0x%x\n", + simd, wave, wave_status, + ((uint64_t)wave_pc_hi << 32 | wave_pc_lo), + ((uint64_t)wave_exec_hi << 32 | wave_exec_lo), + ((uint64_t)wave_inst_dw1 << 32 | wave_inst_dw0), + wave_ib_sts); + } +} + +static void gfx_v9_4_3_inst_query_sq_timeout_status(struct amdgpu_device *adev, + int xcc_id) +{ + uint32_t se_idx, sh_idx, cu_idx; + uint32_t status; + + mutex_lock(&adev->grbm_idx_mutex); + for (se_idx = 0; se_idx < adev->gfx.config.max_shader_engines; se_idx++) { + for (sh_idx = 0; sh_idx < adev->gfx.config.max_sh_per_se; sh_idx++) { + for (cu_idx = 0; cu_idx < adev->gfx.config.max_cu_per_sh; cu_idx++) { + gfx_v9_4_3_xcc_select_se_sh(adev, se_idx, sh_idx, + cu_idx, xcc_id); + status = RREG32_SOC15(GC, GET_INST(GC, xcc_id), + regSQ_TIMEOUT_STATUS); + if (status != 0) { + dev_info( + adev->dev, + "GFX Watchdog Timeout: SE %d, SH %d, CU %d\n", + se_idx, sh_idx, cu_idx); + gfx_v9_4_3_log_cu_timeout_status( + adev, status, xcc_id); + } + /* clear old status */ + WREG32_SOC15(GC, GET_INST(GC, xcc_id), + regSQ_TIMEOUT_STATUS, 0); + } + } + } + gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, + xcc_id); + mutex_unlock(&adev->grbm_idx_mutex); +} + static void gfx_v9_4_3_inst_query_ras_err_status(struct amdgpu_device *adev, void *ras_error_status, int xcc_id) { gfx_v9_4_3_inst_query_ea_err_status(adev, xcc_id); gfx_v9_4_3_inst_query_utc_err_status(adev, xcc_id); + gfx_v9_4_3_inst_query_sq_timeout_status(adev, xcc_id); } static void gfx_v9_4_3_inst_reset_utc_err_status(struct amdgpu_device *adev, @@ -3901,11 +3977,33 @@ static void gfx_v9_4_3_inst_reset_ea_err_status(struct amdgpu_device *adev, mutex_unlock(&adev->grbm_idx_mutex); } +static void gfx_v9_4_3_inst_reset_sq_timeout_status(struct amdgpu_device *adev, + int xcc_id) +{ + uint32_t se_idx, sh_idx, cu_idx; + + mutex_lock(&adev->grbm_idx_mutex); + for (se_idx = 0; se_idx < adev->gfx.config.max_shader_engines; se_idx++) { + for (sh_idx = 0; sh_idx < adev->gfx.config.max_sh_per_se; sh_idx++) { + for (cu_idx = 0; cu_idx < adev->gfx.config.max_cu_per_sh; cu_idx++) { + gfx_v9_4_3_xcc_select_se_sh(adev, se_idx, sh_idx, + cu_idx, xcc_id); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), + regSQ_TIMEOUT_STATUS, 0); + } + } + } + gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, + xcc_id); + mutex_unlock(&adev->grbm_idx_mutex); +} + static void gfx_v9_4_3_inst_reset_ras_err_status(struct amdgpu_device *adev, void *ras_error_status, int xcc_id) { gfx_v9_4_3_inst_reset_utc_err_status(adev, xcc_id); gfx_v9_4_3_inst_reset_ea_err_status(adev, xcc_id); + gfx_v9_4_3_inst_reset_sq_timeout_status(adev, xcc_id); } static void gfx_v9_4_3_query_ras_error_count(struct amdgpu_device *adev, -- cgit v1.2.3 From 92ecb92ccc839c4c4b51ab1025cde5dd82c2fb4b Mon Sep 17 00:00:00 2001 From: Tao Zhou <tao.zhou1@amd.com> Date: Wed, 8 Feb 2023 17:05:08 +0800 Subject: drm/amdgpu: initialize RAS for gfx_v9_4_3 Register GFX RAS functions and initialize GFX RAS. v2: remove xcp operations. v3: reuse the return value of gfx_ras_sw_init. Signed-off-by: Tao Zhou <tao.zhou1@amd.com> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 5bd2f40a817e..e5cfb3adb3b3 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -47,6 +47,8 @@ MODULE_FIRMWARE("amdgpu/gc_9_4_3_rlc.bin"); #define GFX9_MEC_HPD_SIZE 4096 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L +struct amdgpu_gfx_ras gfx_v9_4_3_ras; + static void gfx_v9_4_3_set_ring_funcs(struct amdgpu_device *adev); static void gfx_v9_4_3_set_irq_funcs(struct amdgpu_device *adev); static void gfx_v9_4_3_set_gds_init(struct amdgpu_device *adev); @@ -659,6 +661,7 @@ static int gfx_v9_4_3_gpu_early_init(struct amdgpu_device *adev) u32 gb_addr_config; adev->gfx.funcs = &gfx_v9_4_3_gfx_funcs; + adev->gfx.ras = &gfx_v9_4_3_ras; switch (adev->ip_versions[GC_HWIP][0]) { case IP_VERSION(9, 4, 3): @@ -845,7 +848,7 @@ static int gfx_v9_4_3_sw_init(void *handle) if (r) return r; - return 0; + return amdgpu_gfx_ras_sw_init(adev); } static int gfx_v9_4_3_sw_fini(void *handle) @@ -4342,3 +4345,16 @@ struct amdgpu_xcp_ip_funcs gfx_v9_4_3_xcp_funcs = { .suspend = &gfx_v9_4_3_xcp_suspend, .resume = &gfx_v9_4_3_xcp_resume }; + +struct amdgpu_ras_block_hw_ops gfx_v9_4_3_ras_ops = { + .query_ras_error_count = &gfx_v9_4_3_query_ras_error_count, + .reset_ras_error_count = &gfx_v9_4_3_reset_ras_error_count, + .query_ras_error_status = &gfx_v9_4_3_query_ras_error_status, + .reset_ras_error_status = &gfx_v9_4_3_reset_ras_error_status, +}; + +struct amdgpu_gfx_ras gfx_v9_4_3_ras = { + .ras_block = { + .hw_ops = &gfx_v9_4_3_ras_ops, + }, +}; -- cgit v1.2.3 From d7b8e68dc04ad89809832bebe9ab5d7965a6eef5 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam <srinivasan.shanmugam@amd.com> Date: Fri, 19 May 2023 11:36:57 +0530 Subject: drm/amdgpu: Fix uninitialized variable in gfx_v9_4_3_cp_resume MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c:1925:6: error: variable 'r' is used uninitialized whenever 'if' condition is false [-Werror,-Wsometimes-uninitialized] if (amdgpu_xcp_query_partition_mode(adev->xcp_mgr, ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c:1931:6: note: uninitialized use occurs here if (r) ^ drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c:1925:2: note: remove the 'if' if its condition is always true if (amdgpu_xcp_query_partition_mode(adev->xcp_mgr, ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c:1923:7: note: initialize the variable 'r' to silence this warning int r, i, num_xcc; ^ = 0 1 error generated. Suggested-by: Lijo Lazar <lijo.lazar@amd.com> Cc: Luben Tuikov <luben.tuikov@amd.com> Cc: Alex Deucher <alexander.deucher@amd.com> Cc: Christian König <christian.koenig@amd.com> Signed-off-by: Srinivasan Shanmugam <srinivasan.shanmugam@amd.com> Reviewed-by: Lijo Lazar <lijo.lazar@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index e5cfb3adb3b3..6a1a28df606c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -1920,7 +1920,7 @@ static int gfx_v9_4_3_xcc_cp_resume(struct amdgpu_device *adev, int xcc_id) static int gfx_v9_4_3_cp_resume(struct amdgpu_device *adev) { - int r, i, num_xcc; + int r = 0, i, num_xcc; if (amdgpu_xcp_query_partition_mode(adev->xcp_mgr, AMDGPU_XCP_FL_NONE) == -- cgit v1.2.3 From 803e4c9efc79c96796efbecab9ed53267d051256 Mon Sep 17 00:00:00 2001 From: Tom Rix <trix@redhat.com> Date: Mon, 22 May 2023 08:30:13 -0400 Subject: drm/amdgpu: remove unused variable num_xcc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit gcc with W=1 reports drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c:2138:13: error: variable ‘num_xcc’ set but not used [-Werror=unused-but-set-variable] 2138 | int num_xcc; | ^~~~~~~ This variable is not used so remove it. Signed-off-by: Tom Rix <trix@redhat.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 6a1a28df606c..cb34da896737 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -2135,9 +2135,6 @@ static void gfx_v9_4_3_ring_emit_gds_switch(struct amdgpu_ring *ring, static int gfx_v9_4_3_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - int num_xcc; - - num_xcc = NUM_XCC(adev->gfx.xcc_mask); adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev), AMDGPU_MAX_COMPUTE_RINGS); -- cgit v1.2.3 From e7665d0ca7938a8f921760a780bdc55c5eda6df0 Mon Sep 17 00:00:00 2001 From: Jiapeng Chong <jiapeng.chong@linux.alibaba.com> Date: Tue, 23 May 2023 10:18:11 +0800 Subject: drm/amdgpu: Remove duplicate include ./drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c: amdgpu_xcp.h is included more than once. Reported-by: Abaci Robot <abaci@linux.alibaba.com> Link: https://bugzilla.openanolis.cn/show_bug.cgi?id=5281 Signed-off-by: Jiapeng Chong <jiapeng.chong@linux.alibaba.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index cb34da896737..c54bf0b38fe1 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -23,7 +23,6 @@ #include <linux/firmware.h> #include "amdgpu.h" -#include "amdgpu_xcp.h" #include "amdgpu_gfx.h" #include "soc15.h" #include "soc15d.h" -- cgit v1.2.3 From 232f2431899cbe6c00c1350e35cfba91ea0c1c0b Mon Sep 17 00:00:00 2001 From: Guchun Chen <guchun.chen@amd.com> Date: Fri, 12 May 2023 16:10:40 +0800 Subject: drm/amdgpu/gfx: set sched.ready status after ring/IB test in gfx sched.ready is nothing with ring initialization, it needs to set to be true after ring/IB test in amdgpu_ring_test_helper to tell the ring is ready for submission. Signed-off-by: Guchun Chen <guchun.chen@amd.com> Acked-by: Alex Deucher <alexander.deucher@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 24 ++++-------------------- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 25 ++++--------------------- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 2 -- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 2 -- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 1 - 5 files changed, 8 insertions(+), 46 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 0d15002eac69..f7ad883a70fa 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -6073,7 +6073,6 @@ static int gfx_v10_0_cp_gfx_resume(struct amdgpu_device *adev) u32 tmp; u32 rb_bufsz; u64 rb_addr, rptr_addr, wptr_gpu_addr; - u32 i; /* Set the write pointer delay */ WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0); @@ -6168,11 +6167,6 @@ static int gfx_v10_0_cp_gfx_resume(struct amdgpu_device *adev) /* start the ring */ gfx_v10_0_cp_gfx_start(adev); - for (i = 0; i < adev->gfx.num_gfx_rings; i++) { - ring = &adev->gfx.gfx_ring[i]; - ring->sched.ready = true; - } - return 0; } @@ -6470,7 +6464,7 @@ static int gfx_v10_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev) r = amdgpu_bo_reserve(ring->mqd_obj, false); if (unlikely(r != 0)) - goto done; + return r; r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); if (!r) { @@ -6480,23 +6474,14 @@ static int gfx_v10_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev) } amdgpu_bo_unreserve(ring->mqd_obj); if (r) - goto done; + return r; } r = amdgpu_gfx_enable_kgq(adev, 0); if (r) - goto done; - - r = gfx_v10_0_cp_gfx_start(adev); - if (r) - goto done; + return r; - for (i = 0; i < adev->gfx.num_gfx_rings; i++) { - ring = &adev->gfx.gfx_ring[i]; - ring->sched.ready = true; - } -done: - return r; + return gfx_v10_0_cp_gfx_start(adev); } static int gfx_v10_0_compute_mqd_init(struct amdgpu_device *adev, void *m, @@ -6812,7 +6797,6 @@ static int gfx_v10_0_kiq_resume(struct amdgpu_device *adev) amdgpu_bo_kunmap(ring->mqd_obj); ring->mqd_ptr = NULL; amdgpu_bo_unreserve(ring->mqd_obj); - ring->sched.ready = true; return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 4b7224de879e..da21bf868080 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -3228,7 +3228,6 @@ static int gfx_v11_0_cp_gfx_resume(struct amdgpu_device *adev) u32 tmp; u32 rb_bufsz; u64 rb_addr, rptr_addr, wptr_gpu_addr; - u32 i; /* Set the write pointer delay */ WREG32_SOC15(GC, 0, regCP_RB_WPTR_DELAY, 0); @@ -3320,11 +3319,6 @@ static int gfx_v11_0_cp_gfx_resume(struct amdgpu_device *adev) /* start the ring */ gfx_v11_0_cp_gfx_start(adev); - for (i = 0; i < adev->gfx.num_gfx_rings; i++) { - ring = &adev->gfx.gfx_ring[i]; - ring->sched.ready = true; - } - return 0; } @@ -3370,8 +3364,6 @@ static void gfx_v11_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) WREG32_SOC15(GC, 0, regCP_MEC_CNTL, data); } - adev->gfx.kiq[0].ring.sched.ready = enable; - udelay(50); } @@ -3711,7 +3703,7 @@ static int gfx_v11_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev) r = amdgpu_bo_reserve(ring->mqd_obj, false); if (unlikely(r != 0)) - goto done; + return r; r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); if (!r) { @@ -3721,23 +3713,14 @@ static int gfx_v11_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev) } amdgpu_bo_unreserve(ring->mqd_obj); if (r) - goto done; + return r; } r = amdgpu_gfx_enable_kgq(adev, 0); if (r) - goto done; - - r = gfx_v11_0_cp_gfx_start(adev); - if (r) - goto done; + return r; - for (i = 0; i < adev->gfx.num_gfx_rings; i++) { - ring = &adev->gfx.gfx_ring[i]; - ring->sched.ready = true; - } -done: - return r; + return gfx_v11_0_cp_gfx_start(adev); } static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 6d0589dc1d6c..2f1ef75e126c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -4283,7 +4283,6 @@ static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev) /* start the ring */ amdgpu_ring_clear_ring(ring); gfx_v8_0_cp_gfx_start(adev); - ring->sched.ready = true; return 0; } @@ -4693,7 +4692,6 @@ static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev) amdgpu_bo_kunmap(ring->mqd_obj); ring->mqd_ptr = NULL; amdgpu_bo_unreserve(ring->mqd_obj); - ring->sched.ready = true; return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 8bf95a6b0767..fe090eafa366 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -3144,7 +3144,6 @@ static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev) /* start the ring */ gfx_v9_0_cp_gfx_start(adev); - ring->sched.ready = true; return 0; } @@ -3623,7 +3622,6 @@ static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev) amdgpu_bo_kunmap(ring->mqd_obj); ring->mqd_ptr = NULL; amdgpu_bo_unreserve(ring->mqd_obj); - ring->sched.ready = true; return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index c54bf0b38fe1..ed41a7862d9f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -1845,7 +1845,6 @@ static int gfx_v9_4_3_xcc_kiq_resume(struct amdgpu_device *adev, int xcc_id) amdgpu_bo_kunmap(ring->mqd_obj); ring->mqd_ptr = NULL; amdgpu_bo_unreserve(ring->mqd_obj); - ring->sched.ready = true; return 0; } -- cgit v1.2.3 From a09e2065101a343ac3a709aa6236cdac874627eb Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam <srinivasan.shanmugam@amd.com> Date: Wed, 24 May 2023 20:37:26 +0530 Subject: drm/amdgpu: Fix defined but not used gfx9_cs_data in gfx_v9_4_3.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit gcc with W=1 In file included from drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c:33: drivers/gpu/drm/amd/amdgpu/clearstate_gfx9.h:939:36: warning: ‘gfx9_cs_data’ defined but not used [-Wunused-const-variable=] 939 | static const struct cs_section_def gfx9_cs_data[] = { | gfx9_cs_data is not used in gfx_v9_4_3.c, hence remove its include in gfx_v9_4_3.c Cc: Christian König <christian.koenig@amd.com> Cc: Alex Deucher <alexander.deucher@amd.com> Signed-off-by: Srinivasan Shanmugam <srinivasan.shanmugam@amd.com> Acked-by: Alex Deucher <alexander.deucher@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index ed41a7862d9f..bdaea50cafe7 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -29,7 +29,6 @@ #include "soc15_common.h" #include "vega10_enum.h" -#include "clearstate_gfx9.h" #include "v9_structs.h" #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h" -- cgit v1.2.3 From 89f85765555caccec0a31b604639cea53942e522 Mon Sep 17 00:00:00 2001 From: Shiwu Zhang <shiwu.zhang@amd.com> Date: Tue, 16 May 2023 10:31:49 +0800 Subject: drm/amdgpu: golden settings for ASIC rev_id 0 Suggested by FW team that GB_ADDR_CONFIG is handled by golden settings in driver to get the expected value Signed-off-by: Shiwu Zhang <shiwu.zhang@amd.com> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index bdaea50cafe7..35d359de0aad 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -45,6 +45,8 @@ MODULE_FIRMWARE("amdgpu/gc_9_4_3_rlc.bin"); #define GFX9_MEC_HPD_SIZE 4096 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L +#define GOLDEN_GB_ADDR_CONFIG 0x2a114042 + struct amdgpu_gfx_ras gfx_v9_4_3_ras; static void gfx_v9_4_3_set_ring_funcs(struct amdgpu_device *adev); @@ -195,6 +197,15 @@ static void gfx_v9_4_3_init_golden_registers(struct amdgpu_device *adev) dev_inst = GET_INST(GC, i); if (dev_inst >= 2) WREG32_SOC15(GC, dev_inst, regGRBM_MCM_ADDR, 0x4); + + /* Golden settings applied by driver for ASIC with rev_id 0 */ + if (adev->rev_id == 0) { + WREG32_SOC15(GC, dev_inst, regGB_ADDR_CONFIG, + GOLDEN_GB_ADDR_CONFIG); + + WREG32_FIELD15_PREREG(GC, dev_inst, TCP_UTCL1_CNTL1, + REDUCE_FIFO_DEPTH_BY_2, 2); + } } } -- cgit v1.2.3 From 491ae27829cda38df3ab6d2fe5d94a80ec1bbe22 Mon Sep 17 00:00:00 2001 From: Shiwu Zhang <shiwu.zhang@amd.com> Date: Wed, 17 May 2023 13:40:04 +0800 Subject: drm/amdgpu: complement the 4, 6 and 8 XCC cases Signed-off-by: Shiwu Zhang <shiwu.zhang@amd.com> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 35d359de0aad..ebdd7fa985d6 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -1040,6 +1040,9 @@ static void gfx_v9_4_3_xcc_program_xcc_id(struct amdgpu_device *adev, WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HYP_XCP_CTL, 0x8); break; case 2: + case 4: + case 6: + case 8: tmp = (xcc_id % adev->gfx.num_xcc_per_xcp) << REG_FIELD_SHIFT(CP_HYP_XCP_CTL, VIRTUAL_XCC_ID); tmp = tmp | (adev->gfx.num_xcc_per_xcp << REG_FIELD_SHIFT(CP_HYP_XCP_CTL, NUM_XCC_IN_XCP)); WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HYP_XCP_CTL, tmp); -- cgit v1.2.3 From cab69d36ccdbfa3fa0b5627a032150369c20b4f3 Mon Sep 17 00:00:00 2001 From: Yang Wang <kevinyang.wang@amd.com> Date: Tue, 6 Jun 2023 14:42:39 +0800 Subject: drm/amdgpu: skip to resume rlcg for gc 9.4.3 in vf side skip to resume rlcg, because rlcg is already enabled in pf side. Signed-off-by: Yang Wang <kevinyang.wang@amd.com> Reviewed-by: Lijo Lazar <lijo.lazar@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index ebdd7fa985d6..f5b8d3f388ff 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -1293,6 +1293,9 @@ static int gfx_v9_4_3_rlc_resume(struct amdgpu_device *adev) { int r, i, num_xcc; + if (amdgpu_sriov_vf(adev)) + return 0; + num_xcc = NUM_XCC(adev->gfx.xcc_mask); for (i = 0; i < num_xcc; i++) { r = gfx_v9_4_3_xcc_rlc_resume(adev, i); @@ -4321,11 +4324,13 @@ static int gfx_v9_4_3_xcp_resume(void *handle, uint32_t inst_mask) for_each_inst(i, tmp_mask) gfx_v9_4_3_xcc_constants_init(adev, i); - tmp_mask = inst_mask; - for_each_inst(i, tmp_mask) { - r = gfx_v9_4_3_xcc_rlc_resume(adev, i); - if (r) - return r; + if (!amdgpu_sriov_vf(adev)) { + tmp_mask = inst_mask; + for_each_inst(i, tmp_mask) { + r = gfx_v9_4_3_xcc_rlc_resume(adev, i); + if (r) + return r; + } } tmp_mask = inst_mask; -- cgit v1.2.3