From 980d5baeb25cd65b7a791d7499daa07b34346def Mon Sep 17 00:00:00 2001 From: Tim Huang Date: Thu, 30 Mar 2023 10:33:02 +0800 Subject: drm/amdgpu: allow more APUs to do mode2 reset when go to S4 Skip mode2 reset only for IMU enabled APUs when do S4. This patch is to fix the regression issue https://gitlab.freedesktop.org/drm/amd/-/issues/2483 It is generated by commit b589626674de ("drm/amdgpu: skip ASIC reset for APUs when go to S4"). Fixes: b589626674de ("drm/amdgpu: skip ASIC reset for APUs when go to S4") Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2483 Tested-by: Yuan Perry Signed-off-by: Tim Huang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c index 60b1857f469e..aeeec211861c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c @@ -981,7 +981,12 @@ static bool amdgpu_atcs_pci_probe_handle(struct pci_dev *pdev) */ bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device *adev) { - if (adev->flags & AMD_IS_APU) + if ((adev->flags & AMD_IS_APU) && + adev->gfx.imu.funcs) /* Not need to do mode2 reset for IMU enabled APUs */ + return false; + + if ((adev->flags & AMD_IS_APU) && + amdgpu_acpi_is_s3_active(adev)) return false; if (amdgpu_sriov_vf(adev)) -- cgit v1.2.3 From 97998b893c3000b27a780a4982e16cfc8f4ea555 Mon Sep 17 00:00:00 2001 From: Jack Xiao Date: Fri, 24 Mar 2023 16:55:15 +0800 Subject: drm/amd/amdgpu: introduce gc_*_mes_2.bin v2 To avoid new mes fw running with old driver, rename mes schq fw to gc_*_mes_2.bin. v2: add MODULE_FIRMWARE declaration v3: squash in fixup patch Signed-off-by: Jack Xiao Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 26 ++++++++++++++++++++++---- drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 10 +++++----- 2 files changed, 27 insertions(+), 9 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index 0e55823ef6ca..f0f00466b59f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -1434,13 +1434,31 @@ int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe) struct amdgpu_firmware_info *info; char ucode_prefix[30]; char fw_name[40]; + bool need_retry = false; int r; - amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix)); - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mes%s.bin", - ucode_prefix, - pipe == AMDGPU_MES_SCHED_PIPE ? "" : "1"); + amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, + sizeof(ucode_prefix)); + if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 0)) { + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mes%s.bin", + ucode_prefix, + pipe == AMDGPU_MES_SCHED_PIPE ? "_2" : "1"); + need_retry = true; + } else { + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mes%s.bin", + ucode_prefix, + pipe == AMDGPU_MES_SCHED_PIPE ? "" : "1"); + } + r = amdgpu_ucode_request(adev, &adev->mes.fw[pipe], fw_name); + if (r && need_retry && pipe == AMDGPU_MES_SCHED_PIPE) { + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mes.bin", + ucode_prefix); + DRM_INFO("try to fall back to %s\n", fw_name); + r = amdgpu_ucode_request(adev, &adev->mes.fw[pipe], + fw_name); + } + if (r) goto out; diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index 5826eac270d7..fad652c85017 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -32,15 +32,15 @@ #include "v11_structs.h" #include "mes_v11_api_def.h" -MODULE_FIRMWARE("amdgpu/gc_11_0_0_mes.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_0_0_mes_2.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_0_mes1.bin"); -MODULE_FIRMWARE("amdgpu/gc_11_0_1_mes.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_0_1_mes_2.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_1_mes1.bin"); -MODULE_FIRMWARE("amdgpu/gc_11_0_2_mes.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_0_2_mes_2.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_2_mes1.bin"); -MODULE_FIRMWARE("amdgpu/gc_11_0_3_mes.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_0_3_mes_2.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_3_mes1.bin"); -MODULE_FIRMWARE("amdgpu/gc_11_0_4_mes.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_0_4_mes_2.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_4_mes1.bin"); static int mes_v11_0_hw_fini(void *handle); -- cgit v1.2.3 From 63a4d258ae1b975cd0dd1f0623f50c119953abda Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Fri, 31 Mar 2023 18:52:56 +0530 Subject: drm/amdgpu: add new parameters in v11_struct Added some new parameters defined for the gfx usermode queues use cases in the v11_mqd_struct. Cc: Alex Deucher Cc: Christian Koenig Reviewed-by: Shashank Sharma Signed-off-by: Arvind Yadav Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/include/v11_structs.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/include/v11_structs.h b/drivers/gpu/drm/amd/include/v11_structs.h index b8ff7456ae0b..f8008270f813 100644 --- a/drivers/gpu/drm/amd/include/v11_structs.h +++ b/drivers/gpu/drm/amd/include/v11_structs.h @@ -25,14 +25,14 @@ #define V11_STRUCTS_H_ struct v11_gfx_mqd { - uint32_t reserved_0; // offset: 0 (0x0) - uint32_t reserved_1; // offset: 1 (0x1) - uint32_t reserved_2; // offset: 2 (0x2) - uint32_t reserved_3; // offset: 3 (0x3) - uint32_t reserved_4; // offset: 4 (0x4) - uint32_t reserved_5; // offset: 5 (0x5) - uint32_t reserved_6; // offset: 6 (0x6) - uint32_t reserved_7; // offset: 7 (0x7) + uint32_t shadow_base_lo; // offset: 0 (0x0) + uint32_t shadow_base_hi; // offset: 1 (0x1) + uint32_t gds_bkup_base_lo; // offset: 2 (0x2) + uint32_t gds_bkup_base_hi; // offset: 3 (0x3) + uint32_t fw_work_area_base_lo; // offset: 4 (0x4) + uint32_t fw_work_area_base_hi; // offset: 5 (0x5) + uint32_t shadow_initialized; // offset: 6 (0x6) + uint32_t ib_vmid; // offset: 7 (0x7) uint32_t reserved_8; // offset: 8 (0x8) uint32_t reserved_9; // offset: 9 (0x9) uint32_t reserved_10; // offset: 10 (0xA) -- cgit v1.2.3 From 583da1b82ac51f0631b6ab699153f16b241dd40e Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Thu, 30 Mar 2023 20:10:32 -0400 Subject: drm/amd/display: remove unused average_render_time_in_us and i variables clang with W=1 reports drivers/gpu/drm/amd/amdgpu/../display/modules/freesync/freesync.c:1132:15: error: variable 'average_render_time_in_us' set but not used [-Werror,-Wunused-but-set-variable] unsigned int average_render_time_in_us = 0; ^ This variable is not used so remove it, which caused i to be unused so remove that as well. Signed-off-by: Tom Rix Signed-off-by: Hamza Mahfooz Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/modules/freesync/freesync.c | 14 -------------- 1 file changed, 14 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c index 315da61ee897..5c41a4751db4 100644 --- a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c +++ b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c @@ -1129,7 +1129,6 @@ void mod_freesync_handle_preflip(struct mod_freesync *mod_freesync, { struct core_freesync *core_freesync = NULL; unsigned int last_render_time_in_us = 0; - unsigned int average_render_time_in_us = 0; if (mod_freesync == NULL) return; @@ -1138,7 +1137,6 @@ void mod_freesync_handle_preflip(struct mod_freesync *mod_freesync, if (in_out_vrr->supported && in_out_vrr->state == VRR_STATE_ACTIVE_VARIABLE) { - unsigned int i = 0; unsigned int oldest_index = plane->time.index + 1; if (oldest_index >= DC_PLANE_UPDATE_TIMES_MAX) @@ -1147,18 +1145,6 @@ void mod_freesync_handle_preflip(struct mod_freesync *mod_freesync, last_render_time_in_us = curr_time_stamp_in_us - plane->time.prev_update_time_in_us; - /* Sum off all entries except oldest one */ - for (i = 0; i < DC_PLANE_UPDATE_TIMES_MAX; i++) { - average_render_time_in_us += - plane->time.time_elapsed_in_us[i]; - } - average_render_time_in_us -= - plane->time.time_elapsed_in_us[oldest_index]; - - /* Add render time for current flip */ - average_render_time_in_us += last_render_time_in_us; - average_render_time_in_us /= DC_PLANE_UPDATE_TIMES_MAX; - if (in_out_vrr->btr.btr_enabled) { apply_below_the_range(core_freesync, stream, -- cgit v1.2.3 From 9eb28ac1a25a2117ea5544ffcce59fcc1f128e1f Mon Sep 17 00:00:00 2001 From: Yifan Zha Date: Wed, 29 Mar 2023 16:18:01 +0800 Subject: drm/amdgpu: Add MES KIQ dequeue in MES hw fini [Why] Need dequeue MES KIQ under SRIOV when unloading driver [How] Modify mes_v11_0_kiq_dequeue_sched which was used to dequeue MES SCHED to support veriable pipe. Add MES KIQ dequeue in hw fini Signed-off-by: Yifan Zha Reviewed-by: Horace Chen Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index fad652c85017..b48666da2c87 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -1089,13 +1089,14 @@ static int mes_v11_0_sw_fini(void *handle) return 0; } -static void mes_v11_0_kiq_dequeue_sched(struct amdgpu_device *adev) +static void mes_v11_0_kiq_dequeue(struct amdgpu_ring *ring) { uint32_t data; int i; + struct amdgpu_device *adev = ring->adev; mutex_lock(&adev->srbm_mutex); - soc21_grbm_select(adev, 3, AMDGPU_MES_SCHED_PIPE, 0, 0); + soc21_grbm_select(adev, 3, ring->pipe, 0, 0); /* disable the queue if it's active */ if (RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1) { @@ -1121,8 +1122,6 @@ static void mes_v11_0_kiq_dequeue_sched(struct amdgpu_device *adev) soc21_grbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); - - adev->mes.ring.sched.ready = false; } static void mes_v11_0_kiq_setting(struct amdgpu_ring *ring) @@ -1176,8 +1175,14 @@ failure: static int mes_v11_0_kiq_hw_fini(struct amdgpu_device *adev) { - if (adev->mes.ring.sched.ready) - mes_v11_0_kiq_dequeue_sched(adev); + if (adev->mes.ring.sched.ready) { + mes_v11_0_kiq_dequeue(&adev->mes.ring); + adev->mes.ring.sched.ready = false; + } + + if (amdgpu_sriov_vf(adev)) { + mes_v11_0_kiq_dequeue(&adev->gfx.kiq.ring); + } if (!amdgpu_sriov_vf(adev)) mes_v11_0_enable(adev, false); -- cgit v1.2.3 From abaeafb1b1fbeeb9e18638c6edbe9db31750c163 Mon Sep 17 00:00:00 2001 From: Alvin Lee Date: Tue, 21 Mar 2023 13:35:44 -0400 Subject: drm/amd/display: Clear FAMS flag if FAMS doesn't reduce vlevel [Description] - If we find that applying FAMS doesn't reduce the voltage level, we will not use it - Ensure to clear the stream flags indicating FAMS if we hit this case Reviewed-by: Jun Lei Acked-by: Qingqing Zhuo Signed-off-by: Alvin Lee Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index 1e26adf987cc..c252fdf2c0e8 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -2002,6 +2002,10 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context, * voltage level) */ context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching = false; + for (i = 0; i < context->stream_count; i++) { + if (context->streams[i]) + context->streams[i]->fpo_in_use = false; + } context->bw_ctx.dml.soc.fclk_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.fclk_change_latency_us; dcn32_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, false); } -- cgit v1.2.3 From 0efa70356882ec2a843122f02892391ae61fc4d3 Mon Sep 17 00:00:00 2001 From: Zhikai Zhai Date: Wed, 15 Mar 2023 11:16:12 +0800 Subject: drm/amd/display: add scaler control for dcn32 [WHY] It will introduce the extra warnning log on some asic that doesn't register [HOW] Add the register on dcn32 Reviewed-by: Dmytro Laktyushkin Acked-by: Qingqing Zhuo Signed-off-by: Zhikai Zhai Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h index 10a3350376e4..3f5e92ef0c99 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h @@ -472,6 +472,7 @@ double dcn32_determine_max_vratio_prefetch(struct dc *dc, struct dc_state *conte SRI_ARR(OTG_H_BLANK, DSCL, id), SRI_ARR(OTG_V_BLANK, DSCL, id), \ SRI_ARR(SCL_MODE, DSCL, id), SRI_ARR(LB_DATA_FORMAT, DSCL, id), \ SRI_ARR(LB_MEMORY_CTRL, DSCL, id), SRI_ARR(DSCL_AUTOCAL, DSCL, id), \ + SRI_ARR(DSCL_CONTROL, DSCL, id), \ SRI_ARR(SCL_TAP_CONTROL, DSCL, id), \ SRI_ARR(SCL_COEF_RAM_TAP_SELECT, DSCL, id), \ SRI_ARR(SCL_COEF_RAM_TAP_DATA, DSCL, id), \ -- cgit v1.2.3 From 6f6869dcf415f7c222057a3f07c23667e1758585 Mon Sep 17 00:00:00 2001 From: Hamza Mahfooz Date: Tue, 21 Mar 2023 16:35:28 -0400 Subject: drm/amd/display: prep work for root clock optimization enablement for DCN314 To enable root clock optimizations, we need a number of register writes and need to account for the difference in DPSTREAMCLK between DCN31 and DCN314. To prevent issues, add a number of register writes to DCCG_MASK_SH_LIST_DCN314_COMMON(), and define dccg314_init() which is mostly in alignment with dccg31_init() but accounts for the new DPSTREAMCLK sequence. Reviewed-by: Nicholas Kazlauskas Acked-by: Qingqing Zhuo Signed-off-by: Hamza Mahfooz Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/dcn314/dcn314_dccg.c | 28 +++++++++++++++++++++- .../gpu/drm/amd/display/dc/dcn314/dcn314_dccg.h | 6 +++++ 2 files changed, 33 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.c index 081ce168f621..6f879265ad9c 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.c @@ -274,6 +274,32 @@ static void dccg314_set_dpstreamclk( } } +void dccg314_init(struct dccg *dccg) +{ + int otg_inst; + + /* Set HPO stream encoder to use refclk to avoid case where PHY is + * disabled and SYMCLK32 for HPO SE is sourced from PHYD32CLK which + * will cause DCN to hang. + */ + for (otg_inst = 0; otg_inst < 4; otg_inst++) + dccg31_disable_symclk32_se(dccg, otg_inst); + + if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_le) + for (otg_inst = 0; otg_inst < 2; otg_inst++) + dccg31_disable_symclk32_le(dccg, otg_inst); + + if (dccg->ctx->dc->debug.root_clock_optimization.bits.dpstream) + for (otg_inst = 0; otg_inst < 4; otg_inst++) + dccg314_set_dpstreamclk(dccg, REFCLK, otg_inst, + otg_inst); + + if (dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk) + for (otg_inst = 0; otg_inst < 5; otg_inst++) + dccg31_set_physymclk(dccg, otg_inst, + PHYSYMCLK_FORCE_SRC_SYMCLK, false); +} + static void dccg314_set_valid_pixel_rate( struct dccg *dccg, int ref_dtbclk_khz, @@ -315,7 +341,7 @@ static const struct dccg_funcs dccg314_funcs = { .update_dpp_dto = dccg31_update_dpp_dto, .dpp_root_clock_control = dccg314_dpp_root_clock_control, .get_dccg_ref_freq = dccg31_get_dccg_ref_freq, - .dccg_init = dccg31_init, + .dccg_init = dccg314_init, .set_dpstreamclk = dccg314_set_dpstreamclk, .enable_symclk32_se = dccg31_enable_symclk32_se, .disable_symclk32_se = dccg31_disable_symclk32_se, diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.h b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.h index 6a35986307af..f62631ab53a2 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.h +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.h @@ -155,6 +155,12 @@ DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_SE3_GATE_DISABLE, mask_sh),\ DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_LE0_GATE_DISABLE, mask_sh),\ DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_LE1_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_SE0_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_SE1_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_SE2_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_SE3_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_LE0_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_LE1_GATE_DISABLE, mask_sh),\ DCCG_SF(HDMISTREAMCLK0_DTO_PARAM, HDMISTREAMCLK0_DTO_PHASE, mask_sh),\ DCCG_SF(HDMISTREAMCLK0_DTO_PARAM, HDMISTREAMCLK0_DTO_MODULO, mask_sh) -- cgit v1.2.3 From 385c3e4c29e1d4ce8f68687a8c84621e4c0e0416 Mon Sep 17 00:00:00 2001 From: Paul Hsieh Date: Wed, 22 Mar 2023 17:46:31 +0800 Subject: drm/amd/display: Correct DML calculation to follow HW SPEC [Why] In 2560x1600@240p eDP panel, driver use lowest voltage level to play 1080p video cause underflow. According to HW SPEC, the senario should use high voltage level. [How] ChromaPre value is zero when bandwidth validation. Correct ChromaPre calculation. Reviewed-by: Nicholas Kazlauskas Reviewed-by: Jun Lei Acked-by: Qingqing Zhuo Signed-off-by: Paul Hsieh Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c | 2 +- drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c | 2 +- drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c | 2 +- drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c index d0303173ce80..7d0626e42ea6 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c @@ -4864,7 +4864,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l v->DETBufferSizeCThisState[k], &v->UrgentBurstFactorCursorPre[k], &v->UrgentBurstFactorLumaPre[k], - &v->UrgentBurstFactorChroma[k], + &v->UrgentBurstFactorChromaPre[k], &v->NoUrgentLatencyHidingPre[k]); } diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c index 536a63624595..bd674dc30df3 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c @@ -5191,7 +5191,7 @@ void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l v->DETBufferSizeCThisState[k], &v->UrgentBurstFactorCursorPre[k], &v->UrgentBurstFactorLumaPre[k], - &v->UrgentBurstFactorChroma[k], + &v->UrgentBurstFactorChromaPre[k], &v->NotUrgentLatencyHidingPre[k]); } diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c index daf319370190..7eb2173b7691 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c @@ -5288,7 +5288,7 @@ void dml314_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_ v->DETBufferSizeCThisState[k], &v->UrgentBurstFactorCursorPre[k], &v->UrgentBurstFactorLumaPre[k], - &v->UrgentBurstFactorChroma[k], + &v->UrgentBurstFactorChromaPre[k], &v->NotUrgentLatencyHidingPre[k]); } diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c index f74730c2abbd..13c7e7394b1c 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c @@ -3354,7 +3354,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l /* Output */ &mode_lib->vba.UrgentBurstFactorCursorPre[k], &mode_lib->vba.UrgentBurstFactorLumaPre[k], - &mode_lib->vba.UrgentBurstFactorChroma[k], + &mode_lib->vba.UrgentBurstFactorChromaPre[k], &mode_lib->vba.NotUrgentLatencyHidingPre[k]); } -- cgit v1.2.3 From 0289e0ed1b9ae20e7b682fc7ca30d2d324a47618 Mon Sep 17 00:00:00 2001 From: Alvin Lee Date: Thu, 23 Mar 2023 15:48:41 -0400 Subject: drm/amd/display: Add FPO + VActive support [Description] - When determining FPO support, include FPO + VActive support - Support FPO + VActive if one display meets regular requirements for FPO and the second display is able to switch in VACTIVE with a given amount of margin Reviewed-by: Jun Lei Acked-by: Qingqing Zhuo Signed-off-by: Alvin Lee Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 2 + drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c | 20 +++ .../gpu/drm/amd/display/dc/dcn32/dcn32_resource.c | 2 + .../gpu/drm/amd/display/dc/dcn32/dcn32_resource.h | 3 + .../amd/display/dc/dcn32/dcn32_resource_helpers.c | 156 +++++++++++++++++++++ .../drm/amd/display/dc/dcn321/dcn321_resource.c | 2 + .../gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 81 ++++++++++- .../gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h | 4 + 8 files changed, 267 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index e363a3c88250..719727a2f939 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -875,6 +875,8 @@ struct dc_debug_options { bool override_dispclk_programming; bool disable_fpo_optimizations; bool support_eDP1_5; + uint32_t fpo_vactive_margin_us; + bool disable_fpo_vactive; }; struct gpu_info_soc_bounding_box_v1_0; diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c index dd6f643254fe..a9b9490a532c 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c @@ -327,6 +327,7 @@ bool dc_dmub_srv_p_state_delegate(struct dc *dc, bool should_manage_pstate, stru int i = 0, k = 0; int ramp_up_num_steps = 1; // TODO: Ramp is currently disabled. Reenable it. uint8_t visual_confirm_enabled; + int pipe_idx = 0; if (dc == NULL) return false; @@ -339,6 +340,25 @@ bool dc_dmub_srv_p_state_delegate(struct dc *dc, bool should_manage_pstate, stru cmd.fw_assisted_mclk_switch.config_data.fams_enabled = should_manage_pstate; cmd.fw_assisted_mclk_switch.config_data.visual_confirm_enabled = visual_confirm_enabled; + if (should_manage_pstate) { + for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + + if (!pipe->stream) + continue; + + /* If FAMS is being used to support P-State and there is a stream + * that does not use FAMS, we are in an FPO + VActive scenario. + * Assign vactive stretch margin in this case. + */ + if (!pipe->stream->fpo_in_use) { + cmd.fw_assisted_mclk_switch.config_data.vactive_stretch_margin_us = dc->debug.fpo_vactive_margin_us; + break; + } + pipe_idx++; + } + } + for (i = 0, k = 0; context && i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c index 502f990346b1..99d417fa6458 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c @@ -726,6 +726,8 @@ static const struct dc_debug_options debug_defaults_drv = { .disable_unbounded_requesting = false, .override_dispclk_programming = true, .disable_fpo_optimizations = false, + .fpo_vactive_margin_us = 2000, // 2000us + .disable_fpo_vactive = true, }; static const struct dc_debug_options debug_defaults_diags = { diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h index 3f5e92ef0c99..3937dbc1e552 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h @@ -39,6 +39,7 @@ #define DCN3_2_MBLK_HEIGHT_8BPE 64 #define DCN3_2_VMIN_DISPCLK_HZ 717000000 #define DCN3_2_DCFCLK_DS_INIT_KHZ 10000 // Choose 10Mhz for init DCFCLK DS freq +#define DCN3_2_MIN_ACTIVE_SWITCH_MARGIN_FPO_US 100 // Only allow FPO + Vactive if active margin >= 100 #define TO_DCN32_RES_POOL(pool)\ container_of(pool, struct dcn32_resource_pool, base) @@ -146,6 +147,8 @@ void dcn32_restore_mall_state(struct dc *dc, struct dc_state *context, struct mall_temp_config *temp_config); +struct dc_stream_state *dcn32_can_support_mclk_switch_using_fw_based_vblank_stretch(struct dc *dc, const struct dc_state *context); + bool dcn32_allow_subvp_with_active_margin(struct pipe_ctx *pipe); unsigned int dcn32_calc_num_avail_chans_for_mall(struct dc *dc, int num_chans); diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c index 47fa51c1d3f4..eeca16faf31a 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c @@ -27,6 +27,7 @@ #include "dcn32_resource.h" #include "dcn20/dcn20_resource.h" #include "dml/dcn32/display_mode_vba_util_32.h" +#include "dml/dcn32/dcn32_fpu.h" static bool is_dual_plane(enum surface_pixel_format format) { @@ -500,3 +501,158 @@ void dcn32_restore_mall_state(struct dc *dc, pipe->plane_state->is_phantom = temp_config->is_phantom_plane[i]; } } + +#define MAX_STRETCHED_V_BLANK 1000 // in micro-seconds (must ensure to match value in FW) +/* + * Scaling factor for v_blank stretch calculations considering timing in + * micro-seconds and pixel clock in 100hz. + * Note: the parenthesis are necessary to ensure the correct order of + * operation where V_SCALE is used. + */ +#define V_SCALE (10000 / MAX_STRETCHED_V_BLANK) + +static int get_frame_rate_at_max_stretch_100hz( + struct dc_stream_state *fpo_candidate_stream, + uint32_t fpo_vactive_margin_us) +{ + struct dc_crtc_timing *timing = NULL; + uint32_t sec_per_100_lines; + uint32_t max_v_blank; + uint32_t curr_v_blank; + uint32_t v_stretch_max; + uint32_t stretched_frame_pix_cnt; + uint32_t scaled_stretched_frame_pix_cnt; + uint32_t scaled_refresh_rate; + uint32_t v_scale; + + if (fpo_candidate_stream == NULL) + return 0; + + /* check if refresh rate at least 120hz */ + timing = &fpo_candidate_stream->timing; + if (timing == NULL) + return 0; + + v_scale = 10000 / (MAX_STRETCHED_V_BLANK + fpo_vactive_margin_us); + + sec_per_100_lines = timing->pix_clk_100hz / timing->h_total + 1; + max_v_blank = sec_per_100_lines / v_scale + 1; + curr_v_blank = timing->v_total - timing->v_addressable; + v_stretch_max = (max_v_blank > curr_v_blank) ? (max_v_blank - curr_v_blank) : (0); + stretched_frame_pix_cnt = (v_stretch_max + timing->v_total) * timing->h_total; + scaled_stretched_frame_pix_cnt = stretched_frame_pix_cnt / 10000; + scaled_refresh_rate = (timing->pix_clk_100hz) / scaled_stretched_frame_pix_cnt + 1; + + return scaled_refresh_rate; + +} + +static bool is_refresh_rate_support_mclk_switch_using_fw_based_vblank_stretch( + struct dc_stream_state *fpo_candidate_stream, uint32_t fpo_vactive_margin_us) +{ + int refresh_rate_max_stretch_100hz; + int min_refresh_100hz; + + if (fpo_candidate_stream == NULL) + return false; + + refresh_rate_max_stretch_100hz = get_frame_rate_at_max_stretch_100hz(fpo_candidate_stream, fpo_vactive_margin_us); + min_refresh_100hz = fpo_candidate_stream->timing.min_refresh_in_uhz / 10000; + + if (refresh_rate_max_stretch_100hz < min_refresh_100hz) + return false; + + return true; +} + +static int get_refresh_rate(struct dc_stream_state *fpo_candidate_stream) +{ + int refresh_rate = 0; + int h_v_total = 0; + struct dc_crtc_timing *timing = NULL; + + if (fpo_candidate_stream == NULL) + return 0; + + /* check if refresh rate at least 120hz */ + timing = &fpo_candidate_stream->timing; + if (timing == NULL) + return 0; + + h_v_total = timing->h_total * timing->v_total; + if (h_v_total == 0) + return 0; + + refresh_rate = ((timing->pix_clk_100hz * 100) / (h_v_total)) + 1; + return refresh_rate; +} + +/** + * dcn32_can_support_mclk_switch_using_fw_based_vblank_stretch - Determines if config can support FPO + * + * @param [in]: dc - current dc state + * @param [in]: context - new dc state + * + * Return: Pointer to FPO stream candidate if config can support FPO, otherwise NULL + */ +struct dc_stream_state *dcn32_can_support_mclk_switch_using_fw_based_vblank_stretch(struct dc *dc, const struct dc_state *context) +{ + int refresh_rate = 0; + const int minimum_refreshrate_supported = 120; + struct dc_stream_state *fpo_candidate_stream = NULL; + bool is_fpo_vactive = false; + uint32_t fpo_vactive_margin_us = 0; + + if (context == NULL) + return NULL; + + if (dc->debug.disable_fams) + return NULL; + + if (!dc->caps.dmub_caps.mclk_sw) + return NULL; + + if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching_shut_down) + return NULL; + + /* For FPO we can support up to 2 display configs if: + * - first display uses FPO + * - Second display switches in VACTIVE */ + if (context->stream_count > 2) + return NULL; + else if (context->stream_count == 2) { + DC_FP_START(); + dcn32_assign_fpo_vactive_candidate(dc, context, &fpo_candidate_stream); + DC_FP_END(); + + DC_FP_START(); + is_fpo_vactive = dcn32_find_vactive_pipe(dc, context, DCN3_2_MIN_ACTIVE_SWITCH_MARGIN_FPO_US); + DC_FP_END(); + if (!is_fpo_vactive || dc->debug.disable_fpo_vactive) + return NULL; + } else + fpo_candidate_stream = context->streams[0]; + + if (!fpo_candidate_stream) + return NULL; + + if (fpo_candidate_stream->sink->edid_caps.panel_patch.disable_fams) + return NULL; + + refresh_rate = get_refresh_rate(fpo_candidate_stream); + if (refresh_rate < minimum_refreshrate_supported) + return NULL; + + fpo_vactive_margin_us = is_fpo_vactive ? dc->debug.fpo_vactive_margin_us : 0; // For now hardcode the FPO + Vactive stretch margin to be 2000us + if (!is_refresh_rate_support_mclk_switch_using_fw_based_vblank_stretch(fpo_candidate_stream, fpo_vactive_margin_us)) + return NULL; + + // check if freesync enabled + if (!fpo_candidate_stream->allow_freesync) + return NULL; + + if (fpo_candidate_stream->vrr_active_variable) + return NULL; + + return fpo_candidate_stream; +} diff --git a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c index 79664ba7e7af..7ebd14fb9698 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c @@ -724,6 +724,8 @@ static const struct dc_debug_options debug_defaults_drv = { .disable_unbounded_requesting = false, .override_dispclk_programming = true, .disable_fpo_optimizations = false, + .fpo_vactive_margin_us = 2000, // 2000us + .disable_fpo_vactive = true, }; static const struct dc_debug_options debug_defaults_diags = { diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index c252fdf2c0e8..4548320217fc 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -1927,6 +1927,7 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context, unsigned int min_dram_speed_mts_margin; bool need_fclk_lat_as_dummy = false; bool is_subvp_p_drr = false; + struct dc_stream_state *fpo_candidate_stream = NULL; dc_assert_fp_enabled(); @@ -1968,8 +1969,11 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context, if (!pstate_en || (!dc->debug.disable_fpo_optimizations && pstate_en && vlevel != 0)) { /* only when the mclk switch can not be natural, is the fw based vblank stretch attempted */ - context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching = - dcn30_can_support_mclk_switch_using_fw_based_vblank_stretch(dc, context); + fpo_candidate_stream = dcn32_can_support_mclk_switch_using_fw_based_vblank_stretch(dc, context); + if (fpo_candidate_stream) { + fpo_candidate_stream->fpo_in_use = true; + context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching = true; + } if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching) { dummy_latency_index = dcn32_find_dummy_latency_index_for_fw_based_mclk_switch(dc, @@ -2161,7 +2165,13 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context, * DCFCLK: Min, as reported by PM FW, when available * UCLK: Min, as reported by PM FW, when available */ - dc->res_pool->funcs->update_soc_for_wm_a(dc, context); + + /* For set A set the correct latency values (i.e. non-dummy values) unconditionally + */ + context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us; + context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.sr_enter_plus_exit_time_us; + context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.sr_exit_time_us; + context->bw_ctx.bw.dcn.watermarks.a.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; @@ -2796,3 +2806,68 @@ double dcn32_determine_max_vratio_prefetch(struct dc *dc, struct dc_state *conte } return max_vratio_pre; } + +/** + * dcn32_assign_fpo_vactive_candidate - Assign the FPO stream candidate for FPO + VActive case + * + * This function chooses the FPO candidate stream for FPO + VActive cases (2 stream config). + * For FPO + VAtive cases, the assumption is that one display has ActiveMargin > 0, and the + * other display has ActiveMargin <= 0. This function will choose the pipe/stream that has + * ActiveMargin <= 0 to be the FPO stream candidate if found. + * + * + * @param [in]: dc - current dc state + * @param [in]: context - new dc state + * @param [out]: fpo_candidate_stream - pointer to FPO stream candidate if one is found + * + * Return: void + */ +void dcn32_assign_fpo_vactive_candidate(struct dc *dc, const struct dc_state *context, struct dc_stream_state **fpo_candidate_stream) +{ + unsigned int i, pipe_idx; + const struct vba_vars_st *vba = &context->bw_ctx.dml.vba; + + for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { + const struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + + if (!pipe->stream) + continue; + + if (vba->ActiveDRAMClockChangeLatencyMarginPerState[vba->VoltageLevel][vba->maxMpcComb][vba->pipe_plane[pipe_idx]] <= 0) { + *fpo_candidate_stream = pipe->stream; + break; + } + pipe_idx++; + } +} + +/** + * dcn32_find_vactive_pipe - Determines if the config has a pipe that can switch in VACTIVE + * + * @param [in]: dc - current dc state + * @param [in]: context - new dc state + * @param [in]: vactive_margin_req_us - The vactive marign required for a vactive pipe to be + * considered "found" + * + * Return: True if VACTIVE display is found, false otherwise + */ +bool dcn32_find_vactive_pipe(struct dc *dc, const struct dc_state *context, uint32_t vactive_margin_req_us) +{ + unsigned int i, pipe_idx; + const struct vba_vars_st *vba = &context->bw_ctx.dml.vba; + bool vactive_found = false; + + for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { + const struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + + if (!pipe->stream) + continue; + + if (vba->ActiveDRAMClockChangeLatencyMarginPerState[vba->VoltageLevel][vba->maxMpcComb][vba->pipe_plane[pipe_idx]] >= vactive_margin_req_us) { + vactive_found = true; + break; + } + pipe_idx++; + } + return vactive_found; +} diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h index ab010e7e840b..9a0806a0e2ef 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h @@ -76,4 +76,8 @@ void dcn32_patch_dpm_table(struct clk_bw_params *bw_params); void dcn32_zero_pipe_dcc_fraction(display_e2e_pipe_params_st *pipes, int pipe_cnt); +void dcn32_assign_fpo_vactive_candidate(struct dc *dc, const struct dc_state *context, struct dc_stream_state **fpo_candidate_stream); + +bool dcn32_find_vactive_pipe(struct dc *dc, const struct dc_state *context, uint32_t vactive_margin_req); + #endif -- cgit v1.2.3 From d170e938f01fc8c5c41f8a12f0c12491580829ef Mon Sep 17 00:00:00 2001 From: Alvin Lee Date: Fri, 24 Mar 2023 12:01:09 -0400 Subject: drm/amd/display: On clock init, maintain DISPCLK freq [Description] - On init if a display is connected, we need to maintain the DISPCLK frequency - Even though DPG_EN=1, the display still requires the correct timing or it could cause audio corruption (if DISPCLK freq is reduced) - Read the current DISPCLK freq and request the same value to ensure the timing is valid and unchanged - However, add option to do a full pipe power down (including link) which will also avoid audio related issues - Disabled for the time being on dcn32 Reviewed-by: Jun Lei Acked-by: Qingqing Zhuo Signed-off-by: Alvin Lee Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c | 18 +++++++++++++++ drivers/gpu/drm/amd/display/dc/dc.h | 1 + drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c | 26 +++++++++++++++++----- .../gpu/drm/amd/display/dc/dcn32/dcn32_resource.c | 1 + .../drm/amd/display/dc/dcn321/dcn321_resource.c | 1 + drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h | 3 +++ 6 files changed, 45 insertions(+), 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c index af108f88b112..ea753f8fa175 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c @@ -399,6 +399,23 @@ static void dcn32_update_clocks_update_dentist( } +static int dcn32_get_dispclk_from_dentist(struct clk_mgr *clk_mgr_base) +{ + struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); + uint32_t dispclk_wdivider; + int disp_divider; + + REG_GET(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_WDIVIDER, &dispclk_wdivider); + disp_divider = dentist_get_divider_from_did(dispclk_wdivider); + + /* Return DISPCLK freq in Khz */ + if (disp_divider) + return (DENTIST_DIVIDER_RANGE_SCALE_FACTOR * clk_mgr->base.dentist_vco_freq_khz) / disp_divider; + + return 0; +} + + static void dcn32_update_clocks(struct clk_mgr *clk_mgr_base, struct dc_state *context, bool safe_to_lower) @@ -852,6 +869,7 @@ static struct clk_mgr_funcs dcn32_funcs = { .are_clock_states_equal = dcn32_are_clock_states_equal, .enable_pme_wa = dcn32_enable_pme_wa, .is_smu_present = dcn32_is_smu_present, + .get_dispclk_from_dentist = dcn32_get_dispclk_from_dentist, }; void dcn32_clk_mgr_construct( diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 719727a2f939..f51b9c265b51 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -877,6 +877,7 @@ struct dc_debug_options { bool support_eDP1_5; uint32_t fpo_vactive_margin_us; bool disable_fpo_vactive; + bool disable_boot_optimizations; }; struct gpu_info_soc_bounding_box_v1_0; diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c index b3824287c224..db0974fe58ab 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c @@ -721,10 +721,19 @@ static void dcn32_initialize_min_clocks(struct dc *dc) clocks->socclk_khz = dc->clk_mgr->bw_params->clk_table.entries[0].socclk_mhz * 1000; clocks->dramclk_khz = dc->clk_mgr->bw_params->clk_table.entries[0].memclk_mhz * 1000; clocks->dppclk_khz = dc->clk_mgr->bw_params->clk_table.entries[0].dppclk_mhz * 1000; - clocks->dispclk_khz = dc->clk_mgr->bw_params->clk_table.entries[0].dispclk_mhz * 1000; - clocks->ref_dtbclk_khz = dc->clk_mgr->bw_params->clk_table.entries[0].dtbclk_mhz * 1000; - clocks->fclk_p_state_change_support = true; - clocks->p_state_change_support = true; + if (dc->debug.disable_boot_optimizations) { + clocks->dispclk_khz = dc->clk_mgr->bw_params->clk_table.entries[0].dispclk_mhz * 1000; + } else { + /* Even though DPG_EN = 1 for the connected display, it still requires the + * correct timing so we cannot set DISPCLK to min freq or it could cause + * audio corruption. Read current DISPCLK from DENTIST and request the same + * freq to ensure that the timing is valid and unchanged. + */ + clocks->dispclk_khz = dc->clk_mgr->funcs->get_dispclk_from_dentist(dc->clk_mgr); + clocks->ref_dtbclk_khz = dc->clk_mgr->bw_params->clk_table.entries[0].dtbclk_mhz * 1000; + clocks->fclk_p_state_change_support = true; + clocks->p_state_change_support = true; + } dc->clk_mgr->funcs->update_clocks( dc->clk_mgr, @@ -823,7 +832,14 @@ void dcn32_init_hw(struct dc *dc) * everything down. */ if (dcb->funcs->is_accelerated_mode(dcb) || !dc->config.seamless_boot_edp_requested) { - hws->funcs.init_pipes(dc, dc->current_state); + /* Disable boot optimizations means power down everything including PHY, DIG, + * and OTG (i.e. the boot is not optimized because we do a full power down). + */ + if (dc->hwss.enable_accelerated_mode && dc->debug.disable_boot_optimizations) + dc->hwss.enable_accelerated_mode(dc, dc->current_state); + else + hws->funcs.init_pipes(dc, dc->current_state); + if (dc->res_pool->hubbub->funcs->allow_self_refresh_control) dc->res_pool->hubbub->funcs->allow_self_refresh_control(dc->res_pool->hubbub, !dc->res_pool->hubbub->ctx->dc->debug.disable_stutter); diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c index 99d417fa6458..e30d1f60695d 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c @@ -728,6 +728,7 @@ static const struct dc_debug_options debug_defaults_drv = { .disable_fpo_optimizations = false, .fpo_vactive_margin_us = 2000, // 2000us .disable_fpo_vactive = true, + .disable_boot_optimizations = false, }; static const struct dc_debug_options debug_defaults_diags = { diff --git a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c index 7ebd14fb9698..e5ab7f3077c4 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c @@ -726,6 +726,7 @@ static const struct dc_debug_options debug_defaults_drv = { .disable_fpo_optimizations = false, .fpo_vactive_margin_us = 2000, // 2000us .disable_fpo_vactive = true, + .disable_boot_optimizations = false, }; static const struct dc_debug_options debug_defaults_diags = { diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h index 591ab1389e3b..bef843cc32a1 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h @@ -293,6 +293,9 @@ struct clk_mgr_funcs { /* Get SMU present */ bool (*is_smu_present)(struct clk_mgr *clk_mgr); + + int (*get_dispclk_from_dentist)(struct clk_mgr *clk_mgr_base); + }; struct clk_mgr { -- cgit v1.2.3 From a2a0bdf1989c38ca2fc356edd23a114172ee09a2 Mon Sep 17 00:00:00 2001 From: Charlene Liu Date: Fri, 24 Mar 2023 12:31:07 -0400 Subject: drm/amd/display: add dscclk instance offset check [why] based on dscclk instance offset check conditiona program dscclk Reviewed-by: Nicholas Kazlauskas Acked-by: Qingqing Zhuo Signed-off-by: Charlene Liu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dccg.h | 8 ++++++++ drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dccg.c | 18 ++++++++++++++++++ drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.h | 4 ++++ 3 files changed, 30 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dccg.h b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dccg.h index 893c0809cd4e..7bdc146f7cb5 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dccg.h +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dccg.h @@ -205,6 +205,11 @@ type PHYDSYMCLK_GATE_DISABLE; \ type PHYESYMCLK_GATE_DISABLE; +#define DCCG314_REG_FIELD_LIST(type) \ + type DSCCLK3_DTO_PHASE;\ + type DSCCLK3_DTO_MODULO;\ + type DSCCLK3_DTO_ENABLE; + #define DCCG32_REG_FIELD_LIST(type) \ type DPSTREAMCLK0_EN;\ type DPSTREAMCLK1_EN;\ @@ -237,6 +242,7 @@ struct dccg_shift { DCCG_REG_FIELD_LIST(uint8_t) DCCG3_REG_FIELD_LIST(uint8_t) DCCG31_REG_FIELD_LIST(uint8_t) + DCCG314_REG_FIELD_LIST(uint8_t) DCCG32_REG_FIELD_LIST(uint8_t) }; @@ -244,6 +250,7 @@ struct dccg_mask { DCCG_REG_FIELD_LIST(uint32_t) DCCG3_REG_FIELD_LIST(uint32_t) DCCG31_REG_FIELD_LIST(uint32_t) + DCCG314_REG_FIELD_LIST(uint32_t) DCCG32_REG_FIELD_LIST(uint32_t) }; @@ -273,6 +280,7 @@ struct dccg_registers { uint32_t DSCCLK0_DTO_PARAM; uint32_t DSCCLK1_DTO_PARAM; uint32_t DSCCLK2_DTO_PARAM; + uint32_t DSCCLK3_DTO_PARAM; uint32_t DPSTREAMCLK_ROOT_GATE_DISABLE; uint32_t DPSTREAMCLK_GATE_DISABLE; uint32_t DCCG_GATE_DISABLE_CNTL; diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dccg.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dccg.c index 7d2b982506fd..4c2fdfea162f 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dccg.c @@ -360,6 +360,15 @@ void dccg31_disable_dscclk(struct dccg *dccg, int inst) DSCCLK2_DTO_PHASE, 0, DSCCLK2_DTO_MODULO, 1); break; + case 3: + if (REG(DSCCLK3_DTO_PARAM)) { + REG_UPDATE(DSCCLK_DTO_CTRL, + DSCCLK3_DTO_ENABLE, 1); + REG_UPDATE_2(DSCCLK3_DTO_PARAM, + DSCCLK3_DTO_PHASE, 0, + DSCCLK3_DTO_MODULO, 1); + } + break; default: BREAK_TO_DEBUGGER(); return; @@ -395,6 +404,15 @@ void dccg31_enable_dscclk(struct dccg *dccg, int inst) REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK2_DTO_ENABLE, 0); break; + case 3: + if (REG(DSCCLK3_DTO_PARAM)) { + REG_UPDATE(DSCCLK_DTO_CTRL, + DSCCLK3_DTO_ENABLE, 0); + REG_UPDATE_2(DSCCLK3_DTO_PARAM, + DSCCLK3_DTO_PHASE, 0, + DSCCLK3_DTO_MODULO, 0); + } + break; default: BREAK_TO_DEBUGGER(); return; diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.h b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.h index f62631ab53a2..90687a9e8fdd 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.h +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.h @@ -68,6 +68,7 @@ SR(DSCCLK0_DTO_PARAM),\ SR(DSCCLK1_DTO_PARAM),\ SR(DSCCLK2_DTO_PARAM),\ + SR(DSCCLK3_DTO_PARAM),\ SR(DSCCLK_DTO_CTRL),\ SR(DCCG_GATE_DISABLE_CNTL2),\ SR(DCCG_GATE_DISABLE_CNTL3),\ @@ -149,6 +150,8 @@ DCCG_SF(DSCCLK1_DTO_PARAM, DSCCLK1_DTO_MODULO, mask_sh),\ DCCG_SF(DSCCLK2_DTO_PARAM, DSCCLK2_DTO_PHASE, mask_sh),\ DCCG_SF(DSCCLK2_DTO_PARAM, DSCCLK2_DTO_MODULO, mask_sh),\ + DCCG_SF(DSCCLK3_DTO_PARAM, DSCCLK3_DTO_PHASE, mask_sh),\ + DCCG_SF(DSCCLK3_DTO_PARAM, DSCCLK3_DTO_MODULO, mask_sh),\ DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_SE0_GATE_DISABLE, mask_sh),\ DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_SE1_GATE_DISABLE, mask_sh),\ DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_SE2_GATE_DISABLE, mask_sh),\ @@ -184,6 +187,7 @@ DCCG_SF(DSCCLK_DTO_CTRL, DSCCLK0_DTO_ENABLE, mask_sh),\ DCCG_SF(DSCCLK_DTO_CTRL, DSCCLK1_DTO_ENABLE, mask_sh),\ DCCG_SF(DSCCLK_DTO_CTRL, DSCCLK2_DTO_ENABLE, mask_sh),\ + DCCG_SF(DSCCLK_DTO_CTRL, DSCCLK3_DTO_ENABLE, mask_sh),\ DCCG_SF(DCCG_GATE_DISABLE_CNTL2, PHYASYMCLK_GATE_DISABLE, mask_sh),\ DCCG_SF(DCCG_GATE_DISABLE_CNTL2, PHYBSYMCLK_GATE_DISABLE, mask_sh),\ DCCG_SF(DCCG_GATE_DISABLE_CNTL2, PHYCSYMCLK_GATE_DISABLE, mask_sh),\ -- cgit v1.2.3 From 554836cc24411e4d3645db5392655f8d28d1d47a Mon Sep 17 00:00:00 2001 From: Yifan Zha Date: Wed, 29 Mar 2023 17:18:47 +0800 Subject: drm/amdgpu: Add MES KIQ clear to tell RLC that KIQ is dequeued [Why] As MES KIQ is dequeued, tell RLC that KIQ is inactive [How] Clear the RLC_CP_SCHEDULERS Active bit which RLC checks KIQ status In addition, driver can halt MES under SRIOV when unloading driver v2: Use scheduler0 mask to clear KIQ portion of RLC_CP_SCHEDULERS Signed-off-by: Yifan Zha Reviewed-by: Horace Chen Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index b48666da2c87..7e8c15cd8dc2 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -1138,6 +1138,16 @@ static void mes_v11_0_kiq_setting(struct amdgpu_ring *ring) WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp); } +static void mes_v11_0_kiq_clear(struct amdgpu_device *adev) +{ + uint32_t tmp; + + /* tell RLC which is KIQ dequeue */ + tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS); + tmp &= ~RLC_CP_SCHEDULERS__scheduler0_MASK; + WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp); +} + static int mes_v11_0_kiq_hw_init(struct amdgpu_device *adev) { int r = 0; @@ -1182,10 +1192,10 @@ static int mes_v11_0_kiq_hw_fini(struct amdgpu_device *adev) if (amdgpu_sriov_vf(adev)) { mes_v11_0_kiq_dequeue(&adev->gfx.kiq.ring); + mes_v11_0_kiq_clear(adev); } - if (!amdgpu_sriov_vf(adev)) - mes_v11_0_enable(adev, false); + mes_v11_0_enable(adev, false); return 0; } -- cgit v1.2.3 From 7727e7b60f82e8265a1061b81379f5a7bce0dba6 Mon Sep 17 00:00:00 2001 From: Michael Strauss Date: Fri, 24 Mar 2023 10:48:37 -0400 Subject: drm/amd/display: Improve robustness of FIXED_VS link training at DP1 rates [WHY] New sequence for transparent mode DP1.x link training was provided by LTTPR vendor [HOW] Implement new FIXED_VS sequence, increase LT retry count to minimize any potential intermittent lightup failures Reviewed-by: Jun Lei Acked-by: Qingqing Zhuo Signed-off-by: Michael Strauss Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 1 + .../gpu/drm/amd/display/dc/dcn31/dcn31_resource.c | 2 + drivers/gpu/drm/amd/display/dc/link/link_dpms.c | 8 +- .../display/dc/link/protocols/link_dp_training.c | 5 +- .../link_dp_training_fixed_vs_pe_retimer.c | 378 ++++++++++++++++++++- .../link_dp_training_fixed_vs_pe_retimer.h | 5 + 6 files changed, 396 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index f51b9c265b51..190e6a2e1334 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -405,6 +405,7 @@ struct dc_config { bool force_bios_enable_lttpr; uint8_t force_bios_fixed_vs; int sdpif_request_limit_words_per_umc; + bool use_old_fixed_vs_sequence; bool disable_subvp_drr; }; diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c index eaaa2e01f6d0..ff8cd5076434 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c @@ -1965,6 +1965,8 @@ static bool dcn31_resource_construct( dc->caps.color.mpc.ogam_rom_caps.hlg = 0; dc->caps.color.mpc.ocsc = 1; + dc->config.use_old_fixed_vs_sequence = true; + /* Use pipe context based otg sync logic */ dc->config.use_pipe_ctx_sync_logic = true; diff --git a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c index f6c5ee2d639b..027ad1f0144d 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c @@ -2035,6 +2035,12 @@ static enum dc_status enable_link_dp(struct dc_state *state, uint32_t post_oui_delay = 30; // 30ms /* Reduce link bandwidth between failed link training attempts. */ bool do_fallback = false; + int lt_attempts = LINK_TRAINING_ATTEMPTS; + + // Increase retry count if attempting DP1.x on FIXED_VS link + if ((link->chip_caps & EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) && + link_dp_get_encoding_format(link_settings) == DP_8b_10b_ENCODING) + lt_attempts = 10; // check for seamless boot for (i = 0; i < state->stream_count; i++) { @@ -2099,7 +2105,7 @@ static enum dc_status enable_link_dp(struct dc_state *state, if (perform_link_training_with_retries(link_settings, skip_video_pattern, - LINK_TRAINING_ATTEMPTS, + lt_attempts, pipe_ctx, pipe_ctx->stream->signal, do_fallback)) { diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c index f301c9eaf2f9..70fc0ddf2d7e 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c @@ -1496,7 +1496,10 @@ enum link_training_result dp_perform_link_training( * Non-LT AUX transactions inside training mode. */ if ((link->chip_caps & EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) && encoding == DP_8b_10b_ENCODING) - status = dp_perform_fixed_vs_pe_training_sequence(link, link_res, <_settings); + if (link->dc->config.use_old_fixed_vs_sequence) + status = dp_perform_fixed_vs_pe_training_sequence_legacy(link, link_res, <_settings); + else + status = dp_perform_fixed_vs_pe_training_sequence(link, link_res, <_settings); else if (encoding == DP_8b_10b_ENCODING) status = dp_perform_8b_10b_link_training(link, link_res, <_settings); else if (encoding == DP_128b_132b_ENCODING) diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_fixed_vs_pe_retimer.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_fixed_vs_pe_retimer.c index a4071d2959a0..5731c4b61f9f 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_fixed_vs_pe_retimer.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_fixed_vs_pe_retimer.c @@ -223,7 +223,7 @@ static enum link_training_result perform_fixed_vs_pe_nontransparent_training_seq } -enum link_training_result dp_perform_fixed_vs_pe_training_sequence( +enum link_training_result dp_perform_fixed_vs_pe_training_sequence_legacy( struct dc_link *link, const struct link_resource *link_res, struct link_training_settings *lt_settings) @@ -577,3 +577,379 @@ enum link_training_result dp_perform_fixed_vs_pe_training_sequence( return status; } + +enum link_training_result dp_perform_fixed_vs_pe_training_sequence( + struct dc_link *link, + const struct link_resource *link_res, + struct link_training_settings *lt_settings) +{ + const uint8_t vendor_lttpr_write_data_reset[4] = {0x1, 0x50, 0x63, 0xFF}; + const uint8_t offset = dp_parse_lttpr_repeater_count( + link->dpcd_caps.lttpr_caps.phy_repeater_cnt); + const uint8_t vendor_lttpr_write_data_intercept_en[4] = {0x1, 0x55, 0x63, 0x0}; + const uint8_t vendor_lttpr_write_data_intercept_dis[4] = {0x1, 0x55, 0x63, 0x6E}; + const uint8_t vendor_lttpr_write_data_adicora_eq1[4] = {0x1, 0x55, 0x63, 0x2E}; + const uint8_t vendor_lttpr_write_data_adicora_eq2[4] = {0x1, 0x55, 0x63, 0x01}; + const uint8_t vendor_lttpr_write_data_adicora_eq3[4] = {0x1, 0x55, 0x63, 0x68}; + uint32_t pre_disable_intercept_delay_ms = link->dc->debug.fixed_vs_aux_delay_config_wa; + uint8_t vendor_lttpr_write_data_vs[4] = {0x1, 0x51, 0x63, 0x0}; + uint8_t vendor_lttpr_write_data_pe[4] = {0x1, 0x52, 0x63, 0x0}; + + uint32_t vendor_lttpr_write_address = 0xF004F; + enum link_training_result status = LINK_TRAINING_SUCCESS; + uint8_t lane = 0; + union down_spread_ctrl downspread = {0}; + union lane_count_set lane_count_set = {0}; + uint8_t toggle_rate; + uint8_t rate; + + /* Only 8b/10b is supported */ + ASSERT(link_dp_get_encoding_format(<_settings->link_settings) == + DP_8b_10b_ENCODING); + + if (lt_settings->lttpr_mode == LTTPR_MODE_NON_TRANSPARENT) { + status = perform_fixed_vs_pe_nontransparent_training_sequence(link, link_res, lt_settings); + return status; + } + + if (offset != 0xFF) { + vendor_lttpr_write_address += + ((DP_REPEATER_CONFIGURATION_AND_STATUS_SIZE) * (offset - 1)); + + /* Certain display and cable configuration require extra delay */ + if (offset > 2) + pre_disable_intercept_delay_ms = link->dc->debug.fixed_vs_aux_delay_config_wa * 2; + } + + /* Vendor specific: Reset lane settings */ + core_link_write_dpcd( + link, + vendor_lttpr_write_address, + &vendor_lttpr_write_data_reset[0], + sizeof(vendor_lttpr_write_data_reset)); + core_link_write_dpcd( + link, + vendor_lttpr_write_address, + &vendor_lttpr_write_data_vs[0], + sizeof(vendor_lttpr_write_data_vs)); + core_link_write_dpcd( + link, + vendor_lttpr_write_address, + &vendor_lttpr_write_data_pe[0], + sizeof(vendor_lttpr_write_data_pe)); + + /* Vendor specific: Enable intercept */ + core_link_write_dpcd( + link, + vendor_lttpr_write_address, + &vendor_lttpr_write_data_intercept_en[0], + sizeof(vendor_lttpr_write_data_intercept_en)); + + /* 1. set link rate, lane count and spread. */ + + downspread.raw = (uint8_t)(lt_settings->link_settings.link_spread); + + lane_count_set.bits.LANE_COUNT_SET = + lt_settings->link_settings.lane_count; + + lane_count_set.bits.ENHANCED_FRAMING = lt_settings->enhanced_framing; + lane_count_set.bits.POST_LT_ADJ_REQ_GRANTED = 0; + + + if (lt_settings->pattern_for_eq < DP_TRAINING_PATTERN_SEQUENCE_4) { + lane_count_set.bits.POST_LT_ADJ_REQ_GRANTED = + link->dpcd_caps.max_ln_count.bits.POST_LT_ADJ_REQ_SUPPORTED; + } + + core_link_write_dpcd(link, DP_DOWNSPREAD_CTRL, + &downspread.raw, sizeof(downspread)); + + core_link_write_dpcd(link, DP_LANE_COUNT_SET, + &lane_count_set.raw, 1); + + rate = get_dpcd_link_rate(<_settings->link_settings); + + /* Vendor specific: Toggle link rate */ + toggle_rate = (rate == 0x6) ? 0xA : 0x6; + + if (link->vendor_specific_lttpr_link_rate_wa == rate) { + core_link_write_dpcd( + link, + DP_LINK_BW_SET, + &toggle_rate, + 1); + } + + link->vendor_specific_lttpr_link_rate_wa = rate; + + core_link_write_dpcd(link, DP_LINK_BW_SET, &rate, 1); + + DC_LOG_HW_LINK_TRAINING("%s\n %x rate = %x\n %x lane = %x framing = %x\n %x spread = %x\n", + __func__, + DP_LINK_BW_SET, + lt_settings->link_settings.link_rate, + DP_LANE_COUNT_SET, + lt_settings->link_settings.lane_count, + lt_settings->enhanced_framing, + DP_DOWNSPREAD_CTRL, + lt_settings->link_settings.link_spread); + + /* 2. Perform link training */ + + /* Perform Clock Recovery Sequence */ + if (status == LINK_TRAINING_SUCCESS) { + const uint8_t max_vendor_dpcd_retries = 10; + uint32_t retries_cr; + uint32_t retry_count; + uint32_t wait_time_microsec; + enum dc_lane_count lane_count = lt_settings->link_settings.lane_count; + union lane_status dpcd_lane_status[LANE_COUNT_DP_MAX]; + union lane_align_status_updated dpcd_lane_status_updated; + union lane_adjust dpcd_lane_adjust[LANE_COUNT_DP_MAX] = {0}; + enum dc_status dpcd_status = DC_OK; + uint8_t i = 0; + + retries_cr = 0; + retry_count = 0; + + memset(&dpcd_lane_status, '\0', sizeof(dpcd_lane_status)); + memset(&dpcd_lane_status_updated, '\0', + sizeof(dpcd_lane_status_updated)); + + while ((retries_cr < LINK_TRAINING_MAX_RETRY_COUNT) && + (retry_count < LINK_TRAINING_MAX_CR_RETRY)) { + + + /* 1. call HWSS to set lane settings */ + dp_set_hw_lane_settings( + link, + link_res, + lt_settings, + 0); + + /* 2. update DPCD of the receiver */ + if (!retry_count) { + /* EPR #361076 - write as a 5-byte burst, + * but only for the 1-st iteration. + */ + dpcd_set_lt_pattern_and_lane_settings( + link, + lt_settings, + lt_settings->pattern_for_cr, + 0); + /* Vendor specific: Disable intercept */ + for (i = 0; i < max_vendor_dpcd_retries; i++) { + msleep(pre_disable_intercept_delay_ms); + dpcd_status = core_link_write_dpcd( + link, + vendor_lttpr_write_address, + &vendor_lttpr_write_data_intercept_dis[0], + sizeof(vendor_lttpr_write_data_intercept_dis)); + + if (dpcd_status == DC_OK) + break; + + core_link_write_dpcd( + link, + vendor_lttpr_write_address, + &vendor_lttpr_write_data_intercept_en[0], + sizeof(vendor_lttpr_write_data_intercept_en)); + } + } else { + vendor_lttpr_write_data_vs[3] = 0; + vendor_lttpr_write_data_pe[3] = 0; + + for (lane = 0; lane < lane_count; lane++) { + vendor_lttpr_write_data_vs[3] |= + lt_settings->dpcd_lane_settings[lane].bits.VOLTAGE_SWING_SET << (2 * lane); + vendor_lttpr_write_data_pe[3] |= + lt_settings->dpcd_lane_settings[lane].bits.PRE_EMPHASIS_SET << (2 * lane); + } + + /* Vendor specific: Update VS and PE to DPRX requested value */ + core_link_write_dpcd( + link, + vendor_lttpr_write_address, + &vendor_lttpr_write_data_vs[0], + sizeof(vendor_lttpr_write_data_vs)); + core_link_write_dpcd( + link, + vendor_lttpr_write_address, + &vendor_lttpr_write_data_pe[0], + sizeof(vendor_lttpr_write_data_pe)); + + dpcd_set_lane_settings( + link, + lt_settings, + 0); + } + + /* 3. wait receiver to lock-on*/ + wait_time_microsec = lt_settings->cr_pattern_time; + + dp_wait_for_training_aux_rd_interval( + link, + wait_time_microsec); + + /* 4. Read lane status and requested drive + * settings as set by the sink + */ + dp_get_lane_status_and_lane_adjust( + link, + lt_settings, + dpcd_lane_status, + &dpcd_lane_status_updated, + dpcd_lane_adjust, + 0); + + /* 5. check CR done*/ + if (dp_is_cr_done(lane_count, dpcd_lane_status)) { + status = LINK_TRAINING_SUCCESS; + break; + } + + /* 6. max VS reached*/ + if (dp_is_max_vs_reached(lt_settings)) + break; + + /* 7. same lane settings */ + /* Note: settings are the same for all lanes, + * so comparing first lane is sufficient + */ + if (lt_settings->dpcd_lane_settings[0].bits.VOLTAGE_SWING_SET == + dpcd_lane_adjust[0].bits.VOLTAGE_SWING_LANE) + retries_cr++; + else + retries_cr = 0; + + /* 8. update VS/PE/PC2 in lt_settings*/ + dp_decide_lane_settings(lt_settings, dpcd_lane_adjust, + lt_settings->hw_lane_settings, lt_settings->dpcd_lane_settings); + retry_count++; + } + + if (retry_count >= LINK_TRAINING_MAX_CR_RETRY) { + ASSERT(0); + DC_LOG_ERROR("%s: Link Training Error, could not get CR after %d tries. Possibly voltage swing issue", + __func__, + LINK_TRAINING_MAX_CR_RETRY); + + } + + status = dp_get_cr_failure(lane_count, dpcd_lane_status); + } + + /* Perform Channel EQ Sequence */ + if (status == LINK_TRAINING_SUCCESS) { + enum dc_dp_training_pattern tr_pattern; + uint32_t retries_ch_eq; + uint32_t wait_time_microsec; + enum dc_lane_count lane_count = lt_settings->link_settings.lane_count; + union lane_align_status_updated dpcd_lane_status_updated = {0}; + union lane_status dpcd_lane_status[LANE_COUNT_DP_MAX] = {0}; + union lane_adjust dpcd_lane_adjust[LANE_COUNT_DP_MAX] = {0}; + + core_link_write_dpcd( + link, + vendor_lttpr_write_address, + &vendor_lttpr_write_data_adicora_eq1[0], + sizeof(vendor_lttpr_write_data_adicora_eq1)); + core_link_write_dpcd( + link, + vendor_lttpr_write_address, + &vendor_lttpr_write_data_adicora_eq2[0], + sizeof(vendor_lttpr_write_data_adicora_eq2)); + + /* Note: also check that TPS4 is a supported feature*/ + tr_pattern = lt_settings->pattern_for_eq; + + dp_set_hw_training_pattern(link, link_res, tr_pattern, 0); + + status = LINK_TRAINING_EQ_FAIL_EQ; + + for (retries_ch_eq = 0; retries_ch_eq <= LINK_TRAINING_MAX_RETRY_COUNT; + retries_ch_eq++) { + + dp_set_hw_lane_settings(link, link_res, lt_settings, 0); + + vendor_lttpr_write_data_vs[3] = 0; + vendor_lttpr_write_data_pe[3] = 0; + + for (lane = 0; lane < lane_count; lane++) { + vendor_lttpr_write_data_vs[3] |= + lt_settings->dpcd_lane_settings[lane].bits.VOLTAGE_SWING_SET << (2 * lane); + vendor_lttpr_write_data_pe[3] |= + lt_settings->dpcd_lane_settings[lane].bits.PRE_EMPHASIS_SET << (2 * lane); + } + + /* Vendor specific: Update VS and PE to DPRX requested value */ + core_link_write_dpcd( + link, + vendor_lttpr_write_address, + &vendor_lttpr_write_data_vs[0], + sizeof(vendor_lttpr_write_data_vs)); + core_link_write_dpcd( + link, + vendor_lttpr_write_address, + &vendor_lttpr_write_data_pe[0], + sizeof(vendor_lttpr_write_data_pe)); + + /* 2. update DPCD*/ + if (!retries_ch_eq) { + /* EPR #361076 - write as a 5-byte burst, + * but only for the 1-st iteration + */ + + dpcd_set_lt_pattern_and_lane_settings( + link, + lt_settings, + tr_pattern, 0); + + core_link_write_dpcd( + link, + vendor_lttpr_write_address, + &vendor_lttpr_write_data_adicora_eq3[0], + sizeof(vendor_lttpr_write_data_adicora_eq3)); + } else + dpcd_set_lane_settings(link, lt_settings, 0); + + /* 3. wait for receiver to lock-on*/ + wait_time_microsec = lt_settings->eq_pattern_time; + + dp_wait_for_training_aux_rd_interval( + link, + wait_time_microsec); + + /* 4. Read lane status and requested + * drive settings as set by the sink + */ + dp_get_lane_status_and_lane_adjust( + link, + lt_settings, + dpcd_lane_status, + &dpcd_lane_status_updated, + dpcd_lane_adjust, + 0); + + /* 5. check CR done*/ + if (!dp_is_cr_done(lane_count, dpcd_lane_status)) { + status = LINK_TRAINING_EQ_FAIL_CR; + break; + } + + /* 6. check CHEQ done*/ + if (dp_is_ch_eq_done(lane_count, dpcd_lane_status) && + dp_is_symbol_locked(lane_count, dpcd_lane_status) && + dp_is_interlane_aligned(dpcd_lane_status_updated)) { + status = LINK_TRAINING_SUCCESS; + break; + } + + /* 7. update VS/PE/PC2 in lt_settings*/ + dp_decide_lane_settings(lt_settings, dpcd_lane_adjust, + lt_settings->hw_lane_settings, lt_settings->dpcd_lane_settings); + } + } + + return status; +} diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_fixed_vs_pe_retimer.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_fixed_vs_pe_retimer.h index e61970e27661..c0d6ea329504 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_fixed_vs_pe_retimer.h +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_fixed_vs_pe_retimer.h @@ -28,6 +28,11 @@ #define __DC_LINK_DP_FIXED_VS_PE_RETIMER_H__ #include "link_dp_training.h" +enum link_training_result dp_perform_fixed_vs_pe_training_sequence_legacy( + struct dc_link *link, + const struct link_resource *link_res, + struct link_training_settings *lt_settings); + enum link_training_result dp_perform_fixed_vs_pe_training_sequence( struct dc_link *link, const struct link_resource *link_res, -- cgit v1.2.3 From 9dce8c2a5f1bf5a304aae39342816f099247d7da Mon Sep 17 00:00:00 2001 From: Anthony Koo Date: Sat, 25 Mar 2023 09:55:13 -0400 Subject: drm/amd/display: [FW Promotion] Release 0.0.161.0 - Add command to idle opt. - Rename d3 entry event and add idle trigger param on notify event. - Add bit to fw boot status to notify status when hardware is powered up. Reviewed-by: Aric Cyr Acked-by: Qingqing Zhuo Signed-off-by: Anthony Koo Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h | 28 +++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h index 15d26222597a..598fa1de54ce 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h @@ -362,7 +362,7 @@ union dmub_fw_boot_status { uint32_t defer_load : 1; /**< 1 if VBIOS data is deferred programmed */ uint32_t reserved : 1; uint32_t detection_required: 1; /**< if detection need to be triggered by driver */ - + uint32_t hw_power_init_done: 1; /**< 1 if hw power init is completed */ } bits; /**< status bits */ uint32_t all; /**< 32-bit access to status bits */ }; @@ -377,6 +377,7 @@ enum dmub_fw_boot_status_bit { DMUB_FW_BOOT_STATUS_BIT_RESTORE_REQUIRED = (1 << 3), /**< 1 if driver should call restore */ DMUB_FW_BOOT_STATUS_BIT_DEFERRED_LOADED = (1 << 4), /**< 1 if VBIOS data is deferred programmed */ DMUB_FW_BOOT_STATUS_BIT_DETECTION_REQUIRED = (1 << 6), /**< 1 if detection need to be triggered by driver*/ + DMUB_FW_BOOT_STATUS_BIT_HW_POWER_INIT_DONE = (1 << 7), /**< 1 if hw power init is completed */ }; /* Register bit definition for SCRATCH5 */ @@ -1104,7 +1105,12 @@ enum dmub_cmd_idle_opt_type { /** * DCN hardware save. */ - DMUB_CMD__IDLE_OPT_DCN_SAVE_INIT = 1 + DMUB_CMD__IDLE_OPT_DCN_SAVE_INIT = 1, + + /** + * DCN hardware notify idle. + */ + DMUB_CMD__IDLE_OPT_DCN_NOTIFY_IDLE = 2 }; /** @@ -1114,6 +1120,24 @@ struct dmub_rb_cmd_idle_opt_dcn_restore { struct dmub_cmd_header header; /**< header */ }; +/** + * struct dmub_dcn_notify_idle_cntl_data - Data passed to FW in a DMUB_CMD__IDLE_OPT_DCN_NOTIFY_IDLE command. + */ +struct dmub_dcn_notify_idle_cntl_data { + uint8_t driver_idle; + uint8_t d3_entry; + uint8_t trigger; + uint8_t pad[1]; +}; + +/** + * struct dmub_rb_cmd_idle_opt_dcn_notify_idle - Data passed to FW in a DMUB_CMD__IDLE_OPT_DCN_NOTIFY_IDLE command. + */ +struct dmub_rb_cmd_idle_opt_dcn_notify_idle { + struct dmub_cmd_header header; /**< header */ + struct dmub_dcn_notify_idle_cntl_data cntl_data; +}; + /** * struct dmub_clocks - Clock update notification. */ -- cgit v1.2.3 From d116db180decec1b21bba31d2ff495ac4d8e1b83 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Thu, 23 Mar 2023 14:07:06 -0500 Subject: drm/amd: Fix an out of bounds error in BIOS parser The array is hardcoded to 8 in atomfirmware.h, but firmware provides a bigger one sometimes. Deferencing the larger array causes an out of bounds error. commit 4fc1ba4aa589 ("drm/amd/display: fix array index out of bound error in bios parser") fixed some of this, but there are two other cases not covered by it. Fix those as well. Reported-by: erhard_f@mailbox.org Link: https://bugzilla.kernel.org/show_bug.cgi?id=214853 Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2473 Signed-off-by: Mario Limonciello Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c index f0f948501e9a..cce47d3f1a13 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c @@ -515,11 +515,8 @@ static enum bp_result get_gpio_i2c_info( info->i2c_slave_address = record->i2c_slave_addr; /* TODO: check how to get register offset for en, Y, etc. */ - info->gpio_info.clk_a_register_index = - le16_to_cpu( - header->gpio_pin[table_index].data_a_reg_index); - info->gpio_info.clk_a_shift = - header->gpio_pin[table_index].gpio_bitshift; + info->gpio_info.clk_a_register_index = le16_to_cpu(pin->data_a_reg_index); + info->gpio_info.clk_a_shift = pin->gpio_bitshift; return BP_RESULT_OK; } -- cgit v1.2.3 From e38dddcaed60c0692b77a7af355d34a13183cee1 Mon Sep 17 00:00:00 2001 From: Aric Cyr Date: Mon, 27 Mar 2023 01:45:02 -0400 Subject: drm/amd/display: 3.2.230 This DC version brings along: - FW Release 0.0.161.0 - Improvements on FPO/FAMS - Correction to DML calculation - Fix to multiple clock related issues Acked-by: Qingqing Zhuo Signed-off-by: Aric Cyr Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 190e6a2e1334..23ee63b98dcd 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -45,7 +45,7 @@ struct aux_payload; struct set_config_cmd_payload; struct dmub_notification; -#define DC_VER "3.2.229" +#define DC_VER "3.2.230" #define MAX_SURFACES 3 #define MAX_PLANES 6 -- cgit v1.2.3 From 52f1783ff4146344342422c1cd94fcb4ce39b6fe Mon Sep 17 00:00:00 2001 From: Igor Artemiev Date: Mon, 3 Apr 2023 16:10:37 +0300 Subject: drm/amd/display: Fix potential null dereference The adev->dm.dc pointer can be NULL and dereferenced in amdgpu_dm_fini() without checking. Add a NULL pointer check before calling dc_dmub_srv_destroy(). Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: 9a71c7d31734 ("drm/amd/display: Register DMUB service with DC") Signed-off-by: Igor Artemiev Signed-off-by: Hamza Mahfooz Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index e29655a26dd4..6cacb76f389e 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1890,7 +1890,8 @@ static void amdgpu_dm_fini(struct amdgpu_device *adev) if (adev->dm.dc) dc_deinit_callbacks(adev->dm.dc); - dc_dmub_srv_destroy(&adev->dm.dc->ctx->dmub_srv); + if (adev->dm.dc) + dc_dmub_srv_destroy(&adev->dm.dc->ctx->dmub_srv); if (dc_enable_dmub_notifications(adev->dm.dc)) { kfree(adev->dm.dmub_notify); -- cgit v1.2.3 From 11f25c844e29f85abb0b3ffdb360a2f82a2c4ed0 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Wed, 5 Apr 2023 20:41:09 +0530 Subject: drm/amd/amdgpu: Drop the hang limit parameter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The driver doesn't resubmit jobs on hangs any more, hence drop the hang limit parameter - amdgpu_job_hang_limit, wherever it is used. Suggested-by: Christian König Cc: Alex Deucher Cc: Mario Limonciello Cc: Kent Russell Signed-off-by: Srinivasan Shanmugam Reviewed-by: Mario Limonciello Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 - drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 8 -------- 3 files changed, 1 insertion(+), 10 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 8cf2cc50b3de..833996291dee 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -185,7 +185,6 @@ extern char *amdgpu_disable_cu; extern char *amdgpu_virtual_display; extern uint amdgpu_pp_feature_mask; extern uint amdgpu_force_long_training; -extern int amdgpu_job_hang_limit; extern int amdgpu_lbpw; extern int amdgpu_compute_multipipe; extern int amdgpu_gpu_recovery; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index fac9312b1695..4819b3f86750 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2365,7 +2365,7 @@ static int amdgpu_device_init_schedulers(struct amdgpu_device *adev) } r = drm_sched_init(&ring->sched, &amdgpu_sched_ops, - ring->num_hw_submission, amdgpu_job_hang_limit, + ring->num_hw_submission, 0, timeout, adev->reset_domain->wq, ring->sched_score, ring->name, adev->dev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index b4189d669b54..b1ca1ab6d6ad 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -157,7 +157,6 @@ char *amdgpu_virtual_display; */ uint amdgpu_pp_feature_mask = 0xfff7bfff; uint amdgpu_force_long_training; -int amdgpu_job_hang_limit; int amdgpu_lbpw = -1; int amdgpu_compute_multipipe = -1; int amdgpu_gpu_recovery = -1; /* auto */ @@ -520,13 +519,6 @@ MODULE_PARM_DESC(virtual_display, "Enable virtual display feature (the virtual_display will be set like xxxx:xx:xx.x,x;xxxx:xx:xx.x,x)"); module_param_named(virtual_display, amdgpu_virtual_display, charp, 0444); -/** - * DOC: job_hang_limit (int) - * Set how much time allow a job hang and not drop it. The default is 0. - */ -MODULE_PARM_DESC(job_hang_limit, "how much time allow a job hang and not drop it (default 0)"); -module_param_named(job_hang_limit, amdgpu_job_hang_limit, int ,0444); - /** * DOC: lbpw (int) * Override Load Balancing Per Watt (LBPW) support (1 = enable, 0 = disable). The default is -1 (auto, enabled). -- cgit v1.2.3 From 207bbfb63dc0eb491f71e96baa5db9c25626a247 Mon Sep 17 00:00:00 2001 From: Shane Xiao Date: Wed, 5 Apr 2023 22:33:11 +0800 Subject: drm/amdgpu: Add userptr bo support for mGPUs when iommu is on For userptr bo with iommu on, multiple GPUs use same system memory dma mapping address when both adev and bo_adev are in identity mode or in the same iommu group. If RAM direct map to one GPU, other GPUs can share the original BO in order to reduce dma address array usage when RAM can direct map to these GPUs. However, we should explicit check whether RAM can direct map to all these GPUs. This patch fixes a potential issue that where RAM is direct mapped on some but not all GPUs. v2: 1. Update comment 2. Add helper function reuse_dmamap Signed-off-by: Shane Xiao Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 27 ++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index c87515210c4f..ed58fa5af056 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -82,6 +82,25 @@ static bool kfd_mem_is_attached(struct amdgpu_vm *avm, return false; } +/** + * reuse_dmamap() - Check whether adev can share the original + * userptr BO + * + * If both adev and bo_adev are in direct mapping or + * in the same iommu group, they can share the original BO. + * + * @adev: Device to which can or cannot share the original BO + * @bo_adev: Device to which allocated BO belongs to + * + * Return: returns true if adev can share original userptr BO, + * false otherwise. + */ +static bool reuse_dmamap(struct amdgpu_device *adev, struct amdgpu_device *bo_adev) +{ + return (adev->ram_is_direct_mapped && bo_adev->ram_is_direct_mapped) || + (adev->dev->iommu_group == bo_adev->dev->iommu_group); +} + /* Set memory usage limits. Current, limits are * System (TTM + userptr) memory - 15/16th System RAM * TTM memory - 3/8th System RAM @@ -805,11 +824,11 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem, va + bo_size, vm); if ((adev == bo_adev && !(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) || - (amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) && adev->ram_is_direct_mapped) || - same_hive) { + (amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) && reuse_dmamap(adev, bo_adev)) || + same_hive) { /* Mappings on the local GPU, or VRAM mappings in the - * local hive, or userptr mapping IOMMU direct map mode - * share the original BO + * local hive, or userptr mapping can reuse dma map + * address space share the original BO */ attachment[i]->type = KFD_MEM_ATT_SHARED; bo[i] = mem->bo; -- cgit v1.2.3 From 89317d4255122f05aaa0ac16d189a9ab3022653c Mon Sep 17 00:00:00 2001 From: "Guilherme G. Piccoli" Date: Thu, 30 Mar 2023 11:40:45 -0300 Subject: drm/amd/pm: Fix incorrect comment about Vangogh power cap support The comment mentions that power1 cap attributes are not supported on Vangogh, but the opposite is indeed valid: for APUs, only Vangogh is supported. While at it, also fixed the Renoir comment below (thanks Melissa for noticing that!). Cc: Lijo Lazar Cc: Melissa Wen Signed-off-by: Guilherme G. Piccoli Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/amdgpu_pm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index e011041e3ec6..58c2246918fd 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -3395,7 +3395,7 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj, attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr)) /* can't manage state */ effective_mode &= ~S_IWUSR; - /* not implemented yet for GC 10.3.1 APUs */ + /* In the case of APUs, this is only implemented on Vangogh */ if (((adev->family == AMDGPU_FAMILY_SI) || ((adev->flags & AMD_IS_APU) && (gc_ver != IP_VERSION(10, 3, 1)))) && (attr == &sensor_dev_attr_power1_cap_max.dev_attr.attr || @@ -3404,7 +3404,7 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj, attr == &sensor_dev_attr_power1_cap_default.dev_attr.attr)) return 0; - /* not implemented yet for APUs having <= GC 9.3.0 */ + /* not implemented yet for APUs having < GC 9.3.0 (Renoir) */ if (((adev->family == AMDGPU_FAMILY_SI) || ((adev->flags & AMD_IS_APU) && (gc_ver < IP_VERSION(9, 3, 0)))) && (attr == &sensor_dev_attr_power1_average.dev_attr.attr)) -- cgit v1.2.3 From af152c2120587b02e03dfe370b52ba75c40f8952 Mon Sep 17 00:00:00 2001 From: Shane Xiao Date: Thu, 6 Apr 2023 12:37:16 +0800 Subject: amd/amdgpu: Inherit coherence flags base on original BO flags For SG BO to DMA-map userptrs on other GPUs, the SG BO need inherit MTYPEs in PTEs from original BO. If we set the flags, the device can be coherent with the CPUs and other GPUs. v2: 1. Drop unnecessary flags check 2. Remove local variable align Signed-off-by: Shane Xiao Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index ed58fa5af056..82a8b435148c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -272,15 +272,19 @@ create_dmamap_sg_bo(struct amdgpu_device *adev, struct kgd_mem *mem, struct amdgpu_bo **bo_out) { struct drm_gem_object *gem_obj; - int ret, align; + int ret; + uint64_t flags = 0; ret = amdgpu_bo_reserve(mem->bo, false); if (ret) return ret; - align = 1; - ret = amdgpu_gem_object_create(adev, mem->bo->tbo.base.size, align, - AMDGPU_GEM_DOMAIN_CPU, AMDGPU_GEM_CREATE_PREEMPTIBLE, + if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) + flags |= mem->bo->flags & (AMDGPU_GEM_CREATE_COHERENT | + AMDGPU_GEM_CREATE_UNCACHED); + + ret = amdgpu_gem_object_create(adev, mem->bo->tbo.base.size, 1, + AMDGPU_GEM_DOMAIN_CPU, AMDGPU_GEM_CREATE_PREEMPTIBLE | flags, ttm_bo_type_sg, mem->bo->tbo.base.resv, &gem_obj); amdgpu_bo_unreserve(mem->bo); -- cgit v1.2.3 From e86bd8b21d57670e38c23ed435a053e7e4cc9e21 Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Mon, 27 Mar 2023 14:23:30 +0800 Subject: drm/amdgpu: optimize redundant code in umc_v8_10 Optimize redundant code in umc_v8_10 Signed-off-by: YiPeng Chai Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c | 31 +++++ drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h | 7 ++ drivers/gpu/drm/amd/amdgpu/umc_v8_10.c | 197 +++++++++++++------------------- 3 files changed, 115 insertions(+), 120 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c index 9e2e97207e53..1edf8e6aeb16 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c @@ -302,3 +302,34 @@ void amdgpu_umc_fill_error_record(struct ras_err_data *err_data, err_data->err_addr_cnt++; } + +int amdgpu_umc_loop_channels(struct amdgpu_device *adev, + umc_func func, void *data) +{ + uint32_t node_inst = 0; + uint32_t umc_inst = 0; + uint32_t ch_inst = 0; + int ret = 0; + + if (adev->umc.node_inst_num) { + LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst) { + ret = func(adev, node_inst, umc_inst, ch_inst, data); + if (ret) { + dev_err(adev->dev, "Node %d umc %d ch %d func returns %d\n", + node_inst, umc_inst, ch_inst, ret); + return ret; + } + } + } else { + LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) { + ret = func(adev, 0, umc_inst, ch_inst, data); + if (ret) { + dev_err(adev->dev, "Umc %d ch %d func returns %d\n", + umc_inst, ch_inst, ret); + return ret; + } + } + } + + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h index d7f1229ff11f..86133f77a9a4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h @@ -47,6 +47,10 @@ #define LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst) \ LOOP_UMC_NODE_INST((node_inst)) LOOP_UMC_INST_AND_CH((umc_inst), (ch_inst)) + +typedef int (*umc_func)(struct amdgpu_device *adev, uint32_t node_inst, + uint32_t umc_inst, uint32_t ch_inst, void *data); + struct amdgpu_umc_ras { struct amdgpu_ras_block_object ras_block; void (*err_cnt_init)(struct amdgpu_device *adev); @@ -104,4 +108,7 @@ int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry); int amdgpu_umc_page_retirement_mca(struct amdgpu_device *adev, uint64_t err_addr, uint32_t ch_inst, uint32_t umc_inst); + +int amdgpu_umc_loop_channels(struct amdgpu_device *adev, + umc_func func, void *data); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c index fb55e8cb9967..d51ae0bc36f7 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c @@ -76,10 +76,13 @@ static inline uint32_t get_umc_v8_10_reg_offset(struct amdgpu_device *adev, UMC_8_NODE_DIST * node_inst; } -static void umc_v8_10_clear_error_count_per_channel(struct amdgpu_device *adev, - uint32_t umc_reg_offset) +static int umc_v8_10_clear_error_count_per_channel(struct amdgpu_device *adev, + uint32_t node_inst, uint32_t umc_inst, + uint32_t ch_inst, void *data) { uint32_t ecc_err_cnt_addr; + uint32_t umc_reg_offset = + get_umc_v8_10_reg_offset(adev, node_inst, umc_inst, ch_inst); ecc_err_cnt_addr = SOC15_REG_OFFSET(UMC, 0, regUMCCH0_0_GeccErrCnt); @@ -87,24 +90,14 @@ static void umc_v8_10_clear_error_count_per_channel(struct amdgpu_device *adev, /* clear error count */ WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, UMC_V8_10_CE_CNT_INIT); + + return 0; } static void umc_v8_10_clear_error_count(struct amdgpu_device *adev) { - uint32_t node_inst = 0; - uint32_t umc_inst = 0; - uint32_t ch_inst = 0; - uint32_t umc_reg_offset = 0; - - LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst) { - umc_reg_offset = get_umc_v8_10_reg_offset(adev, - node_inst, - umc_inst, - ch_inst); - - umc_v8_10_clear_error_count_per_channel(adev, - umc_reg_offset); - } + amdgpu_umc_loop_channels(adev, + umc_v8_10_clear_error_count_per_channel, NULL); } static void umc_v8_10_query_correctable_error_count(struct amdgpu_device *adev, @@ -147,29 +140,29 @@ static void umc_v8_10_query_uncorrectable_error_count(struct amdgpu_device *adev *error_count += 1; } +static int umc_v8_10_query_ecc_error_count(struct amdgpu_device *adev, + uint32_t node_inst, uint32_t umc_inst, + uint32_t ch_inst, void *data) +{ + struct ras_err_data *err_data = (struct ras_err_data *)data; + uint32_t umc_reg_offset = + get_umc_v8_10_reg_offset(adev, node_inst, umc_inst, ch_inst); + + umc_v8_10_query_correctable_error_count(adev, + umc_reg_offset, + &(err_data->ce_count)); + umc_v8_10_query_uncorrectable_error_count(adev, + umc_reg_offset, + &(err_data->ue_count)); + + return 0; +} + static void umc_v8_10_query_ras_error_count(struct amdgpu_device *adev, void *ras_error_status) { - struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; - - uint32_t node_inst = 0; - uint32_t umc_inst = 0; - uint32_t ch_inst = 0; - uint32_t umc_reg_offset = 0; - - LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst) { - umc_reg_offset = get_umc_v8_10_reg_offset(adev, - node_inst, - umc_inst, - ch_inst); - - umc_v8_10_query_correctable_error_count(adev, - umc_reg_offset, - &(err_data->ce_count)); - umc_v8_10_query_uncorrectable_error_count(adev, - umc_reg_offset, - &(err_data->ue_count)); - } + amdgpu_umc_loop_channels(adev, + umc_v8_10_query_ecc_error_count, ras_error_status); umc_v8_10_clear_error_count(adev); } @@ -248,28 +241,28 @@ static void umc_v8_10_convert_error_address(struct amdgpu_device *adev, } } -static void umc_v8_10_query_error_address(struct amdgpu_device *adev, - struct ras_err_data *err_data, - uint32_t umc_reg_offset, - uint32_t node_inst, - uint32_t ch_inst, - uint32_t umc_inst) +static int umc_v8_10_query_error_address(struct amdgpu_device *adev, + uint32_t node_inst, uint32_t umc_inst, + uint32_t ch_inst, void *data) { uint64_t mc_umc_status_addr; uint64_t mc_umc_status, err_addr; uint64_t mc_umc_addrt0; + struct ras_err_data *err_data = (struct ras_err_data *)data; + uint32_t umc_reg_offset = + get_umc_v8_10_reg_offset(adev, node_inst, umc_inst, ch_inst); mc_umc_status_addr = SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0); mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4); if (mc_umc_status == 0) - return; + return 0; if (!err_data->err_addr) { /* clear umc status */ WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL); - return; + return 0; } /* calculate error address if ue error is detected */ @@ -287,37 +280,25 @@ static void umc_v8_10_query_error_address(struct amdgpu_device *adev, /* clear umc status */ WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL); + + return 0; } static void umc_v8_10_query_ras_error_address(struct amdgpu_device *adev, void *ras_error_status) { - struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; - uint32_t node_inst = 0; - uint32_t umc_inst = 0; - uint32_t ch_inst = 0; - uint32_t umc_reg_offset = 0; - - LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst) { - umc_reg_offset = get_umc_v8_10_reg_offset(adev, - node_inst, - umc_inst, - ch_inst); - - umc_v8_10_query_error_address(adev, - err_data, - umc_reg_offset, - node_inst, - ch_inst, - umc_inst); - } + amdgpu_umc_loop_channels(adev, + umc_v8_10_query_error_address, ras_error_status); } -static void umc_v8_10_err_cnt_init_per_channel(struct amdgpu_device *adev, - uint32_t umc_reg_offset) +static int umc_v8_10_err_cnt_init_per_channel(struct amdgpu_device *adev, + uint32_t node_inst, uint32_t umc_inst, + uint32_t ch_inst, void *data) { uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr; uint32_t ecc_err_cnt_addr; + uint32_t umc_reg_offset = + get_umc_v8_10_reg_offset(adev, node_inst, umc_inst, ch_inst); ecc_err_cnt_sel_addr = SOC15_REG_OFFSET(UMC, 0, regUMCCH0_0_GeccErrCntSel); @@ -332,23 +313,14 @@ static void umc_v8_10_err_cnt_init_per_channel(struct amdgpu_device *adev, WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel); /* set error count to initial value */ WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, UMC_V8_10_CE_CNT_INIT); + + return 0; } static void umc_v8_10_err_cnt_init(struct amdgpu_device *adev) { - uint32_t node_inst = 0; - uint32_t umc_inst = 0; - uint32_t ch_inst = 0; - uint32_t umc_reg_offset = 0; - - LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst) { - umc_reg_offset = get_umc_v8_10_reg_offset(adev, - node_inst, - umc_inst, - ch_inst); - - umc_v8_10_err_cnt_init_per_channel(adev, umc_reg_offset); - } + amdgpu_umc_loop_channels(adev, + umc_v8_10_err_cnt_init_per_channel, NULL); } static bool umc_v8_10_query_ras_poison_mode(struct amdgpu_device *adev) @@ -406,37 +378,35 @@ static void umc_v8_10_ecc_info_query_uncorrectable_error_count(struct amdgpu_dev } } +static int umc_v8_10_ecc_info_query_ecc_error_count(struct amdgpu_device *adev, + uint32_t node_inst, uint32_t umc_inst, + uint32_t ch_inst, void *data) +{ + struct ras_err_data *err_data = (struct ras_err_data *)data; + + umc_v8_10_ecc_info_query_correctable_error_count(adev, + node_inst, umc_inst, ch_inst, + &(err_data->ce_count)); + umc_v8_10_ecc_info_query_uncorrectable_error_count(adev, + node_inst, umc_inst, ch_inst, + &(err_data->ue_count)); + return 0; +} + static void umc_v8_10_ecc_info_query_ras_error_count(struct amdgpu_device *adev, void *ras_error_status) { - struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; - - uint32_t node_inst = 0; - uint32_t umc_inst = 0; - uint32_t ch_inst = 0; - - /* TODO: driver needs to toggle DF Cstate to ensure - * safe access of UMC registers. Will add the protection - */ - LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst) { - umc_v8_10_ecc_info_query_correctable_error_count(adev, - node_inst, umc_inst, ch_inst, - &(err_data->ce_count)); - umc_v8_10_ecc_info_query_uncorrectable_error_count(adev, - node_inst, umc_inst, ch_inst, - &(err_data->ue_count)); - } + amdgpu_umc_loop_channels(adev, + umc_v8_10_ecc_info_query_ecc_error_count, ras_error_status); } -static void umc_v8_10_ecc_info_query_error_address(struct amdgpu_device *adev, - struct ras_err_data *err_data, - uint32_t ch_inst, - uint32_t umc_inst, - uint32_t node_inst) +static int umc_v8_10_ecc_info_query_error_address(struct amdgpu_device *adev, + uint32_t node_inst, uint32_t umc_inst, + uint32_t ch_inst, void *data) { uint32_t eccinfo_table_idx; uint64_t mc_umc_status, err_addr; - + struct ras_err_data *err_data = (struct ras_err_data *)data; struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); eccinfo_table_idx = node_inst * adev->umc.umc_inst_num * @@ -447,10 +417,10 @@ static void umc_v8_10_ecc_info_query_error_address(struct amdgpu_device *adev, mc_umc_status = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_status; if (mc_umc_status == 0) - return; + return 0; if (!err_data->err_addr) - return; + return 0; /* calculate error address if ue error is detected */ if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && @@ -463,28 +433,15 @@ static void umc_v8_10_ecc_info_query_error_address(struct amdgpu_device *adev, umc_v8_10_convert_error_address(adev, err_data, err_addr, ch_inst, umc_inst, node_inst, mc_umc_status); } + + return 0; } static void umc_v8_10_ecc_info_query_ras_error_address(struct amdgpu_device *adev, void *ras_error_status) { - struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; - - uint32_t node_inst = 0; - uint32_t umc_inst = 0; - uint32_t ch_inst = 0; - - /* TODO: driver needs to toggle DF Cstate to ensure - * safe access of UMC resgisters. Will add the protection - * when firmware interface is ready - */ - LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst) { - umc_v8_10_ecc_info_query_error_address(adev, - err_data, - ch_inst, - umc_inst, - node_inst); - } + amdgpu_umc_loop_channels(adev, + umc_v8_10_ecc_info_query_error_address, ras_error_status); } const struct amdgpu_ras_block_hw_ops umc_v8_10_ras_hw_ops = { -- cgit v1.2.3 From edd48e6d8f341dcaf1c0a45f4822172d33e75129 Mon Sep 17 00:00:00 2001 From: Shane Xiao Date: Wed, 5 Apr 2023 23:09:14 +0800 Subject: drm/amdgpu: DROP redundant drm_prime_sg_to_dma_addr_array For DMA-MAP userptr on other GPUs, the dma address array will be populated in amdgpu_ttm_backend_bind. Remove the redundant call from the driver. v2: update the comment Signed-off-by: Shane Xiao Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 82a8b435148c..83a83ced2439 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -504,9 +504,6 @@ kfd_mem_dmamap_userptr(struct kgd_mem *mem, if (unlikely(ret)) goto release_sg; - drm_prime_sg_to_dma_addr_array(ttm->sg, ttm->dma_address, - ttm->num_pages); - amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT); ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); if (ret) -- cgit v1.2.3 From 5e08e9c742a00384e5abe74bd40cf4dc15cb3a2e Mon Sep 17 00:00:00 2001 From: lyndonli Date: Thu, 6 Apr 2023 15:30:34 +0800 Subject: drm/amdgpu: Fix sdma v4 sw fini error Fix sdma v4 sw fini error for sdma 4.2.2 to solve the following general protection fault [ +0.108196] general protection fault, probably for non-canonical address 0xd5e5a4ae79d24a32: 0000 [#1] PREEMPT SMP PTI [ +0.000018] RIP: 0010:free_fw_priv+0xd/0x70 [ +0.000022] Call Trace: [ +0.000012] [ +0.000011] release_firmware+0x55/0x80 [ +0.000021] amdgpu_ucode_release+0x11/0x20 [amdgpu] [ +0.000415] amdgpu_sdma_destroy_inst_ctx+0x4f/0x90 [amdgpu] [ +0.000360] sdma_v4_0_sw_fini+0xce/0x110 [amdgpu] Signed-off-by: lyndonli Reviewed-by: Likun Gao Reviewed-by: Feifei Xu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index b5affba22156..96b0c3d42346 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -1870,7 +1870,7 @@ static int sdma_v4_0_sw_fini(void *handle) amdgpu_ring_fini(&adev->sdma.instance[i].page); } - if (adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 2, 0) || + if (adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 2, 2) || adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 4, 0)) amdgpu_sdma_destroy_inst_ctx(adev, true); else -- cgit v1.2.3 From fc926faefcb7fade1abc05043db540f5c0fef79e Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Mon, 27 Mar 2023 14:27:12 +0800 Subject: drm/amdgpu: optimize redundant code in umc_v6_7 Optimize redundant code in umc_v6_7. Signed-off-by: YiPeng Chai Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/umc_v6_7.c | 161 +++++++++++++++------------------- 1 file changed, 70 insertions(+), 91 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c index e08e25a3a1a9..530549314ce4 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c @@ -160,24 +160,28 @@ static void umc_v6_7_ecc_info_querry_uncorrectable_error_count(struct amdgpu_dev } } +static int umc_v6_7_ecc_info_querry_ecc_error_count(struct amdgpu_device *adev, + uint32_t node_inst, uint32_t umc_inst, + uint32_t ch_inst, void *data) +{ + struct ras_err_data *err_data = (struct ras_err_data *)data; + + umc_v6_7_ecc_info_query_correctable_error_count(adev, + umc_inst, ch_inst, + &(err_data->ce_count)); + + umc_v6_7_ecc_info_querry_uncorrectable_error_count(adev, + umc_inst, ch_inst, + &(err_data->ue_count)); + + return 0; +} + static void umc_v6_7_ecc_info_query_ras_error_count(struct amdgpu_device *adev, void *ras_error_status) { - struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; - - uint32_t umc_inst = 0; - uint32_t ch_inst = 0; - - /*TODO: driver needs to toggle DF Cstate to ensure - * safe access of UMC registers. Will add the protection */ - LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) { - umc_v6_7_ecc_info_query_correctable_error_count(adev, - umc_inst, ch_inst, - &(err_data->ce_count)); - umc_v6_7_ecc_info_querry_uncorrectable_error_count(adev, - umc_inst, ch_inst, - &(err_data->ue_count)); - } + amdgpu_umc_loop_channels(adev, + umc_v6_7_ecc_info_querry_ecc_error_count, ras_error_status); } void umc_v6_7_convert_error_address(struct amdgpu_device *adev, @@ -215,23 +219,23 @@ void umc_v6_7_convert_error_address(struct amdgpu_device *adev, } } -static void umc_v6_7_ecc_info_query_error_address(struct amdgpu_device *adev, - struct ras_err_data *err_data, - uint32_t ch_inst, - uint32_t umc_inst) +static int umc_v6_7_ecc_info_query_error_address(struct amdgpu_device *adev, + uint32_t node_inst, uint32_t umc_inst, + uint32_t ch_inst, void *data) { uint64_t mc_umc_status, err_addr; uint32_t eccinfo_table_idx; struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + struct ras_err_data *err_data = (struct ras_err_data *)data; eccinfo_table_idx = umc_inst * adev->umc.channel_inst_num + ch_inst; mc_umc_status = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_status; if (mc_umc_status == 0) - return; + return 0; if (!err_data->err_addr) - return; + return 0; /* calculate error address if ue error is detected */ if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && @@ -243,25 +247,15 @@ static void umc_v6_7_ecc_info_query_error_address(struct amdgpu_device *adev, umc_v6_7_convert_error_address(adev, err_data, err_addr, ch_inst, umc_inst); } + + return 0; } static void umc_v6_7_ecc_info_query_ras_error_address(struct amdgpu_device *adev, void *ras_error_status) { - struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; - - uint32_t umc_inst = 0; - uint32_t ch_inst = 0; - - /*TODO: driver needs to toggle DF Cstate to ensure - * safe access of UMC resgisters. Will add the protection - * when firmware interface is ready */ - LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) { - umc_v6_7_ecc_info_query_error_address(adev, - err_data, - ch_inst, - umc_inst); - } + amdgpu_umc_loop_channels(adev, + umc_v6_7_ecc_info_query_error_address, ras_error_status); } static void umc_v6_7_query_correctable_error_count(struct amdgpu_device *adev, @@ -364,11 +358,14 @@ static void umc_v6_7_querry_uncorrectable_error_count(struct amdgpu_device *adev } } -static void umc_v6_7_reset_error_count_per_channel(struct amdgpu_device *adev, - uint32_t umc_reg_offset) +static int umc_v6_7_reset_error_count_per_channel(struct amdgpu_device *adev, + uint32_t node_inst, uint32_t umc_inst, + uint32_t ch_inst, void *data) { uint32_t ecc_err_cnt_addr; uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr; + uint32_t umc_reg_offset = + get_umc_v6_7_reg_offset(adev, umc_inst, ch_inst); ecc_err_cnt_sel_addr = SOC15_REG_OFFSET(UMC, 0, @@ -402,58 +399,54 @@ static void umc_v6_7_reset_error_count_per_channel(struct amdgpu_device *adev, /* clear higher chip error count */ WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, UMC_V6_7_CE_CNT_INIT); + + return 0; } static void umc_v6_7_reset_error_count(struct amdgpu_device *adev) { - uint32_t umc_inst = 0; - uint32_t ch_inst = 0; - uint32_t umc_reg_offset = 0; + amdgpu_umc_loop_channels(adev, + umc_v6_7_reset_error_count_per_channel, NULL); +} + +static int umc_v6_7_query_ecc_error_count(struct amdgpu_device *adev, + uint32_t node_inst, uint32_t umc_inst, + uint32_t ch_inst, void *data) +{ + struct ras_err_data *err_data = (struct ras_err_data *)data; + uint32_t umc_reg_offset = + get_umc_v6_7_reg_offset(adev, umc_inst, ch_inst); + + umc_v6_7_query_correctable_error_count(adev, + umc_reg_offset, + &(err_data->ce_count), + ch_inst, umc_inst); - LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) { - umc_reg_offset = get_umc_v6_7_reg_offset(adev, - umc_inst, - ch_inst); + umc_v6_7_querry_uncorrectable_error_count(adev, + umc_reg_offset, + &(err_data->ue_count)); - umc_v6_7_reset_error_count_per_channel(adev, - umc_reg_offset); - } + return 0; } static void umc_v6_7_query_ras_error_count(struct amdgpu_device *adev, void *ras_error_status) { - struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; - - uint32_t umc_inst = 0; - uint32_t ch_inst = 0; - uint32_t umc_reg_offset = 0; - - /*TODO: driver needs to toggle DF Cstate to ensure - * safe access of UMC registers. Will add the protection */ - LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) { - umc_reg_offset = get_umc_v6_7_reg_offset(adev, - umc_inst, - ch_inst); - umc_v6_7_query_correctable_error_count(adev, - umc_reg_offset, - &(err_data->ce_count), - ch_inst, umc_inst); - umc_v6_7_querry_uncorrectable_error_count(adev, - umc_reg_offset, - &(err_data->ue_count)); - } + amdgpu_umc_loop_channels(adev, + umc_v6_7_query_ecc_error_count, ras_error_status); umc_v6_7_reset_error_count(adev); } -static void umc_v6_7_query_error_address(struct amdgpu_device *adev, - struct ras_err_data *err_data, - uint32_t umc_reg_offset, uint32_t ch_inst, - uint32_t umc_inst) +static int umc_v6_7_query_error_address(struct amdgpu_device *adev, + uint32_t node_inst, uint32_t umc_inst, + uint32_t ch_inst, void *data) { uint32_t mc_umc_status_addr; uint64_t mc_umc_status = 0, mc_umc_addrt0, err_addr; + struct ras_err_data *err_data = (struct ras_err_data *)data; + uint32_t umc_reg_offset = + get_umc_v6_7_reg_offset(adev, umc_inst, ch_inst); mc_umc_status_addr = SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0); @@ -463,12 +456,12 @@ static void umc_v6_7_query_error_address(struct amdgpu_device *adev, mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4); if (mc_umc_status == 0) - return; + return 0; if (!err_data->err_addr) { /* clear umc status */ WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL); - return; + return 0; } /* calculate error address if ue error is detected */ @@ -484,29 +477,15 @@ static void umc_v6_7_query_error_address(struct amdgpu_device *adev, /* clear umc status */ WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL); + + return 0; } static void umc_v6_7_query_ras_error_address(struct amdgpu_device *adev, void *ras_error_status) { - struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; - - uint32_t umc_inst = 0; - uint32_t ch_inst = 0; - uint32_t umc_reg_offset = 0; - - /*TODO: driver needs to toggle DF Cstate to ensure - * safe access of UMC resgisters. Will add the protection - * when firmware interface is ready */ - LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) { - umc_reg_offset = get_umc_v6_7_reg_offset(adev, - umc_inst, - ch_inst); - umc_v6_7_query_error_address(adev, - err_data, - umc_reg_offset, ch_inst, - umc_inst); - } + amdgpu_umc_loop_channels(adev, + umc_v6_7_query_error_address, ras_error_status); } static uint32_t umc_v6_7_query_ras_poison_mode_per_channel( -- cgit v1.2.3 From 5591a051b86be170a84943698ab140342602ff7b Mon Sep 17 00:00:00 2001 From: Tong Liu01 Date: Thu, 6 Apr 2023 15:58:31 +0800 Subject: drm/amdgpu: refine get gpu clock counter method [why] regGOLDEN_TSC_COUNT_LOWER/regGOLDEN_TSC_COUNT_UPPER are protected and unaccessible under sriov. The clock counter high bit may update during reading process. [How] Replace regGOLDEN_TSC_COUNT_LOWER/regGOLDEN_TSC_COUNT_UPPER with regCP_MES_MTIME_LO/regCP_MES_MTIME_HI to get gpu clock under sriov. Refine get gpu clock counter method to make the result more precise. Signed-off-by: Tong Liu01 Acked-by: Luben Tuikov Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index ecf8ceb53311..107c487c0c37 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -4671,11 +4671,24 @@ static int gfx_v11_0_post_soft_reset(void *handle) static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev) { uint64_t clock; + uint64_t clock_counter_lo, clock_counter_hi_pre, clock_counter_hi_after; amdgpu_gfx_off_ctrl(adev, false); mutex_lock(&adev->gfx.gpu_clock_mutex); - clock = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_LOWER) | - ((uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_UPPER) << 32ULL); + if (amdgpu_sriov_vf(adev)) { + clock_counter_hi_pre = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_HI); + clock_counter_lo = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_LO); + clock_counter_hi_after = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_HI); + if (clock_counter_hi_pre != clock_counter_hi_after) + clock_counter_lo = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_LO); + } else { + clock_counter_hi_pre = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_UPPER); + clock_counter_lo = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_LOWER); + clock_counter_hi_after = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_UPPER); + if (clock_counter_hi_pre != clock_counter_hi_after) + clock_counter_lo = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_LOWER); + } + clock = clock_counter_lo | (clock_counter_hi_after << 32ULL); mutex_unlock(&adev->gfx.gpu_clock_mutex); amdgpu_gfx_off_ctrl(adev, true); return clock; -- cgit v1.2.3 From 52a3a40ee4f89c89026837838f7df386d64c2892 Mon Sep 17 00:00:00 2001 From: Horatio Zhang Date: Thu, 6 Apr 2023 11:17:38 +0800 Subject: drm/amd/pm: correct SMU13.0.7 pstate profiling clock settings Correct the pstate standard/peak profiling mode clock settings for SMU13.0.7. Signed-off-by: Horatio Zhang Reviewed-by: Kenneth Feng Signed-off-by: Alex Deucher --- .../gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c index 1b2c82449f20..da05047d7f6e 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c @@ -1329,9 +1329,17 @@ static int smu_v13_0_7_populate_umd_state_clk(struct smu_context *smu) &dpm_context->dpm_tables.fclk_table; struct smu_umd_pstate_table *pstate_table = &smu->pstate_table; + struct smu_table_context *table_context = &smu->smu_table; + PPTable_t *pptable = table_context->driver_pptable; + DriverReportedClocks_t driver_clocks = + pptable->SkuTable.DriverReportedClocks; pstate_table->gfxclk_pstate.min = gfx_table->min; - pstate_table->gfxclk_pstate.peak = gfx_table->max; + if (driver_clocks.GameClockAc && + (driver_clocks.GameClockAc < gfx_table->max)) + pstate_table->gfxclk_pstate.peak = driver_clocks.GameClockAc; + else + pstate_table->gfxclk_pstate.peak = gfx_table->max; pstate_table->uclk_pstate.min = mem_table->min; pstate_table->uclk_pstate.peak = mem_table->max; @@ -1348,12 +1356,12 @@ static int smu_v13_0_7_populate_umd_state_clk(struct smu_context *smu) pstate_table->fclk_pstate.min = fclk_table->min; pstate_table->fclk_pstate.peak = fclk_table->max; - /* - * For now, just use the mininum clock frequency. - * TODO: update them when the real pstate settings available - */ - pstate_table->gfxclk_pstate.standard = gfx_table->min; - pstate_table->uclk_pstate.standard = mem_table->min; + if (driver_clocks.BaseClockAc && + driver_clocks.BaseClockAc < gfx_table->max) + pstate_table->gfxclk_pstate.standard = driver_clocks.BaseClockAc; + else + pstate_table->gfxclk_pstate.standard = gfx_table->max; + pstate_table->uclk_pstate.standard = mem_table->max; pstate_table->socclk_pstate.standard = soc_table->min; pstate_table->vclk_pstate.standard = vclk_table->min; pstate_table->dclk_pstate.standard = dclk_table->min; -- cgit v1.2.3 From 86f3a961f367f5796ed1915cc8253e21c2a329fa Mon Sep 17 00:00:00 2001 From: Horatio Zhang Date: Thu, 6 Apr 2023 13:32:14 +0800 Subject: drm/amd/pm: correct SMU13.0.7 max shader clock reporting Correct the max shader clock reporting on SMU 13.0.7. Signed-off-by: Horatio Zhang Reviewed-by: Kenneth Feng Signed-off-by: Alex Deucher --- .../gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 61 +++++++++++++++++++++- 1 file changed, 60 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c index da05047d7f6e..c8c7133626ec 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c @@ -575,6 +575,14 @@ static int smu_v13_0_7_set_default_dpm_table(struct smu_context *smu) dpm_table); if (ret) return ret; + + if (skutable->DriverReportedClocks.GameClockAc && + (dpm_table->dpm_levels[dpm_table->count - 1].value > + skutable->DriverReportedClocks.GameClockAc)) { + dpm_table->dpm_levels[dpm_table->count - 1].value = + skutable->DriverReportedClocks.GameClockAc; + dpm_table->max = skutable->DriverReportedClocks.GameClockAc; + } } else { dpm_table->count = 1; dpm_table->dpm_levels[0].value = smu->smu_table.boot_values.gfxclk / 100; @@ -828,6 +836,57 @@ static int smu_v13_0_7_get_smu_metrics_data(struct smu_context *smu, return ret; } +static int smu_v13_0_7_get_dpm_ultimate_freq(struct smu_context *smu, + enum smu_clk_type clk_type, + uint32_t *min, + uint32_t *max) +{ + struct smu_13_0_dpm_context *dpm_context = + smu->smu_dpm.dpm_context; + struct smu_13_0_dpm_table *dpm_table; + + switch (clk_type) { + case SMU_MCLK: + case SMU_UCLK: + /* uclk dpm table */ + dpm_table = &dpm_context->dpm_tables.uclk_table; + break; + case SMU_GFXCLK: + case SMU_SCLK: + /* gfxclk dpm table */ + dpm_table = &dpm_context->dpm_tables.gfx_table; + break; + case SMU_SOCCLK: + /* socclk dpm table */ + dpm_table = &dpm_context->dpm_tables.soc_table; + break; + case SMU_FCLK: + /* fclk dpm table */ + dpm_table = &dpm_context->dpm_tables.fclk_table; + break; + case SMU_VCLK: + case SMU_VCLK1: + /* vclk dpm table */ + dpm_table = &dpm_context->dpm_tables.vclk_table; + break; + case SMU_DCLK: + case SMU_DCLK1: + /* dclk dpm table */ + dpm_table = &dpm_context->dpm_tables.dclk_table; + break; + default: + dev_err(smu->adev->dev, "Unsupported clock type!\n"); + return -EINVAL; + } + + if (min) + *min = dpm_table->min; + if (max) + *max = dpm_table->max; + + return 0; +} + static int smu_v13_0_7_read_sensor(struct smu_context *smu, enum amd_pp_sensors sensor, void *data, @@ -1686,7 +1745,7 @@ static const struct pptable_funcs smu_v13_0_7_ppt_funcs = { .dpm_set_jpeg_enable = smu_v13_0_set_jpeg_enable, .init_pptable_microcode = smu_v13_0_init_pptable_microcode, .populate_umd_state_clk = smu_v13_0_7_populate_umd_state_clk, - .get_dpm_ultimate_freq = smu_v13_0_get_dpm_ultimate_freq, + .get_dpm_ultimate_freq = smu_v13_0_7_get_dpm_ultimate_freq, .get_vbios_bootup_values = smu_v13_0_get_vbios_bootup_values, .read_sensor = smu_v13_0_7_read_sensor, .feature_is_enabled = smu_cmn_feature_is_enabled, -- cgit v1.2.3 From 418431bcc9ae6509263f4f3bf4b6a80f39da0772 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Wed, 5 Apr 2023 15:07:06 +0530 Subject: drm/amdgpu: Fix warnings Fix below warning due to incompatible types in conditional operator ../pm/swsmu/smu13/smu_v13_0_6_ppt.c:315:17: sparse: sparse: incompatible types in conditional expression (different base types): Signed-off-by: Lijo Lazar Reported-by: kernel test robot Reviewed-by: Luben Tuikov Reviewed-by: Guchun Chen Link: https://lore.kernel.org/oe-kbuild-all/202303082135.NjdX1Bij-lkp@intel.com/ Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 833996291dee..a831e2b428e3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1221,7 +1221,7 @@ int emu_soc_asic_init(struct amdgpu_device *adev); ((adev)->asic_funcs->flush_hdp ? (adev)->asic_funcs->flush_hdp((adev), (r)) : (adev)->hdp.funcs->flush_hdp((adev), (r))) #define amdgpu_asic_invalidate_hdp(adev, r) \ ((adev)->asic_funcs->invalidate_hdp ? (adev)->asic_funcs->invalidate_hdp((adev), (r)) : \ - ((adev)->hdp.funcs->invalidate_hdp ? (adev)->hdp.funcs->invalidate_hdp((adev), (r)) : 0)) + ((adev)->hdp.funcs->invalidate_hdp ? (adev)->hdp.funcs->invalidate_hdp((adev), (r)) : (void)0)) #define amdgpu_asic_need_full_reset(adev) (adev)->asic_funcs->need_full_reset((adev)) #define amdgpu_asic_init_doorbell_index(adev) (adev)->asic_funcs->init_doorbell_index((adev)) #define amdgpu_asic_get_pcie_usage(adev, cnt0, cnt1) ((adev)->asic_funcs->get_pcie_usage((adev), (cnt0), (cnt1))) -- cgit v1.2.3 From ff38d974bc2842797d1d75f5060afd1cea4a76a9 Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Thu, 6 Apr 2023 14:44:34 -0400 Subject: drm/amd/display: set variable dcn3_14_soc storage-class-specifier to static smatch reports drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn314/dcn314_fpu.c:100:37: warning: symbol 'dcn3_14_soc' was not declared. Should it be static? This variable is only used in one file so should be static. Signed-off-by: Tom Rix Signed-off-by: Hamza Mahfooz Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c index c52b76610bd2..44082f65de1f 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c @@ -97,7 +97,7 @@ struct _vcs_dpi_ip_params_st dcn3_14_ip = { .dcc_supported = true, }; -struct _vcs_dpi_soc_bounding_box_st dcn3_14_soc = { +static struct _vcs_dpi_soc_bounding_box_st dcn3_14_soc = { /*TODO: correct dispclk/dppclk voltage level determination*/ .clock_limits = { { -- cgit v1.2.3 From 75bf1df75d5e9a22898c5b6c3410ef8ec3a0de70 Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Thu, 6 Apr 2023 15:58:18 -0400 Subject: drm/amd/display: set variables aperture_default_system and context0_default_system storage-class-specifier to static smatch reports drivers/gpu/drm/amd/amdgpu/../display/dc/dcn10/dcn10_hubp.c:758:10: warning: symbol 'aperture_default_system' was not declared. Should it be static? drivers/gpu/drm/amd/amdgpu/../display/dc/dcn10/dcn10_hubp.c:759:10: warning: symbol 'context0_default_system' was not declared. Should it be static? These variables are only used in one file so should be static. Signed-off-by: Tom Rix Signed-off-by: Hamza Mahfooz Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c index a142a00bc432..bf399819ca80 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c @@ -755,8 +755,8 @@ bool hubp1_is_flip_pending(struct hubp *hubp) return false; } -uint32_t aperture_default_system = 1; -uint32_t context0_default_system; /* = 0;*/ +static uint32_t aperture_default_system = 1; +static uint32_t context0_default_system; /* = 0;*/ static void hubp1_set_vm_system_aperture_settings(struct hubp *hubp, struct vm_system_aperture_param *apt) -- cgit v1.2.3 From 94aec514c872250887e435faee333c9da741cd72 Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Sat, 8 Apr 2023 09:43:48 -0400 Subject: drm/amd/display: set variables dml*_funcs storage-class-specifier to static smatch reports drivers/gpu/drm/amd/amdgpu/../display/dc/dml/display_mode_lib.c:44:24: warning: symbol 'dml20_funcs' was not declared. Should it be static? drivers/gpu/drm/amd/amdgpu/../display/dc/dml/display_mode_lib.c:51:24: warning: symbol 'dml20v2_funcs' was not declared. Should it be static? drivers/gpu/drm/amd/amdgpu/../display/dc/dml/display_mode_lib.c:58:24: warning: symbol 'dml21_funcs' was not declared. Should it be static? drivers/gpu/drm/amd/amdgpu/../display/dc/dml/display_mode_lib.c:65:24: warning: symbol 'dml30_funcs' was not declared. Should it be static? drivers/gpu/drm/amd/amdgpu/../display/dc/dml/display_mode_lib.c:72:24: warning: symbol 'dml31_funcs' was not declared. Should it be static? drivers/gpu/drm/amd/amdgpu/../display/dc/dml/display_mode_lib.c:79:24: warning: symbol 'dml314_funcs' was not declared. Should it be static? drivers/gpu/drm/amd/amdgpu/../display/dc/dml/display_mode_lib.c:86:24: warning: symbol 'dml32_funcs' was not declared. Should it be static? These variables are only used in one file so should be static. Cleanup whitespace, use tabs consistently for indents. Signed-off-by: Tom Rix Signed-off-by: Hamza Mahfooz Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/dml/display_mode_lib.c | 24 +++++++++++----------- 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.c b/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.c index 4125d3d111d1..bdf3ac6cadd5 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.c +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.c @@ -41,51 +41,51 @@ #include "dcn32/display_rq_dlg_calc_32.h" #include "dml_logger.h" -const struct dml_funcs dml20_funcs = { +static const struct dml_funcs dml20_funcs = { .validate = dml20_ModeSupportAndSystemConfigurationFull, .recalculate = dml20_recalculate, .rq_dlg_get_dlg_reg = dml20_rq_dlg_get_dlg_reg, .rq_dlg_get_rq_reg = dml20_rq_dlg_get_rq_reg }; -const struct dml_funcs dml20v2_funcs = { +static const struct dml_funcs dml20v2_funcs = { .validate = dml20v2_ModeSupportAndSystemConfigurationFull, .recalculate = dml20v2_recalculate, .rq_dlg_get_dlg_reg = dml20v2_rq_dlg_get_dlg_reg, .rq_dlg_get_rq_reg = dml20v2_rq_dlg_get_rq_reg }; -const struct dml_funcs dml21_funcs = { - .validate = dml21_ModeSupportAndSystemConfigurationFull, - .recalculate = dml21_recalculate, - .rq_dlg_get_dlg_reg = dml21_rq_dlg_get_dlg_reg, - .rq_dlg_get_rq_reg = dml21_rq_dlg_get_rq_reg +static const struct dml_funcs dml21_funcs = { + .validate = dml21_ModeSupportAndSystemConfigurationFull, + .recalculate = dml21_recalculate, + .rq_dlg_get_dlg_reg = dml21_rq_dlg_get_dlg_reg, + .rq_dlg_get_rq_reg = dml21_rq_dlg_get_rq_reg }; -const struct dml_funcs dml30_funcs = { +static const struct dml_funcs dml30_funcs = { .validate = dml30_ModeSupportAndSystemConfigurationFull, .recalculate = dml30_recalculate, .rq_dlg_get_dlg_reg = dml30_rq_dlg_get_dlg_reg, .rq_dlg_get_rq_reg = dml30_rq_dlg_get_rq_reg }; -const struct dml_funcs dml31_funcs = { +static const struct dml_funcs dml31_funcs = { .validate = dml31_ModeSupportAndSystemConfigurationFull, .recalculate = dml31_recalculate, .rq_dlg_get_dlg_reg = dml31_rq_dlg_get_dlg_reg, .rq_dlg_get_rq_reg = dml31_rq_dlg_get_rq_reg }; -const struct dml_funcs dml314_funcs = { +static const struct dml_funcs dml314_funcs = { .validate = dml314_ModeSupportAndSystemConfigurationFull, .recalculate = dml314_recalculate, .rq_dlg_get_dlg_reg = dml314_rq_dlg_get_dlg_reg, .rq_dlg_get_rq_reg = dml314_rq_dlg_get_rq_reg }; -const struct dml_funcs dml32_funcs = { +static const struct dml_funcs dml32_funcs = { .validate = dml32_ModeSupportAndSystemConfigurationFull, - .recalculate = dml32_recalculate, + .recalculate = dml32_recalculate, .rq_dlg_get_dlg_reg_v2 = dml32_rq_dlg_get_dlg_reg, .rq_dlg_get_rq_reg_v2 = dml32_rq_dlg_get_rq_reg }; -- cgit v1.2.3 From fdf8ea814ae48d7f5670bc7c3bf34101fb58f7c8 Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Sat, 25 Mar 2023 09:45:03 -0400 Subject: drm/amd/display: remove unused matching_stream_ptrs variable clang with W=1 reports drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_link_enc_cfg.c:625:6: error: variable 'matching_stream_ptrs' set but not used [-Werror,-Wunused-but-set-variable] int matching_stream_ptrs = 0; ^ This variable is not used so remove it. Reviewed-by: Nick Desaulniers Signed-off-by: Tom Rix Signed-off-by: Hamza Mahfooz Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c index 41198c729d90..30c0644d4418 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c @@ -622,7 +622,6 @@ bool link_enc_cfg_validate(struct dc *dc, struct dc_state *state) int i, j; uint8_t valid_count = 0; uint8_t dig_stream_count = 0; - int matching_stream_ptrs = 0; int eng_ids_per_ep_id[MAX_PIPES] = {0}; int ep_ids_per_eng_id[MAX_PIPES] = {0}; int valid_bitmap = 0; @@ -645,9 +644,7 @@ bool link_enc_cfg_validate(struct dc *dc, struct dc_state *state) struct link_enc_assignment assignment = state->res_ctx.link_enc_cfg_ctx.link_enc_assignments[i]; if (assignment.valid) { - if (assignment.stream == state->streams[i]) - matching_stream_ptrs++; - else + if (assignment.stream != state->streams[i]) valid_stream_ptrs = false; } } -- cgit v1.2.3 From aee89b7d0929bfc6530a1468d34e0befc1991389 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Sun, 9 Apr 2023 22:35:38 +0530 Subject: drm/amd/display : Log DP link training downspread info Update the existing log with DP LT downspread info: [Downstream devices shall support down spreading of the link clock. The down-spread amplitude shall either be disabled (0.0%) or up to 0.5%, as written by the upstream device to the DOWNSPREAD_CTRL register (DPCD 00107h). The modulation frequency range shall be 30 to 33 kHz] Besides, fix checkpatch warning: CHECK: Alignment should match open parenthesis Cc: Mario Limonciello Cc: Harry Wentland Cc: Leo Li Cc: Alex Deucher Signed-off-by: Srinivasan Shanmugam Reviewed-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/link/protocols/link_dp_training.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c index 70fc0ddf2d7e..2d067a4a8517 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c @@ -1560,9 +1560,10 @@ bool perform_link_training_with_retries( j = 0; while (j < attempts && fail_count < (attempts * 10)) { - DC_LOG_HW_LINK_TRAINING("%s: Beginning link(%d) training attempt %u of %d @ rate(%d) x lane(%d)\n", - __func__, link->link_index, (unsigned int)j + 1, attempts, cur_link_settings.link_rate, - cur_link_settings.lane_count); + DC_LOG_HW_LINK_TRAINING("%s: Beginning link(%d) training attempt %u of %d @ rate(%d) x lane(%d) @ spread = %x\n", + __func__, link->link_index, (unsigned int)j + 1, attempts, + cur_link_settings.link_rate, cur_link_settings.lane_count, + cur_link_settings.link_spread); dp_enable_link_phy( link, @@ -1640,9 +1641,10 @@ bool perform_link_training_with_retries( break; } - DC_LOG_WARNING("%s: Link(%d) training attempt %u of %d failed @ rate(%d) x lane(%d) : fail reason:(%d)\n", - __func__, link->link_index, (unsigned int)j + 1, attempts, cur_link_settings.link_rate, - cur_link_settings.lane_count, status); + DC_LOG_WARNING("%s: Link(%d) training attempt %u of %d failed @ rate(%d) x lane(%d) @ spread = %x : fail reason:(%d)\n", + __func__, link->link_index, (unsigned int)j + 1, attempts, + cur_link_settings.link_rate, cur_link_settings.lane_count, + cur_link_settings.link_spread, status); dp_disable_link_phy(link, &pipe_ctx->link_res, signal); -- cgit v1.2.3 From febc9c65b37f6f5cc03ed0b6d613fb045c39b376 Mon Sep 17 00:00:00 2001 From: Pierre-Eric Pelloux-Prayer Date: Wed, 5 Apr 2023 10:23:31 +0200 Subject: drm/amdgpu: use sdma_v6 single packet invalidation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This achieves the same result as the sequence used in emit_flush_gpu_tlb but the invalidation is now a single packet instead of the 3 packets required to implement reg_write_reg_wait. Signed-off-by: Pierre-Eric Pelloux-Prayer Reviewed-by: Christian König Reviewed-by: Monk Liu Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c index 40e6b22daa22..fdc6dfd60621 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c @@ -1173,7 +1173,28 @@ static void sdma_v6_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) static void sdma_v6_0_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned vmid, uint64_t pd_addr) { - amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); + struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; + uint32_t req = hub->vmhub_funcs->get_invalidate_req(vmid, 0); + + /* Update the PD address for this VMID. */ + amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + + (hub->ctx_addr_distance * vmid), + lower_32_bits(pd_addr)); + amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + + (hub->ctx_addr_distance * vmid), + upper_32_bits(pd_addr)); + + /* Trigger invalidation. */ + amdgpu_ring_write(ring, + SDMA_PKT_VM_INVALIDATION_HEADER_OP(SDMA_OP_POLL_REGMEM) | + SDMA_PKT_VM_INVALIDATION_HEADER_SUB_OP(SDMA_SUBOP_VM_INVALIDATION) | + SDMA_PKT_VM_INVALIDATION_HEADER_GFX_ENG_ID(ring->vm_inv_eng) | + SDMA_PKT_VM_INVALIDATION_HEADER_MM_ENG_ID(0x1f)); + amdgpu_ring_write(ring, req); + amdgpu_ring_write(ring, 0xFFFFFFFF); + amdgpu_ring_write(ring, + SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_INVALIDATEACK(1 << vmid) | + SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_ADDRESSRANGEHI(0x1F)); } static void sdma_v6_0_ring_emit_wreg(struct amdgpu_ring *ring, -- cgit v1.2.3 From caa4dffa9abd80f3360432cf89236f018be355ca Mon Sep 17 00:00:00 2001 From: "Stanley.Yang" Date: Mon, 10 Apr 2023 18:20:23 +0800 Subject: drm/amdgpu: fix unexpected block id Aldebaran supports VCN and JPEG RAS, it reports unexpected block id message during VCN and JPEG RAS initialization if VCN and JPEG block id not defined. Signed-off-by: Stanley.Yang Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 4 ++++ drivers/gpu/drm/amd/amdgpu/ta_ras_if.h | 2 ++ 2 files changed, 6 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index ef38f4c93df0..17b3d1992e80 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -583,6 +583,10 @@ amdgpu_ras_block_to_ta(enum amdgpu_ras_block block) { return TA_RAS_BLOCK__FUSE; case AMDGPU_RAS_BLOCK__MCA: return TA_RAS_BLOCK__MCA; + case AMDGPU_RAS_BLOCK__VCN: + return TA_RAS_BLOCK__VCN; + case AMDGPU_RAS_BLOCK__JPEG: + return TA_RAS_BLOCK__JPEG; default: WARN_ONCE(1, "RAS ERROR: unexpected block id %d\n", block); return TA_RAS_BLOCK__UMC; diff --git a/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h b/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h index 509d8a1945eb..30d0482ac466 100644 --- a/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h +++ b/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h @@ -84,6 +84,8 @@ enum ta_ras_block { TA_RAS_BLOCK__MP1, TA_RAS_BLOCK__FUSE, TA_RAS_BLOCK__MCA, + TA_RAS_BLOCK__VCN, + TA_RAS_BLOCK__JPEG, TA_NUM_BLOCK_MAX }; -- cgit v1.2.3 From 58bc2a9cbfdd4abdbfaafd835a0cd78bdad11423 Mon Sep 17 00:00:00 2001 From: "Stanley.Yang" Date: Mon, 10 Apr 2023 19:43:16 +0800 Subject: drm/amdgpu: correct ras enabled flag XGMI RAS should be according to the gmc xgmi physical nodes number, XGMI RAS should not be enabled if xgmi num_physical_nodes is zero. Signed-off-by: Stanley.Yang Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 4069bce9479f..ad5d456918f4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -2430,6 +2430,13 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev) else adev->ras_hw_enabled &= ~(1 << AMDGPU_RAS_BLOCK__VCN | 1 << AMDGPU_RAS_BLOCK__JPEG); + + /* + * XGMI RAS is not supported if xgmi num physical nodes + * is zero + */ + if (!adev->gmc.xgmi.num_physical_nodes) + adev->ras_hw_enabled &= ~(1 << AMDGPU_RAS_BLOCK__XGMI_WAFL); } else { dev_info(adev->dev, "SRAM ECC is not presented.\n"); } -- cgit v1.2.3 From 3e3320a7d96c1a5c66b60fbabb38af1f4c4fae1a Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Fri, 7 Apr 2023 14:22:53 +0530 Subject: drm/amd/display: Add logging for DP link traning Test Pattern Seqeunces Add some more logging for DP link traning test pattern seqeunces for better debugging. Cc: Fangzhi Zuo Cc: Harry Wentland Cc: Leo Li Cc: Alex Deucher Signed-off-by: Srinivasan Shanmugam Reviewed-by: Fangzhi Zuo Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c index 2d067a4a8517..170f33835930 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c @@ -212,27 +212,36 @@ enum dpcd_training_patterns switch (pattern) { case DP_TRAINING_PATTERN_SEQUENCE_1: + DC_LOG_HW_LINK_TRAINING("%s: Using DP training pattern TPS1\n", __func__); dpcd_tr_pattern = DPCD_TRAINING_PATTERN_1; break; case DP_TRAINING_PATTERN_SEQUENCE_2: + DC_LOG_HW_LINK_TRAINING("%s: Using DP training pattern TPS2\n", __func__); dpcd_tr_pattern = DPCD_TRAINING_PATTERN_2; break; case DP_TRAINING_PATTERN_SEQUENCE_3: + DC_LOG_HW_LINK_TRAINING("%s: Using DP training pattern TPS3\n", __func__); dpcd_tr_pattern = DPCD_TRAINING_PATTERN_3; break; case DP_TRAINING_PATTERN_SEQUENCE_4: + DC_LOG_HW_LINK_TRAINING("%s: Using DP training pattern TPS4\n", __func__); dpcd_tr_pattern = DPCD_TRAINING_PATTERN_4; break; case DP_128b_132b_TPS1: + DC_LOG_HW_LINK_TRAINING("%s: Using DP 128b/132b training pattern TPS1\n", __func__); dpcd_tr_pattern = DPCD_128b_132b_TPS1; break; case DP_128b_132b_TPS2: + DC_LOG_HW_LINK_TRAINING("%s: Using DP 128b/132b training pattern TPS2\n", __func__); dpcd_tr_pattern = DPCD_128b_132b_TPS2; break; case DP_128b_132b_TPS2_CDS: + DC_LOG_HW_LINK_TRAINING("%s: Using DP 128b/132b training pattern TPS2 CDS\n", + __func__); dpcd_tr_pattern = DPCD_128b_132b_TPS2_CDS; break; case DP_TRAINING_PATTERN_VIDEOIDLE: + DC_LOG_HW_LINK_TRAINING("%s: Using DP training pattern videoidle\n", __func__); dpcd_tr_pattern = DPCD_TRAINING_PATTERN_VIDEOIDLE; break; default: -- cgit v1.2.3 From 00fa40353bf3894adb495f8cce10a8bce43cd375 Mon Sep 17 00:00:00 2001 From: Sreekant Somasekharan Date: Mon, 13 Mar 2023 18:05:41 -0400 Subject: drm/amdkfd: Check PCIe atomics support on GFX11 to set CP_HQD_HQ_STATUS0[29] CP_HQD_HQ_STATUS0[29] bit will be used by CPFW to acknowledge whether PCIe atomics are supported. The default value of this bit is set to 0. Driver will check whether PCIe atomics are supported and set the bit to 1 if supported. This will force CPFW to use real atomic ops. If the bit is not set, CPFW will default to read/modify/write using the firmware itself. This is applicable only to GFX11 RS64 CP with MEC FW >= 509. If MEC FW < 509 and for all GFX11 F32 CP, PCIe atomics needs to be supported else it will skip the device. This commit also involves moving amdgpu_amdkfd_device_probe() function call after per-IP early_init loop in amdgpu_device_ip_early_init() function so as to check for RS64 enabled device. Signed-off-by: Sreekant Somasekharan Reviewed-by: Graham Sider Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 8 ++++++++ drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c | 7 +++++++ 3 files changed, 16 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 4819b3f86750..efdc6b73a55c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2184,7 +2184,6 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) adev->has_pr3 = parent ? pci_pr3_present(parent) : false; } - amdgpu_amdkfd_device_probe(adev); adev->pm.pp_feature = amdgpu_pp_feature_mask; if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS) @@ -2240,6 +2239,7 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) if (!total) return -ENODEV; + amdgpu_amdkfd_device_probe(adev); adev->cg_flags &= amdgpu_cg_mask; adev->pg_flags &= amdgpu_pg_mask; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 0bf8d75950f7..e12ff49d01e1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -204,6 +204,14 @@ static void kfd_device_info_init(struct kfd_dev *kfd, /* Navi1x+ */ if (gc_version >= IP_VERSION(10, 1, 1)) kfd->device_info.needs_pci_atomics = true; + } else if (gc_version < IP_VERSION(12, 0, 0)) { + /* + * PCIe atomics support acknowledgment in GFX11 RS64 CPFW requires + * MEC version >= 509. Prior RS64 CPFW versions (and all F32) require + * PCIe atomics support. + */ + kfd->device_info.needs_pci_atomics = true; + kfd->device_info.no_atomic_fw_version = kfd->adev->gfx.rs64_enable ? 509 : 0; } } else { kfd->device_info.doorbell_size = 4; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c index 4a9af800b1f1..04a73324174b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c @@ -143,6 +143,13 @@ static void init_mqd(struct mqd_manager *mm, void **mqd, 1 << CP_HQD_QUANTUM__QUANTUM_SCALE__SHIFT | 1 << CP_HQD_QUANTUM__QUANTUM_DURATION__SHIFT; + /* + * GFX11 RS64 CPFW version >= 509 supports PCIe atomics support + * acknowledgment. + */ + if (amdgpu_amdkfd_have_atomics_support(mm->dev->adev)) + m->cp_hqd_hq_status0 |= 1 << 29; + if (q->format == KFD_QUEUE_FORMAT_AQL) { m->cp_hqd_aql_control = 1 << CP_HQD_AQL_CONTROL__CONTROL0__SHIFT; -- cgit v1.2.3 From 27488686cb1835f1c69d3efb0eedeb411f675d73 Mon Sep 17 00:00:00 2001 From: Graham Sider Date: Thu, 30 Mar 2023 13:47:05 -0400 Subject: drm/amdgpu: Enable GFX11 SDMA context empty interrupt Enable SDMA queue empty context switching. SDMA context switch due to quantum programming no longer done here (as of sdma v6), so re-name sdma_v6_0_ctx_switch_enable to sdma_v6_0_ctxempty_int_enable to reflect this. Also program SDMAx_QUEUEx_SCHEDULE_CNTL for context switch due to quantum in KFD. Set to amdgpu_sdma_phase_quantum (defaults to 32 i.e. 3200us). Signed-off-by: Graham Sider Reviewed-by: Harish Kasiviswanathan Reviewed-by: Stanley Yang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c | 28 +++++++++++++++--------- drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c | 4 ++++ 2 files changed, 22 insertions(+), 10 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c index fdc6dfd60621..fc0f14ed93d5 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c @@ -403,15 +403,26 @@ static void sdma_v6_0_rlc_stop(struct amdgpu_device *adev) } /** - * sdma_v6_0_ctx_switch_enable - stop the async dma engines context switch + * sdma_v6_0_ctxempty_int_enable - enable or disable context empty interrupts * * @adev: amdgpu_device pointer - * @enable: enable/disable the DMA MEs context switch. + * @enable: enable/disable context switching due to queue empty conditions * - * Halt or unhalt the async dma engines context switch. + * Enable or disable the async dma engines queue empty context switch. */ -static void sdma_v6_0_ctx_switch_enable(struct amdgpu_device *adev, bool enable) +static void sdma_v6_0_ctxempty_int_enable(struct amdgpu_device *adev, bool enable) { + u32 f32_cntl; + int i; + + if (!amdgpu_sriov_vf(adev)) { + for (i = 0; i < adev->sdma.num_instances; i++) { + f32_cntl = RREG32(sdma_v6_0_get_reg_offset(adev, i, regSDMA0_CNTL)); + f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL, + CTXEMPTY_INT_ENABLE, enable ? 1 : 0); + WREG32(sdma_v6_0_get_reg_offset(adev, i, regSDMA0_CNTL), f32_cntl); + } + } } /** @@ -579,10 +590,8 @@ static int sdma_v6_0_gfx_resume(struct amdgpu_device *adev) ring->sched.ready = true; - if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */ - sdma_v6_0_ctx_switch_enable(adev, true); + if (amdgpu_sriov_vf(adev)) sdma_v6_0_enable(adev, true); - } r = amdgpu_ring_test_helper(ring); if (r) { @@ -778,7 +787,6 @@ static int sdma_v6_0_start(struct amdgpu_device *adev) int r = 0; if (amdgpu_sriov_vf(adev)) { - sdma_v6_0_ctx_switch_enable(adev, false); sdma_v6_0_enable(adev, false); /* set RB registers */ @@ -799,7 +807,7 @@ static int sdma_v6_0_start(struct amdgpu_device *adev) /* unhalt the MEs */ sdma_v6_0_enable(adev, true); /* enable sdma ring preemption */ - sdma_v6_0_ctx_switch_enable(adev, true); + sdma_v6_0_ctxempty_int_enable(adev, true); /* start the gfx rings and rlc compute queues */ r = sdma_v6_0_gfx_resume(adev); @@ -1340,7 +1348,7 @@ static int sdma_v6_0_hw_fini(void *handle) return 0; } - sdma_v6_0_ctx_switch_enable(adev, false); + sdma_v6_0_ctxempty_int_enable(adev, false); sdma_v6_0_enable(adev, false); return 0; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c index 04a73324174b..5aa75f72caa1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c @@ -357,6 +357,10 @@ static void update_mqd_sdma(struct mqd_manager *mm, void *mqd, m->sdmax_rlcx_doorbell_offset = q->doorbell_off << SDMA0_QUEUE0_DOORBELL_OFFSET__OFFSET__SHIFT; + m->sdmax_rlcx_sched_cntl = (amdgpu_sdma_phase_quantum + << SDMA0_QUEUE0_SCHEDULE_CNTL__CONTEXT_QUANTUM__SHIFT) + & SDMA0_QUEUE0_SCHEDULE_CNTL__CONTEXT_QUANTUM_MASK; + m->sdma_engine_id = q->sdma_engine_id; m->sdma_queue_id = q->sdma_queue_id; m->sdmax_rlcx_dummy_reg = SDMA_RLC_DUMMY_DEFAULT; -- cgit v1.2.3 From 6246059a19d4cd32ef1af42a6ab016b779cd68c4 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 27 Mar 2023 20:09:08 -0400 Subject: drm/amdgpu: simplify amdgpu_ras_eeprom.c All chips that support RAS also support IP discovery, so use the IP versions rather than a mix of IP versions and asic types. Checking the validity of the atom_ctx pointer is not required as the vbios is already fetched at this point. v2: add comments to id asic types based on feedback from Luben Reviewed-by: Luben Tuikov Signed-off-by: Alex Deucher Cc: Luben Tuikov --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c | 72 +++++++------------------- 1 file changed, 20 insertions(+), 52 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c index 3106fa8a15ef..c2c2a7718613 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c @@ -106,48 +106,13 @@ #define to_amdgpu_device(x) (container_of(x, struct amdgpu_ras, eeprom_control))->adev static bool __is_ras_eeprom_supported(struct amdgpu_device *adev) -{ - if (adev->asic_type == CHIP_IP_DISCOVERY) { - switch (adev->ip_versions[MP1_HWIP][0]) { - case IP_VERSION(13, 0, 0): - case IP_VERSION(13, 0, 10): - return true; - default: - return false; - } - } - - return adev->asic_type == CHIP_VEGA20 || - adev->asic_type == CHIP_ARCTURUS || - adev->asic_type == CHIP_SIENNA_CICHLID || - adev->asic_type == CHIP_ALDEBARAN; -} - -static bool __get_eeprom_i2c_addr_arct(struct amdgpu_device *adev, - struct amdgpu_ras_eeprom_control *control) -{ - struct atom_context *atom_ctx = adev->mode_info.atom_context; - - if (!control || !atom_ctx) - return false; - - if (strnstr(atom_ctx->vbios_version, - "D342", - sizeof(atom_ctx->vbios_version))) - control->i2c_address = EEPROM_I2C_MADDR_0; - else - control->i2c_address = EEPROM_I2C_MADDR_4; - - return true; -} - -static bool __get_eeprom_i2c_addr_ip_discovery(struct amdgpu_device *adev, - struct amdgpu_ras_eeprom_control *control) { switch (adev->ip_versions[MP1_HWIP][0]) { + case IP_VERSION(11, 0, 2): /* VEGA20 and ARCTURUS */ + case IP_VERSION(11, 0, 7): /* Sienna cichlid */ case IP_VERSION(13, 0, 0): + case IP_VERSION(13, 0, 2): /* Aldebaran */ case IP_VERSION(13, 0, 10): - control->i2c_address = EEPROM_I2C_MADDR_4; return true; default: return false; @@ -178,29 +143,32 @@ static bool __get_eeprom_i2c_addr(struct amdgpu_device *adev, return true; } - switch (adev->asic_type) { - case CHIP_VEGA20: - control->i2c_address = EEPROM_I2C_MADDR_0; + switch (adev->ip_versions[MP1_HWIP][0]) { + case IP_VERSION(11, 0, 2): + /* VEGA20 and ARCTURUS */ + if (adev->asic_type == CHIP_VEGA20) + control->i2c_address = EEPROM_I2C_MADDR_0; + else if (strnstr(atom_ctx->vbios_version, + "D342", + sizeof(atom_ctx->vbios_version))) + control->i2c_address = EEPROM_I2C_MADDR_0; + else + control->i2c_address = EEPROM_I2C_MADDR_4; return true; - - case CHIP_ARCTURUS: - return __get_eeprom_i2c_addr_arct(adev, control); - - case CHIP_SIENNA_CICHLID: + case IP_VERSION(11, 0, 7): control->i2c_address = EEPROM_I2C_MADDR_0; return true; - - case CHIP_ALDEBARAN: + case IP_VERSION(13, 0, 2): if (strnstr(atom_ctx->vbios_version, "D673", sizeof(atom_ctx->vbios_version))) control->i2c_address = EEPROM_I2C_MADDR_4; else control->i2c_address = EEPROM_I2C_MADDR_0; return true; - - case CHIP_IP_DISCOVERY: - return __get_eeprom_i2c_addr_ip_discovery(adev, control); - + case IP_VERSION(13, 0, 0): + case IP_VERSION(13, 0, 10): + control->i2c_address = EEPROM_I2C_MADDR_4; + return true; default: return false; } -- cgit v1.2.3 From e69c373c3f0c1888b4b758e37d05e2e7b76585f2 Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Fri, 31 Mar 2023 12:40:41 -0400 Subject: drm/amd/pm: remove unused num_of_active_display variable clang with W=1 reports drivers/gpu/drm/amd/amdgpu/../pm/swsmu/amdgpu_smu.c:1700:6: error: variable 'num_of_active_display' set but not used [-Werror,-Wunused-but-set-variable] int num_of_active_display = 0; ^ This variable is not used so remove it. Fixes: 75145aab7a0d ("drm/amdgpu/swsmu: clean up a bunch of stale interfaces") Reviewed-by: Nick Desaulniers Reviewed-by: Evan Quan Signed-off-by: Tom Rix Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index 056ac2b512eb..5633c5797e85 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -1712,8 +1712,6 @@ static int smu_display_configuration_change(void *handle, const struct amd_pp_display_configuration *display_config) { struct smu_context *smu = handle; - int index = 0; - int num_of_active_display = 0; if (!smu->pm_enabled || !smu->adev->pm.dpm_enabled) return -EOPNOTSUPP; @@ -1724,11 +1722,6 @@ static int smu_display_configuration_change(void *handle, smu_set_min_dcef_deep_sleep(smu, display_config->min_dcef_deep_sleep_set_clk / 100); - for (index = 0; index < display_config->num_path_including_non_display; index++) { - if (display_config->displays[index].controller_id != 0) - num_of_active_display++; - } - return 0; } -- cgit v1.2.3 From 318e431b306e966d2ee99e900a11bdc9a701ee83 Mon Sep 17 00:00:00 2001 From: Mukul Joshi Date: Tue, 11 Apr 2023 16:32:29 -0400 Subject: drm/amdgpu: Enable IH retry CAM on GFX9 This patch enables the IH retry CAM on GFX9 series cards. This retry filter is used to prevent sending lots of retry interrupts in a short span of time and overflowing the IH ring buffer. This will also help reduce CPU interrupt workload. Signed-off-by: Mukul Joshi Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h | 2 + drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 51 +++++++++++++------- drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c | 2 +- drivers/gpu/drm/amd/amdgpu/vega20_ih.c | 55 ++++++++++------------ drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 10 +++- .../amd/include/asic_reg/oss/osssys_4_2_0_offset.h | 6 +++ .../include/asic_reg/oss/osssys_4_2_0_sh_mask.h | 11 +++++ 7 files changed, 88 insertions(+), 49 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h index e9f2c11ea416..be243adf3e65 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h @@ -98,6 +98,8 @@ struct amdgpu_irq { struct irq_domain *domain; /* GPU irq controller domain */ unsigned virq[AMDGPU_MAX_IRQ_SRC_ID]; uint32_t srbm_soft_reset; + u32 retry_cam_doorbell_index; + bool retry_cam_enabled; }; void amdgpu_irq_disable_all(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 67ad92097a65..7ec70666c648 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -555,32 +555,49 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, const char *mmhub_cid; const char *hub_name; u64 addr; + uint32_t cam_index = 0; + int ret; addr = (u64)entry->src_data[0] << 12; addr |= ((u64)entry->src_data[1] & 0xf) << 44; if (retry_fault) { - /* Returning 1 here also prevents sending the IV to the KFD */ + if (adev->irq.retry_cam_enabled) { + /* Delegate it to a different ring if the hardware hasn't + * already done it. + */ + if (entry->ih == &adev->irq.ih) { + amdgpu_irq_delegate(adev, entry, 8); + return 1; + } + + cam_index = entry->src_data[2] & 0x3ff; - /* Process it onyl if it's the first fault for this address */ - if (entry->ih != &adev->irq.ih_soft && - amdgpu_gmc_filter_faults(adev, entry->ih, addr, entry->pasid, + ret = amdgpu_vm_handle_fault(adev, entry->pasid, addr, write_fault); + WDOORBELL32(adev->irq.retry_cam_doorbell_index, cam_index); + if (ret) + return 1; + } else { + /* Process it onyl if it's the first fault for this address */ + if (entry->ih != &adev->irq.ih_soft && + amdgpu_gmc_filter_faults(adev, entry->ih, addr, entry->pasid, entry->timestamp)) - return 1; + return 1; - /* Delegate it to a different ring if the hardware hasn't - * already done it. - */ - if (entry->ih == &adev->irq.ih) { - amdgpu_irq_delegate(adev, entry, 8); - return 1; - } + /* Delegate it to a different ring if the hardware hasn't + * already done it. + */ + if (entry->ih == &adev->irq.ih) { + amdgpu_irq_delegate(adev, entry, 8); + return 1; + } - /* Try to handle the recoverable page faults by filling page - * tables - */ - if (amdgpu_vm_handle_fault(adev, entry->pasid, addr, write_fault)) - return 1; + /* Try to handle the recoverable page faults by filling page + * tables + */ + if (amdgpu_vm_handle_fault(adev, entry->pasid, addr, write_fault)) + return 1; + } } if (!printk_ratelimit()) diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c index 19455a725939..685abf57ffdd 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c @@ -238,7 +238,7 @@ static void nbio_v7_4_ih_doorbell_range(struct amdgpu_device *adev, if (use_doorbell) { ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, BIF_IH_DOORBELL_RANGE, OFFSET, doorbell_index); - ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, BIF_IH_DOORBELL_RANGE, SIZE, 4); + ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, BIF_IH_DOORBELL_RANGE, SIZE, 8); } else ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, BIF_IH_DOORBELL_RANGE, SIZE, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c index 827e2768f867..536128447b71 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c @@ -38,6 +38,11 @@ #define mmIH_CHICKEN_ALDEBARAN 0x18d #define mmIH_CHICKEN_ALDEBARAN_BASE_IDX 0 +#define mmIH_RETRY_INT_CAM_CNTL_ALDEBARAN 0x00ea +#define mmIH_RETRY_INT_CAM_CNTL_ALDEBARAN_BASE_IDX 0 +#define IH_RETRY_INT_CAM_CNTL_ALDEBARAN__ENABLE__SHIFT 0x10 +#define IH_RETRY_INT_CAM_CNTL_ALDEBARAN__ENABLE_MASK 0x00010000L + static void vega20_ih_set_interrupt_funcs(struct amdgpu_device *adev); /** @@ -251,36 +256,14 @@ static int vega20_ih_enable_ring(struct amdgpu_device *adev, return 0; } -/** - * vega20_ih_reroute_ih - reroute VMC/UTCL2 ih to an ih ring - * - * @adev: amdgpu_device pointer - * - * Reroute VMC and UMC interrupts on primary ih ring to - * ih ring 1 so they won't lose when bunches of page faults - * interrupts overwhelms the interrupt handler(VEGA20) - */ -static void vega20_ih_reroute_ih(struct amdgpu_device *adev) +static uint32_t vega20_setup_retry_doorbell(u32 doorbell_index) { - uint32_t tmp; + u32 val = 0; - /* vega20 ih reroute will go through psp this - * function is used for newer asics starting arcturus - */ - if (adev->ip_versions[OSSSYS_HWIP][0] >= IP_VERSION(4, 2, 1)) { - /* Reroute to IH ring 1 for VMC */ - WREG32_SOC15(OSSSYS, 0, mmIH_CLIENT_CFG_INDEX, 0x12); - tmp = RREG32_SOC15(OSSSYS, 0, mmIH_CLIENT_CFG_DATA); - tmp = REG_SET_FIELD(tmp, IH_CLIENT_CFG_DATA, CLIENT_TYPE, 1); - tmp = REG_SET_FIELD(tmp, IH_CLIENT_CFG_DATA, RING_ID, 1); - WREG32_SOC15(OSSSYS, 0, mmIH_CLIENT_CFG_DATA, tmp); - - /* Reroute IH ring 1 for UTCL2 */ - WREG32_SOC15(OSSSYS, 0, mmIH_CLIENT_CFG_INDEX, 0x1B); - tmp = RREG32_SOC15(OSSSYS, 0, mmIH_CLIENT_CFG_DATA); - tmp = REG_SET_FIELD(tmp, IH_CLIENT_CFG_DATA, RING_ID, 1); - WREG32_SOC15(OSSSYS, 0, mmIH_CLIENT_CFG_DATA, tmp); - } + val = REG_SET_FIELD(val, IH_DOORBELL_RPTR, OFFSET, doorbell_index); + val = REG_SET_FIELD(val, IH_DOORBELL_RPTR, ENABLE, 1); + + return val; } /** @@ -333,8 +316,6 @@ static int vega20_ih_irq_init(struct amdgpu_device *adev) for (i = 0; i < ARRAY_SIZE(ih); i++) { if (ih[i]->ring_size) { - if (i == 1) - vega20_ih_reroute_ih(adev); ret = vega20_ih_enable_ring(adev, ih[i]); if (ret) return ret; @@ -347,6 +328,20 @@ static int vega20_ih_irq_init(struct amdgpu_device *adev) pci_set_master(adev->pdev); + /* Allocate the doorbell for IH Retry CAM */ + adev->irq.retry_cam_doorbell_index = (adev->doorbell_index.ih + 3) << 1; + WREG32_SOC15(OSSSYS, 0, mmIH_DOORBELL_RETRY_CAM, + vega20_setup_retry_doorbell(adev->irq.retry_cam_doorbell_index)); + + /* Enable IH Retry CAM */ + if (adev->ip_versions[OSSSYS_HWIP][0] == IP_VERSION(4, 4, 0)) + WREG32_FIELD15(OSSSYS, 0, IH_RETRY_INT_CAM_CNTL_ALDEBARAN, + ENABLE, 1); + else + WREG32_FIELD15(OSSSYS, 0, IH_RETRY_INT_CAM_CNTL, ENABLE, 1); + + adev->irq.retry_cam_enabled = true; + /* enable interrupts */ ret = vega20_ih_toggle_interrupts(adev, true); if (ret) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index dc6fd6967050..96a138a39515 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -2172,7 +2172,15 @@ restart: pr_debug("drain retry fault gpu %d svms %p\n", i, svms); amdgpu_ih_wait_on_checkpoint_process_ts(pdd->dev->adev, - &pdd->dev->adev->irq.ih1); + pdd->dev->adev->irq.retry_cam_enabled ? + &pdd->dev->adev->irq.ih : + &pdd->dev->adev->irq.ih1); + + if (pdd->dev->adev->irq.retry_cam_enabled) + amdgpu_ih_wait_on_checkpoint_process_ts(pdd->dev->adev, + &pdd->dev->adev->irq.ih_soft); + + pr_debug("drain retry fault gpu %d svms 0x%p done\n", i, svms); } if (atomic_cmpxchg(&svms->drain_pagefaults, drain, 0) != drain) diff --git a/drivers/gpu/drm/amd/include/asic_reg/oss/osssys_4_2_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/oss/osssys_4_2_0_offset.h index bd129266ebfd..a84a7cfaf71e 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/oss/osssys_4_2_0_offset.h +++ b/drivers/gpu/drm/amd/include/asic_reg/oss/osssys_4_2_0_offset.h @@ -135,6 +135,8 @@ #define mmIH_RB_WPTR_ADDR_LO_BASE_IDX 0 #define mmIH_DOORBELL_RPTR 0x0087 #define mmIH_DOORBELL_RPTR_BASE_IDX 0 +#define mmIH_DOORBELL_RETRY_CAM 0x0088 +#define mmIH_DOORBELL_RETRY_CAM_BASE_IDX 0 #define mmIH_RB_CNTL_RING1 0x008c #define mmIH_RB_CNTL_RING1_BASE_IDX 0 #define mmIH_RB_BASE_RING1 0x008d @@ -159,6 +161,8 @@ #define mmIH_RB_WPTR_RING2_BASE_IDX 0 #define mmIH_DOORBELL_RPTR_RING2 0x009f #define mmIH_DOORBELL_RPTR_RING2_BASE_IDX 0 +#define mmIH_RETRY_CAM_ACK 0x00a4 +#define mmIH_RETRY_CAM_ACK_BASE_IDX 0 #define mmIH_VERSION 0x00a5 #define mmIH_VERSION_BASE_IDX 0 #define mmIH_CNTL 0x00c0 @@ -235,6 +239,8 @@ #define mmIH_MMHUB_ERROR_BASE_IDX 0 #define mmIH_MEM_POWER_CTRL 0x00e8 #define mmIH_MEM_POWER_CTRL_BASE_IDX 0 +#define mmIH_RETRY_INT_CAM_CNTL 0x00e9 +#define mmIH_RETRY_INT_CAM_CNTL_BASE_IDX 0 #define mmIH_REGISTER_LAST_PART2 0x00ff #define mmIH_REGISTER_LAST_PART2_BASE_IDX 0 #define mmSEM_CLK_CTRL 0x0100 diff --git a/drivers/gpu/drm/amd/include/asic_reg/oss/osssys_4_2_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/oss/osssys_4_2_0_sh_mask.h index 3ea83ea9ce3a..75c04fc275a0 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/oss/osssys_4_2_0_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/oss/osssys_4_2_0_sh_mask.h @@ -349,6 +349,17 @@ #define IH_DOORBELL_RPTR_RING2__ENABLE__SHIFT 0x1c #define IH_DOORBELL_RPTR_RING2__OFFSET_MASK 0x03FFFFFFL #define IH_DOORBELL_RPTR_RING2__ENABLE_MASK 0x10000000L +//IH_RETRY_INT_CAM_CNTL +#define IH_RETRY_INT_CAM_CNTL__CAM_SIZE__SHIFT 0x0 +#define IH_RETRY_INT_CAM_CNTL__BACK_PRESSURE_SKID_VALUE__SHIFT 0x8 +#define IH_RETRY_INT_CAM_CNTL__ENABLE__SHIFT 0x10 +#define IH_RETRY_INT_CAM_CNTL__BACK_PRESSURE_ENABLE__SHIFT 0x11 +#define IH_RETRY_INT_CAM_CNTL__PER_VF_ENTRY_SIZE__SHIFT 0x14 +#define IH_RETRY_INT_CAM_CNTL__CAM_SIZE_MASK 0x0000001FL +#define IH_RETRY_INT_CAM_CNTL__BACK_PRESSURE_SKID_VALUE_MASK 0x00003F00L +#define IH_RETRY_INT_CAM_CNTL__ENABLE_MASK 0x00010000L +#define IH_RETRY_INT_CAM_CNTL__BACK_PRESSURE_ENABLE_MASK 0x00020000L +#define IH_RETRY_INT_CAM_CNTL__PER_VF_ENTRY_SIZE_MASK 0x00300000L //IH_VERSION #define IH_VERSION__MINVER__SHIFT 0x0 #define IH_VERSION__MAJVER__SHIFT 0x8 -- cgit v1.2.3 From dd299441654fd8209056c7985ddf2373ebaba6ed Mon Sep 17 00:00:00 2001 From: Mukul Joshi Date: Tue, 11 Apr 2023 16:32:38 -0400 Subject: drm/amdgpu: Rework retry fault removal Rework retry fault removal from the software filter by storing an expired timestamp for a fault that is being removed. When a new fault comes, and it matches an entry in the sw filter, it will be added as a new fault only when its timestamp is greater than the timestamp expiry of the fault in the sw filter. This helps in avoiding stale faults being added back into the filter and preventing legitimate faults from being handled. Suggested-by: Felix Kuehling Signed-off-by: Mukul Joshi Reviewed-by: Philip Yang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 36 ++++++++++++++++++++++++++++++--- drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h | 1 + 2 files changed, 34 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index 88bc7f5f46e6..9b0ccb1b84c6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -395,8 +395,21 @@ bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, while (fault->timestamp >= stamp) { uint64_t tmp; - if (atomic64_read(&fault->key) == key) - return true; + if (atomic64_read(&fault->key) == key) { + /* + * if we get a fault which is already present in + * the fault_ring and the timestamp of + * the fault is after the expired timestamp, + * then this is a new fault that needs to be added + * into the fault ring. + */ + if (fault->timestamp_expiry != 0 && + amdgpu_ih_ts_after(fault->timestamp_expiry, + timestamp)) + break; + else + return true; + } tmp = fault->timestamp; fault = &gmc->fault_ring[fault->next]; @@ -432,15 +445,32 @@ void amdgpu_gmc_filter_faults_remove(struct amdgpu_device *adev, uint64_t addr, { struct amdgpu_gmc *gmc = &adev->gmc; uint64_t key = amdgpu_gmc_fault_key(addr, pasid); + struct amdgpu_ih_ring *ih; struct amdgpu_gmc_fault *fault; + uint32_t last_wptr; + uint64_t last_ts; uint32_t hash; uint64_t tmp; + ih = adev->irq.retry_cam_enabled ? &adev->irq.ih_soft : &adev->irq.ih1; + /* Get the WPTR of the last entry in IH ring */ + last_wptr = amdgpu_ih_get_wptr(adev, ih); + /* Order wptr with ring data. */ + rmb(); + /* Get the timetamp of the last entry in IH ring */ + last_ts = amdgpu_ih_decode_iv_ts(adev, ih, last_wptr, -1); + hash = hash_64(key, AMDGPU_GMC_FAULT_HASH_ORDER); fault = &gmc->fault_ring[gmc->fault_hash[hash].idx]; do { - if (atomic64_cmpxchg(&fault->key, key, 0) == key) + if (atomic64_read(&fault->key) == key) { + /* + * Update the timestamp when this fault + * expired. + */ + fault->timestamp_expiry = last_ts; break; + } tmp = fault->timestamp; fault = &gmc->fault_ring[fault->next]; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index 232523e3e270..6d105d7fb98b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -70,6 +70,7 @@ struct amdgpu_gmc_fault { uint64_t timestamp:48; uint64_t next:AMDGPU_GMC_FAULT_RING_ORDER; atomic64_t key; + uint64_t timestamp_expiry:48; }; /* -- cgit v1.2.3 From fd784a418ed832fbadfa846cef61741fdab0377a Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Tue, 11 Apr 2023 10:27:12 +0800 Subject: drm/amdgpu: add gfx v11_0_3 fed irq handling for sriov Add gfx v11_0_3 fed irq handling for sriov. Signed-off-by: YiPeng Chai Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c index b07a72ca25d9..068b9586a223 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c @@ -62,10 +62,18 @@ static int gfx_v11_0_3_rlc_gc_fed_irq(struct amdgpu_device *adev, return -EINVAL; } - ih_data.head = *ras_if; - dev_warn(adev->dev, "RLC %s FED IRQ\n", ras_if->name); - amdgpu_ras_interrupt_dispatch(adev, &ih_data); + + if (!amdgpu_sriov_vf(adev)) { + ih_data.head = *ras_if; + amdgpu_ras_interrupt_dispatch(adev, &ih_data); + } else { + if (adev->virt.ops && adev->virt.ops->ras_poison_handler) + adev->virt.ops->ras_poison_handler(adev); + else + dev_warn(adev->dev, + "No ras_poison_handler interface in SRIOV for %s!\n", ras_if->name); + } return 0; } -- cgit v1.2.3 From 6fe2ecdba34445a17049cda73a399d9685189efc Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Fri, 7 Apr 2023 17:12:15 +0800 Subject: drm/amd/pm: correct the pcie link state check for SMU13 Update the driver implementations to fit those data exposed by PMFW. Signed-off-by: Evan Quan Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h | 6 ++++++ drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 4 ++-- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 4 ++-- 3 files changed, 10 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h index 0ef37837b164..7944ce80e5c3 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h @@ -62,6 +62,12 @@ #define CTF_OFFSET_HOTSPOT 5 #define CTF_OFFSET_MEM 5 +static const int pmfw_decoded_link_speed[5] = {1, 2, 3, 4, 5}; +static const int pmfw_decoded_link_width[7] = {0, 1, 2, 4, 8, 12, 16}; + +#define DECODE_GEN_SPEED(gen_speed_idx) (pmfw_decoded_link_speed[gen_speed_idx]) +#define DECODE_LANE_WIDTH(lane_width_idx) (pmfw_decoded_link_width[lane_width_idx]) + struct smu_13_0_max_sustainable_clocks { uint32_t display_clock; uint32_t phy_clock; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index e9766fe5656e..09405ef1e3c8 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -1144,8 +1144,8 @@ static int smu_v13_0_0_print_clk_levels(struct smu_context *smu, (pcie_table->pcie_lane[i] == 5) ? "x12" : (pcie_table->pcie_lane[i] == 6) ? "x16" : "", pcie_table->clk_freq[i], - ((gen_speed - 1) == pcie_table->pcie_gen[i]) && - (lane_width == link_width[pcie_table->pcie_lane[i]]) ? + (gen_speed == DECODE_GEN_SPEED(pcie_table->pcie_gen[i])) && + (lane_width == DECODE_LANE_WIDTH(link_width[pcie_table->pcie_lane[i]])) ? "*" : ""); break; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c index c8c7133626ec..3d9ff46706fb 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c @@ -1133,8 +1133,8 @@ static int smu_v13_0_7_print_clk_levels(struct smu_context *smu, (pcie_table->pcie_lane[i] == 5) ? "x12" : (pcie_table->pcie_lane[i] == 6) ? "x16" : "", pcie_table->clk_freq[i], - (gen_speed == pcie_table->pcie_gen[i]) && - (lane_width == pcie_table->pcie_lane[i]) ? + (gen_speed == DECODE_GEN_SPEED(pcie_table->pcie_gen[i])) && + (lane_width == DECODE_LANE_WIDTH(pcie_table->pcie_lane[i])) ? "*" : ""); break; -- cgit v1.2.3 From f03eb1d26c2739b75580f58bbab4ab2d5d3eba46 Mon Sep 17 00:00:00 2001 From: Jesse Zhang Date: Wed, 12 Apr 2023 18:17:32 +0800 Subject: drm/amdgpu: switch to golden tsc registers for raven/raven2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Due to raven/raven2 maybe enable  sclk slow down, they cannot get clock count by the RLC at the auto level of dpm performance. So switch to golden tsc register. Suggested-by: shanshengwang Reviewed-by: Evan Quan Signed-off-by: Jesse Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 40 +++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index ae09fc1cfe6b..c99d9e642e51 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -149,6 +149,16 @@ MODULE_FIRMWARE("amdgpu/aldebaran_sjt_mec2.bin"); #define mmGOLDEN_TSC_COUNT_LOWER_Renoir 0x0026 #define mmGOLDEN_TSC_COUNT_LOWER_Renoir_BASE_IDX 1 +#define mmGOLDEN_TSC_COUNT_UPPER_Raven 0x007a +#define mmGOLDEN_TSC_COUNT_UPPER_Raven_BASE_IDX 0 +#define mmGOLDEN_TSC_COUNT_LOWER_Raven 0x007b +#define mmGOLDEN_TSC_COUNT_LOWER_Raven_BASE_IDX 0 + +#define mmGOLDEN_TSC_COUNT_UPPER_Raven2 0x0068 +#define mmGOLDEN_TSC_COUNT_UPPER_Raven2_BASE_IDX 0 +#define mmGOLDEN_TSC_COUNT_LOWER_Raven2 0x0069 +#define mmGOLDEN_TSC_COUNT_LOWER_Raven2_BASE_IDX 0 + enum ta_ras_gfx_subblock { /*CPC*/ TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0, @@ -3988,6 +3998,36 @@ static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev) preempt_enable(); clock = clock_lo | (clock_hi << 32ULL); break; + case IP_VERSION(9, 1, 0): + preempt_disable(); + clock_hi = RREG32_SOC15_NO_KIQ(PWR, 0, mmGOLDEN_TSC_COUNT_UPPER_Raven); + clock_lo = RREG32_SOC15_NO_KIQ(PWR, 0, mmGOLDEN_TSC_COUNT_LOWER_Raven); + hi_check = RREG32_SOC15_NO_KIQ(PWR, 0, mmGOLDEN_TSC_COUNT_UPPER_Raven); + /* The PWR TSC clock frequency is 100MHz, which sets 32-bit carry over + * roughly every 42 seconds. + */ + if (hi_check != clock_hi) { + clock_lo = RREG32_SOC15_NO_KIQ(PWR, 0, mmGOLDEN_TSC_COUNT_LOWER_Raven); + clock_hi = hi_check; + } + preempt_enable(); + clock = clock_lo | (clock_hi << 32ULL); + break; + case IP_VERSION(9, 2, 2): + preempt_disable(); + clock_hi = RREG32_SOC15_NO_KIQ(PWR, 0, mmGOLDEN_TSC_COUNT_UPPER_Raven2); + clock_lo = RREG32_SOC15_NO_KIQ(PWR, 0, mmGOLDEN_TSC_COUNT_LOWER_Raven2); + hi_check = RREG32_SOC15_NO_KIQ(PWR, 0, mmGOLDEN_TSC_COUNT_UPPER_Raven2); + /* The PWR TSC clock frequency is 100MHz, which sets 32-bit carry over + * roughly every 42 seconds. + */ + if (hi_check != clock_hi) { + clock_lo = RREG32_SOC15_NO_KIQ(PWR, 0, mmGOLDEN_TSC_COUNT_LOWER_Raven2); + clock_hi = hi_check; + } + preempt_enable(); + clock = clock_lo | (clock_hi << 32ULL); + break; default: amdgpu_gfx_off_ctrl(adev, false); mutex_lock(&adev->gfx.gpu_clock_mutex); -- cgit v1.2.3 From 0512e9ffebca0f9a91f6e54b0da90976dce2b025 Mon Sep 17 00:00:00 2001 From: Shashank Sharma Date: Fri, 24 Feb 2023 11:25:07 +0100 Subject: drm/amdgpu: rename num_doorbells Rename doorbell.num_doorbells to doorbell.num_kernel_doorbells to make it more readable. Cc: Alex Deucher Cc: Christian Koenig Acked-by: Christian Koenig Signed-off-by: Shashank Sharma Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 6 +++--- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 22 +++++++++++----------- drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h | 4 +++- 3 files changed, 17 insertions(+), 15 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index f99d4873bf22..0385f7f69278 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -96,7 +96,7 @@ static void amdgpu_doorbell_get_kfd_info(struct amdgpu_device *adev, size_t *start_offset) { /* - * The first num_doorbells are used by amdgpu. + * The first num_kernel_doorbells are used by amdgpu. * amdkfd takes whatever's left in the aperture. */ if (adev->enable_mes) { @@ -109,11 +109,11 @@ static void amdgpu_doorbell_get_kfd_info(struct amdgpu_device *adev, *aperture_base = adev->doorbell.base; *aperture_size = 0; *start_offset = 0; - } else if (adev->doorbell.size > adev->doorbell.num_doorbells * + } else if (adev->doorbell.size > adev->doorbell.num_kernel_doorbells * sizeof(u32)) { *aperture_base = adev->doorbell.base; *aperture_size = adev->doorbell.size; - *start_offset = adev->doorbell.num_doorbells * sizeof(u32); + *start_offset = adev->doorbell.num_kernel_doorbells * sizeof(u32); } else { *aperture_base = 0; *aperture_size = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index efdc6b73a55c..97068c4dee74 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -602,7 +602,7 @@ u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index) if (amdgpu_device_skip_hw_access(adev)) return 0; - if (index < adev->doorbell.num_doorbells) { + if (index < adev->doorbell.num_kernel_doorbells) { return readl(adev->doorbell.ptr + index); } else { DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index); @@ -625,7 +625,7 @@ void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v) if (amdgpu_device_skip_hw_access(adev)) return; - if (index < adev->doorbell.num_doorbells) { + if (index < adev->doorbell.num_kernel_doorbells) { writel(v, adev->doorbell.ptr + index); } else { DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index); @@ -646,7 +646,7 @@ u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index) if (amdgpu_device_skip_hw_access(adev)) return 0; - if (index < adev->doorbell.num_doorbells) { + if (index < adev->doorbell.num_kernel_doorbells) { return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index)); } else { DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index); @@ -669,7 +669,7 @@ void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v) if (amdgpu_device_skip_hw_access(adev)) return; - if (index < adev->doorbell.num_doorbells) { + if (index < adev->doorbell.num_kernel_doorbells) { atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v); } else { DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index); @@ -1060,7 +1060,7 @@ static int amdgpu_device_doorbell_init(struct amdgpu_device *adev) if (adev->asic_type < CHIP_BONAIRE) { adev->doorbell.base = 0; adev->doorbell.size = 0; - adev->doorbell.num_doorbells = 0; + adev->doorbell.num_kernel_doorbells = 0; adev->doorbell.ptr = NULL; return 0; } @@ -1075,27 +1075,27 @@ static int amdgpu_device_doorbell_init(struct amdgpu_device *adev) adev->doorbell.size = pci_resource_len(adev->pdev, 2); if (adev->enable_mes) { - adev->doorbell.num_doorbells = + adev->doorbell.num_kernel_doorbells = adev->doorbell.size / sizeof(u32); } else { - adev->doorbell.num_doorbells = + adev->doorbell.num_kernel_doorbells = min_t(u32, adev->doorbell.size / sizeof(u32), adev->doorbell_index.max_assignment+1); - if (adev->doorbell.num_doorbells == 0) + if (adev->doorbell.num_kernel_doorbells == 0) return -EINVAL; /* For Vega, reserve and map two pages on doorbell BAR since SDMA * paging queue doorbell use the second page. The * AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the * doorbells are in the first page. So with paging queue enabled, - * the max num_doorbells should + 1 page (0x400 in dword) + * the max num_kernel_doorbells should + 1 page (0x400 in dword) */ if (adev->asic_type >= CHIP_VEGA10) - adev->doorbell.num_doorbells += 0x400; + adev->doorbell.num_kernel_doorbells += 0x400; } adev->doorbell.ptr = ioremap(adev->doorbell.base, - adev->doorbell.num_doorbells * + adev->doorbell.num_kernel_doorbells * sizeof(u32)); if (adev->doorbell.ptr == NULL) return -ENOMEM; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h index 7199b6b0be81..12263986f889 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h @@ -29,7 +29,9 @@ struct amdgpu_doorbell { resource_size_t base; resource_size_t size; u32 __iomem *ptr; - u32 num_doorbells; /* Number of doorbells actually reserved for amdgpu. */ + + /* Number of doorbells reserved for amdgpu kernel driver */ + u32 num_kernel_doorbells; }; /* Reserved doorbells for amdgpu (including multimedia). -- cgit v1.2.3 From ff742e0ca3db876eb152a5d6bdcf7654ef6f9398 Mon Sep 17 00:00:00 2001 From: Shashank Sharma Date: Fri, 24 Feb 2023 21:27:57 +0100 Subject: drm/amdgpu: include protection for doorbell.h This patch adds double include protection for doorbell.h Cc: Christian Koenig Cc: Alex Deucher Reviewed-by: Christian Koenig Signed-off-by: Shashank Sharma Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h index 12263986f889..8fd11497faba 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h @@ -21,6 +21,9 @@ * */ +#ifndef AMDGPU_DOORBELL_H +#define AMDGPU_DOORBELL_H + /* * GPU doorbell structures, functions & helpers */ @@ -308,3 +311,4 @@ void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v); #define RDOORBELL64(index) amdgpu_mm_rdoorbell64(adev, (index)) #define WDOORBELL64(index, v) amdgpu_mm_wdoorbell64(adev, (index), (v)) +#endif -- cgit v1.2.3 From 73c4b0f83693604ae5964c68fb23159b823b12ac Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Wed, 12 Apr 2023 00:01:13 +0800 Subject: drm/amdgpu: drop temp programming for pagefault handling Was introduced as workaround. not needed anymore Signed-off-by: Hawking Zhang Reviewed-by: Jack Gui Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0.c | 22 ---------------------- 1 file changed, 22 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0.c index be0d0f47415e..13712640fa46 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0.c @@ -417,34 +417,12 @@ static void gfxhub_v3_0_set_fault_enable_default(struct amdgpu_device *adev, tmp = REG_SET_FIELD(tmp, CP_DEBUG, CPG_UTCL1_ERROR_HALT_DISABLE, 1); WREG32_SOC15(GC, 0, regCP_DEBUG, tmp); - /** - * Set GRBM_GFX_INDEX in broad cast mode - * before programming GL1C_UTCL0_CNTL1 and SQG_CONFIG - */ - WREG32_SOC15(GC, 0, regGRBM_GFX_INDEX, regGRBM_GFX_INDEX_DEFAULT); - - /** - * Retry respond mode: RETRY - * Error (no retry) respond mode: SUCCESS - */ - tmp = RREG32_SOC15(GC, 0, regGL1C_UTCL0_CNTL1); - tmp = REG_SET_FIELD(tmp, GL1C_UTCL0_CNTL1, RESP_MODE, 0); - tmp = REG_SET_FIELD(tmp, GL1C_UTCL0_CNTL1, RESP_FAULT_MODE, 0x2); - WREG32_SOC15(GC, 0, regGL1C_UTCL0_CNTL1, tmp); - /* These registers are not accessible to VF-SRIOV. * The PF will program them instead. */ if (amdgpu_sriov_vf(adev)) return; - /* Disable SQ XNACK interrupt for all VMIDs */ - tmp = RREG32_SOC15(GC, 0, regSQG_CONFIG); - tmp = REG_SET_FIELD(tmp, SQG_CONFIG, XNACK_INTR_MASK, - SQG_CONFIG__XNACK_INTR_MASK_MASK >> - SQG_CONFIG__XNACK_INTR_MASK__SHIFT); - WREG32_SOC15(GC, 0, regSQG_CONFIG, tmp); - tmp = RREG32_SOC15(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL); tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL, RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value); -- cgit v1.2.3 From 7c0f7ee00c7d52e7ef1953e151a7f5d5fd5db64b Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Thu, 14 Oct 2021 11:45:38 +0800 Subject: drm/amdgpu: add gc v9_4_3 rlc_funcs implementation all the gc v9_4_3 registers fall in gc_rlcpdec address range have different relative offsets and base_idx from the ones defined in gc v9_0 ip headers. gc_v9_0_rlc_funcs can not be reused anymore for gc v9_4_3 v2: drop unused handshake function (Alex) Signed-off-by: Hawking Zhang Reviewed-by: Le Ma Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/Makefile | 1 + drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 334 ++++++++++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.h | 29 +++ 3 files changed, 364 insertions(+) create mode 100644 drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c create mode 100644 drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.h (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index dd8bc53411bd..415a7fa395c4 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -136,6 +136,7 @@ amdgpu-y += \ gfx_v9_0.o \ gfx_v9_4.o \ gfx_v9_4_2.o \ + gfx_v9_4_3.o \ gfx_v10_0.o \ imu_v11_0.o \ gfx_v11_0.o \ diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c new file mode 100644 index 000000000000..a7246d38d5bb --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -0,0 +1,334 @@ +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include + +#include "amdgpu.h" +#include "amdgpu_gfx.h" +#include "soc15.h" +#include "soc15_common.h" + +#include "gc/gc_9_4_3_offset.h" +#include "gc/gc_9_4_3_sh_mask.h" + +#include "gfx_v9_4_3.h" + +#define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L + +static bool gfx_v9_4_3_is_rlc_enabled(struct amdgpu_device *adev) +{ + uint32_t rlc_setting; + + /* if RLC is not enabled, do nothing */ + rlc_setting = RREG32_SOC15(GC, 0, regRLC_CNTL); + if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK)) + return false; + + return true; +} + +static void gfx_v9_4_3_set_safe_mode(struct amdgpu_device *adev) +{ + uint32_t data; + unsigned i; + + data = RLC_SAFE_MODE__CMD_MASK; + data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); + WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, data); + + /* wait for RLC_SAFE_MODE */ + for (i = 0; i < adev->usec_timeout; i++) { + if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, regRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) + break; + udelay(1); + } +} + +static void gfx_v9_4_3_unset_safe_mode(struct amdgpu_device *adev) +{ + uint32_t data; + + data = RLC_SAFE_MODE__CMD_MASK; + WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, data); +} + +static int gfx_v9_4_3_rlc_init(struct amdgpu_device *adev) +{ + /* init spm vmid with 0xf */ + if (adev->gfx.rlc.funcs->update_spm_vmid) + adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf); + + return 0; +} + +static void gfx_v9_4_3_select_se_sh(struct amdgpu_device *adev, + u32 se_num, + u32 sh_num, + u32 instance) +{ + u32 data; + + if (instance == 0xffffffff) + data = REG_SET_FIELD(0, GRBM_GFX_INDEX, + INSTANCE_BROADCAST_WRITES, 1); + else + data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, + instance); + + if (se_num == 0xffffffff) + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, + 1); + else + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num); + + if (sh_num == 0xffffffff) + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, + 1); + else + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num); + + WREG32_SOC15_RLC_SHADOW_EX(reg, GC, 0, regGRBM_GFX_INDEX, data); +} + +static void gfx_v9_4_3_wait_for_rlc_serdes(struct amdgpu_device *adev) +{ + u32 i, j, k; + u32 mask; + + mutex_lock(&adev->grbm_idx_mutex); + for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { + for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { + gfx_v9_4_3_select_se_sh(adev, i, j, 0xffffffff); + for (k = 0; k < adev->usec_timeout; k++) { + if (RREG32_SOC15(GC, 0, regRLC_SERDES_CU_MASTER_BUSY) == 0) + break; + udelay(1); + } + if (k == adev->usec_timeout) { + gfx_v9_4_3_select_se_sh(adev, 0xffffffff, + 0xffffffff, 0xffffffff); + mutex_unlock(&adev->grbm_idx_mutex); + DRM_INFO("Timeout wait for RLC serdes %u,%u\n", + i, j); + return; + } + } + } + gfx_v9_4_3_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + mutex_unlock(&adev->grbm_idx_mutex); + + mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK | + RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK | + RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK | + RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK; + for (k = 0; k < adev->usec_timeout; k++) { + if ((RREG32_SOC15(GC, 0, regRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0) + break; + udelay(1); + } +} + +static void gfx_v9_4_3_enable_gui_idle_interrupt(struct amdgpu_device *adev, + bool enable) +{ + u32 tmp; + + /* These interrupts should be enabled to drive DS clock */ + + tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL_RING0); + + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0); + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0); + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0); + if (adev->gfx.num_gfx_rings) + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0); + + WREG32_SOC15(GC, 0, regCP_INT_CNTL_RING0, tmp); +} + +static void gfx_v9_4_3_rlc_stop(struct amdgpu_device *adev) +{ + WREG32_FIELD15_PREREG(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0); + gfx_v9_4_3_enable_gui_idle_interrupt(adev, false); + gfx_v9_4_3_wait_for_rlc_serdes(adev); +} + +static void gfx_v9_4_3_rlc_reset(struct amdgpu_device *adev) +{ + WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); + udelay(50); + WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0); + udelay(50); +} + +static void gfx_v9_4_3_rlc_start(struct amdgpu_device *adev) +{ +#ifdef AMDGPU_RLC_DEBUG_RETRY + u32 rlc_ucode_ver; +#endif + + WREG32_FIELD15_PREREG(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1); + udelay(50); + + /* carrizo do enable cp interrupt after cp inited */ + if (!(adev->flags & AMD_IS_APU)) { + gfx_v9_4_3_enable_gui_idle_interrupt(adev, true); + udelay(50); + } + +#ifdef AMDGPU_RLC_DEBUG_RETRY + /* RLC_GPM_GENERAL_6 : RLC Ucode version */ + rlc_ucode_ver = RREG32_SOC15(GC, 0, regRLC_GPM_GENERAL_6); + if (rlc_ucode_ver == 0x108) { + dev_info(adev->dev, + "Using rlc debug ucode. regRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n", + rlc_ucode_ver, adev->gfx.rlc_fw_version); + /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles, + * default is 0x9C4 to create a 100us interval */ + WREG32_SOC15(GC, 0, regRLC_GPM_TIMER_INT_3, 0x9C4); + /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr + * to disable the page fault retry interrupts, default is + * 0x100 (256) */ + WREG32_SOC15(GC, 0, regRLC_GPM_GENERAL_12, 0x100); + } +#endif +} + +static int gfx_v9_4_3_rlc_load_microcode(struct amdgpu_device *adev) +{ + const struct rlc_firmware_header_v2_0 *hdr; + const __le32 *fw_data; + unsigned i, fw_size; + + if (!adev->gfx.rlc_fw) + return -EINVAL; + + hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; + amdgpu_ucode_print_rlc_hdr(&hdr->header); + + fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + + le32_to_cpu(hdr->header.ucode_array_offset_bytes)); + fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; + + WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR, + RLCG_UCODE_LOADING_START_ADDRESS); + for (i = 0; i < fw_size; i++) { + if (amdgpu_emu_mode == 1 && i % 100 == 0) { + dev_info(adev->dev, "Write RLC ucode data %u DWs\n", i); + msleep(1); + } + WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++)); + } + WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version); + + return 0; +} + +static int gfx_v9_4_3_rlc_resume(struct amdgpu_device *adev) +{ + int r; + + adev->gfx.rlc.funcs->stop(adev); + + /* disable CG */ + WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, 0); + + /* TODO: revisit pg function */ + /* gfx_v9_4_3_init_pg(adev);*/ + + if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { + /* legacy rlc firmware loading */ + r = gfx_v9_4_3_rlc_load_microcode(adev); + if (r) + return r; + } + + adev->gfx.rlc.funcs->start(adev); + + return 0; +} + +static void gfx_v9_4_3_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid) +{ + u32 reg, data; + + reg = SOC15_REG_OFFSET(GC, 0, regRLC_SPM_MC_CNTL); + if (amdgpu_sriov_is_pp_one_vf(adev)) + data = RREG32_NO_KIQ(reg); + else + data = RREG32(reg); + + data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK; + data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT; + + if (amdgpu_sriov_is_pp_one_vf(adev)) + WREG32_SOC15_NO_KIQ(GC, 0, regRLC_SPM_MC_CNTL, data); + else + WREG32_SOC15(GC, 0, regRLC_SPM_MC_CNTL, data); +} + +static const struct soc15_reg_rlcg rlcg_access_gc_9_4_3[] = { + {SOC15_REG_ENTRY(GC, 0, regGRBM_GFX_INDEX)}, + {SOC15_REG_ENTRY(GC, 0, regSQ_IND_INDEX)}, +}; + +static bool gfx_v9_4_3_check_rlcg_range(struct amdgpu_device *adev, + uint32_t offset, + struct soc15_reg_rlcg *entries, int arr_size) +{ + int i; + uint32_t reg; + + if (!entries) + return false; + + for (i = 0; i < arr_size; i++) { + const struct soc15_reg_rlcg *entry; + + entry = &entries[i]; + reg = adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg; + if (offset == reg) + return true; + } + + return false; +} + +static bool gfx_v9_4_3_is_rlcg_access_range(struct amdgpu_device *adev, u32 offset) +{ + return gfx_v9_4_3_check_rlcg_range(adev, offset, + (void *)rlcg_access_gc_9_4_3, + ARRAY_SIZE(rlcg_access_gc_9_4_3)); +} + +const struct amdgpu_rlc_funcs gfx_v9_4_3_rlc_funcs = { + .is_rlc_enabled = gfx_v9_4_3_is_rlc_enabled, + .set_safe_mode = gfx_v9_4_3_set_safe_mode, + .unset_safe_mode = gfx_v9_4_3_unset_safe_mode, + .init = gfx_v9_4_3_rlc_init, + .resume = gfx_v9_4_3_rlc_resume, + .stop = gfx_v9_4_3_rlc_stop, + .reset = gfx_v9_4_3_rlc_reset, + .start = gfx_v9_4_3_rlc_start, + .update_spm_vmid = gfx_v9_4_3_update_spm_vmid, + .is_rlcg_access_range = gfx_v9_4_3_is_rlcg_access_range, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.h b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.h new file mode 100644 index 000000000000..d21ea9ebf146 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.h @@ -0,0 +1,29 @@ +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __GFX_V9_4_3_H__ +#define __GFX_V9_4_3_H__ + +extern const struct amdgpu_rlc_funcs gfx_v9_4_3_rlc_funcs; + +#endif /* __GFX_V9_4_3_H__ */ -- cgit v1.2.3 From f22067419e9683f8fba40ca3a0d56fb3106c7c6f Mon Sep 17 00:00:00 2001 From: Aaron Liu Date: Wed, 5 Apr 2023 19:22:20 +0800 Subject: drm/amdgpu: skip kfd-iommu suspend/resume for S0ix GFX is in gfxoff mode during s0ix so we shouldn't need to actually execute kfd_iommu_suspend/kfd_iommu_resume operation. Signed-off-by: Aaron Liu Acked-by: Alex Deucher Reviewed-by: Mario Limonciello Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 97068c4dee74..a2292acf06d0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3305,9 +3305,11 @@ static int amdgpu_device_ip_resume(struct amdgpu_device *adev) { int r; - r = amdgpu_amdkfd_resume_iommu(adev); - if (r) - return r; + if (!adev->in_s0ix) { + r = amdgpu_amdkfd_resume_iommu(adev); + if (r) + return r; + } r = amdgpu_device_ip_resume_phase1(adev); if (r) -- cgit v1.2.3 From fbc24293ca16b3b9ef891fe32ccd04735a6f8dc1 Mon Sep 17 00:00:00 2001 From: Jesse Zhang Date: Wed, 12 Apr 2023 17:04:03 +0800 Subject: drm/amdgpu: change the reference clock for raven/raven2 Due to switch to golden tsc register to get clock counter for raven/ raven2. Chang the reference clock from 25MHZ to 100MHZ. Suggested-by: shanshengwang Reviewed-by: Evan Quan Signed-off-by: Jesse Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/soc15.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 7d04c39332ad..0367a97c606b 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -301,11 +301,10 @@ static u32 soc15_get_xclk(struct amdgpu_device *adev) u32 reference_clock = adev->clock.spll.reference_freq; if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(12, 0, 0) || - adev->ip_versions[MP1_HWIP][0] == IP_VERSION(12, 0, 1)) - return 10000; - if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(10, 0, 0) || + adev->ip_versions[MP1_HWIP][0] == IP_VERSION(12, 0, 1) || + adev->ip_versions[MP1_HWIP][0] == IP_VERSION(10, 0, 0) || adev->ip_versions[MP1_HWIP][0] == IP_VERSION(10, 0, 1)) - return reference_clock / 4; + return 10000; return reference_clock; } -- cgit v1.2.3 From 8855818ce7554fb7420200187fac9c3b69500da0 Mon Sep 17 00:00:00 2001 From: Li Ma Date: Wed, 12 Apr 2023 22:06:34 +0800 Subject: drm/amdgpu: reserve the old gc_11_0_*_mes.bin Reserve the MOUDLE_FIRMWARE declaration of gc_11_0_*_mes.bin to fix falling back to old mes bin on failure via autoload. Fixes: 97998b893c30 ("drm/amd/amdgpu: introduce gc_*_mes_2.bin v2") Signed-off-by: Li Ma Reviewed-by: Yifan Zhang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index 7e8c15cd8dc2..45280f047180 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -32,14 +32,19 @@ #include "v11_structs.h" #include "mes_v11_api_def.h" +MODULE_FIRMWARE("amdgpu/gc_11_0_0_mes.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_0_mes_2.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_0_mes1.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_0_1_mes.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_1_mes_2.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_1_mes1.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_0_2_mes.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_2_mes_2.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_2_mes1.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_0_3_mes.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_3_mes_2.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_3_mes1.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_0_4_mes.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_4_mes_2.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_4_mes1.bin"); -- cgit v1.2.3 From 83688771400895ce39994f158362a3c666993504 Mon Sep 17 00:00:00 2001 From: Amber Lin Date: Fri, 24 Sep 2021 12:15:48 -0400 Subject: drm/amdkfd: Enable HW_UPDATE_RPTR on GC 9.4.3 GC 9.4.3 uses the hardware to update AQL queues read pointer, so remove CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR_MASK flag from MQD if it's GC 9.4.3, and keep it for other existing gfx9 ASICs. Signed-off-by: Amber Lin Acked-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c index 4dfae19714ab..fdbfd725841f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c @@ -224,6 +224,7 @@ static void update_mqd(struct mqd_manager *mm, void *mqd, struct queue_properties *q, struct mqd_update_info *minfo) { + struct amdgpu_device *adev = (struct amdgpu_device *)mm->dev->adev; struct v9_mqd *m; m = get_mqd(mqd); @@ -269,10 +270,13 @@ static void update_mqd(struct mqd_manager *mm, void *mqd, m->cp_hqd_vmid = q->vmid; if (q->format == KFD_QUEUE_FORMAT_AQL) { - m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR_MASK | + m->cp_hqd_pq_control |= 2 << CP_HQD_PQ_CONTROL__SLOT_BASED_WPTR__SHIFT | 1 << CP_HQD_PQ_CONTROL__QUEUE_FULL_EN__SHIFT | 1 << CP_HQD_PQ_CONTROL__WPP_CLAMP_EN__SHIFT; + if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 3)) + m->cp_hqd_pq_control |= + CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR_MASK; m->cp_hqd_pq_doorbell_control |= 1 << CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_BIF_DROP__SHIFT; } -- cgit v1.2.3 From 70bdfedaaec12dd47b24f16a59d31ae1bafffd99 Mon Sep 17 00:00:00 2001 From: Graham Sider Date: Wed, 20 Oct 2021 11:31:03 -0400 Subject: drm/amdkfd: Add gfx_target_version for GC 9.4.3 Required for Thunk GFX version sysfs query. Signed-off-by: Graham Sider Reviewed-by: Amber Lin Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index e12ff49d01e1..00f528eb9812 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -323,10 +323,13 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf) break; /* Aldebaran */ case IP_VERSION(9, 4, 2): - case IP_VERSION(9, 4, 3): gfx_target_version = 90010; f2g = &aldebaran_kfd2kgd; break; + case IP_VERSION(9, 4, 3): + gfx_target_version = 90400; + f2g = &aldebaran_kfd2kgd; + break; /* Navi10 */ case IP_VERSION(10, 1, 10): gfx_target_version = 100100; -- cgit v1.2.3 From ca9beb8aac68468f1778ad0e0fdad4e204f91393 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Thu, 13 Apr 2023 21:19:15 +0530 Subject: drm/amd/display: Add logging when setting DP sink power state fails Log if we fail to setup sink power states. Cc: Fangzhi Zuo Cc: Aurabindo Pillai Signed-off-by: Srinivasan Shanmugam Reviewed-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c index c840ef17802e..ba98013fecd0 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c @@ -1043,6 +1043,9 @@ static enum dc_status wake_up_aux_channel(struct dc_link *link) DP_SET_POWER, &dpcd_power_state, sizeof(dpcd_power_state)); + if (status < 0) + DC_LOG_DC("%s: Failed to power up sink: %s\n", __func__, + dpcd_power_state == DP_SET_POWER_D0 ? "D0" : "D3"); return DC_ERROR_UNEXPECTED; } -- cgit v1.2.3 From de7511aef767656950d1c236a294c1b941f14ae7 Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Mon, 25 Oct 2021 16:45:45 +0800 Subject: drm/amdgpu: switch to v9_4_3 gfx_funcs callbacks for GC 9.4.3 add gfx_funcs callbacks implemenation based on gc_v9_4_3 ip headers Signed-off-by: Hawking Zhang Reviewed-by: Le Ma Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 154 ++++++++++++++++++++++++++------ drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.h | 1 + 2 files changed, 126 insertions(+), 29 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index a7246d38d5bb..5f8500577c02 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -26,6 +26,7 @@ #include "amdgpu_gfx.h" #include "soc15.h" #include "soc15_common.h" +#include "vega10_enum.h" #include "gc/gc_9_4_3_offset.h" #include "gc/gc_9_4_3_sh_mask.h" @@ -34,6 +35,121 @@ #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L +static uint64_t gfx_v9_4_3_get_gpu_clock_counter(struct amdgpu_device *adev) +{ + uint64_t clock; + + amdgpu_gfx_off_ctrl(adev, false); + mutex_lock(&adev->gfx.gpu_clock_mutex); + WREG32_SOC15(GC, 0, regRLC_CAPTURE_GPU_CLOCK_COUNT, 1); + clock = (uint64_t)RREG32_SOC15(GC, 0, regRLC_GPU_CLOCK_COUNT_LSB) | + ((uint64_t)RREG32_SOC15(GC, 0, regRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); + mutex_unlock(&adev->gfx.gpu_clock_mutex); + amdgpu_gfx_off_ctrl(adev, true); + + return clock; +} + +static void gfx_v9_4_3_select_se_sh(struct amdgpu_device *adev, + u32 se_num, + u32 sh_num, + u32 instance) +{ + u32 data; + + if (instance == 0xffffffff) + data = REG_SET_FIELD(0, GRBM_GFX_INDEX, + INSTANCE_BROADCAST_WRITES, 1); + else + data = REG_SET_FIELD(0, GRBM_GFX_INDEX, + INSTANCE_INDEX, instance); + + if (se_num == 0xffffffff) + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, + SE_BROADCAST_WRITES, 1); + else + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num); + + if (sh_num == 0xffffffff) + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, + SH_BROADCAST_WRITES, 1); + else + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num); + + WREG32_SOC15_RLC_SHADOW_EX(reg, GC, 0, regGRBM_GFX_INDEX, data); +} + +static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address) +{ + WREG32_SOC15_RLC(GC, 0, regSQ_IND_INDEX, + (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | + (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | + (address << SQ_IND_INDEX__INDEX__SHIFT) | + (SQ_IND_INDEX__FORCE_READ_MASK)); + return RREG32_SOC15(GC, 0, regSQ_IND_DATA); +} + +static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd, + uint32_t wave, uint32_t thread, + uint32_t regno, uint32_t num, uint32_t *out) +{ + WREG32_SOC15_RLC(GC, 0, regSQ_IND_INDEX, + (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | + (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | + (regno << SQ_IND_INDEX__INDEX__SHIFT) | + (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) | + (SQ_IND_INDEX__FORCE_READ_MASK) | + (SQ_IND_INDEX__AUTO_INCR_MASK)); + while (num--) + *(out++) = RREG32_SOC15(GC, 0, regSQ_IND_DATA); +} + +static void gfx_v9_4_3_read_wave_data(struct amdgpu_device *adev, + uint32_t simd, uint32_t wave, + uint32_t *dst, int *no_fields) +{ + /* type 1 wave data */ + dst[(*no_fields)++] = 1; + dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS); + dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO); + dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI); + dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO); + dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI); + dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID); + dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0); + dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1); + dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC); + dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC); + dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS); + dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS); + dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0); + dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0); + dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE); +} + +static void gfx_v9_4_3_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, + uint32_t wave, uint32_t start, + uint32_t size, uint32_t *dst) +{ + wave_read_regs(adev, simd, wave, 0, + start + SQIND_WAVE_SGPRS_OFFSET, size, dst); +} + +static void gfx_v9_4_3_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd, + uint32_t wave, uint32_t thread, + uint32_t start, uint32_t size, + uint32_t *dst) +{ + wave_read_regs(adev, simd, wave, thread, + start + SQIND_WAVE_VGPRS_OFFSET, size, dst); +} + +static void gfx_v9_4_3_select_me_pipe_q(struct amdgpu_device *adev, + u32 me, u32 pipe, u32 q, u32 vm) +{ + soc15_grbm_select(adev, me, pipe, q, vm); +} + static bool gfx_v9_4_3_is_rlc_enabled(struct amdgpu_device *adev) { uint32_t rlc_setting; @@ -80,35 +196,6 @@ static int gfx_v9_4_3_rlc_init(struct amdgpu_device *adev) return 0; } -static void gfx_v9_4_3_select_se_sh(struct amdgpu_device *adev, - u32 se_num, - u32 sh_num, - u32 instance) -{ - u32 data; - - if (instance == 0xffffffff) - data = REG_SET_FIELD(0, GRBM_GFX_INDEX, - INSTANCE_BROADCAST_WRITES, 1); - else - data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, - instance); - - if (se_num == 0xffffffff) - data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, - 1); - else - data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num); - - if (sh_num == 0xffffffff) - data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, - 1); - else - data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num); - - WREG32_SOC15_RLC_SHADOW_EX(reg, GC, 0, regGRBM_GFX_INDEX, data); -} - static void gfx_v9_4_3_wait_for_rlc_serdes(struct amdgpu_device *adev) { u32 i, j, k; @@ -320,6 +407,15 @@ static bool gfx_v9_4_3_is_rlcg_access_range(struct amdgpu_device *adev, u32 offs ARRAY_SIZE(rlcg_access_gc_9_4_3)); } +const struct amdgpu_gfx_funcs gfx_v9_4_3_gfx_funcs = { + .get_gpu_clock_counter = &gfx_v9_4_3_get_gpu_clock_counter, + .select_se_sh = &gfx_v9_4_3_select_se_sh, + .read_wave_data = &gfx_v9_4_3_read_wave_data, + .read_wave_sgprs = &gfx_v9_4_3_read_wave_sgprs, + .read_wave_vgprs = &gfx_v9_4_3_read_wave_vgprs, + .select_me_pipe_q = &gfx_v9_4_3_select_me_pipe_q, +}; + const struct amdgpu_rlc_funcs gfx_v9_4_3_rlc_funcs = { .is_rlc_enabled = gfx_v9_4_3_is_rlc_enabled, .set_safe_mode = gfx_v9_4_3_set_safe_mode, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.h b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.h index d21ea9ebf146..84e69701b81a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.h +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.h @@ -24,6 +24,7 @@ #ifndef __GFX_V9_4_3_H__ #define __GFX_V9_4_3_H__ +extern const struct amdgpu_gfx_funcs gfx_v9_4_3_gfx_funcs; extern const struct amdgpu_rlc_funcs gfx_v9_4_3_rlc_funcs; #endif /* __GFX_V9_4_3_H__ */ -- cgit v1.2.3 From 6a929fea7f80fc968f26baceecfdb5129d159c98 Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Fri, 26 Nov 2021 17:20:32 +0800 Subject: drm/amdgpu: add common early init support for GC 9.4.3 init asic funcs and cp/pg flags for GC 9.4.3 Signed-off-by: Hawking Zhang Reviewed-by: Le Ma Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/soc15.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 0367a97c606b..bc5dd80f10c1 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -1100,6 +1100,11 @@ static int soc15_common_early_init(void *handle) adev->pg_flags = AMD_PG_SUPPORT_VCN_DPG; adev->external_rev_id = adev->rev_id + 0x3c; break; + case IP_VERSION(9, 4, 3): + adev->asic_funcs = &vega20_asic_funcs; + adev->cg_flags = 0; + adev->pg_flags = 0; + break; default: /* FIXME: not supported yet */ return -EINVAL; -- cgit v1.2.3 From 88c7ad91e378775a08f54b4a85068d51b5cf52f3 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Fri, 14 Apr 2023 01:20:18 +0530 Subject: drm/amd/display: Add logging when DP link training Clock recovery is Successful Log when Clock recovery is successful, as part of DP link training process. Cc: Aurabindo Pillai Cc: Fangzhi Zuo Signed-off-by: Srinivasan Shanmugam Reviewed-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/link/protocols/link_dp_training_8b_10b.c | 4 +++- drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.c | 2 ++ 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_8b_10b.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_8b_10b.c index 14b98e096d39..3889ebb2256b 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_8b_10b.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_8b_10b.c @@ -225,8 +225,10 @@ enum link_training_result perform_8b_10b_clock_recovery_sequence( offset); /* 5. check CR done*/ - if (dp_is_cr_done(lane_count, dpcd_lane_status)) + if (dp_is_cr_done(lane_count, dpcd_lane_status)) { + DC_LOG_HW_LINK_TRAINING("%s: Clock recovery OK\n", __func__); return LINK_TRAINING_SUCCESS; + } /* 6. max VS reached*/ if ((link_dp_get_encoding_format(<_settings->link_settings) == diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.c index ab4aafdb5e5c..4f4e899e5c46 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.c @@ -401,6 +401,7 @@ static enum link_training_result dpia_training_cr_non_transparent( /* Check if clock recovery successful. */ if (dp_is_cr_done(lane_count, dpcd_lane_status)) { + DC_LOG_HW_LINK_TRAINING("%s: Clock recovery OK\n", __func__); result = LINK_TRAINING_SUCCESS; break; } @@ -508,6 +509,7 @@ static enum link_training_result dpia_training_cr_transparent( /* Check if clock recovery successful. */ if (dp_is_cr_done(lane_count, dpcd_lane_status)) { + DC_LOG_HW_LINK_TRAINING("%s: Clock recovery OK\n", __func__); result = LINK_TRAINING_SUCCESS; break; } -- cgit v1.2.3 From 5e5d4b39ce2098a1d09064eb8b4e6b6b9a0cbd57 Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Mon, 3 Oct 2022 15:37:47 -0400 Subject: drm/amdgpu: add common ip block for GC 9.4.3 Add common IP handling for GC 9.4.3 Signed-off-by: Hawking Zhang Reviewed-by: Le Ma Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index 5139334925ea..0ecce0b92b82 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -1502,6 +1502,7 @@ static int amdgpu_discovery_set_common_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(9, 4, 0): case IP_VERSION(9, 4, 1): case IP_VERSION(9, 4, 2): + case IP_VERSION(9, 4, 3): amdgpu_device_ip_block_add(adev, &vega10_common_ip_block); break; case IP_VERSION(10, 1, 10): -- cgit v1.2.3 From b805d8d785e49cb3ee9279dad1402d5dcf902166 Mon Sep 17 00:00:00 2001 From: Jane Jian Date: Fri, 14 Apr 2023 11:33:19 +0800 Subject: Revert "drm/amdgpu: enable ras for mp0 v13_0_10 on SRIOV" This reverts commit fe120b9f5ce873516a2604e4ff0c19084be94e8c. This patch impacts sriov multi-vf stability Signed-off-by: Jane Jian Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index ad5d456918f4..3ab8a88789c8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -2341,7 +2341,6 @@ static bool amdgpu_ras_asic_supported(struct amdgpu_device *adev) if (amdgpu_sriov_vf(adev)) { switch (adev->ip_versions[MP0_HWIP][0]) { case IP_VERSION(13, 0, 2): - case IP_VERSION(13, 0, 10): return true; default: return false; -- cgit v1.2.3 From 0530553ba842884737a689ae5fac11154dcf3122 Mon Sep 17 00:00:00 2001 From: Le Ma Date: Thu, 19 May 2022 19:49:59 +0800 Subject: drm/amdgpu: move vmhub out of amdgpu_ring_funcs (v4) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It looks better to place this field in ring structure. Also drop the repeated ring funcs definitions if there's no difference except for vmhub field. v2: rename the field to vm_hub like others (Le) v3: apply the changes to new ip blocks (Hawking) v4: fix vcn sw ring (Alex) Signed-off-by: Le Ma Reviewed-by: Christian König Reviewed-by: Hawking Zhang Signed-off-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c | 8 +-- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h | 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 4 +- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 5 +- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 5 +- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 7 +-- drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 6 +- drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c | 6 +- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 8 +-- drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c | 4 +- drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c | 4 +- drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c | 6 +- drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 99 +++++-------------------------- drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 4 +- drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 2 +- drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c | 4 +- drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c | 8 +-- drivers/gpu/drm/amd/amdgpu/vce_v4_0.c | 4 +- drivers/gpu/drm/amd/amdgpu/vcn_sw_ring.c | 2 +- drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c | 8 +-- drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c | 8 +-- drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c | 84 ++++---------------------- drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c | 5 +- drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c | 3 +- 31 files changed, 93 insertions(+), 220 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index c50d59855011..9d3a0542c996 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -305,6 +305,7 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, ring->ring_obj = NULL; ring->use_doorbell = true; ring->doorbell_index = adev->doorbell_index.kiq; + ring->vm_hub = AMDGPU_GFXHUB_0; r = amdgpu_gfx_kiq_acquire(adev, ring); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index 9b0ccb1b84c6..4e2531758866 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -554,7 +554,7 @@ int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev) for (i = 0; i < adev->num_rings; ++i) { ring = adev->rings[i]; - vmhub = ring->funcs->vmhub; + vmhub = ring->vm_hub; if (ring == &adev->mes.ring) continue; @@ -570,7 +570,7 @@ int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev) vm_inv_engs[vmhub] &= ~(1 << ring->vm_inv_eng); dev_info(adev->dev, "ring %s uses VM inv eng %u on hub %u\n", - ring->name, ring->vm_inv_eng, ring->funcs->vmhub); + ring->name, ring->vm_inv_eng, ring->vm_hub); } return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index df7eb0b7c4b9..4ff348e10e4d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -267,7 +267,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, if (r) { dev_err(adev->dev, "failed to emit fence (%d)\n", r); if (job && job->vmid) - amdgpu_vmid_reset(adev, ring->funcs->vmhub, job->vmid); + amdgpu_vmid_reset(adev, ring->vm_hub, job->vmid); amdgpu_ring_undo(ring); return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c index 3f07b1a2ce47..c991ca0b7a1c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c @@ -202,7 +202,7 @@ static int amdgpu_vmid_grab_idle(struct amdgpu_vm *vm, struct dma_fence **fence) { struct amdgpu_device *adev = ring->adev; - unsigned vmhub = ring->funcs->vmhub; + unsigned vmhub = ring->vm_hub; struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; struct dma_fence **fences; unsigned i; @@ -277,7 +277,7 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm, struct dma_fence **fence) { struct amdgpu_device *adev = ring->adev; - unsigned vmhub = ring->funcs->vmhub; + unsigned vmhub = ring->vm_hub; struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; uint64_t fence_context = adev->fence_context + ring->idx; bool needs_flush = vm->use_cpu_for_update; @@ -338,7 +338,7 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm, struct dma_fence **fence) { struct amdgpu_device *adev = ring->adev; - unsigned vmhub = ring->funcs->vmhub; + unsigned vmhub = ring->vm_hub; struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; uint64_t fence_context = adev->fence_context + ring->idx; uint64_t updates = amdgpu_vm_tlb_seq(vm); @@ -398,7 +398,7 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring, struct amdgpu_job *job, struct dma_fence **fence) { struct amdgpu_device *adev = ring->adev; - unsigned vmhub = ring->funcs->vmhub; + unsigned vmhub = ring->vm_hub; struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; struct amdgpu_vmid *idle = NULL; struct amdgpu_vmid *id = NULL; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 018f36b10de8..ffa9cd55bf7e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -165,7 +165,6 @@ struct amdgpu_ring_funcs { bool support_64bit_ptrs; bool no_user_fence; bool secure_submission_supported; - unsigned vmhub; unsigned extra_dw; /* ring read/write ptr handling */ @@ -275,6 +274,7 @@ struct amdgpu_ring { unsigned cond_exe_offs; u64 cond_exe_gpu_addr; volatile u32 *cond_exe_cpu_addr; + unsigned vm_hub; unsigned vm_inv_eng; struct dma_fence *vmid_wait; bool has_compute_vm_bug; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h index 98d91ebf5c26..525dffbe046a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h @@ -233,7 +233,7 @@ TRACE_EVENT(amdgpu_vm_grab_id, __entry->pasid = vm->pasid; __assign_str(ring, ring->name); __entry->vmid = job->vmid; - __entry->vm_hub = ring->funcs->vmhub, + __entry->vm_hub = ring->vm_hub, __entry->pd_addr = job->vm_pd_addr; __entry->needs_flush = job->vm_needs_flush; ), @@ -427,7 +427,7 @@ TRACE_EVENT(amdgpu_vm_flush, TP_fast_assign( __assign_str(ring, ring->name); __entry->vmid = vmid; - __entry->vm_hub = ring->funcs->vmhub; + __entry->vm_hub = ring->vm_hub; __entry->pd_addr = pd_addr; ), TP_printk("ring=%s, id=%u, hub=%u, pd_addr=%010Lx", diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 286e326bb4bd..3c0310576b3b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -483,7 +483,7 @@ bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring, struct amdgpu_job *job) { struct amdgpu_device *adev = ring->adev; - unsigned vmhub = ring->funcs->vmhub; + unsigned vmhub = ring->vm_hub; struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; if (job->vmid == 0) @@ -517,7 +517,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync) { struct amdgpu_device *adev = ring->adev; - unsigned vmhub = ring->funcs->vmhub; + unsigned vmhub = ring->vm_hub; struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; struct amdgpu_vmid *id = &id_mgr->ids[job->vmid]; bool spm_update_needed = job->spm_update_needed; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 516409989235..f5b5ce1051a2 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -4461,6 +4461,7 @@ static int gfx_v10_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id, ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; else ring->doorbell_index = adev->doorbell_index.gfx_ring1 << 1; + ring->vm_hub = AMDGPU_GFXHUB_0; sprintf(ring->name, "gfx_%d.%d.%d", ring->me, ring->pipe, ring->queue); irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + ring->pipe; @@ -4489,6 +4490,7 @@ static int gfx_v10_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1; ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (ring_id * GFX10_MEC_HPD_SIZE); + ring->vm_hub = AMDGPU_GFXHUB_0; sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP @@ -9249,7 +9251,6 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = { .nop = PACKET3(PACKET3_NOP, 0x3FFF), .support_64bit_ptrs = true, .secure_submission_supported = true, - .vmhub = AMDGPU_GFXHUB_0, .get_rptr = gfx_v10_0_ring_get_rptr_gfx, .get_wptr = gfx_v10_0_ring_get_wptr_gfx, .set_wptr = gfx_v10_0_ring_set_wptr_gfx, @@ -9304,7 +9305,6 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = { .align_mask = 0xff, .nop = PACKET3(PACKET3_NOP, 0x3FFF), .support_64bit_ptrs = true, - .vmhub = AMDGPU_GFXHUB_0, .get_rptr = gfx_v10_0_ring_get_rptr_compute, .get_wptr = gfx_v10_0_ring_get_wptr_compute, .set_wptr = gfx_v10_0_ring_set_wptr_compute, @@ -9340,7 +9340,6 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = { .align_mask = 0xff, .nop = PACKET3(PACKET3_NOP, 0x3FFF), .support_64bit_ptrs = true, - .vmhub = AMDGPU_GFXHUB_0, .get_rptr = gfx_v10_0_ring_get_rptr_compute, .get_wptr = gfx_v10_0_ring_get_wptr_compute, .set_wptr = gfx_v10_0_ring_set_wptr_compute, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 107c487c0c37..a9da0486467a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -866,6 +866,7 @@ static int gfx_v11_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id, ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; else ring->doorbell_index = adev->doorbell_index.gfx_ring1 << 1; + ring->vm_hub = AMDGPU_GFXHUB_0; sprintf(ring->name, "gfx_%d.%d.%d", ring->me, ring->pipe, ring->queue); irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + ring->pipe; @@ -896,6 +897,7 @@ static int gfx_v11_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1; ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (ring_id * GFX11_MEC_HPD_SIZE); + ring->vm_hub = AMDGPU_GFXHUB_0; sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP @@ -6204,7 +6206,6 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = { .nop = PACKET3(PACKET3_NOP, 0x3FFF), .support_64bit_ptrs = true, .secure_submission_supported = true, - .vmhub = AMDGPU_GFXHUB_0, .get_rptr = gfx_v11_0_ring_get_rptr_gfx, .get_wptr = gfx_v11_0_ring_get_wptr_gfx, .set_wptr = gfx_v11_0_ring_set_wptr_gfx, @@ -6252,7 +6253,6 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_compute = { .align_mask = 0xff, .nop = PACKET3(PACKET3_NOP, 0x3FFF), .support_64bit_ptrs = true, - .vmhub = AMDGPU_GFXHUB_0, .get_rptr = gfx_v11_0_ring_get_rptr_compute, .get_wptr = gfx_v11_0_ring_get_wptr_compute, .set_wptr = gfx_v11_0_ring_set_wptr_compute, @@ -6288,7 +6288,6 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_kiq = { .align_mask = 0xff, .nop = PACKET3(PACKET3_NOP, 0x3FFF), .support_64bit_ptrs = true, - .vmhub = AMDGPU_GFXHUB_0, .get_rptr = gfx_v11_0_ring_get_rptr_compute, .get_wptr = gfx_v11_0_ring_get_wptr_compute, .set_wptr = gfx_v11_0_ring_set_wptr_compute, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index c99d9e642e51..adbcd8127c82 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -2005,6 +2005,7 @@ static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1; ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (ring_id * GFX9_MEC_HPD_SIZE); + ring->vm_hub = AMDGPU_GFXHUB_0; sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP @@ -2104,6 +2105,7 @@ static int gfx_v9_0_sw_init(void *handle) /* disable scheduler on the real ring */ ring->no_scheduler = true; + ring->vm_hub = AMDGPU_GFXHUB_0; r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP, AMDGPU_RING_PRIO_DEFAULT, NULL); @@ -2121,6 +2123,7 @@ static int gfx_v9_0_sw_init(void *handle) ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; ring->is_sw_ring = true; hw_prio = amdgpu_sw_ring_priority(i); + ring->vm_hub = AMDGPU_GFXHUB_0; r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP, hw_prio, NULL); @@ -6790,7 +6793,6 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { .nop = PACKET3(PACKET3_NOP, 0x3FFF), .support_64bit_ptrs = true, .secure_submission_supported = true, - .vmhub = AMDGPU_GFXHUB_0, .get_rptr = gfx_v9_0_ring_get_rptr_gfx, .get_wptr = gfx_v9_0_ring_get_wptr_gfx, .set_wptr = gfx_v9_0_ring_set_wptr_gfx, @@ -6844,7 +6846,6 @@ static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = { .nop = PACKET3(PACKET3_NOP, 0x3FFF), .support_64bit_ptrs = true, .secure_submission_supported = true, - .vmhub = AMDGPU_GFXHUB_0, .get_rptr = amdgpu_sw_ring_get_rptr_gfx, .get_wptr = amdgpu_sw_ring_get_wptr_gfx, .set_wptr = amdgpu_sw_ring_set_wptr_gfx, @@ -6898,7 +6899,6 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { .align_mask = 0xff, .nop = PACKET3(PACKET3_NOP, 0x3FFF), .support_64bit_ptrs = true, - .vmhub = AMDGPU_GFXHUB_0, .get_rptr = gfx_v9_0_ring_get_rptr_compute, .get_wptr = gfx_v9_0_ring_get_wptr_compute, .set_wptr = gfx_v9_0_ring_set_wptr_compute, @@ -6937,7 +6937,6 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { .align_mask = 0xff, .nop = PACKET3(PACKET3_NOP, 0x3FFF), .support_64bit_ptrs = true, - .vmhub = AMDGPU_GFXHUB_0, .get_rptr = gfx_v9_0_ring_get_rptr_compute, .get_wptr = gfx_v9_0_ring_get_wptr_compute, .set_wptr = gfx_v9_0_ring_set_wptr_compute, diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index d99821692ba3..7d6f4a68f416 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -479,8 +479,8 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, unsigned vmid, uint64_t pd_addr) { - bool use_semaphore = gmc_v10_0_use_invalidate_semaphore(ring->adev, ring->funcs->vmhub); - struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; + bool use_semaphore = gmc_v10_0_use_invalidate_semaphore(ring->adev, ring->vm_hub); + struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub]; uint32_t req = hub->vmhub_funcs->get_invalidate_req(vmid, 0); unsigned eng = ring->vm_inv_eng; @@ -534,7 +534,7 @@ static void gmc_v10_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid if (ring->is_mes_queue) return; - if (ring->funcs->vmhub == AMDGPU_GFXHUB_0) + if (ring->vm_hub == AMDGPU_GFXHUB_0) reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid; else reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT_MM) + vmid; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c index 9f4f28192c60..d809f2ed5600 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c @@ -378,8 +378,8 @@ static int gmc_v11_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, static uint64_t gmc_v11_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, unsigned vmid, uint64_t pd_addr) { - bool use_semaphore = gmc_v11_0_use_invalidate_semaphore(ring->adev, ring->funcs->vmhub); - struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; + bool use_semaphore = gmc_v11_0_use_invalidate_semaphore(ring->adev, ring->vm_hub); + struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub]; uint32_t req = hub->vmhub_funcs->get_invalidate_req(vmid, 0); unsigned eng = ring->vm_inv_eng; @@ -433,7 +433,7 @@ static void gmc_v11_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid if (ring->is_mes_queue) return; - if (ring->funcs->vmhub == AMDGPU_GFXHUB_0) + if (ring->vm_hub == AMDGPU_GFXHUB_0) reg = SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT) + vmid; else reg = SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT_MM) + vmid; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 7ec70666c648..64ab1a306dfe 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1007,9 +1007,9 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, unsigned vmid, uint64_t pd_addr) { - bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(ring->adev, ring->funcs->vmhub); + bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(ring->adev, ring->vm_hub); struct amdgpu_device *adev = ring->adev; - struct amdgpu_vmhub *hub = &adev->vmhub[ring->funcs->vmhub]; + struct amdgpu_vmhub *hub = &adev->vmhub[ring->vm_hub]; uint32_t req = gmc_v9_0_get_invalidate_req(vmid, 0); unsigned eng = ring->vm_inv_eng; @@ -1060,10 +1060,10 @@ static void gmc_v9_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid, uint32_t reg; /* Do nothing because there's no lut register for mmhub1. */ - if (ring->funcs->vmhub == AMDGPU_MMHUB_1) + if (ring->vm_hub == AMDGPU_MMHUB_1) return; - if (ring->funcs->vmhub == AMDGPU_GFXHUB_0) + if (ring->vm_hub == AMDGPU_GFXHUB_0) reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid; else reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT_MM) + vmid; diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c index 9360204da7fb..a3076eb8af6a 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c @@ -376,7 +376,7 @@ static void jpeg_v1_0_decode_ring_emit_reg_wait(struct amdgpu_ring *ring, static void jpeg_v1_0_decode_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned vmid, uint64_t pd_addr) { - struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; + struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub]; uint32_t data0, data1, mask; pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); @@ -485,6 +485,7 @@ int jpeg_v1_0_sw_init(void *handle) return r; ring = &adev->jpeg.inst->ring_dec; + ring->vm_hub = AMDGPU_MMHUB_0; sprintf(ring->name, "jpeg_dec"); r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0, AMDGPU_RING_PRIO_DEFAULT, NULL); @@ -548,7 +549,6 @@ static const struct amdgpu_ring_funcs jpeg_v1_0_decode_ring_vm_funcs = { .nop = PACKET0(0x81ff, 0), .support_64bit_ptrs = false, .no_user_fence = true, - .vmhub = AMDGPU_MMHUB_0, .extra_dw = 64, .get_rptr = jpeg_v1_0_decode_ring_get_rptr, .get_wptr = jpeg_v1_0_decode_ring_get_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c index f3c1af5130ab..0eddf7c824a7 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c @@ -86,6 +86,7 @@ static int jpeg_v2_0_sw_init(void *handle) ring = &adev->jpeg.inst->ring_dec; ring->use_doorbell = true; ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1; + ring->vm_hub = AMDGPU_MMHUB_0; sprintf(ring->name, "jpeg_dec"); r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0, AMDGPU_RING_PRIO_DEFAULT, NULL); @@ -613,7 +614,7 @@ void jpeg_v2_0_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, void jpeg_v2_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned vmid, uint64_t pd_addr) { - struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; + struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub]; uint32_t data0, data1, mask; pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); @@ -762,7 +763,6 @@ static const struct amd_ip_funcs jpeg_v2_0_ip_funcs = { static const struct amdgpu_ring_funcs jpeg_v2_0_dec_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_JPEG, .align_mask = 0xf, - .vmhub = AMDGPU_MMHUB_0, .get_rptr = jpeg_v2_0_dec_ring_get_rptr, .get_wptr = jpeg_v2_0_dec_ring_get_wptr, .set_wptr = jpeg_v2_0_dec_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c index 6b1887808782..b040f51d9aa9 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c @@ -127,6 +127,10 @@ static int jpeg_v2_5_sw_init(void *handle) ring = &adev->jpeg.inst[i].ring_dec; ring->use_doorbell = true; + if (adev->ip_versions[UVD_HWIP][0] == IP_VERSION(2, 5, 0)) + ring->vm_hub = AMDGPU_MMHUB_1; + else + ring->vm_hub = AMDGPU_MMHUB_0; ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1 + 8 * i; sprintf(ring->name, "jpeg_dec_%d", i); r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst[i].irq, @@ -645,7 +649,6 @@ static const struct amd_ip_funcs jpeg_v2_6_ip_funcs = { static const struct amdgpu_ring_funcs jpeg_v2_5_dec_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_JPEG, .align_mask = 0xf, - .vmhub = AMDGPU_MMHUB_1, .get_rptr = jpeg_v2_5_dec_ring_get_rptr, .get_wptr = jpeg_v2_5_dec_ring_get_wptr, .set_wptr = jpeg_v2_5_dec_ring_set_wptr, @@ -675,7 +678,6 @@ static const struct amdgpu_ring_funcs jpeg_v2_5_dec_ring_vm_funcs = { static const struct amdgpu_ring_funcs jpeg_v2_6_dec_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_JPEG, .align_mask = 0xf, - .vmhub = AMDGPU_MMHUB_0, .get_rptr = jpeg_v2_5_dec_ring_get_rptr, .get_wptr = jpeg_v2_5_dec_ring_get_wptr, .set_wptr = jpeg_v2_5_dec_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c index a1b751d9ac06..c55e09432e26 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c @@ -100,6 +100,7 @@ static int jpeg_v3_0_sw_init(void *handle) ring = &adev->jpeg.inst->ring_dec; ring->use_doorbell = true; ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1; + ring->vm_hub = AMDGPU_MMHUB_0; sprintf(ring->name, "jpeg_dec"); r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0, AMDGPU_RING_PRIO_DEFAULT, NULL); @@ -559,7 +560,6 @@ static const struct amd_ip_funcs jpeg_v3_0_ip_funcs = { static const struct amdgpu_ring_funcs jpeg_v3_0_dec_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_JPEG, .align_mask = 0xf, - .vmhub = AMDGPU_MMHUB_0, .get_rptr = jpeg_v3_0_dec_ring_get_rptr, .get_wptr = jpeg_v3_0_dec_ring_get_wptr, .set_wptr = jpeg_v3_0_dec_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c index 5f2a034b9ec0..a6ad678fd507 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c @@ -108,6 +108,7 @@ static int jpeg_v4_0_sw_init(void *handle) ring = &adev->jpeg.inst->ring_dec; ring->use_doorbell = true; ring->doorbell_index = amdgpu_sriov_vf(adev) ? (((adev->doorbell_index.vcn.vcn_ring0_1) << 1) + 4) : ((adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1); + ring->vm_hub = AMDGPU_MMHUB_0; sprintf(ring->name, "jpeg_dec"); r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0, @@ -715,7 +716,6 @@ static const struct amd_ip_funcs jpeg_v4_0_ip_funcs = { static const struct amdgpu_ring_funcs jpeg_v4_0_dec_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_JPEG, .align_mask = 0xf, - .vmhub = AMDGPU_MMHUB_0, .get_rptr = jpeg_v4_0_dec_ring_get_rptr, .get_wptr = jpeg_v4_0_dec_ring_get_wptr, .set_wptr = jpeg_v4_0_dec_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 96b0c3d42346..b3cc04dd8653 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -1823,6 +1823,15 @@ static int sdma_v4_0_sw_init(void *handle) /* doorbell size is 2 dwords, get DWORD offset */ ring->doorbell_index = adev->doorbell_index.sdma_engine[i] << 1; + /* + * On Arcturus, SDMA instance 5~7 has a different vmhub + * type(AMDGPU_MMHUB_1). + */ + if (adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 2, 2) && i >= 5) + ring->vm_hub = AMDGPU_MMHUB_1; + else + ring->vm_hub = AMDGPU_MMHUB_0; + sprintf(ring->name, "sdma%d", i); r = amdgpu_ring_init(adev, ring, 1024, &adev->sdma.trap_irq, AMDGPU_SDMA_IRQ_INSTANCE0 + i, @@ -1841,6 +1850,11 @@ static int sdma_v4_0_sw_init(void *handle) ring->doorbell_index = adev->doorbell_index.sdma_engine[i] << 1; ring->doorbell_index += 0x400; + if (adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 2, 2) && i >= 5) + ring->vm_hub = AMDGPU_MMHUB_1; + else + ring->vm_hub = AMDGPU_MMHUB_0; + sprintf(ring->name, "page%d", i); r = amdgpu_ring_init(adev, ring, 1024, &adev->sdma.trap_irq, @@ -2294,44 +2308,6 @@ static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs = { .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), .support_64bit_ptrs = true, .secure_submission_supported = true, - .vmhub = AMDGPU_MMHUB_0, - .get_rptr = sdma_v4_0_ring_get_rptr, - .get_wptr = sdma_v4_0_ring_get_wptr, - .set_wptr = sdma_v4_0_ring_set_wptr, - .emit_frame_size = - 6 + /* sdma_v4_0_ring_emit_hdp_flush */ - 3 + /* hdp invalidate */ - 6 + /* sdma_v4_0_ring_emit_pipeline_sync */ - /* sdma_v4_0_ring_emit_vm_flush */ - SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + - SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 + - 10 + 10 + 10, /* sdma_v4_0_ring_emit_fence x3 for user fence, vm fence */ - .emit_ib_size = 7 + 6, /* sdma_v4_0_ring_emit_ib */ - .emit_ib = sdma_v4_0_ring_emit_ib, - .emit_fence = sdma_v4_0_ring_emit_fence, - .emit_pipeline_sync = sdma_v4_0_ring_emit_pipeline_sync, - .emit_vm_flush = sdma_v4_0_ring_emit_vm_flush, - .emit_hdp_flush = sdma_v4_0_ring_emit_hdp_flush, - .test_ring = sdma_v4_0_ring_test_ring, - .test_ib = sdma_v4_0_ring_test_ib, - .insert_nop = sdma_v4_0_ring_insert_nop, - .pad_ib = sdma_v4_0_ring_pad_ib, - .emit_wreg = sdma_v4_0_ring_emit_wreg, - .emit_reg_wait = sdma_v4_0_ring_emit_reg_wait, - .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, -}; - -/* - * On Arcturus, SDMA instance 5~7 has a different vmhub type(AMDGPU_MMHUB_1). - * So create a individual constant ring_funcs for those instances. - */ -static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs_2nd_mmhub = { - .type = AMDGPU_RING_TYPE_SDMA, - .align_mask = 0xf, - .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), - .support_64bit_ptrs = true, - .secure_submission_supported = true, - .vmhub = AMDGPU_MMHUB_1, .get_rptr = sdma_v4_0_ring_get_rptr, .get_wptr = sdma_v4_0_ring_get_wptr, .set_wptr = sdma_v4_0_ring_set_wptr, @@ -2364,40 +2340,6 @@ static const struct amdgpu_ring_funcs sdma_v4_0_page_ring_funcs = { .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), .support_64bit_ptrs = true, .secure_submission_supported = true, - .vmhub = AMDGPU_MMHUB_0, - .get_rptr = sdma_v4_0_ring_get_rptr, - .get_wptr = sdma_v4_0_page_ring_get_wptr, - .set_wptr = sdma_v4_0_page_ring_set_wptr, - .emit_frame_size = - 6 + /* sdma_v4_0_ring_emit_hdp_flush */ - 3 + /* hdp invalidate */ - 6 + /* sdma_v4_0_ring_emit_pipeline_sync */ - /* sdma_v4_0_ring_emit_vm_flush */ - SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + - SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 + - 10 + 10 + 10, /* sdma_v4_0_ring_emit_fence x3 for user fence, vm fence */ - .emit_ib_size = 7 + 6, /* sdma_v4_0_ring_emit_ib */ - .emit_ib = sdma_v4_0_ring_emit_ib, - .emit_fence = sdma_v4_0_ring_emit_fence, - .emit_pipeline_sync = sdma_v4_0_ring_emit_pipeline_sync, - .emit_vm_flush = sdma_v4_0_ring_emit_vm_flush, - .emit_hdp_flush = sdma_v4_0_ring_emit_hdp_flush, - .test_ring = sdma_v4_0_ring_test_ring, - .test_ib = sdma_v4_0_ring_test_ib, - .insert_nop = sdma_v4_0_ring_insert_nop, - .pad_ib = sdma_v4_0_ring_pad_ib, - .emit_wreg = sdma_v4_0_ring_emit_wreg, - .emit_reg_wait = sdma_v4_0_ring_emit_reg_wait, - .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, -}; - -static const struct amdgpu_ring_funcs sdma_v4_0_page_ring_funcs_2nd_mmhub = { - .type = AMDGPU_RING_TYPE_SDMA, - .align_mask = 0xf, - .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), - .support_64bit_ptrs = true, - .secure_submission_supported = true, - .vmhub = AMDGPU_MMHUB_1, .get_rptr = sdma_v4_0_ring_get_rptr, .get_wptr = sdma_v4_0_page_ring_get_wptr, .set_wptr = sdma_v4_0_page_ring_set_wptr, @@ -2429,19 +2371,10 @@ static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev) int i; for (i = 0; i < adev->sdma.num_instances; i++) { - if (adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 2, 2) && i >= 5) - adev->sdma.instance[i].ring.funcs = - &sdma_v4_0_ring_funcs_2nd_mmhub; - else - adev->sdma.instance[i].ring.funcs = - &sdma_v4_0_ring_funcs; + adev->sdma.instance[i].ring.funcs = &sdma_v4_0_ring_funcs; adev->sdma.instance[i].ring.me = i; if (adev->sdma.has_page_queue) { - if (adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 2, 2) && i >= 5) - adev->sdma.instance[i].page.funcs = - &sdma_v4_0_page_ring_funcs_2nd_mmhub; - else - adev->sdma.instance[i].page.funcs = + adev->sdma.instance[i].page.funcs = &sdma_v4_0_page_ring_funcs; adev->sdma.instance[i].page.me = i; } diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c index 1b04700a4d55..64dcaa2670dd 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c @@ -1309,6 +1309,7 @@ static int sdma_v4_4_2_sw_init(void *handle) /* doorbell size is 2 dwords, get DWORD offset */ ring->doorbell_index = adev->doorbell_index.sdma_engine[i] << 1; + ring->vm_hub = AMDGPU_MMHUB_0; sprintf(ring->name, "sdma%d", i); r = amdgpu_ring_init(adev, ring, 1024, &adev->sdma.trap_irq, @@ -1327,6 +1328,7 @@ static int sdma_v4_4_2_sw_init(void *handle) */ ring->doorbell_index = adev->doorbell_index.sdma_engine[i] << 1; ring->doorbell_index += 0x400; + ring->vm_hub = AMDGPU_MMHUB_0; sprintf(ring->name, "page%d", i); r = amdgpu_ring_init(adev, ring, 1024, @@ -1741,7 +1743,6 @@ static const struct amdgpu_ring_funcs sdma_v4_4_2_ring_funcs = { .align_mask = 0xf, .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), .support_64bit_ptrs = true, - .vmhub = AMDGPU_MMHUB_0, .get_rptr = sdma_v4_4_2_ring_get_rptr, .get_wptr = sdma_v4_4_2_ring_get_wptr, .set_wptr = sdma_v4_4_2_ring_set_wptr, @@ -1773,7 +1774,6 @@ static const struct amdgpu_ring_funcs sdma_v4_4_2_page_ring_funcs = { .align_mask = 0xf, .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), .support_64bit_ptrs = true, - .vmhub = AMDGPU_MMHUB_0, .get_rptr = sdma_v4_4_2_ring_get_rptr, .get_wptr = sdma_v4_4_2_page_ring_get_wptr, .set_wptr = sdma_v4_4_2_page_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index 1941b3b7c5d9..92e1299be021 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -1389,6 +1389,7 @@ static int sdma_v5_0_sw_init(void *handle) (adev->doorbell_index.sdma_engine[0] << 1) //get DWORD offset : (adev->doorbell_index.sdma_engine[1] << 1); // get DWORD offset + ring->vm_hub = AMDGPU_GFXHUB_0; sprintf(ring->name, "sdma%d", i); r = amdgpu_ring_init(adev, ring, 1024, &adev->sdma.trap_irq, (i == 0) ? AMDGPU_SDMA_IRQ_INSTANCE0 : @@ -1765,7 +1766,6 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = { .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), .support_64bit_ptrs = true, .secure_submission_supported = true, - .vmhub = AMDGPU_GFXHUB_0, .get_rptr = sdma_v5_0_ring_get_rptr, .get_wptr = sdma_v5_0_ring_get_wptr, .set_wptr = sdma_v5_0_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index 8e445eb9dd49..ca7e8757d78e 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -1253,6 +1253,7 @@ static int sdma_v5_2_sw_init(void *handle) ring->doorbell_index = (adev->doorbell_index.sdma_engine[i] << 1); //get DWORD offset + ring->vm_hub = AMDGPU_GFXHUB_0; sprintf(ring->name, "sdma%d", i); r = amdgpu_ring_init(adev, ring, 1024, &adev->sdma.trap_irq, AMDGPU_SDMA_IRQ_INSTANCE0 + i, @@ -1653,7 +1654,6 @@ static const struct amdgpu_ring_funcs sdma_v5_2_ring_funcs = { .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), .support_64bit_ptrs = true, .secure_submission_supported = true, - .vmhub = AMDGPU_GFXHUB_0, .get_rptr = sdma_v5_2_ring_get_rptr, .get_wptr = sdma_v5_2_ring_get_wptr, .set_wptr = sdma_v5_2_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c index fc0f14ed93d5..eb722830531f 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c @@ -1181,7 +1181,7 @@ static void sdma_v6_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) static void sdma_v6_0_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned vmid, uint64_t pd_addr) { - struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; + struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub]; uint32_t req = hub->vmhub_funcs->get_invalidate_req(vmid, 0); /* Update the PD address for this VMID. */ @@ -1301,6 +1301,7 @@ static int sdma_v6_0_sw_init(void *handle) ring->doorbell_index = (adev->doorbell_index.sdma_engine[i] << 1); // get DWORD offset + ring->vm_hub = AMDGPU_GFXHUB_0; sprintf(ring->name, "sdma%d", i); r = amdgpu_ring_init(adev, ring, 1024, &adev->sdma.trap_irq, @@ -1557,7 +1558,6 @@ static const struct amdgpu_ring_funcs sdma_v6_0_ring_funcs = { .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), .support_64bit_ptrs = true, .secure_submission_supported = true, - .vmhub = AMDGPU_GFXHUB_0, .get_rptr = sdma_v6_0_ring_get_rptr, .get_wptr = sdma_v6_0_ring_get_wptr, .set_wptr = sdma_v6_0_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c index e407be6cb63c..e32b656b3dab 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c @@ -444,6 +444,7 @@ static int uvd_v7_0_sw_init(void *handle) continue; if (!amdgpu_sriov_vf(adev)) { ring = &adev->uvd.inst[j].ring; + ring->vm_hub = AMDGPU_MMHUB_0; sprintf(ring->name, "uvd_%d", ring->me); r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst[j].irq, 0, @@ -454,6 +455,7 @@ static int uvd_v7_0_sw_init(void *handle) for (i = 0; i < adev->uvd.num_enc_rings; ++i) { ring = &adev->uvd.inst[j].ring_enc[i]; + ring->vm_hub = AMDGPU_MMHUB_0; sprintf(ring->name, "uvd_enc_%d.%d", ring->me, i); if (amdgpu_sriov_vf(adev)) { ring->use_doorbell = true; @@ -1397,7 +1399,7 @@ static void uvd_v7_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, static void uvd_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned vmid, uint64_t pd_addr) { - struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; + struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub]; uint32_t data0, data1, mask; pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); @@ -1440,7 +1442,7 @@ static void uvd_v7_0_enc_ring_emit_reg_wait(struct amdgpu_ring *ring, static void uvd_v7_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned int vmid, uint64_t pd_addr) { - struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; + struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub]; pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); @@ -1802,7 +1804,6 @@ static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = { .align_mask = 0xf, .support_64bit_ptrs = false, .no_user_fence = true, - .vmhub = AMDGPU_MMHUB_0, .get_rptr = uvd_v7_0_ring_get_rptr, .get_wptr = uvd_v7_0_ring_get_wptr, .set_wptr = uvd_v7_0_ring_set_wptr, @@ -1835,7 +1836,6 @@ static const struct amdgpu_ring_funcs uvd_v7_0_enc_ring_vm_funcs = { .nop = HEVC_ENC_CMD_NO_OP, .support_64bit_ptrs = false, .no_user_fence = true, - .vmhub = AMDGPU_MMHUB_0, .get_rptr = uvd_v7_0_enc_ring_get_rptr, .get_wptr = uvd_v7_0_enc_ring_get_wptr, .set_wptr = uvd_v7_0_enc_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c index 66cd3d11aa4b..57b85bb6a1e4 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c @@ -466,6 +466,7 @@ static int vce_v4_0_sw_init(void *handle) enum amdgpu_ring_priority_level hw_prio = amdgpu_vce_get_ring_prio(i); ring = &adev->vce.ring[i]; + ring->vm_hub = AMDGPU_MMHUB_0; sprintf(ring->name, "vce%d", i); if (amdgpu_sriov_vf(adev)) { /* DOORBELL only works under SRIOV */ @@ -1021,7 +1022,7 @@ static void vce_v4_0_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring, unsigned int vmid, uint64_t pd_addr) { - struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; + struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub]; pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); @@ -1103,7 +1104,6 @@ static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = { .nop = VCE_CMD_NO_OP, .support_64bit_ptrs = false, .no_user_fence = true, - .vmhub = AMDGPU_MMHUB_0, .get_rptr = vce_v4_0_ring_get_rptr, .get_wptr = vce_v4_0_ring_get_wptr, .set_wptr = vce_v4_0_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_sw_ring.c b/drivers/gpu/drm/amd/amdgpu/vcn_sw_ring.c index 1ceda3d0cd5b..2b9ddb3d2fe1 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_sw_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_sw_ring.c @@ -65,7 +65,7 @@ void vcn_dec_sw_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, void vcn_dec_sw_ring_emit_vm_flush(struct amdgpu_ring *ring, uint32_t vmid, uint64_t pd_addr) { - struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; + struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub]; uint32_t data0, data1, mask; pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index c305b2cb8490..761c28fa6ec1 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -120,6 +120,7 @@ static int vcn_v1_0_sw_init(void *handle) return r; ring = &adev->vcn.inst->ring_dec; + ring->vm_hub = AMDGPU_MMHUB_0; sprintf(ring->name, "vcn_dec"); r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0, AMDGPU_RING_PRIO_DEFAULT, NULL); @@ -141,6 +142,7 @@ static int vcn_v1_0_sw_init(void *handle) enum amdgpu_ring_priority_level hw_prio = amdgpu_vcn_get_enc_ring_prio(i); ring = &adev->vcn.inst->ring_enc[i]; + ring->vm_hub = AMDGPU_MMHUB_0; sprintf(ring->name, "vcn_enc%d", i); r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0, hw_prio, NULL); @@ -1548,7 +1550,7 @@ static void vcn_v1_0_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, static void vcn_v1_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned vmid, uint64_t pd_addr) { - struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; + struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub]; uint32_t data0, data1, mask; pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); @@ -1693,7 +1695,7 @@ static void vcn_v1_0_enc_ring_emit_reg_wait(struct amdgpu_ring *ring, static void vcn_v1_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned int vmid, uint64_t pd_addr) { - struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; + struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub]; pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); @@ -1977,7 +1979,6 @@ static const struct amdgpu_ring_funcs vcn_v1_0_dec_ring_vm_funcs = { .support_64bit_ptrs = false, .no_user_fence = true, .secure_submission_supported = true, - .vmhub = AMDGPU_MMHUB_0, .get_rptr = vcn_v1_0_dec_ring_get_rptr, .get_wptr = vcn_v1_0_dec_ring_get_wptr, .set_wptr = vcn_v1_0_dec_ring_set_wptr, @@ -2012,7 +2013,6 @@ static const struct amdgpu_ring_funcs vcn_v1_0_enc_ring_vm_funcs = { .nop = VCN_ENC_CMD_NO_OP, .support_64bit_ptrs = false, .no_user_fence = true, - .vmhub = AMDGPU_MMHUB_0, .get_rptr = vcn_v1_0_enc_ring_get_rptr, .get_wptr = vcn_v1_0_enc_ring_get_wptr, .set_wptr = vcn_v1_0_enc_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c index 4b4cd88414e0..7c2b3aa48083 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c @@ -129,6 +129,7 @@ static int vcn_v2_0_sw_init(void *handle) ring->use_doorbell = true; ring->doorbell_index = adev->doorbell_index.vcn.vcn_ring0_1 << 1; + ring->vm_hub = AMDGPU_MMHUB_0; sprintf(ring->name, "vcn_dec"); r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0, @@ -159,6 +160,7 @@ static int vcn_v2_0_sw_init(void *handle) ring = &adev->vcn.inst->ring_enc[i]; ring->use_doorbell = true; + ring->vm_hub = AMDGPU_MMHUB_0; if (!amdgpu_sriov_vf(adev)) ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + i; else @@ -1511,7 +1513,7 @@ void vcn_v2_0_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, void vcn_v2_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned vmid, uint64_t pd_addr) { - struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; + struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub]; uint32_t data0, data1, mask; pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); @@ -1671,7 +1673,7 @@ void vcn_v2_0_enc_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, void vcn_v2_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned int vmid, uint64_t pd_addr) { - struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; + struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub]; pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); @@ -2014,7 +2016,6 @@ static const struct amdgpu_ring_funcs vcn_v2_0_dec_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_DEC, .align_mask = 0xf, .secure_submission_supported = true, - .vmhub = AMDGPU_MMHUB_0, .get_rptr = vcn_v2_0_dec_ring_get_rptr, .get_wptr = vcn_v2_0_dec_ring_get_wptr, .set_wptr = vcn_v2_0_dec_ring_set_wptr, @@ -2045,7 +2046,6 @@ static const struct amdgpu_ring_funcs vcn_v2_0_enc_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_ENC, .align_mask = 0x3f, .nop = VCN_ENC_CMD_NO_OP, - .vmhub = AMDGPU_MMHUB_0, .get_rptr = vcn_v2_0_enc_ring_get_rptr, .get_wptr = vcn_v2_0_enc_ring_get_wptr, .set_wptr = vcn_v2_0_enc_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c index 223e7dfe4618..ab0b45d0ead1 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c @@ -186,6 +186,12 @@ static int vcn_v2_5_sw_init(void *handle) ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + (amdgpu_sriov_vf(adev) ? 2*j : 8*j); + + if (adev->ip_versions[UVD_HWIP][0] == IP_VERSION(2, 5, 0)) + ring->vm_hub = AMDGPU_MMHUB_1; + else + ring->vm_hub = AMDGPU_MMHUB_0; + sprintf(ring->name, "vcn_dec_%d", j); r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[j].irq, 0, AMDGPU_RING_PRIO_DEFAULT, NULL); @@ -201,6 +207,11 @@ static int vcn_v2_5_sw_init(void *handle) ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + (amdgpu_sriov_vf(adev) ? (1 + i + 2*j) : (2 + i + 8*j)); + if (adev->ip_versions[UVD_HWIP][0] == IP_VERSION(2, 5, 0)) + ring->vm_hub = AMDGPU_MMHUB_1; + else + ring->vm_hub = AMDGPU_MMHUB_0; + sprintf(ring->name, "vcn_enc_%d.%d", j, i); r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[j].irq, 0, @@ -1562,38 +1573,6 @@ static const struct amdgpu_ring_funcs vcn_v2_5_dec_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_DEC, .align_mask = 0xf, .secure_submission_supported = true, - .vmhub = AMDGPU_MMHUB_1, - .get_rptr = vcn_v2_5_dec_ring_get_rptr, - .get_wptr = vcn_v2_5_dec_ring_get_wptr, - .set_wptr = vcn_v2_5_dec_ring_set_wptr, - .emit_frame_size = - SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + - SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + - 8 + /* vcn_v2_0_dec_ring_emit_vm_flush */ - 14 + 14 + /* vcn_v2_0_dec_ring_emit_fence x2 vm fence */ - 6, - .emit_ib_size = 8, /* vcn_v2_0_dec_ring_emit_ib */ - .emit_ib = vcn_v2_0_dec_ring_emit_ib, - .emit_fence = vcn_v2_0_dec_ring_emit_fence, - .emit_vm_flush = vcn_v2_0_dec_ring_emit_vm_flush, - .test_ring = vcn_v2_0_dec_ring_test_ring, - .test_ib = amdgpu_vcn_dec_ring_test_ib, - .insert_nop = vcn_v2_0_dec_ring_insert_nop, - .insert_start = vcn_v2_0_dec_ring_insert_start, - .insert_end = vcn_v2_0_dec_ring_insert_end, - .pad_ib = amdgpu_ring_generic_pad_ib, - .begin_use = amdgpu_vcn_ring_begin_use, - .end_use = amdgpu_vcn_ring_end_use, - .emit_wreg = vcn_v2_0_dec_ring_emit_wreg, - .emit_reg_wait = vcn_v2_0_dec_ring_emit_reg_wait, - .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, -}; - -static const struct amdgpu_ring_funcs vcn_v2_6_dec_ring_vm_funcs = { - .type = AMDGPU_RING_TYPE_VCN_DEC, - .align_mask = 0xf, - .secure_submission_supported = true, - .vmhub = AMDGPU_MMHUB_0, .get_rptr = vcn_v2_5_dec_ring_get_rptr, .get_wptr = vcn_v2_5_dec_ring_get_wptr, .set_wptr = vcn_v2_5_dec_ring_set_wptr, @@ -1693,7 +1672,6 @@ static const struct amdgpu_ring_funcs vcn_v2_5_enc_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_ENC, .align_mask = 0x3f, .nop = VCN_ENC_CMD_NO_OP, - .vmhub = AMDGPU_MMHUB_1, .get_rptr = vcn_v2_5_enc_ring_get_rptr, .get_wptr = vcn_v2_5_enc_ring_get_wptr, .set_wptr = vcn_v2_5_enc_ring_set_wptr, @@ -1719,36 +1697,6 @@ static const struct amdgpu_ring_funcs vcn_v2_5_enc_ring_vm_funcs = { .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, }; -static const struct amdgpu_ring_funcs vcn_v2_6_enc_ring_vm_funcs = { - .type = AMDGPU_RING_TYPE_VCN_ENC, - .align_mask = 0x3f, - .nop = VCN_ENC_CMD_NO_OP, - .vmhub = AMDGPU_MMHUB_0, - .get_rptr = vcn_v2_5_enc_ring_get_rptr, - .get_wptr = vcn_v2_5_enc_ring_get_wptr, - .set_wptr = vcn_v2_5_enc_ring_set_wptr, - .emit_frame_size = - SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + - SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 + - 4 + /* vcn_v2_0_enc_ring_emit_vm_flush */ - 5 + 5 + /* vcn_v2_0_enc_ring_emit_fence x2 vm fence */ - 1, /* vcn_v2_0_enc_ring_insert_end */ - .emit_ib_size = 5, /* vcn_v2_0_enc_ring_emit_ib */ - .emit_ib = vcn_v2_0_enc_ring_emit_ib, - .emit_fence = vcn_v2_0_enc_ring_emit_fence, - .emit_vm_flush = vcn_v2_0_enc_ring_emit_vm_flush, - .test_ring = amdgpu_vcn_enc_ring_test_ring, - .test_ib = amdgpu_vcn_enc_ring_test_ib, - .insert_nop = amdgpu_ring_insert_nop, - .insert_end = vcn_v2_0_enc_ring_insert_end, - .pad_ib = amdgpu_ring_generic_pad_ib, - .begin_use = amdgpu_vcn_ring_begin_use, - .end_use = amdgpu_vcn_ring_end_use, - .emit_wreg = vcn_v2_0_enc_ring_emit_wreg, - .emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait, - .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, -}; - static void vcn_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev) { int i; @@ -1756,10 +1704,7 @@ static void vcn_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev) for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { if (adev->vcn.harvest_config & (1 << i)) continue; - if (adev->ip_versions[UVD_HWIP][0] == IP_VERSION(2, 5, 0)) - adev->vcn.inst[i].ring_dec.funcs = &vcn_v2_5_dec_ring_vm_funcs; - else /* CHIP_ALDEBARAN */ - adev->vcn.inst[i].ring_dec.funcs = &vcn_v2_6_dec_ring_vm_funcs; + adev->vcn.inst[i].ring_dec.funcs = &vcn_v2_5_dec_ring_vm_funcs; adev->vcn.inst[i].ring_dec.me = i; DRM_INFO("VCN(%d) decode is enabled in VM mode\n", i); } @@ -1773,10 +1718,7 @@ static void vcn_v2_5_set_enc_ring_funcs(struct amdgpu_device *adev) if (adev->vcn.harvest_config & (1 << j)) continue; for (i = 0; i < adev->vcn.num_enc_rings; ++i) { - if (adev->ip_versions[UVD_HWIP][0] == IP_VERSION(2, 5, 0)) - adev->vcn.inst[j].ring_enc[i].funcs = &vcn_v2_5_enc_ring_vm_funcs; - else /* CHIP_ALDEBARAN */ - adev->vcn.inst[j].ring_enc[i].funcs = &vcn_v2_6_enc_ring_vm_funcs; + adev->vcn.inst[j].ring_enc[i].funcs = &vcn_v2_5_enc_ring_vm_funcs; adev->vcn.inst[j].ring_enc[i].me = j; } DRM_INFO("VCN(%d) encode is enabled in VM mode\n", j); diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c index 66439388faee..3eab186261aa 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c @@ -189,6 +189,7 @@ static int vcn_v3_0_sw_init(void *handle) } else { ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 8 * i; } + ring->vm_hub = AMDGPU_MMHUB_0; sprintf(ring->name, "vcn_dec_%d", i); r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[i].irq, 0, AMDGPU_RING_PRIO_DEFAULT, @@ -212,6 +213,7 @@ static int vcn_v3_0_sw_init(void *handle) } else { ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + j + 8 * i; } + ring->vm_hub = AMDGPU_MMHUB_0; sprintf(ring->name, "vcn_enc_%d.%d", i, j); r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[i].irq, 0, hw_prio, &adev->vcn.inst[i].sched_score); @@ -1738,7 +1740,6 @@ static const struct amdgpu_ring_funcs vcn_v3_0_dec_sw_ring_vm_funcs = { .align_mask = 0x3f, .nop = VCN_DEC_SW_CMD_NO_OP, .secure_submission_supported = true, - .vmhub = AMDGPU_MMHUB_0, .get_rptr = vcn_v3_0_dec_ring_get_rptr, .get_wptr = vcn_v3_0_dec_ring_get_wptr, .set_wptr = vcn_v3_0_dec_ring_set_wptr, @@ -1899,7 +1900,6 @@ static const struct amdgpu_ring_funcs vcn_v3_0_dec_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_DEC, .align_mask = 0xf, .secure_submission_supported = true, - .vmhub = AMDGPU_MMHUB_0, .get_rptr = vcn_v3_0_dec_ring_get_rptr, .get_wptr = vcn_v3_0_dec_ring_get_wptr, .set_wptr = vcn_v3_0_dec_ring_set_wptr, @@ -2000,7 +2000,6 @@ static const struct amdgpu_ring_funcs vcn_v3_0_enc_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_ENC, .align_mask = 0x3f, .nop = VCN_ENC_CMD_NO_OP, - .vmhub = AMDGPU_MMHUB_0, .get_rptr = vcn_v3_0_enc_ring_get_rptr, .get_wptr = vcn_v3_0_enc_ring_get_wptr, .set_wptr = vcn_v3_0_enc_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c index 720ab36f9c92..bf0674039598 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c @@ -149,7 +149,7 @@ static int vcn_v4_0_sw_init(void *handle) ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + i * (adev->vcn.num_enc_rings + 1) + 1; else ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + 8 * i; - + ring->vm_hub = AMDGPU_MMHUB_0; sprintf(ring->name, "vcn_unified_%d", i); r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[i].irq, 0, @@ -1798,7 +1798,6 @@ static const struct amdgpu_ring_funcs vcn_v4_0_unified_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_ENC, .align_mask = 0x3f, .nop = VCN_ENC_CMD_NO_OP, - .vmhub = AMDGPU_MMHUB_0, .get_rptr = vcn_v4_0_unified_ring_get_rptr, .get_wptr = vcn_v4_0_unified_ring_get_wptr, .set_wptr = vcn_v4_0_unified_ring_set_wptr, -- cgit v1.2.3 From 541372bb62f289f4402cf55be51fb9cec7373627 Mon Sep 17 00:00:00 2001 From: Le Ma Date: Tue, 16 Nov 2021 21:42:28 +0800 Subject: drm/amdgpu: add some basic elements for multiple XCD case Add some basic definitions and structure member. Inscrease MAX_WB slots to 1024 to support the increasing number of rings for multiple partitions. v2: unify naming style Signed-off-by: Le Ma Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 17 ++++++++++++++++- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 1 + 3 files changed, 18 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index a831e2b428e3..02b827785e39 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -470,7 +470,7 @@ int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv); /* * Writeback */ -#define AMDGPU_MAX_WB 256 /* Reserve at most 256 WB slots for amdgpu-owned rings. */ +#define AMDGPU_MAX_WB 1024 /* Reserve at most 1024 WB slots for amdgpu-owned rings. */ struct amdgpu_wb { struct amdgpu_bo *wb_obj; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index de9e7a00bb15..bfabea76d166 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -42,6 +42,8 @@ #define AMDGPU_GFX_CG_DISABLED_MODE 0x00000004L #define AMDGPU_GFX_LBPW_DISABLED_MODE 0x00000008L +#define AMDGPU_MAX_GC_INSTANCES 8 + #define AMDGPU_MAX_GFX_QUEUES KGD_MAX_QUEUES #define AMDGPU_MAX_COMPUTE_QUEUES KGD_MAX_QUEUES @@ -53,6 +55,15 @@ enum amdgpu_gfx_pipe_priority { #define AMDGPU_GFX_QUEUE_PRIORITY_MINIMUM 0 #define AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM 15 +enum amdgpu_gfx_partition { + AMDGPU_SPX_PARTITION_MODE = 0, + AMDGPU_DPX_PARTITION_MODE = 1, + AMDGPU_TPX_PARTITION_MODE = 2, + AMDGPU_QPX_PARTITION_MODE = 3, + AMDGPU_CPX_PARTITION_MODE = 4, + AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE, +}; + struct amdgpu_mec { struct amdgpu_bo *hpd_eop_obj; u64 hpd_eop_gpu_addr; @@ -323,7 +334,7 @@ struct amdgpu_gfx { bool cp_fw_write_wait; struct amdgpu_ring gfx_ring[AMDGPU_MAX_GFX_RINGS]; unsigned num_gfx_rings; - struct amdgpu_ring compute_ring[AMDGPU_MAX_COMPUTE_RINGS]; + struct amdgpu_ring compute_ring[AMDGPU_MAX_COMPUTE_RINGS * AMDGPU_MAX_GC_INSTANCES]; unsigned num_compute_rings; struct amdgpu_irq_src eop_irq; struct amdgpu_irq_src priv_reg_irq; @@ -364,6 +375,10 @@ struct amdgpu_gfx { struct amdgpu_ring sw_gfx_ring[AMDGPU_MAX_SW_GFX_RINGS]; struct amdgpu_ring_mux muxer; + + enum amdgpu_gfx_partition partition_mode; + uint32_t num_xcd; + uint32_t num_xcc_per_xcp; }; #define amdgpu_gfx_get_gpu_clock_counter(adev) (adev)->gfx.funcs->get_gpu_clock_counter((adev)) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index ffa9cd55bf7e..d8749444b689 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -249,6 +249,7 @@ struct amdgpu_ring { uint64_t ptr_mask; uint32_t buf_mask; u32 idx; + u32 xcc_id; u32 me; u32 pipe; u32 queue; -- cgit v1.2.3