diff options
| author | Dave Airlie <airlied@redhat.com> | 2026-03-21 02:21:54 +1000 |
|---|---|---|
| committer | Dave Airlie <airlied@redhat.com> | 2026-03-21 02:22:03 +1000 |
| commit | b3970e97490abfb040305f14327d75e7568f31c4 (patch) | |
| tree | 4484c8a0e3337839fcca5555dd11c2cfa2b33939 | |
| parent | d93f8ea0e5ad41d661496d205fac3e2fbd9358c0 (diff) | |
| parent | daf9f9dd509113d6f49ce7c00d92bc17db58f215 (diff) | |
| download | lwn-b3970e97490abfb040305f14327d75e7568f31c4.tar.gz lwn-b3970e97490abfb040305f14327d75e7568f31c4.zip | |
Merge tag 'amd-drm-next-7.1-2026-03-19' of https://gitlab.freedesktop.org/agd5f/linux into drm-next
amd-drm-next-7.1-2026-03-19:
amdgpu:
- Fix gamma 2.2 colorop TFs
- BO list fix
- LTO fix
- DC FP fix
- DisplayID handling fix
- DCN 2.01 fix
- MMHUB boundary fixes
- ISP fix
- TLB fence fix
- Hainan pm fix
- UserQ fixes
- MES 12.1 Updates
- GC 12.1 updates
- RAS fixes
- DML updates
- Cursor fixes
- SWSMU cleanups
- Misc cleanups
- Clean up duplicate format modifiers
- Devcoredump updates
- Cleanup mmhub cid handling
- Initial VCN 5.0.2 support
- Initial JPEG 5.0.2 support
- PSP 13.0.15 updates
amdkfd:
- Queue properties fix
- GC 12.1 updates
radeon:
- Hainan pm fix
Signed-off-by: Dave Airlie <airlied@redhat.com>
From: Alex Deucher <alexander.deucher@amd.com>
Link: https://patch.msgid.link/20260319173334.479766-1-alexander.deucher@amd.com
126 files changed, 4556 insertions, 1389 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 9c9c73b73ac8..6a7e9bfec59e 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -213,6 +213,7 @@ amdgpu-y += \ vcn_v4_0_5.o \ vcn_v5_0_0.o \ vcn_v5_0_1.o \ + vcn_v5_0_2.o \ amdgpu_jpeg.o \ jpeg_v1_0.o \ jpeg_v2_0.o \ @@ -223,6 +224,7 @@ amdgpu-y += \ jpeg_v4_0_5.o \ jpeg_v5_0_0.o \ jpeg_v5_0_1.o \ + jpeg_v5_0_2.o \ jpeg_v5_3_0.o # add VPE block diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 59731014a55a..892c90b8d063 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -327,6 +327,7 @@ struct kfd_vm_fault_info; struct amdgpu_hive_info; struct amdgpu_reset_context; struct amdgpu_reset_control; +struct amdgpu_coredump_info; enum amdgpu_cp_irq { AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP = 0, @@ -1147,6 +1148,11 @@ struct amdgpu_device { struct amdgpu_reset_domain *reset_domain; +#ifdef CONFIG_DEV_COREDUMP + struct amdgpu_coredump_info *coredump; + struct work_struct coredump_work; +#endif + struct mutex benchmark_mutex; bool scpm_enabled; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v12_1.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v12_1.c index 965c7e688535..bcb180f9d3ff 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v12_1.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v12_1.c @@ -330,7 +330,7 @@ static uint32_t kgd_gfx_v12_1_set_address_watch(struct amdgpu_device *adev, watch_address_cntl = 0; watch_address_low = lower_32_bits(watch_address); - watch_address_high = upper_32_bits(watch_address) & 0xffff; + watch_address_high = upper_32_bits(watch_address) & 0x1ffffff; watch_address_cntl = REG_SET_FIELD(watch_address_cntl, TCP_WATCH0_CNTL, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c index 4662bfbe70b2..43864df8af04 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c @@ -36,6 +36,7 @@ #define AMDGPU_BO_LIST_MAX_PRIORITY 32u #define AMDGPU_BO_LIST_NUM_BUCKETS (AMDGPU_BO_LIST_MAX_PRIORITY + 1) +#define AMDGPU_BO_LIST_MAX_ENTRIES (128 * 1024) static void amdgpu_bo_list_free_rcu(struct rcu_head *rcu) { @@ -188,6 +189,9 @@ int amdgpu_bo_create_list_entry_array(struct drm_amdgpu_bo_list_in *in, const uint32_t bo_number = in->bo_number; struct drm_amdgpu_bo_list_entry *info; + if (bo_number > AMDGPU_BO_LIST_MAX_ENTRIES) + return -EINVAL; + /* copy the handle array from userspace to a kernel buffer */ if (likely(info_size == bo_info_size)) { info = vmemdup_array_user(uptr, bo_number, info_size); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c index aabe9d58c3dc..b04fa9fd90b7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c @@ -1231,6 +1231,8 @@ static enum drm_mode_status amdgpu_connector_dvi_mode_valid(struct drm_connector case CONNECTOR_OBJECT_ID_HDMI_TYPE_B: max_digital_pixel_clock_khz = max_dvi_single_link_pixel_clock * 2; break; + default: + return MODE_BAD; } /* When the display EDID claims that it's an HDMI display, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 70ea9b0831a0..c048217615c1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1740,6 +1740,13 @@ int amdgpu_cs_wait_fences_ioctl(struct drm_device *dev, void *data, struct drm_amdgpu_fence *fences; int r; + /* + * fence_count must be non-zero; dma_fence_wait_any_timeout() + * does not accept an empty fence array. + */ + if (!wait->in.fence_count) + return -EINVAL; + /* Get the fences from userspace */ fences = memdup_array_user(u64_to_user_ptr(wait->in.fences), wait->in.fence_count, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c index 160f0704d1d3..2b54a67437c2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c @@ -32,8 +32,13 @@ void amdgpu_coredump(struct amdgpu_device *adev, bool skip_vram_check, bool vram_lost, struct amdgpu_job *job) { } +void amdgpu_coredump_init(struct amdgpu_device *adev) +{ +} #else +#define AMDGPU_CORE_DUMP_SIZE_MAX (256 * 1024 * 1024) + const char *hw_ip_names[MAX_HWIP] = { [GC_HWIP] = "GC", [HDP_HWIP] = "HDP", @@ -196,11 +201,9 @@ static void amdgpu_devcoredump_fw_info(struct amdgpu_device *adev, } static ssize_t -amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count, - void *data, size_t datalen) +amdgpu_devcoredump_format(char *buffer, size_t count, struct amdgpu_coredump_info *coredump) { struct drm_printer p; - struct amdgpu_coredump_info *coredump = data; struct drm_print_iterator iter; struct amdgpu_vm_fault_info *fault_info; struct amdgpu_ip_block *ip_block; @@ -208,7 +211,6 @@ amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count, iter.data = buffer; iter.offset = 0; - iter.start = offset; iter.remain = count; p = drm_coredump_printer(&iter); @@ -322,9 +324,63 @@ amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count, return count - iter.remain; } +static ssize_t +amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count, + void *data, size_t datalen) +{ + struct amdgpu_coredump_info *coredump = data; + ssize_t byte_copied; + + if (!coredump) + return -ENODEV; + + if (!coredump->formatted) + return -ENODEV; + + if (offset >= coredump->formatted_size) + return 0; + + byte_copied = count < coredump->formatted_size - offset ? count : + coredump->formatted_size - offset; + memcpy(buffer, coredump->formatted + offset, byte_copied); + + return byte_copied; +} + static void amdgpu_devcoredump_free(void *data) { - kfree(data); + struct amdgpu_coredump_info *coredump = data; + + kvfree(coredump->formatted); + kvfree(data); +} + +static void amdgpu_devcoredump_deferred_work(struct work_struct *work) +{ + struct amdgpu_device *adev = container_of(work, typeof(*adev), coredump_work); + struct amdgpu_coredump_info *coredump = adev->coredump; + + /* Do a one-time preparation of the coredump output because + * repeatingly calling drm_coredump_printer is very slow. + */ + coredump->formatted_size = amdgpu_devcoredump_format( + NULL, AMDGPU_CORE_DUMP_SIZE_MAX, coredump); + coredump->formatted = kvzalloc(coredump->formatted_size, GFP_KERNEL); + if (!coredump->formatted) { + amdgpu_devcoredump_free(coredump); + goto end; + } + + amdgpu_devcoredump_format(coredump->formatted, coredump->formatted_size, coredump); + + /* If there's an existing coredump for this device, the free function will be + * called immediately so coredump might be invalid after the call to dev_coredumpm. + */ + dev_coredumpm(coredump->adev->dev, THIS_MODULE, coredump, 0, GFP_NOWAIT, + amdgpu_devcoredump_read, amdgpu_devcoredump_free); + +end: + adev->coredump = NULL; } void amdgpu_coredump(struct amdgpu_device *adev, bool skip_vram_check, @@ -334,6 +390,10 @@ void amdgpu_coredump(struct amdgpu_device *adev, bool skip_vram_check, struct amdgpu_coredump_info *coredump; struct drm_sched_job *s_job; + /* No need to generate a new coredump if there's one in progress already. */ + if (work_pending(&adev->coredump_work)) + return; + coredump = kzalloc_obj(*coredump, GFP_NOWAIT); if (!coredump) return; @@ -360,11 +420,20 @@ void amdgpu_coredump(struct amdgpu_device *adev, bool skip_vram_check, ktime_get_ts64(&coredump->reset_time); - dev_coredumpm(dev->dev, THIS_MODULE, coredump, 0, GFP_NOWAIT, - amdgpu_devcoredump_read, amdgpu_devcoredump_free); + /* Update the current coredump pointer (no lock needed, this function can only be called + * from a single thread) + */ + adev->coredump = coredump; + /* Kick off coredump formatting to a worker thread. */ + queue_work(system_unbound_wq, &adev->coredump_work); drm_info(dev, "AMDGPU device coredump file has been created\n"); drm_info(dev, "Check your /sys/class/drm/card%d/device/devcoredump/data\n", dev->primary->index); } + +void amdgpu_coredump_init(struct amdgpu_device *adev) +{ + INIT_WORK(&adev->coredump_work, amdgpu_devcoredump_deferred_work); +} #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h index ef9772c6bcc9..b3582d0b4ca4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h @@ -35,12 +35,19 @@ struct amdgpu_coredump_info { struct amdgpu_device *adev; struct amdgpu_task_info reset_task_info; struct timespec64 reset_time; + bool skip_vram_check; bool reset_vram_lost; struct amdgpu_ring *ring; + /* Readable form of coredevdump, generate once to speed up + * reading it (see drm_coredump_printer's documentation). + */ + ssize_t formatted_size; + char *formatted; }; #endif void amdgpu_coredump(struct amdgpu_device *adev, bool skip_vram_check, bool vram_lost, struct amdgpu_job *job); +void amdgpu_coredump_init(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 711b4502653a..ac5769d9e75c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3781,6 +3781,8 @@ int amdgpu_device_init(struct amdgpu_device *adev, INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func); INIT_WORK(&adev->userq_reset_work, amdgpu_userq_reset_work); + amdgpu_coredump_init(adev); + adev->gfx.gfx_off_req_count = 1; adev->gfx.gfx_off_residency = 0; adev->gfx.gfx_off_entrycount = 0; @@ -3878,9 +3880,21 @@ int amdgpu_device_init(struct amdgpu_device *adev, amdgpu_gmc_noretry_set(adev); /* Need to get xgmi info early to decide the reset behavior*/ if (adev->gmc.xgmi.supported) { - r = adev->gfxhub.funcs->get_xgmi_info(adev); - if (r) - return r; + if (adev->gfxhub.funcs && + adev->gfxhub.funcs->get_xgmi_info) { + r = adev->gfxhub.funcs->get_xgmi_info(adev); + if (r) + return r; + } + } + + if (adev->gmc.xgmi.connected_to_cpu) { + if (adev->mmhub.funcs && + adev->mmhub.funcs->get_xgmi_info) { + r = adev->mmhub.funcs->get_xgmi_info(adev); + if (r) + return r; + } } /* enable PCIE atomic ops */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index f9f785c5d8ac..0eb0c62d2f4f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -112,8 +112,10 @@ #include "smuio_v15_0_8.h" #include "vcn_v5_0_0.h" #include "vcn_v5_0_1.h" +#include "vcn_v5_0_2.h" #include "jpeg_v5_0_0.h" #include "jpeg_v5_0_1.h" +#include "jpeg_v5_0_2.h" #include "jpeg_v5_3_0.h" #include "amdgpu_ras_mgr.h" @@ -296,13 +298,17 @@ static int amdgpu_discovery_get_tmr_info(struct amdgpu_device *adev, if (vram_size) adev->discovery.offset = (vram_size << 20) - DISCOVERY_TMR_OFFSET; - if (amdgpu_sriov_vf(adev) && adev->virt.is_dynamic_crit_regn_enabled) { - adev->discovery.offset = - adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_IPD_TABLE_ID].offset; - adev->discovery.size = - adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_IPD_TABLE_ID].size_kb << 10; - if (!adev->discovery.offset || !adev->discovery.size) - return -EINVAL; + if (amdgpu_sriov_vf(adev)) { + if (adev->virt.is_dynamic_crit_regn_enabled) { + adev->discovery.offset = + adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_IPD_TABLE_ID].offset; + adev->discovery.size = + adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_IPD_TABLE_ID].size_kb << 10; + if (!adev->discovery.offset || !adev->discovery.size) + return -EINVAL; + } else { + goto out; + } } else { tmr_size = RREG32(mmDRIVER_SCRATCH_2); if (tmr_size) { @@ -322,7 +328,7 @@ static int amdgpu_discovery_get_tmr_info(struct amdgpu_device *adev, adev->discovery.offset = tmr_offset + tmr_size - DISCOVERY_TMR_OFFSET; } } - +out: adev->discovery.bin = kzalloc(adev->discovery.size, GFP_KERNEL); if (!adev->discovery.bin) return -ENOMEM; @@ -556,7 +562,7 @@ static int amdgpu_discovery_table_check(struct amdgpu_device *adev, checksum = le16_to_cpu(info->checksum); switch (table_id) { - case IP_DISCOVERY: + case IP_DISCOVERY: { struct ip_discovery_header *ihdr = (struct ip_discovery_header *)(discovery_bin + offset); act_val = le32_to_cpu(ihdr->signature); @@ -564,7 +570,8 @@ static int amdgpu_discovery_table_check(struct amdgpu_device *adev, table_size = le16_to_cpu(ihdr->size); table_name = "data table"; break; - case GC: + } + case GC: { struct gpu_info_header *ghdr = (struct gpu_info_header *)(discovery_bin + offset); act_val = le32_to_cpu(ghdr->table_id); @@ -572,7 +579,8 @@ static int amdgpu_discovery_table_check(struct amdgpu_device *adev, table_size = le16_to_cpu(ghdr->size); table_name = "gc table"; break; - case HARVEST_INFO: + } + case HARVEST_INFO: { struct harvest_info_header *hhdr = (struct harvest_info_header *)(discovery_bin + offset); act_val = le32_to_cpu(hhdr->signature); @@ -580,7 +588,8 @@ static int amdgpu_discovery_table_check(struct amdgpu_device *adev, table_size = sizeof(struct harvest_table); table_name = "harvest table"; break; - case VCN_INFO: + } + case VCN_INFO: { struct vcn_info_header *vhdr = (struct vcn_info_header *)(discovery_bin + offset); act_val = le32_to_cpu(vhdr->table_id); @@ -588,7 +597,8 @@ static int amdgpu_discovery_table_check(struct amdgpu_device *adev, table_size = le32_to_cpu(vhdr->size_bytes); table_name = "vcn table"; break; - case MALL_INFO: + } + case MALL_INFO: { struct mall_info_header *mhdr = (struct mall_info_header *)(discovery_bin + offset); act_val = le32_to_cpu(mhdr->table_id); @@ -597,6 +607,7 @@ static int amdgpu_discovery_table_check(struct amdgpu_device *adev, table_name = "mall table"; check_table = false; break; + } default: dev_err(adev->dev, "invalid ip discovery table id %d specified\n", table_id); check_table = false; @@ -2640,6 +2651,10 @@ static int amdgpu_discovery_set_mm_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &vcn_v5_0_1_ip_block); amdgpu_device_ip_block_add(adev, &jpeg_v5_0_1_ip_block); break; + case IP_VERSION(5, 0, 2): + amdgpu_device_ip_block_add(adev, &vcn_v5_0_2_ip_block); + amdgpu_device_ip_block_add(adev, &jpeg_v5_0_2_ip_block); + break; default: dev_err(adev->dev, "Failed to add vcn/jpeg ip block(UVD_HWIP:0x%x)\n", diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 03814a23eb54..a44baa9ee78d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -839,8 +839,8 @@ module_param_named_unsafe(no_queue_eviction_on_vm_fault, amdgpu_no_queue_evictio /** * DOC: mtype_local (int) */ -int amdgpu_mtype_local; -MODULE_PARM_DESC(mtype_local, "MTYPE for local memory (0 = MTYPE_RW (default), 1 = MTYPE_NC, 2 = MTYPE_CC)"); +int amdgpu_mtype_local = -1; +MODULE_PARM_DESC(mtype_local, "MTYPE for local memory (default: ASIC dependent, 0 = MTYPE_RW, 1 = MTYPE_NC, 2 = MTYPE_CC)"); module_param_named_unsafe(mtype_local, amdgpu_mtype_local, int, 0444); /** @@ -2952,9 +2952,11 @@ static int amdgpu_drm_release(struct inode *inode, struct file *filp) int idx; if (fpriv && drm_dev_enter(dev, &idx)) { - fpriv->evf_mgr.fd_closing = true; - amdgpu_eviction_fence_destroy(&fpriv->evf_mgr); + amdgpu_evf_mgr_shutdown(&fpriv->evf_mgr); + amdgpu_userq_mgr_cancel_resume(&fpriv->userq_mgr); + amdgpu_evf_mgr_flush_suspend(&fpriv->evf_mgr); amdgpu_userq_mgr_fini(&fpriv->userq_mgr); + amdgpu_evf_mgr_fini(&fpriv->evf_mgr); drm_dev_exit(idx); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c index 3b588c7740ec..55e8b9cc2f9f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c @@ -25,9 +25,6 @@ #include <drm/drm_exec.h> #include "amdgpu.h" -#define work_to_evf_mgr(w, name) container_of(w, struct amdgpu_eviction_fence_mgr, name) -#define evf_mgr_to_fpriv(e) container_of(e, struct amdgpu_fpriv, evf_mgr) - static const char * amdgpu_eviction_fence_get_driver_name(struct dma_fence *fence) { @@ -43,127 +40,79 @@ amdgpu_eviction_fence_get_timeline_name(struct dma_fence *f) return ef->timeline_name; } -int -amdgpu_eviction_fence_replace_fence(struct amdgpu_eviction_fence_mgr *evf_mgr, - struct drm_exec *exec) +static bool amdgpu_eviction_fence_enable_signaling(struct dma_fence *f) { - struct amdgpu_eviction_fence *old_ef, *new_ef; - struct drm_gem_object *obj; - unsigned long index; - int ret; - - if (evf_mgr->ev_fence && - !dma_fence_is_signaled(&evf_mgr->ev_fence->base)) - return 0; - /* - * Steps to replace eviction fence: - * * lock all objects in exec (caller) - * * create a new eviction fence - * * update new eviction fence in evf_mgr - * * attach the new eviction fence to BOs - * * release the old fence - * * unlock the objects (caller) - */ - new_ef = amdgpu_eviction_fence_create(evf_mgr); - if (!new_ef) { - DRM_ERROR("Failed to create new eviction fence\n"); - return -ENOMEM; - } - - /* Update the eviction fence now */ - spin_lock(&evf_mgr->ev_fence_lock); - old_ef = evf_mgr->ev_fence; - evf_mgr->ev_fence = new_ef; - spin_unlock(&evf_mgr->ev_fence_lock); + struct amdgpu_eviction_fence *ev_fence = to_ev_fence(f); - /* Attach the new fence */ - drm_exec_for_each_locked_object(exec, index, obj) { - struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); - - if (!bo) - continue; - ret = amdgpu_eviction_fence_attach(evf_mgr, bo); - if (ret) { - DRM_ERROR("Failed to attch new eviction fence\n"); - goto free_err; - } - } - - /* Free old fence */ - if (old_ef) - dma_fence_put(&old_ef->base); - return 0; - -free_err: - kfree(new_ef); - return ret; + schedule_work(&ev_fence->evf_mgr->suspend_work); + return true; } +static const struct dma_fence_ops amdgpu_eviction_fence_ops = { + .get_driver_name = amdgpu_eviction_fence_get_driver_name, + .get_timeline_name = amdgpu_eviction_fence_get_timeline_name, + .enable_signaling = amdgpu_eviction_fence_enable_signaling, +}; + static void amdgpu_eviction_fence_suspend_worker(struct work_struct *work) { - struct amdgpu_eviction_fence_mgr *evf_mgr = work_to_evf_mgr(work, suspend_work.work); - struct amdgpu_fpriv *fpriv = evf_mgr_to_fpriv(evf_mgr); + struct amdgpu_eviction_fence_mgr *evf_mgr = + container_of(work, struct amdgpu_eviction_fence_mgr, + suspend_work); + struct amdgpu_fpriv *fpriv = + container_of(evf_mgr, struct amdgpu_fpriv, evf_mgr); struct amdgpu_userq_mgr *uq_mgr = &fpriv->userq_mgr; - struct amdgpu_eviction_fence *ev_fence; + struct dma_fence *ev_fence; mutex_lock(&uq_mgr->userq_mutex); - spin_lock(&evf_mgr->ev_fence_lock); - ev_fence = evf_mgr->ev_fence; - if (ev_fence) - dma_fence_get(&ev_fence->base); - else - goto unlock; - spin_unlock(&evf_mgr->ev_fence_lock); - - amdgpu_userq_evict(uq_mgr, ev_fence); + ev_fence = amdgpu_evf_mgr_get_fence(evf_mgr); + amdgpu_userq_evict(uq_mgr, !evf_mgr->shutdown); - mutex_unlock(&uq_mgr->userq_mutex); - dma_fence_put(&ev_fence->base); - return; - -unlock: - spin_unlock(&evf_mgr->ev_fence_lock); + /* + * Signaling the eviction fence must be done while holding the + * userq_mutex. Otherwise we won't resume the queues before issuing the + * next fence. + */ + dma_fence_signal(ev_fence); + dma_fence_put(ev_fence); mutex_unlock(&uq_mgr->userq_mutex); } -static bool amdgpu_eviction_fence_enable_signaling(struct dma_fence *f) +int amdgpu_evf_mgr_attach_fence(struct amdgpu_eviction_fence_mgr *evf_mgr, + struct amdgpu_bo *bo) { - struct amdgpu_eviction_fence_mgr *evf_mgr; - struct amdgpu_eviction_fence *ev_fence; - - if (!f) - return true; - - ev_fence = to_ev_fence(f); - evf_mgr = ev_fence->evf_mgr; + struct dma_fence *ev_fence = amdgpu_evf_mgr_get_fence(evf_mgr); + struct ttm_operation_ctx ctx = { false, false }; + struct dma_resv *resv = bo->tbo.base.resv; + int ret; - schedule_delayed_work(&evf_mgr->suspend_work, 0); - return true; -} + if (!dma_fence_is_signaled(ev_fence)) { -static const struct dma_fence_ops amdgpu_eviction_fence_ops = { - .get_driver_name = amdgpu_eviction_fence_get_driver_name, - .get_timeline_name = amdgpu_eviction_fence_get_timeline_name, - .enable_signaling = amdgpu_eviction_fence_enable_signaling, -}; + amdgpu_bo_placement_from_domain(bo, bo->allowed_domains); + ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + if (!ret) + dma_resv_add_fence(resv, ev_fence, + DMA_RESV_USAGE_BOOKKEEP); + } else { + ret = 0; + } -void amdgpu_eviction_fence_signal(struct amdgpu_eviction_fence_mgr *evf_mgr, - struct amdgpu_eviction_fence *ev_fence) -{ - spin_lock(&evf_mgr->ev_fence_lock); - dma_fence_signal(&ev_fence->base); - spin_unlock(&evf_mgr->ev_fence_lock); + dma_fence_put(ev_fence); + return ret; } -struct amdgpu_eviction_fence * -amdgpu_eviction_fence_create(struct amdgpu_eviction_fence_mgr *evf_mgr) +int amdgpu_evf_mgr_rearm(struct amdgpu_eviction_fence_mgr *evf_mgr, + struct drm_exec *exec) { struct amdgpu_eviction_fence *ev_fence; + struct drm_gem_object *obj; + unsigned long index; + /* Create and initialize a new eviction fence */ ev_fence = kzalloc_obj(*ev_fence); if (!ev_fence) - return NULL; + return -ENOMEM; ev_fence->evf_mgr = evf_mgr; get_task_comm(ev_fence->timeline_name, current); @@ -171,56 +120,22 @@ amdgpu_eviction_fence_create(struct amdgpu_eviction_fence_mgr *evf_mgr) dma_fence_init64(&ev_fence->base, &amdgpu_eviction_fence_ops, &ev_fence->lock, evf_mgr->ev_fence_ctx, atomic_inc_return(&evf_mgr->ev_fence_seq)); - return ev_fence; -} -void amdgpu_eviction_fence_destroy(struct amdgpu_eviction_fence_mgr *evf_mgr) -{ - struct amdgpu_eviction_fence *ev_fence; - - /* Wait for any pending work to execute */ - flush_delayed_work(&evf_mgr->suspend_work); - - spin_lock(&evf_mgr->ev_fence_lock); - ev_fence = evf_mgr->ev_fence; - spin_unlock(&evf_mgr->ev_fence_lock); - - if (!ev_fence) - return; - - dma_fence_wait(&ev_fence->base, false); + /* Remember it for newly added BOs */ + dma_fence_put(evf_mgr->ev_fence); + evf_mgr->ev_fence = &ev_fence->base; - /* Last unref of ev_fence */ - dma_fence_put(&ev_fence->base); -} - -int amdgpu_eviction_fence_attach(struct amdgpu_eviction_fence_mgr *evf_mgr, - struct amdgpu_bo *bo) -{ - struct amdgpu_eviction_fence *ev_fence; - struct dma_resv *resv = bo->tbo.base.resv; - int ret; - - if (!resv) - return 0; + /* And add it to all existing BOs */ + drm_exec_for_each_locked_object(exec, index, obj) { + struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); - ret = dma_resv_reserve_fences(resv, 1); - if (ret) { - DRM_DEBUG_DRIVER("Failed to resv fence space\n"); - return ret; + amdgpu_evf_mgr_attach_fence(evf_mgr, bo); } - - spin_lock(&evf_mgr->ev_fence_lock); - ev_fence = evf_mgr->ev_fence; - if (ev_fence) - dma_resv_add_fence(resv, &ev_fence->base, DMA_RESV_USAGE_BOOKKEEP); - spin_unlock(&evf_mgr->ev_fence_lock); - return 0; } -void amdgpu_eviction_fence_detach(struct amdgpu_eviction_fence_mgr *evf_mgr, - struct amdgpu_bo *bo) +void amdgpu_evf_mgr_detach_fence(struct amdgpu_eviction_fence_mgr *evf_mgr, + struct amdgpu_bo *bo) { struct dma_fence *stub = dma_fence_get_stub(); @@ -229,13 +144,31 @@ void amdgpu_eviction_fence_detach(struct amdgpu_eviction_fence_mgr *evf_mgr, dma_fence_put(stub); } -int amdgpu_eviction_fence_init(struct amdgpu_eviction_fence_mgr *evf_mgr) +void amdgpu_evf_mgr_init(struct amdgpu_eviction_fence_mgr *evf_mgr) { - /* This needs to be done one time per open */ atomic_set(&evf_mgr->ev_fence_seq, 0); evf_mgr->ev_fence_ctx = dma_fence_context_alloc(1); - spin_lock_init(&evf_mgr->ev_fence_lock); + evf_mgr->ev_fence = dma_fence_get_stub(); - INIT_DELAYED_WORK(&evf_mgr->suspend_work, amdgpu_eviction_fence_suspend_worker); - return 0; + INIT_WORK(&evf_mgr->suspend_work, amdgpu_eviction_fence_suspend_worker); +} + +void amdgpu_evf_mgr_shutdown(struct amdgpu_eviction_fence_mgr *evf_mgr) +{ + evf_mgr->shutdown = true; + /* Make sure that the shutdown is visible to the suspend work */ + flush_work(&evf_mgr->suspend_work); +} + +void amdgpu_evf_mgr_flush_suspend(struct amdgpu_eviction_fence_mgr *evf_mgr) +{ + dma_fence_wait(rcu_dereference_protected(evf_mgr->ev_fence, true), + false); + /* Make sure that we are done with the last suspend work */ + flush_work(&evf_mgr->suspend_work); +} + +void amdgpu_evf_mgr_fini(struct amdgpu_eviction_fence_mgr *evf_mgr) +{ + dma_fence_put(evf_mgr->ev_fence); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h index fcd867b7147d..2a750add4e7b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h @@ -25,6 +25,8 @@ #ifndef AMDGPU_EV_FENCE_H_ #define AMDGPU_EV_FENCE_H_ +#include <linux/dma-fence.h> + struct amdgpu_eviction_fence { struct dma_fence base; spinlock_t lock; @@ -35,35 +37,36 @@ struct amdgpu_eviction_fence { struct amdgpu_eviction_fence_mgr { u64 ev_fence_ctx; atomic_t ev_fence_seq; - spinlock_t ev_fence_lock; - struct amdgpu_eviction_fence *ev_fence; - struct delayed_work suspend_work; - uint8_t fd_closing; -}; - -/* Eviction fence helper functions */ -struct amdgpu_eviction_fence * -amdgpu_eviction_fence_create(struct amdgpu_eviction_fence_mgr *evf_mgr); -void -amdgpu_eviction_fence_destroy(struct amdgpu_eviction_fence_mgr *evf_mgr); - -int -amdgpu_eviction_fence_attach(struct amdgpu_eviction_fence_mgr *evf_mgr, - struct amdgpu_bo *bo); + /* + * Only updated while holding the VM resv lock. + * Only signaled while holding the userq mutex. + */ + struct dma_fence __rcu *ev_fence; + struct work_struct suspend_work; + bool shutdown; +}; -void -amdgpu_eviction_fence_detach(struct amdgpu_eviction_fence_mgr *evf_mgr, - struct amdgpu_bo *bo); +static inline struct dma_fence * +amdgpu_evf_mgr_get_fence(struct amdgpu_eviction_fence_mgr *evf_mgr) +{ + struct dma_fence *ev_fence; -int -amdgpu_eviction_fence_init(struct amdgpu_eviction_fence_mgr *evf_mgr); + rcu_read_lock(); + ev_fence = dma_fence_get_rcu_safe(&evf_mgr->ev_fence); + rcu_read_unlock(); + return ev_fence; +} -void -amdgpu_eviction_fence_signal(struct amdgpu_eviction_fence_mgr *evf_mgr, - struct amdgpu_eviction_fence *ev_fence); +int amdgpu_evf_mgr_attach_fence(struct amdgpu_eviction_fence_mgr *evf_mgr, + struct amdgpu_bo *bo); +int amdgpu_evf_mgr_rearm(struct amdgpu_eviction_fence_mgr *evf_mgr, + struct drm_exec *exec); +void amdgpu_evf_mgr_detach_fence(struct amdgpu_eviction_fence_mgr *evf_mgr, + struct amdgpu_bo *bo); +void amdgpu_evf_mgr_init(struct amdgpu_eviction_fence_mgr *evf_mgr); +void amdgpu_evf_mgr_shutdown(struct amdgpu_eviction_fence_mgr *evf_mgr); +void amdgpu_evf_mgr_flush_suspend(struct amdgpu_eviction_fence_mgr *evf_mgr); +void amdgpu_evf_mgr_fini(struct amdgpu_eviction_fence_mgr *evf_mgr); -int -amdgpu_eviction_fence_replace_fence(struct amdgpu_eviction_fence_mgr *evf_mgr, - struct drm_exec *exec); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c index 514bd302365f..841e1b3a017e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c @@ -106,6 +106,7 @@ static bool is_fru_eeprom_supported(struct amdgpu_device *adev, u32 *fru_addr) *fru_addr = FRU_EEPROM_MADDR_8; return true; case IP_VERSION(13, 0, 12): + case IP_VERSION(15, 0, 8): if (fru_addr) *fru_addr = FRU_EEPROM_MADDR_INV; return true; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index a6107109a2b8..01dc73309d73 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -258,18 +258,15 @@ static int amdgpu_gem_object_open(struct drm_gem_object *obj, amdgpu_vm_bo_update_shared(abo); bo_va = amdgpu_vm_bo_find(vm, abo); - if (!bo_va) + if (!bo_va) { bo_va = amdgpu_vm_bo_add(adev, vm, abo); - else + r = amdgpu_evf_mgr_attach_fence(&fpriv->evf_mgr, abo); + if (r) + goto out_unlock; + } else { ++bo_va->ref_count; - - /* attach gfx eviction fence */ - r = amdgpu_eviction_fence_attach(&fpriv->evf_mgr, abo); - if (r) { - DRM_DEBUG_DRIVER("Failed to attach eviction fence to BO\n"); - amdgpu_bo_unreserve(abo); - return r; } + drm_exec_fini(&exec); /* Validate and add eviction fence to DMABuf imports with dynamic @@ -337,7 +334,7 @@ static void amdgpu_gem_object_close(struct drm_gem_object *obj, } if (!amdgpu_vm_is_bo_always_valid(vm, bo)) - amdgpu_eviction_fence_detach(&fpriv->evf_mgr, bo); + amdgpu_evf_mgr_detach_fence(&fpriv->evf_mgr, bo); bo_va = amdgpu_vm_bo_find(vm, bo); if (!bo_va || --bo_va->ref_count) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 720ed3a2c78c..2785eda6fea5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -463,6 +463,7 @@ struct amdgpu_gfx { struct amdgpu_irq_src cp_ecc_error_irq; struct amdgpu_irq_src sq_irq; struct amdgpu_irq_src rlc_gc_fed_irq; + struct amdgpu_irq_src rlc_poison_irq; struct sq_work sq_work; /* gfx status */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 7f19554b9ad1..06efce38f323 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -1522,10 +1522,7 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) "Failed to init usermode queue manager (%d), use legacy workload submission only\n", r); - r = amdgpu_eviction_fence_init(&fpriv->evf_mgr); - if (r) - goto error_vm; - + amdgpu_evf_mgr_init(&fpriv->evf_mgr); amdgpu_ctx_mgr_init(&fpriv->ctx_mgr, adev); file_priv->driver_priv = fpriv; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h index 1ca9d4ed8063..6b8214650e5d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h @@ -63,14 +63,40 @@ struct amdgpu_mmhub_funcs { uint64_t page_table_base); void (*update_power_gating)(struct amdgpu_device *adev, bool enable); + int (*get_xgmi_info)(struct amdgpu_device *adev); +}; + +struct amdgpu_mmhub_client_ids { + const char * const (*names)[2]; + unsigned int size; }; struct amdgpu_mmhub { struct ras_common_if *ras_if; const struct amdgpu_mmhub_funcs *funcs; struct amdgpu_mmhub_ras *ras; + struct amdgpu_mmhub_client_ids client_ids; }; +static inline void +amdgpu_mmhub_init_client_info(struct amdgpu_mmhub *mmhub, + const char * const (*names)[2], + unsigned int size) +{ + mmhub->client_ids.names = names; + mmhub->client_ids.size = size; +} + +static inline const char * +amdgpu_mmhub_client_name(struct amdgpu_mmhub *mmhub, + u32 cid, bool is_write) +{ + if (cid < mmhub->client_ids.size) + return mmhub->client_ids.names[cid][is_write]; + + return NULL; +} + int amdgpu_mmhub_ras_sw_init(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 741d1919ef88..6edcb7713299 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -2250,6 +2250,7 @@ static bool amdgpu_ras_aca_is_supported(struct amdgpu_device *adev) case IP_VERSION(13, 0, 6): case IP_VERSION(13, 0, 12): case IP_VERSION(13, 0, 14): + case IP_VERSION(13, 0, 15): ret = true; break; default: @@ -4000,6 +4001,7 @@ static bool amdgpu_ras_asic_supported(struct amdgpu_device *adev) case IP_VERSION(13, 0, 6): case IP_VERSION(13, 0, 12): case IP_VERSION(13, 0, 14): + case IP_VERSION(13, 0, 15): return true; default: return false; @@ -4013,6 +4015,7 @@ static bool amdgpu_ras_asic_supported(struct amdgpu_device *adev) case IP_VERSION(13, 0, 10): case IP_VERSION(13, 0, 12): case IP_VERSION(13, 0, 14): + case IP_VERSION(13, 0, 15): case IP_VERSION(14, 0, 3): return true; default: @@ -4182,7 +4185,8 @@ init_ras_enabled_flag: adev->aca.is_enabled = (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6) || amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 12) || - amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 14)); + amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 14) || + amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 15)); } /* bad page feature is not applicable to specific app platform */ @@ -4270,6 +4274,7 @@ static void amdgpu_ras_init_reserved_vram_size(struct amdgpu_device *adev) case IP_VERSION(13, 0, 2): case IP_VERSION(13, 0, 6): case IP_VERSION(13, 0, 12): + case IP_VERSION(13, 0, 15): con->reserved_pages_in_bytes = AMDGPU_RAS_RESERVED_VRAM_SIZE_DEFAULT; break; case IP_VERSION(13, 0, 14): diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reg_access.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_reg_access.c index bf8645390bdc..540040c76058 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reg_access.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reg_access.c @@ -314,7 +314,6 @@ uint64_t amdgpu_reg_get_smn_base64(struct amdgpu_device *adev, "SMN base address query not supported for this device\n"); return 0; } - return 0; } return adev->reg.smn.get_smn_base(adev, block, die_inst); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index 82333aeb4453..6d9e96fabd58 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c @@ -1150,7 +1150,7 @@ int amdgpu_ucode_create_bo(struct amdgpu_device *adev) if ((adev->firmware.load_type != AMDGPU_FW_LOAD_DIRECT) && (adev->firmware.load_type != AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO)) { amdgpu_bo_create_kernel(adev, adev->firmware.fw_size, PAGE_SIZE, - (amdgpu_sriov_vf(adev) || adev->debug_use_vram_fw_buf) ? + (amdgpu_sriov_vf(adev) || adev->debug_use_vram_fw_buf || adev->gmc.xgmi.connected_to_cpu) ? AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT, &adev->firmware.fw_buf, &adev->firmware.fw_buf_mc, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c index 4df0f9d5ad11..0238c2798de4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c @@ -398,6 +398,17 @@ int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev, return 0; } +int amdgpu_umc_uniras_process_ecc_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + struct ras_ih_info ih_info = {0}; + + ih_info.block = RAS_BLOCK_ID__UMC; + amdgpu_ras_mgr_dispatch_interrupt(adev, &ih_info); + return 0; +} + int amdgpu_umc_fill_error_record(struct ras_err_data *err_data, uint64_t err_addr, uint64_t retired_page, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h index 28dff750c47e..8494a55ebf76 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h @@ -161,6 +161,9 @@ int amdgpu_umc_pasid_poison_handler(struct amdgpu_device *adev, int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry); +int amdgpu_umc_uniras_process_ecc_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry); int amdgpu_umc_fill_error_record(struct ras_err_data *err_data, uint64_t err_addr, uint64_t retired_page, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c index 6d964a6ee349..d94f4966fea9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c @@ -156,7 +156,7 @@ static void amdgpu_userq_hang_detect_work(struct work_struct *work) struct dma_fence *fence; struct amdgpu_userq_mgr *uq_mgr; - if (!queue || !queue->userq_mgr) + if (!queue->userq_mgr) return; uq_mgr = queue->userq_mgr; @@ -472,17 +472,16 @@ void amdgpu_userq_ensure_ev_fence(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_eviction_fence_mgr *evf_mgr) { - struct amdgpu_eviction_fence *ev_fence; + struct dma_fence *ev_fence; retry: /* Flush any pending resume work to create ev_fence */ flush_delayed_work(&uq_mgr->resume_work); mutex_lock(&uq_mgr->userq_mutex); - spin_lock(&evf_mgr->ev_fence_lock); - ev_fence = evf_mgr->ev_fence; - spin_unlock(&evf_mgr->ev_fence_lock); - if (!ev_fence || dma_fence_is_signaled(&ev_fence->base)) { + ev_fence = amdgpu_evf_mgr_get_fence(evf_mgr); + if (dma_fence_is_signaled(ev_fence)) { + dma_fence_put(ev_fence); mutex_unlock(&uq_mgr->userq_mutex); /* * Looks like there was no pending resume work, @@ -491,6 +490,7 @@ retry: schedule_delayed_work(&uq_mgr->resume_work, 0); goto retry; } + dma_fence_put(ev_fence); } int amdgpu_userq_create_object(struct amdgpu_userq_mgr *uq_mgr, @@ -623,13 +623,14 @@ amdgpu_userq_destroy(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_usermode_que int r = 0; cancel_delayed_work_sync(&uq_mgr->resume_work); + + /* Cancel any pending hang detection work and cleanup */ + cancel_delayed_work_sync(&queue->hang_detect_work); + mutex_lock(&uq_mgr->userq_mutex); + queue->hang_detect_fence = NULL; amdgpu_userq_wait_for_last_fence(queue); - /* Cancel any pending hang detection work and cleanup */ - if (queue->hang_detect_fence) { - cancel_delayed_work_sync(&queue->hang_detect_work); - queue->hang_detect_fence = NULL; - } + r = amdgpu_bo_reserve(queue->db_obj.obj, true); if (!r) { amdgpu_bo_unpin(queue->db_obj.obj); @@ -1040,12 +1041,12 @@ amdgpu_userq_bo_validate(struct amdgpu_device *adev, struct drm_exec *exec, struct amdgpu_bo *bo; int ret; - spin_lock(&vm->invalidated_lock); + spin_lock(&vm->status_lock); while (!list_empty(&vm->invalidated)) { bo_va = list_first_entry(&vm->invalidated, struct amdgpu_bo_va, base.vm_status); - spin_unlock(&vm->invalidated_lock); + spin_unlock(&vm->status_lock); bo = bo_va->base.bo; ret = drm_exec_prepare_obj(exec, &bo->tbo.base, 2); @@ -1062,9 +1063,9 @@ amdgpu_userq_bo_validate(struct amdgpu_device *adev, struct drm_exec *exec, if (ret) return ret; - spin_lock(&vm->invalidated_lock); + spin_lock(&vm->status_lock); } - spin_unlock(&vm->invalidated_lock); + spin_unlock(&vm->status_lock); return 0; } @@ -1196,7 +1197,7 @@ retry_lock: dma_fence_wait(bo_va->last_pt_update, false); dma_fence_wait(vm->last_update, false); - ret = amdgpu_eviction_fence_replace_fence(&fpriv->evf_mgr, &exec); + ret = amdgpu_evf_mgr_rearm(&fpriv->evf_mgr, &exec); if (ret) drm_file_err(uq_mgr->file, "Failed to replace eviction fence\n"); @@ -1216,11 +1217,13 @@ static void amdgpu_userq_restore_worker(struct work_struct *work) { struct amdgpu_userq_mgr *uq_mgr = work_to_uq_mgr(work, resume_work.work); struct amdgpu_fpriv *fpriv = uq_mgr_to_fpriv(uq_mgr); + struct dma_fence *ev_fence; int ret; - flush_delayed_work(&fpriv->evf_mgr.suspend_work); - mutex_lock(&uq_mgr->userq_mutex); + ev_fence = amdgpu_evf_mgr_get_fence(&fpriv->evf_mgr); + if (!dma_fence_is_signaled(ev_fence)) + goto unlock; ret = amdgpu_userq_vm_validate(uq_mgr); if (ret) { @@ -1236,6 +1239,7 @@ static void amdgpu_userq_restore_worker(struct work_struct *work) unlock: mutex_unlock(&uq_mgr->userq_mutex); + dma_fence_put(ev_fence); } static int @@ -1311,11 +1315,8 @@ amdgpu_userq_wait_for_signal(struct amdgpu_userq_mgr *uq_mgr) } void -amdgpu_userq_evict(struct amdgpu_userq_mgr *uq_mgr, - struct amdgpu_eviction_fence *ev_fence) +amdgpu_userq_evict(struct amdgpu_userq_mgr *uq_mgr, bool schedule_resume) { - struct amdgpu_fpriv *fpriv = uq_mgr_to_fpriv(uq_mgr); - struct amdgpu_eviction_fence_mgr *evf_mgr = &fpriv->evf_mgr; struct amdgpu_device *adev = uq_mgr->adev; int ret; @@ -1328,16 +1329,8 @@ amdgpu_userq_evict(struct amdgpu_userq_mgr *uq_mgr, if (ret) dev_err(adev->dev, "Failed to evict userqueue\n"); - /* Signal current eviction fence */ - amdgpu_eviction_fence_signal(evf_mgr, ev_fence); - - if (evf_mgr->fd_closing) { - cancel_delayed_work_sync(&uq_mgr->resume_work); - return; - } - - /* Schedule a resume work */ - schedule_delayed_work(&uq_mgr->resume_work, 0); + if (schedule_resume) + schedule_delayed_work(&uq_mgr->resume_work, 0); } int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct drm_file *file_priv, @@ -1352,6 +1345,11 @@ int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct drm_file *f return 0; } +void amdgpu_userq_mgr_cancel_resume(struct amdgpu_userq_mgr *userq_mgr) +{ + cancel_delayed_work_sync(&userq_mgr->resume_work); +} + void amdgpu_userq_mgr_fini(struct amdgpu_userq_mgr *userq_mgr) { struct amdgpu_usermode_queue *queue; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h index 54e1997b3cc0..f0abc16d02cc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h @@ -123,6 +123,7 @@ int amdgpu_userq_ioctl(struct drm_device *dev, void *data, struct drm_file *filp int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct drm_file *file_priv, struct amdgpu_device *adev); +void amdgpu_userq_mgr_cancel_resume(struct amdgpu_userq_mgr *userq_mgr); void amdgpu_userq_mgr_fini(struct amdgpu_userq_mgr *userq_mgr); int amdgpu_userq_create_object(struct amdgpu_userq_mgr *uq_mgr, @@ -133,7 +134,7 @@ void amdgpu_userq_destroy_object(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_userq_obj *userq_obj); void amdgpu_userq_evict(struct amdgpu_userq_mgr *uq_mgr, - struct amdgpu_eviction_fence *ev_fence); + bool schedule_resume); void amdgpu_userq_ensure_ev_fence(struct amdgpu_userq_mgr *userq_mgr, struct amdgpu_eviction_fence_mgr *evf_mgr); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c index d8ce7b3733e7..781896c9fd26 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c @@ -597,11 +597,11 @@ exec_fini: put_gobj_write: for (i = 0; i < num_write_bo_handles; i++) drm_gem_object_put(gobj_write[i]); - kfree(gobj_write); + kvfree(gobj_write); put_gobj_read: for (i = 0; i < num_read_bo_handles; i++) drm_gem_object_put(gobj_read[i]); - kfree(gobj_read); + kvfree(gobj_read); free_syncobj: while (entry-- > 0) if (syncobj[entry]) @@ -616,339 +616,399 @@ free_syncobj_handles: return r; } -int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data, - struct drm_file *filp) +/* Count the number of expected fences so userspace can alloc a buffer */ +static int +amdgpu_userq_wait_count_fences(struct drm_file *filp, + struct drm_amdgpu_userq_wait *wait_info, + u32 *syncobj_handles, u32 *timeline_points, + u32 *timeline_handles, + struct drm_gem_object **gobj_write, + struct drm_gem_object **gobj_read) { - struct drm_amdgpu_userq_wait *wait_info = data; - const unsigned int num_write_bo_handles = wait_info->num_bo_write_handles; - const unsigned int num_read_bo_handles = wait_info->num_bo_read_handles; - struct drm_amdgpu_userq_fence_info *fence_info = NULL; - struct amdgpu_fpriv *fpriv = filp->driver_priv; - struct amdgpu_userq_mgr *userq_mgr = &fpriv->userq_mgr; - struct drm_gem_object **gobj_write, **gobj_read; - u32 *timeline_points, *timeline_handles; - struct amdgpu_usermode_queue *waitq = NULL; - u32 *syncobj_handles, num_syncobj; - struct dma_fence **fences = NULL; - u16 num_points, num_fences = 0; + int num_read_bo_handles, num_write_bo_handles; + struct dma_fence_unwrap iter; + struct dma_fence *fence, *f; + unsigned int num_fences = 0; struct drm_exec exec; - int r, i, cnt; - - if (!amdgpu_userq_enabled(dev)) - return -ENOTSUPP; - - if (wait_info->num_bo_write_handles > AMDGPU_USERQ_MAX_HANDLES || - wait_info->num_bo_read_handles > AMDGPU_USERQ_MAX_HANDLES) - return -EINVAL; - - num_syncobj = wait_info->num_syncobj_handles; - syncobj_handles = memdup_array_user(u64_to_user_ptr(wait_info->syncobj_handles), - num_syncobj, sizeof(u32)); - if (IS_ERR(syncobj_handles)) - return PTR_ERR(syncobj_handles); - + int i, r; + + /* + * This needs to be outside of the lock provided by drm_exec for + * DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT to work correctly. + */ + + /* Count timeline fences */ + for (i = 0; i < wait_info->num_syncobj_timeline_handles; i++) { + r = drm_syncobj_find_fence(filp, timeline_handles[i], + timeline_points[i], + DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT, + &fence); + if (r) + return r; + + dma_fence_unwrap_for_each(f, &iter, fence) + num_fences++; - num_points = wait_info->num_syncobj_timeline_handles; - timeline_handles = memdup_array_user(u64_to_user_ptr(wait_info->syncobj_timeline_handles), - num_points, sizeof(u32)); - if (IS_ERR(timeline_handles)) { - r = PTR_ERR(timeline_handles); - goto free_syncobj_handles; + dma_fence_put(fence); } - timeline_points = memdup_array_user(u64_to_user_ptr(wait_info->syncobj_timeline_points), - num_points, sizeof(u32)); + /* Count boolean fences */ + for (i = 0; i < wait_info->num_syncobj_handles; i++) { + r = drm_syncobj_find_fence(filp, syncobj_handles[i], 0, + DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT, + &fence); + if (r) + return r; - if (IS_ERR(timeline_points)) { - r = PTR_ERR(timeline_points); - goto free_timeline_handles; + num_fences++; + dma_fence_put(fence); } - r = drm_gem_objects_lookup(filp, - u64_to_user_ptr(wait_info->bo_read_handles), - num_read_bo_handles, - &gobj_read); - if (r) - goto free_timeline_points; - - r = drm_gem_objects_lookup(filp, - u64_to_user_ptr(wait_info->bo_write_handles), - num_write_bo_handles, - &gobj_write); - if (r) - goto put_gobj_read; - + /* Lock all the GEM objects */ + /* TODO: It is actually not necessary to lock them */ + num_read_bo_handles = wait_info->num_bo_read_handles; + num_write_bo_handles = wait_info->num_bo_write_handles; drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, - (num_read_bo_handles + num_write_bo_handles)); + num_read_bo_handles + num_write_bo_handles); - /* Lock all BOs with retry handling */ drm_exec_until_all_locked(&exec) { - r = drm_exec_prepare_array(&exec, gobj_read, num_read_bo_handles, 1); + r = drm_exec_prepare_array(&exec, gobj_read, + num_read_bo_handles, 1); drm_exec_retry_on_contention(&exec); - if (r) { - drm_exec_fini(&exec); - goto put_gobj_write; - } + if (r) + goto error_unlock; - r = drm_exec_prepare_array(&exec, gobj_write, num_write_bo_handles, 1); + r = drm_exec_prepare_array(&exec, gobj_write, + num_write_bo_handles, 1); drm_exec_retry_on_contention(&exec); - if (r) { - drm_exec_fini(&exec); - goto put_gobj_write; - } + if (r) + goto error_unlock; } - if (!wait_info->num_fences) { - if (num_points) { - struct dma_fence_unwrap iter; - struct dma_fence *fence; - struct dma_fence *f; - - for (i = 0; i < num_points; i++) { - r = drm_syncobj_find_fence(filp, timeline_handles[i], - timeline_points[i], - DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT, - &fence); - if (r) - goto exec_fini; - - dma_fence_unwrap_for_each(f, &iter, fence) - num_fences++; - - dma_fence_put(fence); - } - } + /* Count read fences */ + for (i = 0; i < num_read_bo_handles; i++) { + struct dma_resv_iter resv_cursor; + struct dma_fence *fence; - /* Count syncobj's fence */ - for (i = 0; i < num_syncobj; i++) { - struct dma_fence *fence; + dma_resv_for_each_fence(&resv_cursor, gobj_read[i]->resv, + DMA_RESV_USAGE_READ, fence) + num_fences++; + } - r = drm_syncobj_find_fence(filp, syncobj_handles[i], - 0, - DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT, - &fence); - if (r) - goto exec_fini; + /* Count write fences */ + for (i = 0; i < num_write_bo_handles; i++) { + struct dma_resv_iter resv_cursor; + struct dma_fence *fence; + dma_resv_for_each_fence(&resv_cursor, gobj_write[i]->resv, + DMA_RESV_USAGE_WRITE, fence) num_fences++; - dma_fence_put(fence); - } + } - /* Count GEM objects fence */ - for (i = 0; i < num_read_bo_handles; i++) { - struct dma_resv_iter resv_cursor; - struct dma_fence *fence; + wait_info->num_fences = num_fences; + r = 0; - dma_resv_for_each_fence(&resv_cursor, gobj_read[i]->resv, - DMA_RESV_USAGE_READ, fence) - num_fences++; - } +error_unlock: + /* Unlock all the GEM objects */ + drm_exec_fini(&exec); + return r; +} - for (i = 0; i < num_write_bo_handles; i++) { - struct dma_resv_iter resv_cursor; - struct dma_fence *fence; +static int +amdgpu_userq_wait_return_fence_info(struct drm_file *filp, + struct drm_amdgpu_userq_wait *wait_info, + u32 *syncobj_handles, u32 *timeline_points, + u32 *timeline_handles, + struct drm_gem_object **gobj_write, + struct drm_gem_object **gobj_read) +{ + struct amdgpu_fpriv *fpriv = filp->driver_priv; + struct amdgpu_userq_mgr *userq_mgr = &fpriv->userq_mgr; + struct drm_amdgpu_userq_fence_info *fence_info; + int num_read_bo_handles, num_write_bo_handles; + struct amdgpu_usermode_queue *waitq; + struct dma_fence **fences, *fence, *f; + struct dma_fence_unwrap iter; + int num_points, num_syncobj; + unsigned int num_fences = 0; + struct drm_exec exec; + int i, cnt, r; - dma_resv_for_each_fence(&resv_cursor, gobj_write[i]->resv, - DMA_RESV_USAGE_WRITE, fence) - num_fences++; - } + fence_info = kmalloc_array(wait_info->num_fences, sizeof(*fence_info), + GFP_KERNEL); + if (!fence_info) + return -ENOMEM; - /* - * Passing num_fences = 0 means that userspace doesn't want to - * retrieve userq_fence_info. If num_fences = 0 we skip filling - * userq_fence_info and return the actual number of fences on - * args->num_fences. - */ - wait_info->num_fences = num_fences; - } else { - /* Array of fence info */ - fence_info = kmalloc_array(wait_info->num_fences, sizeof(*fence_info), GFP_KERNEL); - if (!fence_info) { - r = -ENOMEM; - goto exec_fini; - } + fences = kmalloc_array(wait_info->num_fences, sizeof(*fences), + GFP_KERNEL); + if (!fences) { + r = -ENOMEM; + goto free_fence_info; + } + + /* Retrieve timeline fences */ + num_points = wait_info->num_syncobj_timeline_handles; + for (i = 0; i < num_points; i++) { + r = drm_syncobj_find_fence(filp, timeline_handles[i], + timeline_points[i], + DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT, + &fence); + if (r) + goto free_fences; + + dma_fence_unwrap_for_each(f, &iter, fence) { + if (num_fences >= wait_info->num_fences) { + r = -EINVAL; + dma_fence_put(fence); + goto free_fences; + } - /* Array of fences */ - fences = kmalloc_array(wait_info->num_fences, sizeof(*fences), GFP_KERNEL); - if (!fences) { - r = -ENOMEM; - goto free_fence_info; + fences[num_fences++] = dma_fence_get(f); } - /* Retrieve GEM read objects fence */ - for (i = 0; i < num_read_bo_handles; i++) { - struct dma_resv_iter resv_cursor; - struct dma_fence *fence; + dma_fence_put(fence); + } - dma_resv_for_each_fence(&resv_cursor, gobj_read[i]->resv, - DMA_RESV_USAGE_READ, fence) { - if (num_fences >= wait_info->num_fences) { - r = -EINVAL; - goto free_fences; - } + /* Retrieve boolean fences */ + num_syncobj = wait_info->num_syncobj_handles; + for (i = 0; i < num_syncobj; i++) { + struct dma_fence *fence; - fences[num_fences++] = fence; - dma_fence_get(fence); - } + r = drm_syncobj_find_fence(filp, syncobj_handles[i], 0, + DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT, + &fence); + if (r) + goto free_fences; + + if (num_fences >= wait_info->num_fences) { + dma_fence_put(fence); + r = -EINVAL; + goto free_fences; } - /* Retrieve GEM write objects fence */ - for (i = 0; i < num_write_bo_handles; i++) { - struct dma_resv_iter resv_cursor; - struct dma_fence *fence; + /* Give the reference to the fence array */ + fences[num_fences++] = fence; + } + + /* Lock all the GEM objects */ + num_read_bo_handles = wait_info->num_bo_read_handles; + num_write_bo_handles = wait_info->num_bo_write_handles; + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, + num_read_bo_handles + num_write_bo_handles); - dma_resv_for_each_fence(&resv_cursor, gobj_write[i]->resv, - DMA_RESV_USAGE_WRITE, fence) { - if (num_fences >= wait_info->num_fences) { - r = -EINVAL; - goto free_fences; - } + drm_exec_until_all_locked(&exec) { + r = drm_exec_prepare_array(&exec, gobj_read, + num_read_bo_handles, 1); + drm_exec_retry_on_contention(&exec); + if (r) + goto error_unlock; - fences[num_fences++] = fence; - dma_fence_get(fence); - } - } + r = drm_exec_prepare_array(&exec, gobj_write, + num_write_bo_handles, 1); + drm_exec_retry_on_contention(&exec); + if (r) + goto error_unlock; + } - if (num_points) { - struct dma_fence_unwrap iter; - struct dma_fence *fence; - struct dma_fence *f; - - for (i = 0; i < num_points; i++) { - r = drm_syncobj_find_fence(filp, timeline_handles[i], - timeline_points[i], - DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT, - &fence); - if (r) - goto free_fences; - - dma_fence_unwrap_for_each(f, &iter, fence) { - if (num_fences >= wait_info->num_fences) { - r = -EINVAL; - dma_fence_put(fence); - goto free_fences; - } - - dma_fence_get(f); - fences[num_fences++] = f; - } + /* Retrieve GEM read objects fence */ + for (i = 0; i < num_read_bo_handles; i++) { + struct dma_resv_iter resv_cursor; + struct dma_fence *fence; - dma_fence_put(fence); + dma_resv_for_each_fence(&resv_cursor, gobj_read[i]->resv, + DMA_RESV_USAGE_READ, fence) { + if (num_fences >= wait_info->num_fences) { + r = -EINVAL; + goto error_unlock; } - } - /* Retrieve syncobj's fence */ - for (i = 0; i < num_syncobj; i++) { - struct dma_fence *fence; + fences[num_fences++] = dma_fence_get(fence); + } + } - r = drm_syncobj_find_fence(filp, syncobj_handles[i], - 0, - DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT, - &fence); - if (r) - goto free_fences; + /* Retrieve GEM write objects fence */ + for (i = 0; i < num_write_bo_handles; i++) { + struct dma_resv_iter resv_cursor; + struct dma_fence *fence; + dma_resv_for_each_fence(&resv_cursor, gobj_write[i]->resv, + DMA_RESV_USAGE_WRITE, fence) { if (num_fences >= wait_info->num_fences) { r = -EINVAL; - dma_fence_put(fence); - goto free_fences; + goto error_unlock; } - fences[num_fences++] = fence; + fences[num_fences++] = dma_fence_get(fence); } + } - /* - * Keep only the latest fences to reduce the number of values - * given back to userspace. - */ - num_fences = dma_fence_dedup_array(fences, num_fences); + drm_exec_fini(&exec); - waitq = amdgpu_userq_get(userq_mgr, wait_info->waitq_id); - if (!waitq) { - r = -EINVAL; - goto free_fences; - } + /* + * Keep only the latest fences to reduce the number of values + * given back to userspace. + */ + num_fences = dma_fence_dedup_array(fences, num_fences); - for (i = 0, cnt = 0; i < num_fences; i++) { - struct amdgpu_userq_fence_driver *fence_drv; - struct amdgpu_userq_fence *userq_fence; - u32 index; - - userq_fence = to_amdgpu_userq_fence(fences[i]); - if (!userq_fence) { - /* - * Just waiting on other driver fences should - * be good for now - */ - r = dma_fence_wait(fences[i], true); - if (r) { - dma_fence_put(fences[i]); - goto free_fences; - } - - dma_fence_put(fences[i]); - continue; - } + waitq = amdgpu_userq_get(userq_mgr, wait_info->waitq_id); + if (!waitq) { + r = -EINVAL; + goto free_fences; + } + + for (i = 0, cnt = 0; i < num_fences; i++) { + struct amdgpu_userq_fence_driver *fence_drv; + struct amdgpu_userq_fence *userq_fence; + u32 index; - fence_drv = userq_fence->fence_drv; + userq_fence = to_amdgpu_userq_fence(fences[i]); + if (!userq_fence) { /* - * We need to make sure the user queue release their reference - * to the fence drivers at some point before queue destruction. - * Otherwise, we would gather those references until we don't - * have any more space left and crash. + * Just waiting on other driver fences should + * be good for now */ - r = xa_alloc(&waitq->fence_drv_xa, &index, fence_drv, - xa_limit_32b, GFP_KERNEL); + r = dma_fence_wait(fences[i], true); if (r) - goto free_fences; + goto put_waitq; - amdgpu_userq_fence_driver_get(fence_drv); + continue; + } - /* Store drm syncobj's gpu va address and value */ - fence_info[cnt].va = fence_drv->va; - fence_info[cnt].value = fences[i]->seqno; + fence_drv = userq_fence->fence_drv; + /* + * We need to make sure the user queue release their reference + * to the fence drivers at some point before queue destruction. + * Otherwise, we would gather those references until we don't + * have any more space left and crash. + */ + r = xa_alloc(&waitq->fence_drv_xa, &index, fence_drv, + xa_limit_32b, GFP_KERNEL); + if (r) + goto put_waitq; - dma_fence_put(fences[i]); - /* Increment the actual userq fence count */ - cnt++; - } + amdgpu_userq_fence_driver_get(fence_drv); - wait_info->num_fences = cnt; - /* Copy userq fence info to user space */ - if (copy_to_user(u64_to_user_ptr(wait_info->out_fences), - fence_info, wait_info->num_fences * sizeof(*fence_info))) { - r = -EFAULT; - goto free_fences; - } + /* Store drm syncobj's gpu va address and value */ + fence_info[cnt].va = fence_drv->va; + fence_info[cnt].value = fences[i]->seqno; + + /* Increment the actual userq fence count */ + cnt++; } + wait_info->num_fences = cnt; + + /* Copy userq fence info to user space */ + if (copy_to_user(u64_to_user_ptr(wait_info->out_fences), + fence_info, cnt * sizeof(*fence_info))) + r = -EFAULT; + else + r = 0; + +put_waitq: + amdgpu_userq_put(waitq); free_fences: - if (fences) { - while (num_fences-- > 0) - dma_fence_put(fences[num_fences]); - kfree(fences); - } + while (num_fences--) + dma_fence_put(fences[num_fences]); + kfree(fences); + free_fence_info: kfree(fence_info); -exec_fini: + return r; + +error_unlock: drm_exec_fini(&exec); -put_gobj_write: - for (i = 0; i < num_write_bo_handles; i++) - drm_gem_object_put(gobj_write[i]); - kfree(gobj_write); + goto free_fences; +} + +int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp) +{ + int num_points, num_syncobj, num_read_bo_handles, num_write_bo_handles; + u32 *syncobj_handles, *timeline_points, *timeline_handles; + struct drm_amdgpu_userq_wait *wait_info = data; + struct drm_gem_object **gobj_write; + struct drm_gem_object **gobj_read; + void __user *ptr; + int r; + + if (!amdgpu_userq_enabled(dev)) + return -ENOTSUPP; + + if (wait_info->num_bo_write_handles > AMDGPU_USERQ_MAX_HANDLES || + wait_info->num_bo_read_handles > AMDGPU_USERQ_MAX_HANDLES) + return -EINVAL; + + num_syncobj = wait_info->num_syncobj_handles; + ptr = u64_to_user_ptr(wait_info->syncobj_handles); + syncobj_handles = memdup_array_user(ptr, num_syncobj, sizeof(u32)); + if (IS_ERR(syncobj_handles)) + return PTR_ERR(syncobj_handles); + + num_points = wait_info->num_syncobj_timeline_handles; + ptr = u64_to_user_ptr(wait_info->syncobj_timeline_handles); + timeline_handles = memdup_array_user(ptr, num_points, sizeof(u32)); + if (IS_ERR(timeline_handles)) { + r = PTR_ERR(timeline_handles); + goto free_syncobj_handles; + } + + ptr = u64_to_user_ptr(wait_info->syncobj_timeline_points); + timeline_points = memdup_array_user(ptr, num_points, sizeof(u32)); + if (IS_ERR(timeline_points)) { + r = PTR_ERR(timeline_points); + goto free_timeline_handles; + } + + num_read_bo_handles = wait_info->num_bo_read_handles; + ptr = u64_to_user_ptr(wait_info->bo_read_handles), + r = drm_gem_objects_lookup(filp, ptr, num_read_bo_handles, &gobj_read); + if (r) + goto free_timeline_points; + + num_write_bo_handles = wait_info->num_bo_write_handles; + ptr = u64_to_user_ptr(wait_info->bo_write_handles), + r = drm_gem_objects_lookup(filp, ptr, num_write_bo_handles, + &gobj_write); + if (r) + goto put_gobj_read; + + /* + * Passing num_fences = 0 means that userspace doesn't want to + * retrieve userq_fence_info. If num_fences = 0 we skip filling + * userq_fence_info and return the actual number of fences on + * args->num_fences. + */ + if (!wait_info->num_fences) { + r = amdgpu_userq_wait_count_fences(filp, wait_info, + syncobj_handles, + timeline_points, + timeline_handles, + gobj_write, + gobj_read); + } else { + r = amdgpu_userq_wait_return_fence_info(filp, wait_info, + syncobj_handles, + timeline_points, + timeline_handles, + gobj_write, + gobj_read); + } + + while (num_write_bo_handles--) + drm_gem_object_put(gobj_write[num_write_bo_handles]); + kvfree(gobj_write); + put_gobj_read: - for (i = 0; i < num_read_bo_handles; i++) - drm_gem_object_put(gobj_read[i]); - kfree(gobj_read); + while (num_read_bo_handles--) + drm_gem_object_put(gobj_read[num_read_bo_handles]); + kvfree(gobj_read); + free_timeline_points: kfree(timeline_points); free_timeline_handles: kfree(timeline_handles); free_syncobj_handles: kfree(syncobj_handles); - - if (waitq) - amdgpu_userq_put(waitq); - return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index 75ae9b429420..03d95dca93d7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -63,6 +63,7 @@ #define FIRMWARE_VCN4_0_6_1 "amdgpu/vcn_4_0_6_1.bin" #define FIRMWARE_VCN5_0_0 "amdgpu/vcn_5_0_0.bin" #define FIRMWARE_VCN5_0_1 "amdgpu/vcn_5_0_1.bin" +#define FIRMWARE_VCN5_0_2 "amdgpu/vcn_5_0_2.bin" #define FIRMWARE_VCN5_3_0 "amdgpu/vcn_5_3_0.bin" MODULE_FIRMWARE(FIRMWARE_RAVEN); @@ -91,6 +92,7 @@ MODULE_FIRMWARE(FIRMWARE_VCN4_0_6); MODULE_FIRMWARE(FIRMWARE_VCN4_0_6_1); MODULE_FIRMWARE(FIRMWARE_VCN5_0_0); MODULE_FIRMWARE(FIRMWARE_VCN5_0_1); +MODULE_FIRMWARE(FIRMWARE_VCN5_0_2); MODULE_FIRMWARE(FIRMWARE_VCN5_3_0); static void amdgpu_vcn_idle_work_handler(struct work_struct *work); @@ -1095,7 +1097,8 @@ int amdgpu_vcn_unified_ring_test_ib(struct amdgpu_ring *ring, long timeout) long r; if ((amdgpu_ip_version(adev, UVD_HWIP, 0) != IP_VERSION(4, 0, 3)) && - (amdgpu_ip_version(adev, UVD_HWIP, 0) != IP_VERSION(5, 0, 1))) { + (amdgpu_ip_version(adev, UVD_HWIP, 0) != IP_VERSION(5, 0, 1)) && + (amdgpu_ip_version(adev, UVD_HWIP, 0) != IP_VERSION(5, 0, 2))) { r = amdgpu_vcn_enc_ring_test_ib(ring, timeout); if (r) goto error; @@ -1132,7 +1135,8 @@ void amdgpu_vcn_setup_ucode(struct amdgpu_device *adev, int i) return; if ((amdgpu_ip_version(adev, UVD_HWIP, 0) == IP_VERSION(4, 0, 3) || - amdgpu_ip_version(adev, UVD_HWIP, 0) == IP_VERSION(5, 0, 1)) + amdgpu_ip_version(adev, UVD_HWIP, 0) == IP_VERSION(5, 0, 1) || + amdgpu_ip_version(adev, UVD_HWIP, 0) == IP_VERSION(5, 0, 2)) && (i > 0)) return; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 275745aa5829..dba7ea16a10d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -605,10 +605,10 @@ static int amdgpu_virt_write_vf2pf_data(struct amdgpu_device *adev) #ifdef MODULE if (THIS_MODULE->version != NULL) - strcpy(vf2pf_info->driver_version, THIS_MODULE->version); + strscpy(vf2pf_info->driver_version, THIS_MODULE->version); else #endif - strcpy(vf2pf_info->driver_version, "N/A"); + strscpy(vf2pf_info->driver_version, "N/A"); vf2pf_info->pf2vf_version_required = 0; // no requirement, guest understands all vf2pf_info->driver_cert = 0; @@ -950,11 +950,6 @@ int amdgpu_virt_init_critical_region(struct amdgpu_device *adev) if (adev->virt.req_init_data_ver != GPU_CRIT_REGION_V2) return 0; - if (init_hdr_offset < 0) { - dev_err(adev->dev, "Invalid init header offset\n"); - return -EINVAL; - } - vram_size = RREG32(mmRCC_CONFIG_MEMSIZE); if (!vram_size || vram_size == U32_MAX) return -EINVAL; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 76248a0276ef..3f5712fc7216 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -139,6 +139,20 @@ static void amdgpu_vm_assert_locked(struct amdgpu_vm *vm) } /** + * amdgpu_vm_is_bo_always_valid - check if the BO is VM always valid + * + * @vm: VM to test against. + * @bo: BO to be tested. + * + * Returns true if the BO shares the dma_resv object with the root PD and is + * always guaranteed to be valid inside the VM. + */ +bool amdgpu_vm_is_bo_always_valid(struct amdgpu_vm *vm, struct amdgpu_bo *bo) +{ + return bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv; +} + +/** * amdgpu_vm_bo_evicted - vm_bo is evicted * * @vm_bo: vm_bo which is evicted @@ -153,10 +167,12 @@ static void amdgpu_vm_bo_evicted(struct amdgpu_vm_bo_base *vm_bo) vm_bo->moved = true; amdgpu_vm_assert_locked(vm); + spin_lock(&vm_bo->vm->status_lock); if (bo->tbo.type == ttm_bo_type_kernel) list_move(&vm_bo->vm_status, &vm->evicted); else list_move_tail(&vm_bo->vm_status, &vm->evicted); + spin_unlock(&vm_bo->vm->status_lock); } /** * amdgpu_vm_bo_moved - vm_bo is moved @@ -169,7 +185,9 @@ static void amdgpu_vm_bo_evicted(struct amdgpu_vm_bo_base *vm_bo) static void amdgpu_vm_bo_moved(struct amdgpu_vm_bo_base *vm_bo) { amdgpu_vm_assert_locked(vm_bo->vm); + spin_lock(&vm_bo->vm->status_lock); list_move(&vm_bo->vm_status, &vm_bo->vm->moved); + spin_unlock(&vm_bo->vm->status_lock); } /** @@ -183,7 +201,9 @@ static void amdgpu_vm_bo_moved(struct amdgpu_vm_bo_base *vm_bo) static void amdgpu_vm_bo_idle(struct amdgpu_vm_bo_base *vm_bo) { amdgpu_vm_assert_locked(vm_bo->vm); + spin_lock(&vm_bo->vm->status_lock); list_move(&vm_bo->vm_status, &vm_bo->vm->idle); + spin_unlock(&vm_bo->vm->status_lock); vm_bo->moved = false; } @@ -197,9 +217,9 @@ static void amdgpu_vm_bo_idle(struct amdgpu_vm_bo_base *vm_bo) */ static void amdgpu_vm_bo_invalidated(struct amdgpu_vm_bo_base *vm_bo) { - spin_lock(&vm_bo->vm->invalidated_lock); + spin_lock(&vm_bo->vm->status_lock); list_move(&vm_bo->vm_status, &vm_bo->vm->invalidated); - spin_unlock(&vm_bo->vm->invalidated_lock); + spin_unlock(&vm_bo->vm->status_lock); } /** @@ -212,9 +232,10 @@ static void amdgpu_vm_bo_invalidated(struct amdgpu_vm_bo_base *vm_bo) */ static void amdgpu_vm_bo_evicted_user(struct amdgpu_vm_bo_base *vm_bo) { - amdgpu_vm_assert_locked(vm_bo->vm); vm_bo->moved = true; + spin_lock(&vm_bo->vm->status_lock); list_move(&vm_bo->vm_status, &vm_bo->vm->evicted_user); + spin_unlock(&vm_bo->vm->status_lock); } /** @@ -228,10 +249,13 @@ static void amdgpu_vm_bo_evicted_user(struct amdgpu_vm_bo_base *vm_bo) static void amdgpu_vm_bo_relocated(struct amdgpu_vm_bo_base *vm_bo) { amdgpu_vm_assert_locked(vm_bo->vm); - if (vm_bo->bo->parent) + if (vm_bo->bo->parent) { + spin_lock(&vm_bo->vm->status_lock); list_move(&vm_bo->vm_status, &vm_bo->vm->relocated); - else + spin_unlock(&vm_bo->vm->status_lock); + } else { amdgpu_vm_bo_idle(vm_bo); + } } /** @@ -245,7 +269,9 @@ static void amdgpu_vm_bo_relocated(struct amdgpu_vm_bo_base *vm_bo) static void amdgpu_vm_bo_done(struct amdgpu_vm_bo_base *vm_bo) { amdgpu_vm_assert_locked(vm_bo->vm); + spin_lock(&vm_bo->vm->status_lock); list_move(&vm_bo->vm_status, &vm_bo->vm->done); + spin_unlock(&vm_bo->vm->status_lock); } /** @@ -259,13 +285,13 @@ static void amdgpu_vm_bo_reset_state_machine(struct amdgpu_vm *vm) { struct amdgpu_vm_bo_base *vm_bo, *tmp; - spin_lock(&vm->invalidated_lock); + amdgpu_vm_assert_locked(vm); + + spin_lock(&vm->status_lock); list_splice_init(&vm->done, &vm->invalidated); list_for_each_entry(vm_bo, &vm->invalidated, vm_status) vm_bo->moved = true; - spin_unlock(&vm->invalidated_lock); - amdgpu_vm_assert_locked(vm); list_for_each_entry_safe(vm_bo, tmp, &vm->idle, vm_status) { struct amdgpu_bo *bo = vm_bo->bo; @@ -275,13 +301,14 @@ static void amdgpu_vm_bo_reset_state_machine(struct amdgpu_vm *vm) else if (bo->parent) list_move(&vm_bo->vm_status, &vm_bo->vm->relocated); } + spin_unlock(&vm->status_lock); } /** * amdgpu_vm_update_shared - helper to update shared memory stat * @base: base structure for tracking BO usage in a VM * - * Takes the vm stats_lock and updates the shared memory stat. If the basic + * Takes the vm status_lock and updates the shared memory stat. If the basic * stat changed (e.g. buffer was moved) amdgpu_vm_update_stats need to be called * as well. */ @@ -294,7 +321,7 @@ static void amdgpu_vm_update_shared(struct amdgpu_vm_bo_base *base) bool shared; dma_resv_assert_held(bo->tbo.base.resv); - spin_lock(&vm->stats_lock); + spin_lock(&vm->status_lock); shared = drm_gem_object_is_shared_for_memory_stats(&bo->tbo.base); if (base->shared != shared) { base->shared = shared; @@ -306,7 +333,7 @@ static void amdgpu_vm_update_shared(struct amdgpu_vm_bo_base *base) vm->stats[bo_memtype].drm.private += size; } } - spin_unlock(&vm->stats_lock); + spin_unlock(&vm->status_lock); } /** @@ -331,11 +358,11 @@ void amdgpu_vm_bo_update_shared(struct amdgpu_bo *bo) * be bo->tbo.resource * @sign: if we should add (+1) or subtract (-1) from the stat * - * Caller need to have the vm stats_lock held. Useful for when multiple update + * Caller need to have the vm status_lock held. Useful for when multiple update * need to happen at the same time. */ static void amdgpu_vm_update_stats_locked(struct amdgpu_vm_bo_base *base, - struct ttm_resource *res, int sign) + struct ttm_resource *res, int sign) { struct amdgpu_vm *vm = base->vm; struct amdgpu_bo *bo = base->bo; @@ -359,8 +386,7 @@ static void amdgpu_vm_update_stats_locked(struct amdgpu_vm_bo_base *base, */ if (bo->flags & AMDGPU_GEM_CREATE_DISCARDABLE) vm->stats[res_memtype].drm.purgeable += size; - if (!(bo->preferred_domains & - amdgpu_mem_type_to_domain(res_memtype))) + if (!(bo->preferred_domains & amdgpu_mem_type_to_domain(res_memtype))) vm->stats[bo_memtype].evicted += size; } } @@ -379,9 +405,9 @@ void amdgpu_vm_update_stats(struct amdgpu_vm_bo_base *base, { struct amdgpu_vm *vm = base->vm; - spin_lock(&vm->stats_lock); + spin_lock(&vm->status_lock); amdgpu_vm_update_stats_locked(base, res, sign); - spin_unlock(&vm->stats_lock); + spin_unlock(&vm->status_lock); } /** @@ -407,10 +433,10 @@ void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base, base->next = bo->vm_bo; bo->vm_bo = base; - spin_lock(&vm->stats_lock); + spin_lock(&vm->status_lock); base->shared = drm_gem_object_is_shared_for_memory_stats(&bo->tbo.base); amdgpu_vm_update_stats_locked(base, bo->tbo.resource, +1); - spin_unlock(&vm->stats_lock); + spin_unlock(&vm->status_lock); if (!amdgpu_vm_is_bo_always_valid(vm, bo)) return; @@ -469,25 +495,25 @@ int amdgpu_vm_lock_done_list(struct amdgpu_vm *vm, struct drm_exec *exec, int ret; /* We can only trust prev->next while holding the lock */ - spin_lock(&vm->invalidated_lock); + spin_lock(&vm->status_lock); while (!list_is_head(prev->next, &vm->done)) { bo_va = list_entry(prev->next, typeof(*bo_va), base.vm_status); bo = bo_va->base.bo; if (bo) { amdgpu_bo_ref(bo); - spin_unlock(&vm->invalidated_lock); + spin_unlock(&vm->status_lock); ret = drm_exec_prepare_obj(exec, &bo->tbo.base, 1); amdgpu_bo_unref(&bo); if (unlikely(ret)) return ret; - spin_lock(&vm->invalidated_lock); + spin_lock(&vm->status_lock); } prev = prev->next; } - spin_unlock(&vm->invalidated_lock); + spin_unlock(&vm->status_lock); return 0; } @@ -583,7 +609,7 @@ int amdgpu_vm_validate(struct amdgpu_device *adev, struct amdgpu_vm *vm, void *param) { uint64_t new_vm_generation = amdgpu_vm_generation(adev, vm); - struct amdgpu_vm_bo_base *bo_base, *tmp; + struct amdgpu_vm_bo_base *bo_base; struct amdgpu_bo *bo; int r; @@ -596,7 +622,13 @@ int amdgpu_vm_validate(struct amdgpu_device *adev, struct amdgpu_vm *vm, return r; } - list_for_each_entry_safe(bo_base, tmp, &vm->evicted, vm_status) { + spin_lock(&vm->status_lock); + while (!list_empty(&vm->evicted)) { + bo_base = list_first_entry(&vm->evicted, + struct amdgpu_vm_bo_base, + vm_status); + spin_unlock(&vm->status_lock); + bo = bo_base->bo; r = validate(param, bo); @@ -609,21 +641,26 @@ int amdgpu_vm_validate(struct amdgpu_device *adev, struct amdgpu_vm *vm, vm->update_funcs->map_table(to_amdgpu_bo_vm(bo)); amdgpu_vm_bo_relocated(bo_base); } + spin_lock(&vm->status_lock); } + while (ticket && !list_empty(&vm->evicted_user)) { + bo_base = list_first_entry(&vm->evicted_user, + struct amdgpu_vm_bo_base, + vm_status); + spin_unlock(&vm->status_lock); - if (ticket) { - list_for_each_entry_safe(bo_base, tmp, &vm->evicted_user, - vm_status) { - bo = bo_base->bo; - dma_resv_assert_held(bo->tbo.base.resv); + bo = bo_base->bo; + dma_resv_assert_held(bo->tbo.base.resv); - r = validate(param, bo); - if (r) - return r; + r = validate(param, bo); + if (r) + return r; - amdgpu_vm_bo_invalidated(bo_base); - } + amdgpu_vm_bo_invalidated(bo_base); + + spin_lock(&vm->status_lock); } + spin_unlock(&vm->status_lock); amdgpu_vm_eviction_lock(vm); vm->evicting = false; @@ -652,7 +689,9 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm) ret = !vm->evicting; amdgpu_vm_eviction_unlock(vm); + spin_lock(&vm->status_lock); ret &= list_empty(&vm->evicted); + spin_unlock(&vm->status_lock); spin_lock(&vm->immediate.lock); ret &= !vm->immediate.stopped; @@ -946,13 +985,18 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev, struct amdgpu_vm *vm, bool immediate) { struct amdgpu_vm_update_params params; - struct amdgpu_vm_bo_base *entry, *tmp; + struct amdgpu_vm_bo_base *entry; bool flush_tlb_needed = false; + LIST_HEAD(relocated); int r, idx; amdgpu_vm_assert_locked(vm); - if (list_empty(&vm->relocated)) + spin_lock(&vm->status_lock); + list_splice_init(&vm->relocated, &relocated); + spin_unlock(&vm->status_lock); + + if (list_empty(&relocated)) return 0; if (!drm_dev_enter(adev_to_drm(adev), &idx)) @@ -968,7 +1012,7 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev, if (r) goto error; - list_for_each_entry(entry, &vm->relocated, vm_status) { + list_for_each_entry(entry, &relocated, vm_status) { /* vm_flush_needed after updating moved PDEs */ flush_tlb_needed |= entry->moved; @@ -984,7 +1028,9 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev, if (flush_tlb_needed) atomic64_inc(&vm->tlb_seq); - list_for_each_entry_safe(entry, tmp, &vm->relocated, vm_status) { + while (!list_empty(&relocated)) { + entry = list_first_entry(&relocated, struct amdgpu_vm_bo_base, + vm_status); amdgpu_vm_bo_idle(entry); } @@ -1041,7 +1087,10 @@ amdgpu_vm_tlb_flush(struct amdgpu_vm_update_params *params, } /* Prepare a TLB flush fence to be attached to PTs */ - if (!params->unlocked) { + /* The check for need_tlb_fence should be dropped once we + * sort out the issues with KIQ/MES TLB invalidation timeouts. + */ + if (!params->unlocked && vm->need_tlb_fence) { amdgpu_vm_tlb_fence_create(params->adev, vm, fence); /* Makes sure no PD/PT is freed before the flush */ @@ -1211,9 +1260,9 @@ error_free: void amdgpu_vm_get_memory(struct amdgpu_vm *vm, struct amdgpu_mem_stats stats[__AMDGPU_PL_NUM]) { - spin_lock(&vm->stats_lock); + spin_lock(&vm->status_lock); memcpy(stats, vm->stats, sizeof(*stats) * __AMDGPU_PL_NUM); - spin_unlock(&vm->stats_lock); + spin_unlock(&vm->status_lock); } /** @@ -1580,24 +1629,29 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct ww_acquire_ctx *ticket) { - struct amdgpu_bo_va *bo_va, *tmp; + struct amdgpu_bo_va *bo_va; struct dma_resv *resv; bool clear, unlock; int r; - list_for_each_entry_safe(bo_va, tmp, &vm->moved, base.vm_status) { + spin_lock(&vm->status_lock); + while (!list_empty(&vm->moved)) { + bo_va = list_first_entry(&vm->moved, struct amdgpu_bo_va, + base.vm_status); + spin_unlock(&vm->status_lock); + /* Per VM BOs never need to bo cleared in the page tables */ r = amdgpu_vm_bo_update(adev, bo_va, false); if (r) return r; + spin_lock(&vm->status_lock); } - spin_lock(&vm->invalidated_lock); while (!list_empty(&vm->invalidated)) { bo_va = list_first_entry(&vm->invalidated, struct amdgpu_bo_va, base.vm_status); resv = bo_va->base.bo->tbo.base.resv; - spin_unlock(&vm->invalidated_lock); + spin_unlock(&vm->status_lock); /* Try to reserve the BO to avoid clearing its ptes */ if (!adev->debug_vm && dma_resv_trylock(resv)) { @@ -1629,9 +1683,9 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev, bo_va->base.bo->tbo.resource->mem_type == TTM_PL_SYSTEM)) amdgpu_vm_bo_evicted_user(&bo_va->base); - spin_lock(&vm->invalidated_lock); + spin_lock(&vm->status_lock); } - spin_unlock(&vm->invalidated_lock); + spin_unlock(&vm->status_lock); return 0; } @@ -2174,9 +2228,9 @@ void amdgpu_vm_bo_del(struct amdgpu_device *adev, } } - spin_lock(&vm->invalidated_lock); + spin_lock(&vm->status_lock); list_del(&bo_va->base.vm_status); - spin_unlock(&vm->invalidated_lock); + spin_unlock(&vm->status_lock); list_for_each_entry_safe(mapping, next, &bo_va->valids, list) { list_del(&mapping->list); @@ -2284,10 +2338,10 @@ void amdgpu_vm_bo_move(struct amdgpu_bo *bo, struct ttm_resource *new_mem, for (bo_base = bo->vm_bo; bo_base; bo_base = bo_base->next) { struct amdgpu_vm *vm = bo_base->vm; - spin_lock(&vm->stats_lock); + spin_lock(&vm->status_lock); amdgpu_vm_update_stats_locked(bo_base, bo->tbo.resource, -1); amdgpu_vm_update_stats_locked(bo_base, new_mem, +1); - spin_unlock(&vm->stats_lock); + spin_unlock(&vm->status_lock); } amdgpu_vm_bo_invalidate(bo, evicted); @@ -2556,12 +2610,11 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, INIT_LIST_HEAD(&vm->relocated); INIT_LIST_HEAD(&vm->moved); INIT_LIST_HEAD(&vm->idle); - spin_lock_init(&vm->invalidated_lock); INIT_LIST_HEAD(&vm->invalidated); + spin_lock_init(&vm->status_lock); INIT_LIST_HEAD(&vm->freed); INIT_LIST_HEAD(&vm->done); INIT_KFIFO(vm->faults); - spin_lock_init(&vm->stats_lock); r = amdgpu_vm_init_entities(adev, vm); if (r) @@ -2570,6 +2623,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, ttm_lru_bulk_move_init(&vm->lru_bulk_move); vm->is_compute_context = false; + vm->need_tlb_fence = amdgpu_userq_enabled(&adev->ddev); vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & AMDGPU_VM_USE_CPU_FOR_GFX); @@ -2707,6 +2761,7 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm) dma_fence_put(vm->last_update); vm->last_update = dma_fence_get_stub(); vm->is_compute_context = true; + vm->need_tlb_fence = true; unreserve_bo: amdgpu_bo_unreserve(vm->root.bo); @@ -3029,6 +3084,7 @@ void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m) amdgpu_vm_assert_locked(vm); + spin_lock(&vm->status_lock); seq_puts(m, "\tIdle BOs:\n"); list_for_each_entry_safe(bo_va, tmp, &vm->idle, base.vm_status) { if (!bo_va->base.bo) @@ -3066,13 +3122,11 @@ void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m) id = 0; seq_puts(m, "\tInvalidated BOs:\n"); - spin_lock(&vm->invalidated_lock); list_for_each_entry_safe(bo_va, tmp, &vm->invalidated, base.vm_status) { if (!bo_va->base.bo) continue; total_invalidated += amdgpu_bo_print_info(id++, bo_va->base.bo, m); } - spin_unlock(&vm->invalidated_lock); total_invalidated_objs = id; id = 0; @@ -3082,6 +3136,7 @@ void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m) continue; total_done += amdgpu_bo_print_info(id++, bo_va->base.bo, m); } + spin_unlock(&vm->status_lock); total_done_objs = id; seq_printf(m, "\tTotal idle size: %12lld\tobjs:\t%d\n", total_idle, @@ -3156,20 +3211,6 @@ void amdgpu_vm_update_fault_cache(struct amdgpu_device *adev, xa_unlock_irqrestore(&adev->vm_manager.pasids, flags); } -/** - * amdgpu_vm_is_bo_always_valid - check if the BO is VM always valid - * - * @vm: VM to test against. - * @bo: BO to be tested. - * - * Returns true if the BO shares the dma_resv object with the root PD and is - * always guaranteed to be valid inside the VM. - */ -bool amdgpu_vm_is_bo_always_valid(struct amdgpu_vm *vm, struct amdgpu_bo *bo) -{ - return bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv; -} - void amdgpu_vm_print_task_info(struct amdgpu_device *adev, struct amdgpu_task_info *task_info) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 87b0617d4661..641fe91b4f03 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -205,11 +205,11 @@ struct amdgpu_vm_bo_base { /* protected by bo being reserved */ struct amdgpu_vm_bo_base *next; - /* protected by vm reservation and invalidated_lock */ + /* protected by vm status_lock */ struct list_head vm_status; /* if the bo is counted as shared in mem stats - * protected by vm BO being reserved */ + * protected by vm status_lock */ bool shared; /* protected by the BO being reserved */ @@ -345,8 +345,10 @@ struct amdgpu_vm { bool evicting; unsigned int saved_flags; - /* Memory statistics for this vm, protected by stats_lock */ - spinlock_t stats_lock; + /* Lock to protect vm_bo add/del/move on all lists of vm */ + spinlock_t status_lock; + + /* Memory statistics for this vm, protected by status_lock */ struct amdgpu_mem_stats stats[__AMDGPU_PL_NUM]; /* @@ -354,8 +356,6 @@ struct amdgpu_vm { * PDs, PTs or per VM BOs. The state transits are: * * evicted -> relocated (PDs, PTs) or moved (per VM BOs) -> idle - * - * Lists are protected by the root PD dma_resv lock. */ /* Per-VM and PT BOs who needs a validation */ @@ -376,10 +376,7 @@ struct amdgpu_vm { * state transits are: * * evicted_user or invalidated -> done - * - * Lists are protected by the invalidated_lock. */ - spinlock_t invalidated_lock; /* BOs for user mode queues that need a validation */ struct list_head evicted_user; @@ -444,6 +441,8 @@ struct amdgpu_vm { struct ttm_lru_bulk_move lru_bulk_move; /* Flag to indicate if VM is used for compute */ bool is_compute_context; + /* Flag to indicate if VM needs a TLB fence (KFD or KGD) */ + bool need_tlb_fence; /* Memory partition number, -1 means any partition */ int8_t mem_id; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c index 7bdd664f0770..31a437ce9570 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c @@ -544,7 +544,9 @@ static void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry) entry->bo->vm_bo = NULL; ttm_bo_set_bulk_move(&entry->bo->tbo, NULL); + spin_lock(&entry->vm->status_lock); list_del(&entry->vm_status); + spin_unlock(&entry->vm->status_lock); amdgpu_bo_unref(&entry->bo); } @@ -588,6 +590,7 @@ static void amdgpu_vm_pt_add_list(struct amdgpu_vm_update_params *params, struct amdgpu_vm_pt_cursor seek; struct amdgpu_vm_bo_base *entry; + spin_lock(¶ms->vm->status_lock); for_each_amdgpu_vm_pt_dfs_safe(params->adev, params->vm, cursor, seek, entry) { if (entry && entry->bo) list_move(&entry->vm_status, ¶ms->tlb_flush_waitlist); @@ -595,6 +598,7 @@ static void amdgpu_vm_pt_add_list(struct amdgpu_vm_update_params *params, /* enter start node now */ list_move(&cursor->entry->vm_status, ¶ms->tlb_flush_waitlist); + spin_unlock(¶ms->vm->status_lock); } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index e36c287b3289..e63d05c477a0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -472,12 +472,12 @@ static ssize_t amdgpu_xgmi_show_num_hops(struct device *dev, struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = drm_to_adev(ddev); struct psp_xgmi_topology_info *top = &adev->psp.xgmi_context.top_info; - int i; + int i, offset = 0; for (i = 0; i < top->num_nodes; i++) - sprintf(buf + 3 * i, "%02x ", top->nodes[i].num_hops); + offset += sysfs_emit_at(buf, offset, "%02x ", top->nodes[i].num_hops); - return sysfs_emit(buf, "%s\n", buf); + return offset + sysfs_emit_at(buf, offset, "\n"); } static ssize_t amdgpu_xgmi_show_num_links(struct device *dev, @@ -487,12 +487,12 @@ static ssize_t amdgpu_xgmi_show_num_links(struct device *dev, struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = drm_to_adev(ddev); struct psp_xgmi_topology_info *top = &adev->psp.xgmi_context.top_info; - int i; + int i, offset = 0; for (i = 0; i < top->num_nodes; i++) - sprintf(buf + 3 * i, "%02x ", top->nodes[i].num_links); + offset += sysfs_emit_at(buf, offset, "%02x ", top->nodes[i].num_links); - return sysfs_emit(buf, "%s\n", buf); + return offset + sysfs_emit_at(buf, offset, "\n"); } static ssize_t amdgpu_xgmi_show_connected_port_num(struct device *dev, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c index 4e02b62cdbb3..db49582a211f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c @@ -45,6 +45,7 @@ #include "v12_structs.h" #include "gfx_v12_1.h" #include "mes_v12_1.h" +#include "amdgpu_ras_mgr.h" #define GFX12_MEC_HPD_SIZE 2048 #define NUM_SIMD_PER_CU_GFX12_1 4 @@ -136,7 +137,6 @@ static void gfx_v12_1_kiq_map_queues(struct amdgpu_ring *kiq_ring, PACKET3_MAP_QUEUES_PIPE(ring->pipe) | PACKET3_MAP_QUEUES_ME((me)) | PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */ - PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */ PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) | PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */ amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index)); @@ -245,8 +245,7 @@ static void gfx_v12_1_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, /* memory (1) or register (0) */ (WAIT_REG_MEM_MEM_SPACE(mem_space) | WAIT_REG_MEM_OPERATION(opt) | /* wait */ - WAIT_REG_MEM_FUNCTION(3) | /* equal */ - WAIT_REG_MEM_ENGINE(eng_sel))); + WAIT_REG_MEM_FUNCTION(3))); /* equal */ if (mem_space) BUG_ON(addr0 & 0x3); /* Dword align */ @@ -1186,6 +1185,13 @@ static int gfx_v12_1_sw_init(struct amdgpu_ip_block *ip_block) if (r) return r; + /* RLC POISON Error */ + r = amdgpu_irq_add_id(adev, SOC_V1_0_IH_CLIENTID_RLC, + GFX_12_1_0__SRCID__RLC_POISON_INTERRUPT, + &adev->gfx.rlc_poison_irq); + if (r) + return r; + adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE; r = gfx_v12_1_rlc_init(adev); @@ -2631,24 +2637,6 @@ static void gfx_v12_1_xcc_disable_gpa_mode(struct amdgpu_device *adev, WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCPG_PSP_DEBUG, data); } -static void gfx_v12_1_xcc_setup_tcp_thrashing_ctrl(struct amdgpu_device *adev, - int xcc_id) -{ - uint32_t val; - - /* Set the TCP UTCL0 register to enable atomics */ - val = RREG32_SOC15(GC, GET_INST(GC, xcc_id), - regTCP_UTCL0_THRASHING_CTRL); - val = REG_SET_FIELD(val, TCP_UTCL0_THRASHING_CTRL, THRASHING_EN, 0x2); - val = REG_SET_FIELD(val, TCP_UTCL0_THRASHING_CTRL, - RETRY_FRAGMENT_THRESHOLD_UP_EN, 0x1); - val = REG_SET_FIELD(val, TCP_UTCL0_THRASHING_CTRL, - RETRY_FRAGMENT_THRESHOLD_DOWN_EN, 0x1); - - WREG32_SOC15(GC, GET_INST(GC, xcc_id), - regTCP_UTCL0_THRASHING_CTRL, val); -} - static void gfx_v12_1_xcc_enable_atomics(struct amdgpu_device *adev, int xcc_id) { @@ -2697,7 +2685,6 @@ static void gfx_v12_1_init_golden_registers(struct amdgpu_device *adev) for (i = 0; i < NUM_XCC(adev->gfx.xcc_mask); i++) { gfx_v12_1_xcc_disable_burst(adev, i); gfx_v12_1_xcc_enable_atomics(adev, i); - gfx_v12_1_xcc_setup_tcp_thrashing_ctrl(adev, i); gfx_v12_1_xcc_disable_early_write_ack(adev, i); gfx_v12_1_xcc_disable_tcp_spill_cache(adev, i); } @@ -3431,11 +3418,10 @@ static void gfx_v12_1_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, static void gfx_v12_1_ring_emit_pipeline_sync(struct amdgpu_ring *ring) { - int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); uint32_t seq = ring->fence_drv.sync_seq; uint64_t addr = ring->fence_drv.gpu_addr; - gfx_v12_1_wait_reg_mem(ring, usepfp, 1, 0, lower_32_bits(addr), + gfx_v12_1_wait_reg_mem(ring, 0, 1, 0, lower_32_bits(addr), upper_32_bits(addr), seq, 0xffffffff, 4); } @@ -3474,8 +3460,7 @@ static void gfx_v12_1_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr, /* write fence seq to the "addr" */ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); - amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | - WRITE_DATA_DST_SEL(5) | WR_CONFIRM)); + amdgpu_ring_write(ring, (WRITE_DATA_DST_SEL(5) | WR_CONFIRM)); amdgpu_ring_write(ring, lower_32_bits(addr)); amdgpu_ring_write(ring, upper_32_bits(addr)); amdgpu_ring_write(ring, lower_32_bits(seq)); @@ -3483,8 +3468,7 @@ static void gfx_v12_1_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr, if (flags & AMDGPU_FENCE_FLAG_INT) { /* set register to trigger INT */ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); - amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | - WRITE_DATA_DST_SEL(0) | WR_CONFIRM)); + amdgpu_ring_write(ring, (WRITE_DATA_DST_SEL(0) | WR_CONFIRM)); amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regCPC_INT_STATUS)); amdgpu_ring_write(ring, 0); amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */ @@ -3543,9 +3527,7 @@ static void gfx_v12_1_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, uint32_t reg0, uint32_t reg1, uint32_t ref, uint32_t mask) { - int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); - - gfx_v12_1_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1, + gfx_v12_1_wait_reg_mem(ring, 0, 0, 1, reg0, reg1, ref, mask, 0x20); } @@ -3804,6 +3786,35 @@ static int gfx_v12_1_priv_inst_irq(struct amdgpu_device *adev, return 0; } +static int gfx_v12_1_rlc_poison_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + uint32_t rlc_fed_status = 0; + uint32_t ras_blk = RAS_BLOCK_ID__GFX; + struct ras_ih_info ih_info = {0}; + int i, num_xcc; + + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) + rlc_fed_status |= RREG32(SOC15_REG_OFFSET(GC, + GET_INST(GC, i), regRLC_RLCS_FED_STATUS)); + + if (!rlc_fed_status) + return 0; + + if (REG_GET_FIELD(rlc_fed_status, RLC_RLCS_FED_STATUS, SDMA0_FED_ERR) || + REG_GET_FIELD(rlc_fed_status, RLC_RLCS_FED_STATUS, SDMA1_FED_ERR)) + ras_blk = RAS_BLOCK_ID__SDMA; + + dev_warn(adev->dev, "RLC %d FED IRQ\n", ras_blk); + + ih_info.block = ras_blk; + ih_info.reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET; + amdgpu_ras_mgr_dispatch_interrupt(adev, &ih_info); + return 0; +} + static void gfx_v12_1_emit_mem_sync(struct amdgpu_ring *ring) { const unsigned int gcr_cntl = @@ -3928,6 +3939,10 @@ static const struct amdgpu_irq_src_funcs gfx_v12_1_priv_inst_irq_funcs = { .process = gfx_v12_1_priv_inst_irq, }; +static const struct amdgpu_irq_src_funcs gfx_v12_1_rlc_poison_irq_funcs = { + .process = gfx_v12_1_rlc_poison_irq, +}; + static void gfx_v12_1_set_irq_funcs(struct amdgpu_device *adev) { adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; @@ -3938,6 +3953,9 @@ static void gfx_v12_1_set_irq_funcs(struct amdgpu_device *adev) adev->gfx.priv_inst_irq.num_types = 1; adev->gfx.priv_inst_irq.funcs = &gfx_v12_1_priv_inst_irq_funcs; + + adev->gfx.rlc_poison_irq.num_types = 1; + adev->gfx.rlc_poison_irq.funcs = &gfx_v12_1_rlc_poison_irq_funcs; } static void gfx_v12_1_set_imu_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_1_pkt.h b/drivers/gpu/drm/amd/amdgpu/gfx_v12_1_pkt.h index 9a5c82c8db53..21a07530c64d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_1_pkt.h +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_1_pkt.h @@ -53,16 +53,10 @@ /* Packet 3 types */ #define PACKET3_NOP 0x10 -#define PACKET3_SET_BASE 0x11 -#define PACKET3_BASE_INDEX(x) ((x) << 0) -#define CE_PARTITION_BASE 3 #define PACKET3_CLEAR_STATE 0x12 #define PACKET3_INDEX_BUFFER_SIZE 0x13 #define PACKET3_DISPATCH_DIRECT 0x15 #define PACKET3_DISPATCH_INDIRECT 0x16 -#define PACKET3_INDIRECT_BUFFER_END 0x17 -#define PACKET3_INDIRECT_BUFFER_CNST_END 0x19 -#define PACKET3_ATOMIC_GDS 0x1D #define PACKET3_ATOMIC_MEM 0x1E #define PACKET3_OCCLUSION_QUERY 0x1F #define PACKET3_SET_PREDICATION 0x20 @@ -74,47 +68,42 @@ #define PACKET3_INDEX_BASE 0x26 #define PACKET3_DRAW_INDEX_2 0x27 #define PACKET3_CONTEXT_CONTROL 0x28 -#define PACKET3_INDEX_TYPE 0x2A #define PACKET3_DRAW_INDIRECT_MULTI 0x2C #define PACKET3_DRAW_INDEX_AUTO 0x2D #define PACKET3_NUM_INSTANCES 0x2F #define PACKET3_DRAW_INDEX_MULTI_AUTO 0x30 -#define PACKET3_INDIRECT_BUFFER_PRIV 0x32 -#define PACKET3_INDIRECT_BUFFER_CNST 0x33 -#define PACKET3_COND_INDIRECT_BUFFER_CNST 0x33 -#define PACKET3_STRMOUT_BUFFER_UPDATE 0x34 #define PACKET3_DRAW_INDEX_OFFSET_2 0x35 -#define PACKET3_DRAW_PREAMBLE 0x36 #define PACKET3_WRITE_DATA 0x37 -#define WRITE_DATA_DST_SEL(x) ((x) << 8) +#define WRITE_DATA_DST_SEL(x) (((x) & 0xf) << 8) /* 0 - register - * 1 - memory (sync - via GRBM) - * 2 - gl2 - * 3 - gds + * 1 - reserved + * 2 - tc_l2 + * 3 - reserved * 4 - reserved - * 5 - memory (async - direct) + * 5 - memory (same as tc_l2) + * 6 - memory_mapped_adc_persistent_state */ -#define WR_ONE_ADDR (1 << 16) +#define WRITE_DATA_SCOPE(x) (((x) & 0x3) << 12) +#define WRITE_DATA_MODE(x) (((x) & 0x3) << 14) + /* 0 - local xcd + * 1 - remote/local aid + * 2 - remote xcd + * 3 - remote mid + */ +#define WRITE_DATA_ADDR_INCR (1 << 16) +#define WRITE_DATA_MID_DIE_ID(x) (((x) & 0x3) << 18) #define WR_CONFIRM (1 << 20) -#define WRITE_DATA_CACHE_POLICY(x) ((x) << 25) - /* 0 - LRU - * 1 - Stream - */ -#define WRITE_DATA_ENGINE_SEL(x) ((x) << 30) - /* 0 - me - * 1 - pfp - * 2 - ce +#define WRITE_DATA_XCD_DIE_ID(x) (((x) & 0xf) << 21) +#define WRITE_DATA_TEMPORAL(x) (((x) & 0x3) << 25) + /* 0 - rt + * 1 - nt + * 2 - ht + * 3 - lu */ +#define WRITE_DATA_COOP_DISABLE (1 << 27) #define PACKET3_DRAW_INDEX_INDIRECT_MULTI 0x38 -#define PACKET3_MEM_SEMAPHORE 0x39 -# define PACKET3_SEM_USE_MAILBOX (0x1 << 16) -# define PACKET3_SEM_SEL_SIGNAL_TYPE (0x1 << 20) /* 0 = increment, 1 = write 1 */ -# define PACKET3_SEM_SEL_SIGNAL (0x6 << 29) -# define PACKET3_SEM_SEL_WAIT (0x7 << 29) -#define PACKET3_DRAW_INDEX_MULTI_INST 0x3A -#define PACKET3_COPY_DW 0x3B #define PACKET3_WAIT_REG_MEM 0x3C -#define WAIT_REG_MEM_FUNCTION(x) ((x) << 0) +#define WAIT_REG_MEM_FUNCTION(x) (((x) & 0x7) << 0) /* 0 - always * 1 - < * 2 - <= @@ -123,33 +112,66 @@ * 5 - >= * 6 - > */ -#define WAIT_REG_MEM_MEM_SPACE(x) ((x) << 4) +#define WAIT_REG_MEM_MEM_SPACE(x) (((x) & 0x3) << 4) /* 0 - reg * 1 - mem */ -#define WAIT_REG_MEM_OPERATION(x) ((x) << 6) +#define WAIT_REG_MEM_OPERATION(x) (((x) & 0x3) << 6) /* 0 - wait_reg_mem * 1 - wr_wait_wr_reg */ -#define WAIT_REG_MEM_ENGINE(x) ((x) << 8) - /* 0 - me - * 1 - pfp +#define WAIT_REG_MEM_MODE(x) (((x) & 0x3) << 10) + /* 0 - local xcd + * 1 - remote/local aid + * 2 - remote xcd + * 3 - remote mid + */ +#define WAIT_REG_MEM_MID_DIE_ID(x) (((x) & 0x3) << 12) +#define WAIT_REG_MEM_XCD_DIE_ID(x) (((x) & 0xf) << 14) +#define WAIT_REG_MEM_MES_INTR_PIPE(x) (((x) & 0x3) << 22) +#define WAIT_REG_MEM_MES_ACTION(x) (((x) & 0x1) << 24) +#define WAIT_REG_MEM_TEMPORAL(x) (((x) & 0x3) << 25) + /* 0 - rt + * 1 - nt + * 2 - ht + * 3 - lu */ #define PACKET3_INDIRECT_BUFFER 0x3F #define INDIRECT_BUFFER_VALID (1 << 23) -#define INDIRECT_BUFFER_CACHE_POLICY(x) ((x) << 28) - /* 0 - LRU - * 1 - Stream - * 2 - Bypass +#define INDIRECT_BUFFER_TEMPORAL(x) (x) << 28) + /* 0 - rt + * 1 - nt + * 2 - ht + * 3 - lu */ -#define INDIRECT_BUFFER_PRE_ENB(x) ((x) << 21) -#define INDIRECT_BUFFER_PRE_RESUME(x) ((x) << 30) #define PACKET3_COND_INDIRECT_BUFFER 0x3F #define PACKET3_COPY_DATA 0x40 -#define PACKET3_CP_DMA 0x41 +#define COPY_DATA_SRC_SEL(x) (((x) & 0xf) << 0) +#define COPY_DATA_DST_SEL(x) (((x) & 0xf) << 8) +#define COPY_DATA_SRC_SCOPE(x) (((x) & 0x3) << 4) +#define COPY_DATA_DST_SCOPE(x) (((x) & 0x3) << 27) +#define COPY_DATA_MODE(x) (((x) & 0x3) << 6) + /* 0 - local xcd + * 1 - remote/local aid + * 2 - remote xcd + * 3 - remote mid + */ +#define COPY_DATA_SRC_TEMPORAL(x) (((x) & 0x3) << 13) +#define COPY_DATA_DST_TEMPORAL(x) (((x) & 0x3) << 25) + /* 0 - rt + * 1 - nt + * 2 - ht + * 3 - lu + */ +#define COPY_DATA_COUNT_SEL (1 << 16) +#define COPY_DATA_SRC_DST_REMOTE_MODE(x) (((x)) & 0x1 << 16) + /* 0 - src remote + * 1 - dst remote + */ +#define COPY_DATA_MID_DIE_ID(x) (((x) & 0x3) << 18) +#define COPY_DATA_XCD_DIE_ID(x) (((x) & 0xf) << 21) +#define COPY_DATA_PQ_EXE_STATUS (1 << 27) #define PACKET3_PFP_SYNC_ME 0x42 -#define PACKET3_SURFACE_SYNC 0x43 -#define PACKET3_ME_INITIALIZE 0x44 #define PACKET3_COND_WRITE 0x45 #define PACKET3_EVENT_WRITE 0x46 #define EVENT_TYPE(x) ((x) << 0) @@ -160,8 +182,6 @@ * 3 - SAMPLE_STREAMOUTSTAT* * 4 - *S_PARTIAL_FLUSH */ -#define PACKET3_EVENT_WRITE_EOP 0x47 -#define PACKET3_EVENT_WRITE_EOS 0x48 #define PACKET3_RELEASE_MEM 0x49 #define PACKET3_RELEASE_MEM_EVENT_TYPE(x) ((x) << 0) #define PACKET3_RELEASE_MEM_EVENT_INDEX(x) ((x) << 8) @@ -180,27 +200,30 @@ * 2 - temporal__release_mem__ht * 3 - temporal__release_mem__lu */ -#define PACKET3_RELEASE_MEM_EXECUTE (1 << 28) +#define PACKET3_RELEASE_MEM_PQ_EXE_STATUS (1 << 28) +#define PACKET3_RELEASE_MEM_GCR_GLK_INV (1 << 30) -#define PACKET3_RELEASE_MEM_DATA_SEL(x) ((x) << 29) - /* 0 - discard - * 1 - send low 32bit data - * 2 - send 64bit data - * 3 - send 64bit GPU counter value - * 4 - send 64bit sys counter value +#define PACKET3_RELEASE_MEM_DST_SEL(x) ((x) << 16) + /* 0 - memory controller + * 1 - TC/L2 + * 2 - register */ +#define PACKET3_RELEASE_MEM_MES_INTR_PIPE(x) ((x) << 20) +#define PACKET3_RELEASE_MEM_MES_ACTION_ID(x) ((x) << 22) #define PACKET3_RELEASE_MEM_INT_SEL(x) ((x) << 24) /* 0 - none * 1 - interrupt only (DATA_SEL = 0) * 2 - interrupt when data write is confirmed */ -#define PACKET3_RELEASE_MEM_DST_SEL(x) ((x) << 16) - /* 0 - MC - * 1 - TC/L2 +#define PACKET3_RELEASE_MEM_ADD_DOOREBLL_OFFSET(x) (1 << 28) +#define PACKET3_RELEASE_MEM_DATA_SEL(x) ((x) << 29) + /* 0 - discard + * 1 - send low 32bit data + * 2 - send 64bit data + * 3 - send 64bit GPU counter value + * 4 - send 64bit sys counter value */ - - #define PACKET3_PREAMBLE_CNTL 0x4A # define PACKET3_PREAMBLE_BEGIN_CLEAR_STATE (2 << 28) # define PACKET3_PREAMBLE_END_CLEAR_STATE (3 << 28) @@ -218,26 +241,29 @@ /* 0 - ME * 1 - PFP */ -# define PACKET3_DMA_DATA_SRC_CACHE_POLICY(x) ((x) << 13) - /* 0 - LRU - * 1 - Stream +# define PACKET3_DMA_DATA_SRC_TEMPORAL(x) ((x) << 13) + /* 0 - rt + * 1 - nt + * 2 - ht + * 3 - lu */ -# define PACKET3_DMA_DATA_DST_SEL(x) ((x) << 20) +# define PACKET3_DMA_DATA_SRC_SCOPE(x) ((x) << 15) +# define PACKET3_DMA_DATA_DST_SEL(x) ((x) << 20) /* 0 - DST_ADDR using DAS * 1 - GDS * 3 - DST_ADDR using L2 */ -# define PACKET3_DMA_DATA_DST_CACHE_POLICY(x) ((x) << 25) +# define PACKET3_DMA_DATA_DST_TEMPORAL(x) ((x) << 25) /* 0 - LRU * 1 - Stream */ -# define PACKET3_DMA_DATA_SRC_SEL(x) ((x) << 29) +# define PACKET3_DMA_DATA_DST_SCOPE(x) ((x) << 27) +# define PACKET3_DMA_DATA_SRC_SEL(x) ((x) << 29) /* 0 - SRC_ADDR using SAS * 1 - GDS * 2 - DATA * 3 - SRC_ADDR using L2 */ -# define PACKET3_DMA_DATA_CP_SYNC (1 << 31) /* COMMAND */ # define PACKET3_DMA_DATA_CMD_SAS (1 << 26) /* 0 - memory @@ -247,13 +273,11 @@ /* 0 - memory * 1 - register */ -# define PACKET3_DMA_DATA_CMD_SAIC (1 << 28) -# define PACKET3_DMA_DATA_CMD_DAIC (1 << 29) -# define PACKET3_DMA_DATA_CMD_RAW_WAIT (1 << 30) +# define PACKET3_DMA_DATA_CMD_SAIC (1 << 28) +# define PACKET3_DMA_DATA_CMD_DAIC (1 << 29) +# define PACKET3_DMA_DATA_CMD_RAW_WAIT (1 << 30) +# define PACKET3_DMA_DATA_CMD_DIS_WC (1 << 30) #define PACKET3_CONTEXT_REG_RMW 0x51 -#define PACKET3_GFX_CNTX_UPDATE 0x52 -#define PACKET3_BLK_CNTX_UPDATE 0x53 -#define PACKET3_INCR_UPDT_STATE 0x55 #define PACKET3_ACQUIRE_MEM 0x58 /* 1. HEADER * 2. COHER_CNTL [30:0] @@ -307,10 +331,7 @@ * 2: REVERSE */ #define PACKET3_ACQUIRE_MEM_GCR_RANGE_IS_PA (1 << 18) -#define PACKET3_REWIND 0x59 -#define PACKET3_INTERRUPT 0x5A #define PACKET3_GEN_PDEPTE 0x5B -#define PACKET3_INDIRECT_BUFFER_PASID 0x5C #define PACKET3_PRIME_UTCL2 0x5D #define PACKET3_LOAD_UCONFIG_REG 0x5E #define PACKET3_LOAD_SH_REG 0x5F @@ -324,12 +345,6 @@ #define PACKET3_SET_CONTEXT_REG 0x69 #define PACKET3_SET_CONTEXT_REG_START 0x0000a000 #define PACKET3_SET_CONTEXT_REG_END 0x0000a400 -#define PACKET3_SET_CONTEXT_REG_INDEX 0x6A -#define PACKET3_SET_VGPR_REG_DI_MULTI 0x71 -#define PACKET3_SET_SH_REG_DI 0x72 -#define PACKET3_SET_CONTEXT_REG_INDIRECT 0x73 -#define PACKET3_SET_SH_REG_DI_MULTI 0x74 -#define PACKET3_GFX_PIPE_LOCK 0x75 #define PACKET3_SET_SH_REG 0x76 #define PACKET3_SET_SH_REG_START 0x00002c00 #define PACKET3_SET_SH_REG_END 0x00003000 @@ -339,47 +354,19 @@ #define PACKET3_SET_UCONFIG_REG_START 0x0000c000 #define PACKET3_SET_UCONFIG_REG_END 0x0000c400 #define PACKET3_SET_UCONFIG_REG_INDEX 0x7A -#define PACKET3_FORWARD_HEADER 0x7C -#define PACKET3_SCRATCH_RAM_WRITE 0x7D -#define PACKET3_SCRATCH_RAM_READ 0x7E -#define PACKET3_LOAD_CONST_RAM 0x80 -#define PACKET3_WRITE_CONST_RAM 0x81 -#define PACKET3_DUMP_CONST_RAM 0x83 -#define PACKET3_INCREMENT_CE_COUNTER 0x84 -#define PACKET3_INCREMENT_DE_COUNTER 0x85 -#define PACKET3_WAIT_ON_CE_COUNTER 0x86 -#define PACKET3_WAIT_ON_DE_COUNTER_DIFF 0x88 -#define PACKET3_SWITCH_BUFFER 0x8B #define PACKET3_DISPATCH_DRAW_PREAMBLE 0x8C -#define PACKET3_DISPATCH_DRAW_PREAMBLE_ACE 0x8C #define PACKET3_DISPATCH_DRAW 0x8D -#define PACKET3_DISPATCH_DRAW_ACE 0x8D -#define PACKET3_GET_LOD_STATS 0x8E -#define PACKET3_DRAW_MULTI_PREAMBLE 0x8F -#define PACKET3_FRAME_CONTROL 0x90 -# define FRAME_TMZ (1 << 0) -# define FRAME_CMD(x) ((x) << 28) - /* - * x=0: tmz_begin - * x=1: tmz_end - */ #define PACKET3_INDEX_ATTRIBUTES_INDIRECT 0x91 #define PACKET3_WAIT_REG_MEM64 0x93 -#define PACKET3_COND_PREEMPT 0x94 #define PACKET3_HDP_FLUSH 0x95 -#define PACKET3_COPY_DATA_RB 0x96 #define PACKET3_INVALIDATE_TLBS 0x98 #define PACKET3_INVALIDATE_TLBS_DST_SEL(x) ((x) << 0) #define PACKET3_INVALIDATE_TLBS_ALL_HUB(x) ((x) << 4) #define PACKET3_INVALIDATE_TLBS_PASID(x) ((x) << 5) #define PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(x) ((x) << 29) -#define PACKET3_AQL_PACKET 0x99 #define PACKET3_DMA_DATA_FILL_MULTI 0x9A #define PACKET3_SET_SH_REG_INDEX 0x9B -#define PACKET3_DRAW_INDIRECT_COUNT_MULTI 0x9C -#define PACKET3_DRAW_INDEX_INDIRECT_COUNT_MULTI 0x9D -#define PACKET3_DUMP_CONST_RAM_OFFSET 0x9E #define PACKET3_LOAD_CONTEXT_REG_INDEX 0x9F #define PACKET3_SET_RESOURCES 0xA0 /* 1. header @@ -394,7 +381,6 @@ # define PACKET3_SET_RESOURCES_VMID_MASK(x) ((x) << 0) # define PACKET3_SET_RESOURCES_UNMAP_LATENTY(x) ((x) << 16) # define PACKET3_SET_RESOURCES_QUEUE_TYPE(x) ((x) << 29) -#define PACKET3_MAP_PROCESS 0xA1 #define PACKET3_MAP_QUEUES 0xA2 /* 1. header * 2. CONTROL @@ -411,11 +397,10 @@ # define PACKET3_MAP_QUEUES_PIPE(x) ((x) << 16) # define PACKET3_MAP_QUEUES_ME(x) ((x) << 18) # define PACKET3_MAP_QUEUES_QUEUE_TYPE(x) ((x) << 21) -# define PACKET3_MAP_QUEUES_ALLOC_FORMAT(x) ((x) << 24) +# define PACKET3_MAP_QUEUES_QUEUE_GROUP(x) ((x) << 24) # define PACKET3_MAP_QUEUES_ENGINE_SEL(x) ((x) << 26) # define PACKET3_MAP_QUEUES_NUM_QUEUES(x) ((x) << 29) /* CONTROL2 */ -# define PACKET3_MAP_QUEUES_CHECK_DISABLE(x) ((x) << 1) # define PACKET3_MAP_QUEUES_DOORBELL_OFFSET(x) ((x) << 2) #define PACKET3_UNMAP_QUEUES 0xA3 /* 1. header @@ -464,12 +449,6 @@ # define PACKET3_QUERY_STATUS_PASID(x) ((x) << 0) /* CONTROL2b */ # define PACKET3_QUERY_STATUS_DOORBELL_OFFSET(x) ((x) << 2) -# define PACKET3_QUERY_STATUS_ENG_SEL(x) ((x) << 25) -#define PACKET3_RUN_LIST 0xA5 -#define PACKET3_MAP_PROCESS_VM 0xA6 -/* GFX11 */ -#define PACKET3_SET_Q_PREEMPTION_MODE 0xF0 -# define PACKET3_SET_Q_PREEMPTION_MODE_IB_VMID(x) ((x) << 0) -# define PACKET3_SET_Q_PREEMPTION_MODE_INIT_SHADOW_MEM (1 << 0) +# define PACKET3_QUERY_STATUS_ENG_SEL(x) ((x) << 28) #endif diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v12_1.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v12_1.c index 4aa004ee2c4d..3544eb42dca6 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v12_1.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v12_1.c @@ -146,71 +146,15 @@ static void gfxhub_v12_1_xcc_init_system_aperture_regs(struct amdgpu_device *ade uint32_t tmp; int i; - for_each_inst(i, xcc_mask) { - /* Program the AGP BAR */ - WREG32_SOC15_RLC(GC, GET_INST(GC, i), - regGCMC_VM_AGP_BASE_LO32, 0); - WREG32_SOC15_RLC(GC, GET_INST(GC, i), - regGCMC_VM_AGP_BASE_HI32, 0); - WREG32_SOC15_RLC(GC, GET_INST(GC, i), - regGCMC_VM_AGP_BOT_LO32, - lower_32_bits(adev->gmc.agp_start >> 24)); - WREG32_SOC15_RLC(GC, GET_INST(GC, i), - regGCMC_VM_AGP_BOT_HI32, - upper_32_bits(adev->gmc.agp_start >> 24)); - WREG32_SOC15_RLC(GC, GET_INST(GC, i), - regGCMC_VM_AGP_TOP_LO32, - lower_32_bits(adev->gmc.agp_end >> 24)); - WREG32_SOC15_RLC(GC, GET_INST(GC, i), - regGCMC_VM_AGP_TOP_HI32, - upper_32_bits(adev->gmc.agp_end >> 24)); + /*TODO: revisit whether the SRIOV guest access to theseregisters + * is blocked by security policy or not */ + if (amdgpu_sriov_vf(adev)) + return; - if (!amdgpu_sriov_vf(adev)) { - /* Program the system aperture low logical page number. */ - WREG32_SOC15(GC, GET_INST(GC, i), - regGCMC_VM_SYSTEM_APERTURE_LOW_ADDR_LO32, - lower_32_bits(min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18)); - WREG32_SOC15(GC, GET_INST(GC, i), - regGCMC_VM_SYSTEM_APERTURE_LOW_ADDR_HI32, - upper_32_bits(min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18)); - WREG32_SOC15(GC, GET_INST(GC, i), - regGCMC_VM_SYSTEM_APERTURE_HIGH_ADDR_LO32, - lower_32_bits(max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18)); - WREG32_SOC15(GC, GET_INST(GC, i), - regGCMC_VM_SYSTEM_APERTURE_HIGH_ADDR_HI32, - upper_32_bits(max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18)); - - /* Set default page address. */ - value = amdgpu_gmc_vram_mc2pa(adev, adev->mem_scratch.gpu_addr); - WREG32_SOC15(GC, GET_INST(GC, i), - regGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB, - (u32)(value >> 12)); - WREG32_SOC15(GC, GET_INST(GC, i), - regGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB, - (u32)(value >> 44)); - - /* Program "protection fault". */ - WREG32_SOC15(GC, GET_INST(GC, i), - regGCVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32, - (u32)(adev->dummy_page_addr >> 12)); - WREG32_SOC15(GC, GET_INST(GC, i), - regGCVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32, - (u32)((u64)adev->dummy_page_addr >> 44)); - - tmp = RREG32_SOC15(GC, GET_INST(GC, i), - regGCVM_L2_PROTECTION_FAULT_CNTL2); - tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL2, - ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1); - tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL2, - ENABLE_RETRY_FAULT_INTERRUPT, 0x1); - WREG32_SOC15(GC, GET_INST(GC, i), - regGCVM_L2_PROTECTION_FAULT_CNTL2, tmp); - } - - /* In the case squeezing vram into GART aperture, we don't use - * FB aperture and AGP aperture. Disable them. - */ + for_each_inst(i, xcc_mask) { if (adev->gmc.pdb0_bo) { + /* Disable agp and system aperture + * when vmid0 page table is enabled */ WREG32_SOC15(GC, GET_INST(GC, i), regGCMC_VM_FB_LOCATION_TOP_LO32, 0); WREG32_SOC15(GC, GET_INST(GC, i), @@ -225,7 +169,8 @@ static void gfxhub_v12_1_xcc_init_system_aperture_regs(struct amdgpu_device *ade WREG32_SOC15(GC, GET_INST(GC, i), regGCMC_VM_AGP_TOP_HI32, 0); WREG32_SOC15(GC, GET_INST(GC, i), - regGCMC_VM_AGP_BOT_LO32, 0xFFFFFFFF); + regGCMC_VM_AGP_BOT_LO32, + 0xFFFFFFFF); WREG32_SOC15(GC, GET_INST(GC, i), regGCMC_VM_AGP_BOT_HI32, 1); WREG32_SOC15(GC, GET_INST(GC, i), @@ -238,7 +183,69 @@ static void gfxhub_v12_1_xcc_init_system_aperture_regs(struct amdgpu_device *ade regGCMC_VM_SYSTEM_APERTURE_HIGH_ADDR_LO32, 0); WREG32_SOC15(GC, GET_INST(GC, i), regGCMC_VM_SYSTEM_APERTURE_HIGH_ADDR_HI32, 0); + } else { + /* Program the AGP BAR */ + WREG32_SOC15_RLC(GC, GET_INST(GC, i), + regGCMC_VM_AGP_BASE_LO32, 0); + WREG32_SOC15_RLC(GC, GET_INST(GC, i), + regGCMC_VM_AGP_BASE_HI32, 0); + WREG32_SOC15_RLC(GC, GET_INST(GC, i), + regGCMC_VM_AGP_BOT_LO32, + lower_32_bits(adev->gmc.agp_start >> 24)); + WREG32_SOC15_RLC(GC, GET_INST(GC, i), + regGCMC_VM_AGP_BOT_HI32, + upper_32_bits(adev->gmc.agp_start >> 24)); + WREG32_SOC15_RLC(GC, GET_INST(GC, i), + regGCMC_VM_AGP_TOP_LO32, + lower_32_bits(adev->gmc.agp_end >> 24)); + WREG32_SOC15_RLC(GC, GET_INST(GC, i), + regGCMC_VM_AGP_TOP_HI32, + upper_32_bits(adev->gmc.agp_end >> 24)); + + /* Program the system aperture low logical page number. */ + WREG32_SOC15(GC, GET_INST(GC, i), + regGCMC_VM_SYSTEM_APERTURE_LOW_ADDR_LO32, + lower_32_bits(min(adev->gmc.fb_start, + adev->gmc.agp_start) >> 18)); + WREG32_SOC15(GC, GET_INST(GC, i), + regGCMC_VM_SYSTEM_APERTURE_LOW_ADDR_HI32, + upper_32_bits(min(adev->gmc.fb_start, + adev->gmc.agp_start) >> 18)); + WREG32_SOC15(GC, GET_INST(GC, i), + regGCMC_VM_SYSTEM_APERTURE_HIGH_ADDR_LO32, + lower_32_bits(max(adev->gmc.fb_end, + adev->gmc.agp_end) >> 18)); + WREG32_SOC15(GC, GET_INST(GC, i), + regGCMC_VM_SYSTEM_APERTURE_HIGH_ADDR_HI32, + upper_32_bits(max(adev->gmc.fb_end, + adev->gmc.agp_end) >> 18)); } + + /* Set default page address. */ + value = amdgpu_gmc_vram_mc2pa(adev, adev->mem_scratch.gpu_addr); + WREG32_SOC15(GC, GET_INST(GC, i), + regGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB, + (u32)(value >> 12)); + WREG32_SOC15(GC, GET_INST(GC, i), + regGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB, + (u32)(value >> 44)); + + /* Program "protection fault". */ + WREG32_SOC15(GC, GET_INST(GC, i), + regGCVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32, + (u32)(adev->dummy_page_addr >> 12)); + WREG32_SOC15(GC, GET_INST(GC, i), + regGCVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32, + (u32)((u64)adev->dummy_page_addr >> 44)); + + tmp = RREG32_SOC15(GC, GET_INST(GC, i), + regGCVM_L2_PROTECTION_FAULT_CNTL2); + tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL2, + ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1); + tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL2, + ENABLE_RETRY_FAULT_INTERRUPT, 0x1); + WREG32_SOC15(GC, GET_INST(GC, i), + regGCVM_L2_PROTECTION_FAULT_CNTL2, tmp); } } @@ -325,10 +332,18 @@ static void gfxhub_v12_1_xcc_init_cache_regs(struct amdgpu_device *adev, WREG32_SOC15_RLC(GC, GET_INST(GC, i), regGCVM_L2_CNTL3, tmp); tmp = regGCVM_L2_CNTL4_DEFAULT; - tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL4, - VMC_TAP_PDE_REQUEST_PHYSICAL, 1); - tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL4, - VMC_TAP_PTE_REQUEST_PHYSICAL, 1); + if (adev->gmc.xgmi.connected_to_cpu) { + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL4, + VMC_TAP_PDE_REQUEST_PHYSICAL, 1); + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL4, + VMC_TAP_PTE_REQUEST_PHYSICAL, 1); + } else { + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL4, + VMC_TAP_PDE_REQUEST_PHYSICAL, 0); + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL4, + VMC_TAP_PTE_REQUEST_PHYSICAL, 0); + } + WREG32_SOC15_RLC(GC, GET_INST(GC, i), regGCVM_L2_CNTL4, tmp); tmp = regGCVM_L2_CNTL5_DEFAULT; @@ -807,47 +822,6 @@ static void gfxhub_v12_1_init(struct amdgpu_device *adev) gfxhub_v12_1_xcc_init(adev, xcc_mask); } -static int gfxhub_v12_1_get_xgmi_info(struct amdgpu_device *adev) -{ - u32 max_num_physical_nodes; - u32 max_physical_node_id; - u32 xgmi_lfb_cntl; - u32 max_region; - u64 seg_size; - - xgmi_lfb_cntl = RREG32_SOC15(GC, GET_INST(GC, 0), - regGCMC_VM_XGMI_LFB_CNTL); - seg_size = REG_GET_FIELD(RREG32_SOC15(GC, GET_INST(GC, 0), - regGCMC_VM_XGMI_LFB_SIZE), - GCMC_VM_XGMI_LFB_SIZE, PF_LFB_SIZE) << 24; - max_region = REG_GET_FIELD(xgmi_lfb_cntl, - GCMC_VM_XGMI_LFB_CNTL, - PF_MAX_REGION); - - max_num_physical_nodes = 8; - max_physical_node_id = 7; - - /* PF_MAX_REGION=0 means xgmi is disabled */ - if (max_region || adev->gmc.xgmi.connected_to_cpu) { - adev->gmc.xgmi.num_physical_nodes = max_region + 1; - - if (adev->gmc.xgmi.num_physical_nodes > max_num_physical_nodes) - return -EINVAL; - - adev->gmc.xgmi.physical_node_id = - REG_GET_FIELD(xgmi_lfb_cntl, - GCMC_VM_XGMI_LFB_CNTL, - PF_LFB_REGION); - - if (adev->gmc.xgmi.physical_node_id > max_physical_node_id) - return -EINVAL; - - adev->gmc.xgmi.node_segment_size = seg_size; - } - - return 0; -} - const struct amdgpu_gfxhub_funcs gfxhub_v12_1_funcs = { .get_fb_location = gfxhub_v12_1_get_fb_location, .get_mc_fb_offset = gfxhub_v12_1_get_mc_fb_offset, @@ -856,7 +830,6 @@ const struct amdgpu_gfxhub_funcs gfxhub_v12_1_funcs = { .gart_disable = gfxhub_v12_1_gart_disable, .set_fault_enable_default = gfxhub_v12_1_set_fault_enable_default, .init = gfxhub_v12_1_init, - .get_xgmi_info = gfxhub_v12_1_get_xgmi_info, }; static int gfxhub_v12_1_xcp_resume(void *handle, uint32_t inst_mask) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c index da4a0cf4aad0..f1079bd8cf00 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c @@ -636,6 +636,11 @@ static int gmc_v12_0_early_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; + if (adev->smuio.funcs && + adev->smuio.funcs->is_host_gpu_xgmi_supported) + adev->gmc.xgmi.connected_to_cpu = + adev->smuio.funcs->is_host_gpu_xgmi_supported(adev); + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(12, 1, 0): gmc_v12_1_set_gmc_funcs(adev); @@ -691,17 +696,23 @@ static void gmc_v12_0_vram_gtt_location(struct amdgpu_device *adev, base = adev->mmhub.funcs->get_fb_location(adev); - amdgpu_gmc_set_agp_default(adev, mc); - amdgpu_gmc_vram_location(adev, &adev->gmc, base); - amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_LOW); - if (!amdgpu_sriov_vf(adev) && (amdgpu_agp == 1)) - amdgpu_gmc_agp_location(adev, mc); - + if (amdgpu_gmc_is_pdb0_enabled(adev)) { + amdgpu_gmc_sysvm_location(adev, mc); + } else { + amdgpu_gmc_set_agp_default(adev, mc); + amdgpu_gmc_vram_location(adev, &adev->gmc, base); + amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_LOW); + if (!amdgpu_sriov_vf(adev) && (amdgpu_agp == 1)) + amdgpu_gmc_agp_location(adev, mc); + } /* base offset of vram pages */ if (amdgpu_sriov_vf(adev)) adev->vm_manager.vram_base_offset = 0; else adev->vm_manager.vram_base_offset = adev->mmhub.funcs->get_mc_fb_offset(adev); + + adev->vm_manager.vram_base_offset += + adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size; } /** @@ -717,12 +728,17 @@ static int gmc_v12_0_mc_init(struct amdgpu_device *adev) { int r; - /* size in MB on si */ - adev->gmc.mc_vram_size = - adev->nbio.funcs->get_memsize(adev) * 1024ULL * 1024ULL; + if (adev->gmc.xgmi.connected_to_cpu) + adev->gmc.mc_vram_size = + adev->gmc.xgmi.node_segment_size * adev->gmc.xgmi.num_physical_nodes; + else + adev->gmc.mc_vram_size = + adev->nbio.funcs->get_memsize(adev) * 1024ULL * 1024ULL; + adev->gmc.real_vram_size = adev->gmc.mc_vram_size; - if (!(adev->flags & AMD_IS_APU)) { + if (!(adev->flags & AMD_IS_APU) && + !adev->gmc.xgmi.connected_to_cpu) { r = amdgpu_device_resize_fb_bar(adev); if (r) return r; @@ -732,8 +748,12 @@ static int gmc_v12_0_mc_init(struct amdgpu_device *adev) adev->gmc.aper_size = pci_resource_len(adev->pdev, 0); #ifdef CONFIG_X86_64 - if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev)) { - adev->gmc.aper_base = adev->mmhub.funcs->get_mc_fb_offset(adev); + if (((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev)) || + (adev->gmc.xgmi.connected_to_cpu)) { + adev->gmc.aper_base = + adev->mmhub.funcs->get_mc_fb_offset(adev) + + adev->gmc.xgmi.physical_node_id * + adev->gmc.xgmi.node_segment_size; adev->gmc.aper_size = adev->gmc.real_vram_size; } #endif @@ -762,6 +782,14 @@ static int gmc_v12_0_gart_init(struct amdgpu_device *adev) return 0; } + if (amdgpu_gmc_is_pdb0_enabled(adev)) { + adev->gmc.vmid0_page_table_depth = 1; + adev->gmc.vmid0_page_table_block_size = 12; + } else { + adev->gmc.vmid0_page_table_depth = 0; + adev->gmc.vmid0_page_table_block_size = 0; + } + /* Initialize common gart structure */ r = amdgpu_gart_init(adev); if (r) @@ -772,7 +800,14 @@ static int gmc_v12_0_gart_init(struct amdgpu_device *adev) AMDGPU_PTE_EXECUTABLE | AMDGPU_PTE_IS_PTE; - return amdgpu_gart_table_vram_alloc(adev); + r = amdgpu_gart_table_vram_alloc(adev); + if (r) + return r; + + if (amdgpu_gmc_is_pdb0_enabled(adev)) + r = amdgpu_gmc_pdb0_alloc(adev); + + return r; } static int gmc_v12_0_sw_init(struct amdgpu_ip_block *ip_block) @@ -858,11 +893,15 @@ static int gmc_v12_0_sw_init(struct amdgpu_ip_block *ip_block) if (r) return r; - if ((amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(12, 1, 0)) && - !amdgpu_sriov_vf(adev)) { + if (!amdgpu_sriov_vf(adev)) { /* interrupt sent to DF. */ - r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_DF, 0, + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(12, 0, 0)) + r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_DF, 0, + &adev->gmc.ecc_irq); + else + r = amdgpu_irq_add_id(adev, SOC_V1_0_IH_CLIENTID_DF, 0, &adev->gmc.ecc_irq); + if (r) return r; } @@ -944,6 +983,7 @@ static int gmc_v12_0_sw_fini(struct amdgpu_ip_block *ip_block) amdgpu_vm_manager_fini(adev); gmc_v12_0_gart_fini(adev); amdgpu_gem_force_release(adev); + amdgpu_bo_free_kernel(&adev->gmc.pdb0_bo, NULL, &adev->gmc.ptr_pdb0); amdgpu_bo_fini(adev); return 0; @@ -963,6 +1003,9 @@ static int gmc_v12_0_gart_enable(struct amdgpu_device *adev) int r; bool value; + if (adev->gmc.xgmi.connected_to_cpu) + amdgpu_gmc_init_pdb0(adev); + if (adev->gart.bo == NULL) { dev_err(adev->dev, "No VRAM object for PCIE GART.\n"); return -EINVAL; @@ -984,6 +1027,7 @@ static int gmc_v12_0_gart_enable(struct amdgpu_device *adev) drm_info(adev_to_drm(adev), "PCIE GART of %uM enabled (table at 0x%016llX).\n", (unsigned)(adev->gmc.gart_size >> 20), + (adev->gmc.pdb0_bo) ? (unsigned long long)amdgpu_bo_gpu_offset(adev->gmc.pdb0_bo) : (unsigned long long)amdgpu_bo_gpu_offset(adev->gart.bo)); return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_1.c b/drivers/gpu/drm/amd/amdgpu/gmc_v12_1.c index c49112d8300e..7ea7b9c30bca 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_1.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_1.c @@ -524,20 +524,27 @@ static void gmc_v12_1_get_coherence_flags(struct amdgpu_device *adev, bool ext_coherent = bo->flags & AMDGPU_GEM_CREATE_EXT_COHERENT; uint32_t gc_ip_version = amdgpu_ip_version(adev, GC_HWIP, 0); bool uncached = bo->flags & AMDGPU_GEM_CREATE_UNCACHED; - unsigned int mtype, mtype_local; + unsigned int mtype, mtype_local, mtype_remote; bool snoop = false; bool is_local = false; + bool is_aid_a1; switch (gc_ip_version) { case IP_VERSION(12, 1, 0): - mtype_local = MTYPE_RW; - if (amdgpu_mtype_local == 1) { + is_aid_a1 = (adev->rev_id & 0x10); + + mtype_local = is_aid_a1 ? MTYPE_RW : MTYPE_NC; + mtype_remote = is_aid_a1 ? MTYPE_NC : MTYPE_UC; + if (amdgpu_mtype_local == 0) { + DRM_INFO_ONCE("Using MTYPE_RW for local memory\n"); + mtype_local = MTYPE_RW; + } else if (amdgpu_mtype_local == 1) { DRM_INFO_ONCE("Using MTYPE_NC for local memory\n"); mtype_local = MTYPE_NC; } else if (amdgpu_mtype_local == 2) { - DRM_INFO_ONCE("MTYPE_CC not supported, using MTYPE_RW instead for local memory\n"); + DRM_INFO_ONCE("MTYPE_CC not supported, using %s for local memory\n", is_aid_a1 ? "MTYPE_RW" : "MTYPE_NC"); } else { - DRM_INFO_ONCE("Using MTYPE_RW for local memory\n"); + DRM_INFO_ONCE("Using %s for local memory\n", is_aid_a1 ? "MTYPE_RW" : "MTYPE_NC"); } is_local = (is_vram && adev == bo_adev); @@ -547,10 +554,7 @@ static void gmc_v12_1_get_coherence_flags(struct amdgpu_device *adev, } else if (ext_coherent) { mtype = is_local ? mtype_local : MTYPE_UC; } else { - if (is_local) - mtype = mtype_local; - else - mtype = MTYPE_NC; + mtype = is_local ? mtype_local : mtype_remote; } break; default: @@ -621,10 +625,17 @@ static const struct amdgpu_irq_src_funcs gmc_v12_1_irq_funcs = { .process = gmc_v12_1_process_interrupt, }; +static const struct amdgpu_irq_src_funcs gmc_v12_1_ecc_funcs = { + .process = amdgpu_umc_uniras_process_ecc_irq, +}; + void gmc_v12_1_set_irq_funcs(struct amdgpu_device *adev) { adev->gmc.vm_fault.num_types = 1; adev->gmc.vm_fault.funcs = &gmc_v12_1_irq_funcs; + + adev->gmc.ecc_irq.num_types = 1; + adev->gmc.ecc_irq.funcs = &gmc_v12_1_ecc_funcs; } void gmc_v12_1_init_vram_info(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index e35ed0cc2ec6..1ca0202cfdea 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -660,35 +660,7 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, gfxhub_client_ids[cid], cid); } else { - switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) { - case IP_VERSION(9, 0, 0): - mmhub_cid = mmhub_client_ids_vega10[cid][rw]; - break; - case IP_VERSION(9, 3, 0): - mmhub_cid = mmhub_client_ids_vega12[cid][rw]; - break; - case IP_VERSION(9, 4, 0): - mmhub_cid = mmhub_client_ids_vega20[cid][rw]; - break; - case IP_VERSION(9, 4, 1): - mmhub_cid = mmhub_client_ids_arcturus[cid][rw]; - break; - case IP_VERSION(9, 1, 0): - case IP_VERSION(9, 2, 0): - mmhub_cid = mmhub_client_ids_raven[cid][rw]; - break; - case IP_VERSION(1, 5, 0): - case IP_VERSION(2, 4, 0): - mmhub_cid = mmhub_client_ids_renoir[cid][rw]; - break; - case IP_VERSION(1, 8, 0): - case IP_VERSION(9, 4, 2): - mmhub_cid = mmhub_client_ids_aldebaran[cid][rw]; - break; - default: - mmhub_cid = NULL; - break; - } + mmhub_cid = amdgpu_mmhub_client_name(&adev->mmhub, cid, rw); dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n", mmhub_cid ? mmhub_cid : "unknown", cid); } @@ -1428,6 +1400,52 @@ static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev) } } +static void gmc_v9_0_init_mmhub_client_info(struct amdgpu_device *adev) +{ + switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) { + case IP_VERSION(9, 0, 0): + amdgpu_mmhub_init_client_info(&adev->mmhub, + mmhub_client_ids_vega10, + ARRAY_SIZE(mmhub_client_ids_vega10)); + break; + case IP_VERSION(9, 3, 0): + amdgpu_mmhub_init_client_info(&adev->mmhub, + mmhub_client_ids_vega12, + ARRAY_SIZE(mmhub_client_ids_vega12)); + break; + case IP_VERSION(9, 4, 0): + amdgpu_mmhub_init_client_info(&adev->mmhub, + mmhub_client_ids_vega20, + ARRAY_SIZE(mmhub_client_ids_vega20)); + break; + case IP_VERSION(9, 4, 1): + amdgpu_mmhub_init_client_info(&adev->mmhub, + mmhub_client_ids_arcturus, + ARRAY_SIZE(mmhub_client_ids_arcturus)); + break; + case IP_VERSION(9, 1, 0): + case IP_VERSION(9, 2, 0): + amdgpu_mmhub_init_client_info(&adev->mmhub, + mmhub_client_ids_raven, + ARRAY_SIZE(mmhub_client_ids_raven)); + break; + case IP_VERSION(1, 5, 0): + case IP_VERSION(2, 4, 0): + amdgpu_mmhub_init_client_info(&adev->mmhub, + mmhub_client_ids_renoir, + ARRAY_SIZE(mmhub_client_ids_renoir)); + break; + case IP_VERSION(1, 8, 0): + case IP_VERSION(9, 4, 2): + amdgpu_mmhub_init_client_info(&adev->mmhub, + mmhub_client_ids_aldebaran, + ARRAY_SIZE(mmhub_client_ids_aldebaran)); + break; + default: + break; + } +} + static void gmc_v9_0_set_mmhub_funcs(struct amdgpu_device *adev) { switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) { @@ -1445,6 +1463,8 @@ static void gmc_v9_0_set_mmhub_funcs(struct amdgpu_device *adev) adev->mmhub.funcs = &mmhub_v1_0_funcs; break; } + + gmc_v9_0_init_mmhub_client_info(adev); } static void gmc_v9_0_set_mmhub_ras_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c index b3590b33cab9..485ecdec9618 100644 --- a/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c +++ b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c @@ -129,7 +129,7 @@ static int isp_genpd_add_device(struct device *dev, void *data) if (!pdev) return -EINVAL; - if (!dev->type->name) { + if (!dev->type || !dev->type->name) { drm_dbg(&adev->ddev, "Invalid device type to add\n"); goto exit; } @@ -165,7 +165,7 @@ static int isp_genpd_remove_device(struct device *dev, void *data) if (!pdev) return -EINVAL; - if (!dev->type->name) { + if (!dev->type || !dev->type->name) { drm_dbg(&adev->ddev, "Invalid device type to remove\n"); goto exit; } diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_2.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_2.c new file mode 100644 index 000000000000..285c459379c4 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_2.c @@ -0,0 +1,840 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* + * Copyright 2025-2026 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "amdgpu.h" +#include "amdgpu_jpeg.h" +#include "amdgpu_pm.h" +#include "soc15.h" +#include "soc15d.h" +#include "jpeg_v4_0_3.h" +#include "jpeg_v5_0_2.h" +#include "mmsch_v5_0.h" + +#include "vcn/vcn_5_0_0_offset.h" +#include "vcn/vcn_5_0_0_sh_mask.h" +#include "ivsrcid/vcn/irqsrcs_vcn_5_0.h" + +static void jpeg_v5_0_2_set_dec_ring_funcs(struct amdgpu_device *adev); +static void jpeg_v5_0_2_set_irq_funcs(struct amdgpu_device *adev); +static int jpeg_v5_0_2_set_powergating_state(struct amdgpu_ip_block *ip_block, + enum amd_powergating_state state); +static void jpeg_v5_0_2_dec_ring_set_wptr(struct amdgpu_ring *ring); + +static int amdgpu_ih_srcid_jpeg[] = { + VCN_5_0__SRCID__JPEG_DECODE, + VCN_5_0__SRCID__JPEG1_DECODE, + VCN_5_0__SRCID__JPEG2_DECODE, + VCN_5_0__SRCID__JPEG3_DECODE, + VCN_5_0__SRCID__JPEG4_DECODE, + VCN_5_0__SRCID__JPEG5_DECODE, + VCN_5_0__SRCID__JPEG6_DECODE, + VCN_5_0__SRCID__JPEG7_DECODE, + VCN_5_0__SRCID__JPEG8_DECODE, + VCN_5_0__SRCID__JPEG9_DECODE, +}; + +static const struct amdgpu_hwip_reg_entry jpeg_reg_list_5_0_2[] = { + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_POWER_STATUS), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_INT_STAT), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC0_UVD_JRBC_RB_RPTR), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC0_UVD_JRBC_RB_WPTR), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC0_UVD_JRBC_STATUS), + SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_ADDR_MODE), + SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_GFX10_ADDR_CONFIG), + SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_Y_GFX10_TILING_SURFACE), + SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_UV_GFX10_TILING_SURFACE), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_PITCH), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_UV_PITCH), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC1_UVD_JRBC_RB_RPTR), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC1_UVD_JRBC_RB_WPTR), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC1_UVD_JRBC_STATUS), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC2_UVD_JRBC_RB_RPTR), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC2_UVD_JRBC_RB_WPTR), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC2_UVD_JRBC_STATUS), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC3_UVD_JRBC_RB_RPTR), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC3_UVD_JRBC_RB_WPTR), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC3_UVD_JRBC_STATUS), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC4_UVD_JRBC_RB_RPTR), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC4_UVD_JRBC_RB_WPTR), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC4_UVD_JRBC_STATUS), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC5_UVD_JRBC_RB_RPTR), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC5_UVD_JRBC_RB_WPTR), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC5_UVD_JRBC_STATUS), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC6_UVD_JRBC_RB_RPTR), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC6_UVD_JRBC_RB_WPTR), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC6_UVD_JRBC_STATUS), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC7_UVD_JRBC_RB_RPTR), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC7_UVD_JRBC_RB_WPTR), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC7_UVD_JRBC_STATUS), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC8_UVD_JRBC_RB_RPTR), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC8_UVD_JRBC_RB_WPTR), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC8_UVD_JRBC_STATUS), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC9_UVD_JRBC_RB_RPTR), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC9_UVD_JRBC_RB_WPTR), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC9_UVD_JRBC_STATUS), +}; + +static int jpeg_v5_0_2_core_reg_offset(u32 pipe) +{ + if (pipe <= AMDGPU_MAX_JPEG_RINGS_4_0_3) + return ((0x40 * pipe) - 0xc80); + else + return ((0x40 * pipe) - 0x440); +} + +/** + * jpeg_v5_0_2_early_init - set function pointers + * + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. + * + * Set ring and irq function pointers + */ +static int jpeg_v5_0_2_early_init(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + + if (!adev->jpeg.num_jpeg_inst || adev->jpeg.num_jpeg_inst > AMDGPU_MAX_JPEG_INSTANCES) + return -ENOENT; + + adev->jpeg.num_jpeg_rings = AMDGPU_MAX_JPEG_RINGS; + jpeg_v5_0_2_set_dec_ring_funcs(adev); + jpeg_v5_0_2_set_irq_funcs(adev); + + return 0; +} + +/** + * jpeg_v5_0_2_sw_init - sw init for JPEG block + * + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. + * + * Load firmware and sw initialization + */ +static int jpeg_v5_0_2_sw_init(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + struct amdgpu_ring *ring; + int i, j, r, jpeg_inst; + + for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { + /* JPEG TRAP */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, + amdgpu_ih_srcid_jpeg[j], &adev->jpeg.inst->irq); + if (r) + return r; + } + + r = amdgpu_jpeg_sw_init(adev); + if (r) + return r; + + r = amdgpu_jpeg_resume(adev); + if (r) + return r; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + jpeg_inst = GET_INST(JPEG, i); + + for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { + ring = &adev->jpeg.inst[i].ring_dec[j]; + ring->use_doorbell = false; + ring->vm_hub = AMDGPU_MMHUB0(adev->jpeg.inst[i].aid_id); + ring->doorbell_index = + (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + + 1 + j + 11 * jpeg_inst; + sprintf(ring->name, "jpeg_dec_%d.%d", adev->jpeg.inst[i].aid_id, j); + r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0, + AMDGPU_RING_PRIO_DEFAULT, NULL); + if (r) + return r; + + adev->jpeg.internal.jpeg_pitch[j] = + regUVD_JRBC0_UVD_JRBC_SCRATCH0_INTERNAL_OFFSET; + adev->jpeg.inst[i].external.jpeg_pitch[j] = + SOC15_REG_OFFSET1(JPEG, jpeg_inst, regUVD_JRBC_SCRATCH0, + (j ? jpeg_v5_0_2_core_reg_offset(j) : 0)); + } + } + + r = amdgpu_jpeg_reg_dump_init(adev, jpeg_reg_list_5_0_2, ARRAY_SIZE(jpeg_reg_list_5_0_2)); + if (r) + return r; + + adev->jpeg.supported_reset = + amdgpu_get_soft_full_reset_mask(&adev->jpeg.inst[0].ring_dec[0]); + adev->jpeg.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; + r = amdgpu_jpeg_sysfs_reset_mask_init(adev); + + return r; +} + +/** + * jpeg_v5_0_2_sw_fini - sw fini for JPEG block + * + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. + * + * JPEG suspend and free up sw allocation + */ +static int jpeg_v5_0_2_sw_fini(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + int r; + + r = amdgpu_jpeg_suspend(adev); + if (r) + return r; + + amdgpu_jpeg_sysfs_reset_mask_fini(adev); + + r = amdgpu_jpeg_sw_fini(adev); + + return r; +} + +/** + * jpeg_v5_0_2_hw_init - start and test JPEG block + * + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. + * + */ +static int jpeg_v5_0_2_hw_init(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + struct amdgpu_ring *ring; + int i, j, r, jpeg_inst, tmp; + + if (RREG32_SOC15(VCN, GET_INST(VCN, 0), regVCN_RRMT_CNTL) & 0x100) + adev->jpeg.caps |= AMDGPU_JPEG_CAPS(RRMT_ENABLED); + + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + jpeg_inst = GET_INST(JPEG, i); + ring = adev->jpeg.inst[i].ring_dec; + + /* Remove JPEG Tile antihang mechanism */ + tmp = RREG32_SOC15(JPEG, jpeg_inst, regUVD_JPEG_POWER_STATUS); + tmp &= (~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); + WREG32_SOC15(JPEG, jpeg_inst, regUVD_JPEG_POWER_STATUS, tmp); + + if (ring->use_doorbell) + adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, + (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 11 * jpeg_inst, + adev->jpeg.inst[i].aid_id); + + for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { + ring = &adev->jpeg.inst[i].ring_dec[j]; + if (ring->use_doorbell) + WREG32_SOC15_OFFSET(VCN, GET_INST(VCN, i), regVCN_JPEG_DB_CTRL, + ring->pipe, + ring->doorbell_index << + VCN_JPEG_DB_CTRL__OFFSET__SHIFT | + VCN_JPEG_DB_CTRL__EN_MASK); + r = amdgpu_ring_test_helper(ring); + if (r) + return r; + } + } + + return 0; +} + +/** + * jpeg_v5_0_2_hw_fini - stop the hardware block + * + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. + * + * Stop the JPEG block, mark ring as not ready any more + */ +static int jpeg_v5_0_2_hw_fini(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + int ret = 0; + + cancel_delayed_work_sync(&adev->jpeg.idle_work); + + if (adev->jpeg.cur_state != AMD_PG_STATE_GATE) + ret = jpeg_v5_0_2_set_powergating_state(ip_block, AMD_PG_STATE_GATE); + + return ret; +} + +/** + * jpeg_v5_0_2_suspend - suspend JPEG block + * + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. + * + * HW fini and suspend JPEG block + */ +static int jpeg_v5_0_2_suspend(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + int r; + + r = jpeg_v5_0_2_hw_fini(ip_block); + if (r) + return r; + + r = amdgpu_jpeg_suspend(adev); + + return r; +} + +/** + * jpeg_v5_0_2_resume - resume JPEG block + * + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. + * + * Resume firmware and hw init JPEG block + */ +static int jpeg_v5_0_2_resume(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + int r; + + r = amdgpu_jpeg_resume(adev); + if (r) + return r; + + r = jpeg_v5_0_2_hw_init(ip_block); + + return r; +} + +static void jpeg_v5_0_2_init_inst(struct amdgpu_device *adev, int i) +{ + int jpeg_inst = GET_INST(JPEG, i); + + /* disable anti hang mechanism */ + WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JPEG_POWER_STATUS), 0, + ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); + + /* keep the JPEG in static PG mode */ + WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JPEG_POWER_STATUS), 0, + ~UVD_JPEG_POWER_STATUS__JPEG_PG_MODE_MASK); + + /* MJPEG global tiling registers */ + WREG32_SOC15(JPEG, 0, regJPEG_DEC_GFX10_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); + + /* enable JMI channel */ + WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JMI_CNTL), 0, + ~UVD_JMI_CNTL__SOFT_RESET_MASK); +} + +static void jpeg_v5_0_2_deinit_inst(struct amdgpu_device *adev, int i) +{ + int jpeg_inst = GET_INST(JPEG, i); + /* reset JMI */ + WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JMI_CNTL), + UVD_JMI_CNTL__SOFT_RESET_MASK, + ~UVD_JMI_CNTL__SOFT_RESET_MASK); + + /* enable anti hang mechanism */ + WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JPEG_POWER_STATUS), + UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK, + ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); +} + +static void jpeg_v5_0_2_init_jrbc(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + u32 reg, data, mask; + int jpeg_inst = GET_INST(JPEG, ring->me); + int reg_offset = ring->pipe ? jpeg_v5_0_2_core_reg_offset(ring->pipe) : 0; + + /* enable System Interrupt for JRBC */ + reg = SOC15_REG_OFFSET(JPEG, jpeg_inst, regJPEG_SYS_INT_EN); + if (ring->pipe < AMDGPU_MAX_JPEG_RINGS_4_0_3) { + data = JPEG_SYS_INT_EN__DJRBC0_MASK << ring->pipe; + mask = ~(JPEG_SYS_INT_EN__DJRBC0_MASK << ring->pipe); + WREG32_P(reg, data, mask); + } else { + data = JPEG_SYS_INT_EN__DJRBC0_MASK << (ring->pipe+12); + mask = ~(JPEG_SYS_INT_EN__DJRBC0_MASK << (ring->pipe+12)); + WREG32_P(reg, data, mask); + } + + WREG32_SOC15_OFFSET(JPEG, jpeg_inst, + regUVD_LMI_JRBC_RB_VMID, + reg_offset, 0); + WREG32_SOC15_OFFSET(JPEG, jpeg_inst, + regUVD_JRBC_RB_CNTL, + reg_offset, + (0x00000001L | 0x00000002L)); + WREG32_SOC15_OFFSET(JPEG, jpeg_inst, + regUVD_LMI_JRBC_RB_64BIT_BAR_LOW, + reg_offset, lower_32_bits(ring->gpu_addr)); + WREG32_SOC15_OFFSET(JPEG, jpeg_inst, + regUVD_LMI_JRBC_RB_64BIT_BAR_HIGH, + reg_offset, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15_OFFSET(JPEG, jpeg_inst, + regUVD_JRBC_RB_RPTR, + reg_offset, 0); + WREG32_SOC15_OFFSET(JPEG, jpeg_inst, + regUVD_JRBC_RB_WPTR, + reg_offset, 0); + WREG32_SOC15_OFFSET(JPEG, jpeg_inst, + regUVD_JRBC_RB_CNTL, + reg_offset, 0x00000002L); + WREG32_SOC15_OFFSET(JPEG, jpeg_inst, + regUVD_JRBC_RB_SIZE, + reg_offset, ring->ring_size / 4); + ring->wptr = RREG32_SOC15_OFFSET(JPEG, jpeg_inst, regUVD_JRBC_RB_WPTR, + reg_offset); +} + +/** + * jpeg_v5_0_2_start - start JPEG block + * + * @adev: amdgpu_device pointer + * + * Setup and start the JPEG block + */ +static int jpeg_v5_0_2_start(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring; + int i, j; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + jpeg_v5_0_2_init_inst(adev, i); + for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { + ring = &adev->jpeg.inst[i].ring_dec[j]; + jpeg_v5_0_2_init_jrbc(ring); + } + } + + return 0; +} + +/** + * jpeg_v5_0_2_stop - stop JPEG block + * + * @adev: amdgpu_device pointer + * + * stop the JPEG block + */ +static int jpeg_v5_0_2_stop(struct amdgpu_device *adev) +{ + int i; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) + jpeg_v5_0_2_deinit_inst(adev, i); + + return 0; +} + +/** + * jpeg_v5_0_2_dec_ring_get_rptr - get read pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware read pointer + */ +static uint64_t jpeg_v5_0_2_dec_ring_get_rptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + return RREG32_SOC15_OFFSET(JPEG, GET_INST(JPEG, ring->me), regUVD_JRBC_RB_RPTR, + ring->pipe ? jpeg_v5_0_2_core_reg_offset(ring->pipe) : 0); +} + +/** + * jpeg_v5_0_2_dec_ring_get_wptr - get write pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware write pointer + */ +static uint64_t jpeg_v5_0_2_dec_ring_get_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring->use_doorbell) + return adev->wb.wb[ring->wptr_offs]; + + return RREG32_SOC15_OFFSET(JPEG, GET_INST(JPEG, ring->me), regUVD_JRBC_RB_WPTR, + ring->pipe ? jpeg_v5_0_2_core_reg_offset(ring->pipe) : 0); +} + +/** + * jpeg_v5_0_2_dec_ring_set_wptr - set write pointer + * + * @ring: amdgpu_ring pointer + * + * Commits the write pointer to the hardware + */ +static void jpeg_v5_0_2_dec_ring_set_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring->use_doorbell) { + adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); + WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); + } else { + WREG32_SOC15_OFFSET(JPEG, GET_INST(JPEG, ring->me), + regUVD_JRBC_RB_WPTR, + (ring->pipe ? jpeg_v5_0_2_core_reg_offset(ring->pipe) : 0), + lower_32_bits(ring->wptr)); + } +} + +static bool jpeg_v5_0_2_is_idle(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + bool ret = false; + int i, j; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { + int reg_offset = (j ? jpeg_v5_0_2_core_reg_offset(j) : 0); + + ret &= ((RREG32_SOC15_OFFSET(JPEG, GET_INST(JPEG, i), + regUVD_JRBC_STATUS, reg_offset) & + UVD_JRBC_STATUS__RB_JOB_DONE_MASK) == + UVD_JRBC_STATUS__RB_JOB_DONE_MASK); + } + } + + return ret; +} + +static int jpeg_v5_0_2_wait_for_idle(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + int ret = 0; + int i, j; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { + int reg_offset = (j ? jpeg_v5_0_2_core_reg_offset(j) : 0); + + ret &= SOC15_WAIT_ON_RREG_OFFSET(JPEG, GET_INST(JPEG, i), + regUVD_JRBC_STATUS, reg_offset, + UVD_JRBC_STATUS__RB_JOB_DONE_MASK, + UVD_JRBC_STATUS__RB_JOB_DONE_MASK); + } + } + return ret; +} + +static int jpeg_v5_0_2_set_clockgating_state(struct amdgpu_ip_block *ip_block, + enum amd_clockgating_state state) +{ + struct amdgpu_device *adev = ip_block->adev; + bool enable = state == AMD_CG_STATE_GATE; + + int i; + + if (!enable) + return 0; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + if (!jpeg_v5_0_2_is_idle(ip_block)) + return -EBUSY; + } + + return 0; +} + +static int jpeg_v5_0_2_set_powergating_state(struct amdgpu_ip_block *ip_block, + enum amd_powergating_state state) +{ + struct amdgpu_device *adev = ip_block->adev; + int ret; + + if (state == adev->jpeg.cur_state) + return 0; + + if (state == AMD_PG_STATE_GATE) + ret = jpeg_v5_0_2_stop(adev); + else + ret = jpeg_v5_0_2_start(adev); + + if (!ret) + adev->jpeg.cur_state = state; + + return ret; +} + +static int jpeg_v5_0_2_set_interrupt_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned int type, + enum amdgpu_interrupt_state state) +{ + return 0; +} + +static int jpeg_v5_0_2_process_interrupt(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + u32 i, inst; + + i = node_id_to_phys_map[entry->node_id]; + DRM_DEV_DEBUG(adev->dev, "IH: JPEG TRAP\n"); + + for (inst = 0; inst < adev->jpeg.num_jpeg_inst; ++inst) + if (adev->jpeg.inst[inst].aid_id == i) + break; + + if (inst >= adev->jpeg.num_jpeg_inst) { + dev_WARN_ONCE(adev->dev, 1, + "Interrupt received for unknown JPEG instance %d", + entry->node_id); + return 0; + } + + switch (entry->src_id) { + case VCN_5_0__SRCID__JPEG_DECODE: + amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[0]); + break; + case VCN_5_0__SRCID__JPEG1_DECODE: + amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[1]); + break; + case VCN_5_0__SRCID__JPEG2_DECODE: + amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[2]); + break; + case VCN_5_0__SRCID__JPEG3_DECODE: + amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[3]); + break; + case VCN_5_0__SRCID__JPEG4_DECODE: + amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[4]); + break; + case VCN_5_0__SRCID__JPEG5_DECODE: + amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[5]); + break; + case VCN_5_0__SRCID__JPEG6_DECODE: + amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[6]); + break; + case VCN_5_0__SRCID__JPEG7_DECODE: + amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[7]); + break; + case VCN_5_0__SRCID__JPEG8_DECODE: + amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[8]); + break; + case VCN_5_0__SRCID__JPEG9_DECODE: + amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[9]); + break; + default: + DRM_DEV_ERROR(adev->dev, "Unhandled interrupt: %d %d\n", + entry->src_id, entry->src_data[0]); + break; + } + + return 0; +} + +static void jpeg_v5_0_2_core_stall_reset(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + int jpeg_inst = GET_INST(JPEG, ring->me); + int reg_offset = ring->pipe ? jpeg_v5_0_2_core_reg_offset(ring->pipe) : 0; + + WREG32_SOC15_OFFSET(JPEG, jpeg_inst, + regUVD_JMI0_UVD_JMI_CLIENT_STALL, + reg_offset, 0x1F); + SOC15_WAIT_ON_RREG_OFFSET(JPEG, jpeg_inst, + regUVD_JMI0_UVD_JMI_CLIENT_CLEAN_STATUS, + reg_offset, 0x1F, 0x1F); + WREG32_SOC15_OFFSET(JPEG, jpeg_inst, + regUVD_JMI0_JPEG_LMI_DROP, + reg_offset, 0x1F); + WREG32_SOC15(JPEG, jpeg_inst, regJPEG_CORE_RST_CTRL, 1 << ring->pipe); + WREG32_SOC15_OFFSET(JPEG, jpeg_inst, + regUVD_JMI0_UVD_JMI_CLIENT_STALL, + reg_offset, 0x00); + WREG32_SOC15_OFFSET(JPEG, jpeg_inst, + regUVD_JMI0_JPEG_LMI_DROP, + reg_offset, 0x00); + WREG32_SOC15(JPEG, jpeg_inst, regJPEG_CORE_RST_CTRL, 0x00); +} + +static int jpeg_v5_0_2_ring_reset(struct amdgpu_ring *ring, + unsigned int vmid, + struct amdgpu_fence *timedout_fence) +{ + amdgpu_ring_reset_helper_begin(ring, timedout_fence); + jpeg_v5_0_2_core_stall_reset(ring); + jpeg_v5_0_2_init_jrbc(ring); + return amdgpu_ring_reset_helper_end(ring, timedout_fence); +} + +static const struct amd_ip_funcs jpeg_v5_0_2_ip_funcs = { + .name = "jpeg_v5_0_2", + .early_init = jpeg_v5_0_2_early_init, + .late_init = NULL, + .sw_init = jpeg_v5_0_2_sw_init, + .sw_fini = jpeg_v5_0_2_sw_fini, + .hw_init = jpeg_v5_0_2_hw_init, + .hw_fini = jpeg_v5_0_2_hw_fini, + .suspend = jpeg_v5_0_2_suspend, + .resume = jpeg_v5_0_2_resume, + .is_idle = jpeg_v5_0_2_is_idle, + .wait_for_idle = jpeg_v5_0_2_wait_for_idle, + .check_soft_reset = NULL, + .pre_soft_reset = NULL, + .soft_reset = NULL, + .post_soft_reset = NULL, + .set_clockgating_state = jpeg_v5_0_2_set_clockgating_state, + .set_powergating_state = jpeg_v5_0_2_set_powergating_state, + .dump_ip_state = amdgpu_jpeg_dump_ip_state, + .print_ip_state = amdgpu_jpeg_print_ip_state, +}; + +static const struct amdgpu_ring_funcs jpeg_v5_0_2_dec_ring_vm_funcs = { + .type = AMDGPU_RING_TYPE_VCN_JPEG, + .align_mask = 0xf, + .get_rptr = jpeg_v5_0_2_dec_ring_get_rptr, + .get_wptr = jpeg_v5_0_2_dec_ring_get_wptr, + .set_wptr = jpeg_v5_0_2_dec_ring_set_wptr, + .emit_frame_size = + SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + + 8 + /* jpeg_v5_0_2_dec_ring_emit_vm_flush */ + 22 + 22 + /* jpeg_v5_0_2_dec_ring_emit_fence x2 vm fence */ + 8 + 16, + .emit_ib_size = 22, /* jpeg_v5_0_2_dec_ring_emit_ib */ + .emit_ib = jpeg_v4_0_3_dec_ring_emit_ib, + .emit_fence = jpeg_v4_0_3_dec_ring_emit_fence, + .emit_vm_flush = jpeg_v4_0_3_dec_ring_emit_vm_flush, + .emit_hdp_flush = jpeg_v4_0_3_ring_emit_hdp_flush, + .test_ring = amdgpu_jpeg_dec_ring_test_ring, + .test_ib = amdgpu_jpeg_dec_ring_test_ib, + .insert_nop = jpeg_v4_0_3_dec_ring_nop, + .insert_start = jpeg_v4_0_3_dec_ring_insert_start, + .insert_end = jpeg_v4_0_3_dec_ring_insert_end, + .pad_ib = amdgpu_ring_generic_pad_ib, + .begin_use = amdgpu_jpeg_ring_begin_use, + .end_use = amdgpu_jpeg_ring_end_use, + .emit_wreg = jpeg_v4_0_3_dec_ring_emit_wreg, + .emit_reg_wait = jpeg_v4_0_3_dec_ring_emit_reg_wait, + .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, + .reset = jpeg_v5_0_2_ring_reset, +}; + +static void jpeg_v5_0_2_set_dec_ring_funcs(struct amdgpu_device *adev) +{ + int i, j, jpeg_inst; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { + adev->jpeg.inst[i].ring_dec[j].funcs = &jpeg_v5_0_2_dec_ring_vm_funcs; + adev->jpeg.inst[i].ring_dec[j].me = i; + adev->jpeg.inst[i].ring_dec[j].pipe = j; + } + jpeg_inst = GET_INST(JPEG, i); + adev->jpeg.inst[i].aid_id = + jpeg_inst / adev->jpeg.num_inst_per_aid; + } +} + +static const struct amdgpu_irq_src_funcs jpeg_v5_0_2_irq_funcs = { + .set = jpeg_v5_0_2_set_interrupt_state, + .process = jpeg_v5_0_2_process_interrupt, +}; + +static void jpeg_v5_0_2_set_irq_funcs(struct amdgpu_device *adev) +{ + int i; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) + adev->jpeg.inst->irq.num_types += adev->jpeg.num_jpeg_rings; + + adev->jpeg.inst->irq.funcs = &jpeg_v5_0_2_irq_funcs; +} + +const struct amdgpu_ip_block_version jpeg_v5_0_2_ip_block = { + .type = AMD_IP_BLOCK_TYPE_JPEG, + .major = 5, + .minor = 0, + .rev = 2, + .funcs = &jpeg_v5_0_2_ip_funcs, +}; + +#if 0 +static int jpeg_v5_0_2_aca_bank_parser(struct aca_handle *handle, struct aca_bank *bank, + enum aca_smu_type type, void *data) +{ + struct aca_bank_info info; + u64 misc0; + int ret; + + ret = aca_bank_info_decode(bank, &info); + if (ret) + return ret; + + misc0 = bank->regs[ACA_REG_IDX_MISC0]; + switch (type) { + case ACA_SMU_TYPE_UE: + bank->aca_err_type = ACA_ERROR_TYPE_UE; + ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_UE, + 1ULL); + break; + case ACA_SMU_TYPE_CE: + bank->aca_err_type = ACA_ERROR_TYPE_CE; + ret = aca_error_cache_log_bank_error(handle, &info, bank->aca_err_type, + ACA_REG__MISC0__ERRCNT(misc0)); + break; + default: + return -EINVAL; + } + + return ret; +} + +/* reference to smu driver if header file */ +static int jpeg_v5_0_2_err_codes[] = { + 16, 17, 18, 19, 20, 21, 22, 23, /* JPEG[0-9][S|D] */ + 24, 25, 26, 27, 28, 29, 30, 31, + 48, 49, 50, 51, +}; + +static bool jpeg_v5_0_2_aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank, + enum aca_smu_type type, void *data) +{ + u32 instlo; + + instlo = ACA_REG__IPID__INSTANCEIDLO(bank->regs[ACA_REG_IDX_IPID]); + instlo &= GENMASK(31, 1); + + if (instlo != mmSMNAID_AID0_MCA_SMU) + return false; + + if (aca_bank_check_error_codes(handle->adev, bank, + jpeg_v5_0_2_err_codes, + ARRAY_SIZE(jpeg_v5_0_2_err_codes))) + return false; + + return true; +} + +static const struct aca_bank_ops jpeg_v5_0_2_aca_bank_ops = { + .aca_bank_parser = jpeg_v5_0_2_aca_bank_parser, + .aca_bank_is_valid = jpeg_v5_0_2_aca_bank_is_valid, +}; + +static const struct aca_info jpeg_v5_0_2_aca_info = { + .hwip = ACA_HWIP_TYPE_SMU, + .mask = ACA_ERROR_UE_MASK, + .bank_ops = &jpeg_v5_0_2_aca_bank_ops, +}; +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_2.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_2.h new file mode 100644 index 000000000000..c201a2c42a0d --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_2.h @@ -0,0 +1,111 @@ +/* + * Copyright 2025-2026 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __JPEG_V5_0_2_H__ +#define __JPEG_V5_0_2_H__ + +extern const struct amdgpu_ip_block_version jpeg_v5_0_2_ip_block; + +#define regUVD_JRBC0_UVD_JRBC_SCRATCH0_INTERNAL_OFFSET 0x4094 +#define regUVD_JRBC_EXTERNAL_MCM_ADDR_INTERNAL_OFFSET 0x1bffe + +#define regUVD_JRBC0_UVD_JRBC_RB_WPTR 0x0640 +#define regUVD_JRBC0_UVD_JRBC_RB_WPTR_BASE_IDX 1 +#define regUVD_JRBC0_UVD_JRBC_STATUS 0x0649 +#define regUVD_JRBC0_UVD_JRBC_STATUS_BASE_IDX 1 +#define regUVD_JRBC0_UVD_JRBC_RB_RPTR 0x064a +#define regUVD_JRBC0_UVD_JRBC_RB_RPTR_BASE_IDX 1 +#define regUVD_JRBC1_UVD_JRBC_RB_WPTR 0x0000 +#define regUVD_JRBC1_UVD_JRBC_RB_WPTR_BASE_IDX 0 +#define regUVD_JRBC1_UVD_JRBC_STATUS 0x0009 +#define regUVD_JRBC1_UVD_JRBC_STATUS_BASE_IDX 0 +#define regUVD_JRBC1_UVD_JRBC_RB_RPTR 0x000a +#define regUVD_JRBC1_UVD_JRBC_RB_RPTR_BASE_IDX 0 +#define regUVD_JRBC2_UVD_JRBC_RB_WPTR 0x0040 +#define regUVD_JRBC2_UVD_JRBC_RB_WPTR_BASE_IDX 0 +#define regUVD_JRBC2_UVD_JRBC_STATUS 0x0049 +#define regUVD_JRBC2_UVD_JRBC_STATUS_BASE_IDX 0 +#define regUVD_JRBC2_UVD_JRBC_RB_RPTR 0x004a +#define regUVD_JRBC2_UVD_JRBC_RB_RPTR_BASE_IDX 0 +#define regUVD_JRBC3_UVD_JRBC_RB_WPTR 0x0080 +#define regUVD_JRBC3_UVD_JRBC_RB_WPTR_BASE_IDX 0 +#define regUVD_JRBC3_UVD_JRBC_STATUS 0x0089 +#define regUVD_JRBC3_UVD_JRBC_STATUS_BASE_IDX 0 +#define regUVD_JRBC3_UVD_JRBC_RB_RPTR 0x008a +#define regUVD_JRBC3_UVD_JRBC_RB_RPTR_BASE_IDX 0 +#define regUVD_JRBC4_UVD_JRBC_RB_WPTR 0x00c0 +#define regUVD_JRBC4_UVD_JRBC_RB_WPTR_BASE_IDX 0 +#define regUVD_JRBC4_UVD_JRBC_STATUS 0x00c9 +#define regUVD_JRBC4_UVD_JRBC_STATUS_BASE_IDX 0 +#define regUVD_JRBC4_UVD_JRBC_RB_RPTR 0x00ca +#define regUVD_JRBC4_UVD_JRBC_RB_RPTR_BASE_IDX 0 +#define regUVD_JRBC5_UVD_JRBC_RB_WPTR 0x0100 +#define regUVD_JRBC5_UVD_JRBC_RB_WPTR_BASE_IDX 0 +#define regUVD_JRBC5_UVD_JRBC_STATUS 0x0109 +#define regUVD_JRBC5_UVD_JRBC_STATUS_BASE_IDX 0 +#define regUVD_JRBC5_UVD_JRBC_RB_RPTR 0x010a +#define regUVD_JRBC5_UVD_JRBC_RB_RPTR_BASE_IDX 0 +#define regUVD_JRBC6_UVD_JRBC_RB_WPTR 0x0140 +#define regUVD_JRBC6_UVD_JRBC_RB_WPTR_BASE_IDX 0 +#define regUVD_JRBC6_UVD_JRBC_STATUS 0x0149 +#define regUVD_JRBC6_UVD_JRBC_STATUS_BASE_IDX 0 +#define regUVD_JRBC6_UVD_JRBC_RB_RPTR 0x014a +#define regUVD_JRBC6_UVD_JRBC_RB_RPTR_BASE_IDX 0 +#define regUVD_JRBC7_UVD_JRBC_RB_WPTR 0x0180 +#define regUVD_JRBC7_UVD_JRBC_RB_WPTR_BASE_IDX 0 +#define regUVD_JRBC7_UVD_JRBC_STATUS 0x0189 +#define regUVD_JRBC7_UVD_JRBC_STATUS_BASE_IDX 0 +#define regUVD_JRBC7_UVD_JRBC_RB_RPTR 0x018a +#define regUVD_JRBC7_UVD_JRBC_RB_RPTR_BASE_IDX 0 +#define regUVD_JRBC8_UVD_JRBC_RB_WPTR 0x01c0 +#define regUVD_JRBC8_UVD_JRBC_RB_WPTR_BASE_IDX 0 +#define regUVD_JRBC8_UVD_JRBC_STATUS 0x01c9 +#define regUVD_JRBC8_UVD_JRBC_STATUS_BASE_IDX 0 +#define regUVD_JRBC8_UVD_JRBC_RB_RPTR 0x01ca +#define regUVD_JRBC8_UVD_JRBC_RB_RPTR_BASE_IDX 0 +#define regUVD_JRBC9_UVD_JRBC_RB_WPTR 0x0440 +#define regUVD_JRBC9_UVD_JRBC_RB_WPTR_BASE_IDX 1 +#define regUVD_JRBC9_UVD_JRBC_STATUS 0x0449 +#define regUVD_JRBC9_UVD_JRBC_STATUS_BASE_IDX 1 +#define regUVD_JRBC9_UVD_JRBC_RB_RPTR 0x044a +#define regUVD_JRBC9_UVD_JRBC_RB_RPTR_BASE_IDX 1 +#define regUVD_JMI0_JPEG_LMI_DROP 0x0663 +#define regUVD_JMI0_JPEG_LMI_DROP_BASE_IDX 1 +#define regUVD_JMI0_UVD_JMI_CLIENT_STALL 0x067a +#define regUVD_JMI0_UVD_JMI_CLIENT_STALL_BASE_IDX 1 +#define regUVD_JMI0_UVD_JMI_CLIENT_CLEAN_STATUS 0x067b +#define regUVD_JMI0_UVD_JMI_CLIENT_CLEAN_STATUS_BASE_IDX 1 +#define regJPEG_CORE_RST_CTRL 0x072e +#define regJPEG_CORE_RST_CTRL_BASE_IDX 1 + +#define regVCN_RRMT_CNTL 0x0940 +#define regVCN_RRMT_CNTL_BASE_IDX 1 + +enum amdgpu_jpeg_v5_0_2_sub_block { + AMDGPU_JPEG_V5_0_2_JPEG0 = 0, + AMDGPU_JPEG_V5_0_2_JPEG1, + + AMDGPU_JPEG_V5_0_2_MAX_SUB_BLOCK, +}; + +#endif /* __JPEG_V5_0_2_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_1.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_1.c index d8e4b52bdfd5..5dcc2c32644a 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_1.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_1.c @@ -31,6 +31,8 @@ #include "gc/gc_11_0_0_default.h" #include "v12_structs.h" #include "mes_v12_api_def.h" +#include "gfx_v12_1_pkt.h" +#include "sdma_v7_1_0_pkt_open.h" MODULE_FIRMWARE("amdgpu/gc_12_1_0_mes.bin"); MODULE_FIRMWARE("amdgpu/gc_12_1_0_mes1.bin"); @@ -41,6 +43,7 @@ static int mes_v12_1_xcc_hw_init(struct amdgpu_ip_block *ip_block, int xcc_id); static int mes_v12_1_hw_fini(struct amdgpu_ip_block *ip_block); static int mes_v12_1_kiq_hw_init(struct amdgpu_device *adev, uint32_t xcc_id); static int mes_v12_1_kiq_hw_fini(struct amdgpu_device *adev, uint32_t xcc_id); +static int mes_v12_1_self_test(struct amdgpu_device *adev, int xcc_id); #define MES_EOP_SIZE 2048 @@ -491,16 +494,12 @@ static int mes_v12_1_query_sched_status(struct amdgpu_mes *mes, } static uint32_t mes_v12_1_get_xcc_from_reg(uint32_t reg_offset) { - /* Check xcc reg offset range */ - uint32_t xcc = (reg_offset & XCC_MID_MASK) ? 4 : 0; - /* Each XCC has two register ranges. - * These are represented in reg_offset[17:16] - */ - return ((reg_offset >> 16) & 0x3) + xcc; + return ((reg_offset >> 16) & 0x7); } static void mes_v12_1_get_rrmt(uint32_t reg, uint32_t xcc_id, - struct RRMT_OPTION *rrmt_opt) + struct RRMT_OPTION *rrmt_opt, + uint32_t *out_reg) { uint32_t normalized_reg = soc_v1_0_normalize_xcc_reg_offset(reg); @@ -509,8 +508,12 @@ static void mes_v12_1_get_rrmt(uint32_t reg, uint32_t xcc_id, rrmt_opt->mode = (xcc_id == rrmt_opt->xcd_die_id) ? MES_RRMT_MODE_LOCAL_XCD : MES_RRMT_MODE_REMOTE_XCD; } else { - rrmt_opt->mode = MES_RRMT_MODE_LOCAL_REMOTE_AID; + rrmt_opt->mode = MES_RRMT_MODE_REMOTE_MID; + if (soc_v1_0_mid1_reg_range(reg)) + rrmt_opt->mid_die_id = 1; } + + *out_reg = soc_v1_0_normalize_reg_offset(reg); } static int mes_v12_1_misc_op(struct amdgpu_mes *mes, @@ -534,65 +537,44 @@ static int mes_v12_1_misc_op(struct amdgpu_mes *mes, switch (input->op) { case MES_MISC_OP_READ_REG: misc_pkt.opcode = MESAPI_MISC__READ_REG; - misc_pkt.read_reg.reg_offset = input->read_reg.reg_offset; misc_pkt.read_reg.buffer_addr = input->read_reg.buffer_addr; mes_v12_1_get_rrmt(input->read_reg.reg_offset, GET_INST(GC, input->xcc_id), - &misc_pkt.read_reg.rrmt_opt); - if (misc_pkt.read_reg.rrmt_opt.mode != MES_RRMT_MODE_REMOTE_MID) { - misc_pkt.read_reg.reg_offset = - soc_v1_0_normalize_xcc_reg_offset(misc_pkt.read_reg.reg_offset); - } + &misc_pkt.read_reg.rrmt_opt, + &misc_pkt.read_reg.reg_offset); break; case MES_MISC_OP_WRITE_REG: misc_pkt.opcode = MESAPI_MISC__WRITE_REG; - misc_pkt.write_reg.reg_offset = input->write_reg.reg_offset; misc_pkt.write_reg.reg_value = input->write_reg.reg_value; mes_v12_1_get_rrmt(input->write_reg.reg_offset, GET_INST(GC, input->xcc_id), - &misc_pkt.write_reg.rrmt_opt); - if (misc_pkt.write_reg.rrmt_opt.mode != MES_RRMT_MODE_REMOTE_MID) { - misc_pkt.write_reg.reg_offset = - soc_v1_0_normalize_xcc_reg_offset(misc_pkt.write_reg.reg_offset); - } + &misc_pkt.write_reg.rrmt_opt, + &misc_pkt.write_reg.reg_offset); break; case MES_MISC_OP_WRM_REG_WAIT: misc_pkt.opcode = MESAPI_MISC__WAIT_REG_MEM; misc_pkt.wait_reg_mem.op = WRM_OPERATION__WAIT_REG_MEM; misc_pkt.wait_reg_mem.reference = input->wrm_reg.ref; misc_pkt.wait_reg_mem.mask = input->wrm_reg.mask; - misc_pkt.wait_reg_mem.reg_offset1 = input->wrm_reg.reg0; misc_pkt.wait_reg_mem.reg_offset2 = 0; mes_v12_1_get_rrmt(input->wrm_reg.reg0, GET_INST(GC, input->xcc_id), - &misc_pkt.wait_reg_mem.rrmt_opt1); - if (misc_pkt.wait_reg_mem.rrmt_opt1.mode != MES_RRMT_MODE_REMOTE_MID) { - misc_pkt.wait_reg_mem.reg_offset1 = - soc_v1_0_normalize_xcc_reg_offset(misc_pkt.wait_reg_mem.reg_offset1); - } + &misc_pkt.wait_reg_mem.rrmt_opt1, + &misc_pkt.wait_reg_mem.reg_offset1); break; case MES_MISC_OP_WRM_REG_WR_WAIT: misc_pkt.opcode = MESAPI_MISC__WAIT_REG_MEM; misc_pkt.wait_reg_mem.op = WRM_OPERATION__WR_WAIT_WR_REG; misc_pkt.wait_reg_mem.reference = input->wrm_reg.ref; misc_pkt.wait_reg_mem.mask = input->wrm_reg.mask; - misc_pkt.wait_reg_mem.reg_offset1 = input->wrm_reg.reg0; - misc_pkt.wait_reg_mem.reg_offset2 = input->wrm_reg.reg1; mes_v12_1_get_rrmt(input->wrm_reg.reg0, GET_INST(GC, input->xcc_id), - &misc_pkt.wait_reg_mem.rrmt_opt1); + &misc_pkt.wait_reg_mem.rrmt_opt1, + &misc_pkt.wait_reg_mem.reg_offset1); mes_v12_1_get_rrmt(input->wrm_reg.reg1, GET_INST(GC, input->xcc_id), - &misc_pkt.wait_reg_mem.rrmt_opt2); - - if (misc_pkt.wait_reg_mem.rrmt_opt1.mode != MES_RRMT_MODE_REMOTE_MID) { - misc_pkt.wait_reg_mem.reg_offset1 = - soc_v1_0_normalize_xcc_reg_offset(misc_pkt.wait_reg_mem.reg_offset1); - } - if (misc_pkt.wait_reg_mem.rrmt_opt2.mode != MES_RRMT_MODE_REMOTE_MID) { - misc_pkt.wait_reg_mem.reg_offset2 = - soc_v1_0_normalize_xcc_reg_offset(misc_pkt.wait_reg_mem.reg_offset2); - } + &misc_pkt.wait_reg_mem.rrmt_opt2, + &misc_pkt.wait_reg_mem.reg_offset2); break; case MES_MISC_OP_SET_SHADER_DEBUGGER: pipe = AMDGPU_MES_SCHED_PIPE; @@ -1949,10 +1931,31 @@ static int mes_v12_1_early_init(struct amdgpu_ip_block *ip_block) return 0; } +static int mes_v12_1_late_init(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + int xcc_id, num_xcc = NUM_XCC(adev->gfx.xcc_mask); + + /* TODO: remove it if issue fixed. */ + if (adev->mes.enable_coop_mode) + return 0; + + for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) { + /* for COOP mode, only test master xcc. */ + if (adev->mes.enable_coop_mode && + adev->mes.master_xcc_ids[xcc_id] != xcc_id) + continue; + + mes_v12_1_self_test(adev, xcc_id); + } + + return 0; +} + static const struct amd_ip_funcs mes_v12_1_ip_funcs = { .name = "mes_v12_1", .early_init = mes_v12_1_early_init, - .late_init = NULL, + .late_init = mes_v12_1_late_init, .sw_init = mes_v12_1_sw_init, .sw_fini = mes_v12_1_sw_fini, .hw_init = mes_v12_1_hw_init, @@ -1968,3 +1971,312 @@ const struct amdgpu_ip_block_version mes_v12_1_ip_block = { .rev = 0, .funcs = &mes_v12_1_ip_funcs, }; + +static int mes_v12_1_alloc_test_buf(struct amdgpu_device *adev, + struct amdgpu_bo **bo, uint64_t *addr, + void **ptr, int size) +{ + amdgpu_bo_create_kernel(adev, size, PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, + bo, addr, ptr); + if (!*bo) { + dev_err(adev->dev, "failed to allocate test buffer bo\n"); + return -ENOMEM; + } + memset(*ptr, 0, size); + return 0; +} + +static int mes_v12_1_map_test_bo(struct amdgpu_device *adev, + struct amdgpu_bo *bo, struct amdgpu_vm *vm, + struct amdgpu_bo_va **bo_va, u64 va, int size) +{ + struct amdgpu_sync sync; + int r; + + r = amdgpu_map_static_csa(adev, vm, bo, bo_va, va, size); + if (r) + return r; + + amdgpu_sync_create(&sync); + + r = amdgpu_vm_bo_update(adev, *bo_va, false); + if (r) { + dev_err(adev->dev, "failed to do vm_bo_update on meta data\n"); + goto error; + } + amdgpu_sync_fence(&sync, (*bo_va)->last_pt_update, GFP_KERNEL); + + r = amdgpu_vm_update_pdes(adev, vm, false); + if (r) { + dev_err(adev->dev, "failed to update pdes on meta data\n"); + goto error; + } + amdgpu_sync_fence(&sync, vm->last_update, GFP_KERNEL); + amdgpu_sync_wait(&sync, false); + +error: + amdgpu_sync_free(&sync); + return 0; +} + +static int mes_v12_1_test_ring(struct amdgpu_device *adev, int xcc_id, + u32 *queue_ptr, u64 fence_gpu_addr, + void *fence_cpu_ptr, void *wptr_cpu_addr, + u64 doorbell_idx, int queue_type) +{ + volatile uint32_t *cpu_ptr = fence_cpu_ptr; + int num_xcc = NUM_XCC(adev->gfx.xcc_mask); + int sdma_ring_align = 0x10, compute_ring_align = 0x100; + uint32_t tmp, xcc_offset; + int r = 0, i, wptr = 0; + + if (queue_type == AMDGPU_RING_TYPE_COMPUTE) { + if (!adev->mes.enable_coop_mode) { + WREG32_SOC15(GC, GET_INST(GC, xcc_id), + regSCRATCH_REG0, 0xCAFEDEAD); + } else { + for (i = 0; i < num_xcc; i++) { + if (adev->mes.master_xcc_ids[i] == xcc_id) + WREG32_SOC15(GC, GET_INST(GC, i), + regSCRATCH_REG0, 0xCAFEDEAD); + } + } + + xcc_offset = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0); + queue_ptr[wptr++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); + queue_ptr[wptr++] = xcc_offset - PACKET3_SET_UCONFIG_REG_START; + queue_ptr[wptr++] = 0xDEADBEEF; + + for (i = wptr; i < compute_ring_align; i++) + queue_ptr[wptr++] = PACKET3(PACKET3_NOP, 0x3FFF); + + } else if (queue_type == AMDGPU_RING_TYPE_SDMA) { + *cpu_ptr = 0xCAFEDEAD; + + queue_ptr[wptr++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_WRITE) | + SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR); + queue_ptr[wptr++] = lower_32_bits(fence_gpu_addr); + queue_ptr[wptr++] = upper_32_bits(fence_gpu_addr); + queue_ptr[wptr++] = SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0); + queue_ptr[wptr++] = 0xDEADBEEF; + + for (i = wptr; i < sdma_ring_align; i++) + queue_ptr[wptr++] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP); + + wptr <<= 2; + } + + atomic64_set((atomic64_t *)wptr_cpu_addr, wptr); + WDOORBELL64(doorbell_idx, wptr); + + for (i = 0; i < adev->usec_timeout; i++) { + if (queue_type == AMDGPU_RING_TYPE_SDMA) { + tmp = le32_to_cpu(*cpu_ptr); + } else { + if (!adev->mes.enable_coop_mode) { + tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), + regSCRATCH_REG0); + } else { + for (i = 0; i < num_xcc; i++) { + if (xcc_id != adev->mes.master_xcc_ids[i]) + continue; + + tmp = RREG32_SOC15(GC, GET_INST(GC, i), + regSCRATCH_REG0); + if (tmp != 0xDEADBEEF) + break; + } + } + } + + if (tmp == 0xDEADBEEF) + break; + + if (amdgpu_emu_mode == 1) + msleep(1); + else + udelay(1); + } + + if (i >= adev->usec_timeout) { + dev_err(adev->dev, "xcc%d: mes self test (%s) failed\n", xcc_id, + queue_type == AMDGPU_RING_TYPE_SDMA ? "sdma" : "compute"); + + while (halt_if_hws_hang) + schedule(); + + r = -ETIMEDOUT; + } else { + dev_info(adev->dev, "xcc%d: mes self test (%s) pass\n", xcc_id, + queue_type == AMDGPU_RING_TYPE_SDMA ? "sdma" : "compute"); + } + + return r; +} + +#define USER_CTX_SIZE (PAGE_SIZE * 2) +#define USER_CTX_VA AMDGPU_VA_RESERVED_BOTTOM +#define RING_OFFSET(addr) ((addr)) +#define EOP_OFFSET(addr) ((addr) + PAGE_SIZE) +#define WPTR_OFFSET(addr) ((addr) + USER_CTX_SIZE - sizeof(u64)) +#define RPTR_OFFSET(addr) ((addr) + USER_CTX_SIZE - sizeof(u64) * 2) +#define FENCE_OFFSET(addr) ((addr) + USER_CTX_SIZE - sizeof(u64) * 3) + +static int mes_v12_1_test_queue(struct amdgpu_device *adev, int xcc_id, + int pasid, struct amdgpu_vm *vm, u64 meta_gpu_addr, + u64 queue_gpu_addr, void *ctx_ptr, int queue_type) +{ + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; + struct amdgpu_mqd *mqd_mgr = &adev->mqds[queue_type]; + struct amdgpu_mqd_prop mqd_prop = {0}; + struct mes_add_queue_input add_queue = {0}; + struct mes_remove_queue_input remove_queue = {0}; + struct amdgpu_bo *mqd_bo = NULL; + int num_xcc = NUM_XCC(adev->gfx.xcc_mask); + int i, r, off, mqd_size, mqd_count = 1; + void *mqd_ptr = NULL; + u64 mqd_gpu_addr, doorbell_idx; + + /* extra one page size padding for mes fw */ + mqd_size = mqd_mgr->mqd_size + PAGE_SIZE; + + if (queue_type == AMDGPU_RING_TYPE_SDMA) { + doorbell_idx = adev->mes.db_start_dw_offset + \ + adev->doorbell_index.sdma_engine[0]; + } else { + doorbell_idx = adev->mes.db_start_dw_offset + \ + adev->doorbell_index.userqueue_start; + } + + if (adev->mes.enable_coop_mode && + queue_type == AMDGPU_RING_TYPE_COMPUTE) { + for (i = 0, mqd_count = 0; i < num_xcc; i++) { + if (adev->mes.master_xcc_ids[i] == xcc_id) + mqd_count++; + } + mqd_size *= mqd_count; + } + + r = mes_v12_1_alloc_test_buf(adev, &mqd_bo, &mqd_gpu_addr, + &mqd_ptr, mqd_size * mqd_count); + if (r < 0) + return r; + + mqd_prop.mqd_gpu_addr = mqd_gpu_addr; + mqd_prop.hqd_base_gpu_addr = RING_OFFSET(USER_CTX_VA); + mqd_prop.eop_gpu_addr = EOP_OFFSET(USER_CTX_VA); + mqd_prop.wptr_gpu_addr = WPTR_OFFSET(USER_CTX_VA); + mqd_prop.rptr_gpu_addr = RPTR_OFFSET(USER_CTX_VA); + mqd_prop.doorbell_index = doorbell_idx; + mqd_prop.queue_size = PAGE_SIZE; + mqd_prop.mqd_stride_size = mqd_size; + mqd_prop.use_doorbell = true; + mqd_prop.hqd_active = false; + + mqd_mgr->init_mqd(adev, mqd_ptr, &mqd_prop); + if (mqd_count > 1) { + for (i = 1; i < mqd_count; i++) { + off = mqd_size * i; + mqd_prop.mqd_gpu_addr = mqd_gpu_addr + off; + mqd_mgr->init_mqd(adev, (char *)mqd_ptr + off, + &mqd_prop); + } + } + + add_queue.xcc_id = xcc_id; + add_queue.process_id = pasid; + add_queue.page_table_base_addr = adev->vm_manager.vram_base_offset + + amdgpu_bo_gpu_offset(vm->root.bo) - adev->gmc.vram_start; + add_queue.process_va_start = 0; + add_queue.process_va_end = adev->vm_manager.max_pfn - 1; + add_queue.process_context_addr = meta_gpu_addr; + add_queue.gang_context_addr = meta_gpu_addr + AMDGPU_MES_PROC_CTX_SIZE; + add_queue.doorbell_offset = doorbell_idx; + add_queue.mqd_addr = mqd_gpu_addr; + add_queue.wptr_addr = mqd_prop.wptr_gpu_addr; + add_queue.wptr_mc_addr = WPTR_OFFSET(queue_gpu_addr); + add_queue.queue_type = queue_type; + add_queue.vm_cntx_cntl = hub->vm_cntx_cntl; + + r = mes_v12_1_add_hw_queue(&adev->mes, &add_queue); + if (r) + goto error; + + mes_v12_1_test_ring(adev, xcc_id, (u32 *)RING_OFFSET((char *)ctx_ptr), + FENCE_OFFSET(USER_CTX_VA), + FENCE_OFFSET((char *)ctx_ptr), + WPTR_OFFSET((char *)ctx_ptr), + doorbell_idx, queue_type); + + remove_queue.xcc_id = xcc_id; + remove_queue.doorbell_offset = doorbell_idx; + remove_queue.gang_context_addr = add_queue.gang_context_addr; + r = mes_v12_1_remove_hw_queue(&adev->mes, &remove_queue); + +error: + amdgpu_bo_free_kernel(&mqd_bo, &mqd_gpu_addr, &mqd_ptr); + return r; +} + +static int mes_v12_1_self_test(struct amdgpu_device *adev, int xcc_id) +{ + int queue_types[] = { AMDGPU_RING_TYPE_COMPUTE, + /* AMDGPU_RING_TYPE_SDMA */ }; + struct amdgpu_bo_va *bo_va = NULL; + struct amdgpu_vm *vm = NULL; + struct amdgpu_bo *meta_bo = NULL, *ctx_bo = NULL; + void *meta_ptr = NULL, *ctx_ptr = NULL; + u64 meta_gpu_addr, ctx_gpu_addr; + int size, i, r, pasid;; + + pasid = amdgpu_pasid_alloc(16); + if (pasid < 0) + pasid = 0; + + size = AMDGPU_MES_PROC_CTX_SIZE + AMDGPU_MES_GANG_CTX_SIZE; + r = mes_v12_1_alloc_test_buf(adev, &meta_bo, &meta_gpu_addr, + &meta_ptr, size); + if (r < 0) + goto err2; + + r = mes_v12_1_alloc_test_buf(adev, &ctx_bo, &ctx_gpu_addr, + &ctx_ptr, USER_CTX_SIZE); + if (r < 0) + goto err2; + + vm = kzalloc(sizeof(*vm), GFP_KERNEL); + if (!vm) { + r = -ENOMEM; + goto err2; + } + + r = amdgpu_vm_init(adev, vm, -1, pasid); + if (r) + goto err1; + + r = mes_v12_1_map_test_bo(adev, ctx_bo, vm, &bo_va, + USER_CTX_VA, USER_CTX_SIZE); + if (r) + goto err0; + + for (i = 0; i < ARRAY_SIZE(queue_types); i++) { + memset(ctx_ptr, 0, USER_CTX_SIZE); + + r = mes_v12_1_test_queue(adev, xcc_id, pasid, vm, meta_gpu_addr, + ctx_gpu_addr, ctx_ptr, queue_types[i]); + if (r) + break; + } + + amdgpu_unmap_static_csa(adev, vm, ctx_bo, bo_va, USER_CTX_VA); +err0: + amdgpu_vm_fini(adev, vm); +err1: + kfree(vm); +err2: + amdgpu_bo_free_kernel(&meta_bo, &meta_gpu_addr, &meta_ptr); + amdgpu_bo_free_kernel(&ctx_bo, &ctx_gpu_addr, &ctx_ptr); + amdgpu_pasid_free(pasid); + return r; +} + diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c index a0cc8e218ca1..42a09a277ec3 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c @@ -141,7 +141,7 @@ mmhub_v2_0_print_l2_protection_fault_status(struct amdgpu_device *adev, uint32_t status) { uint32_t cid, rw; - const char *mmhub_cid = NULL; + const char *mmhub_cid; cid = REG_GET_FIELD(status, MMVM_L2_PROTECTION_FAULT_STATUS, CID); @@ -151,22 +151,7 @@ mmhub_v2_0_print_l2_protection_fault_status(struct amdgpu_device *adev, dev_err(adev->dev, "MMVM_L2_PROTECTION_FAULT_STATUS:0x%08X\n", status); - switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) { - case IP_VERSION(2, 0, 0): - case IP_VERSION(2, 0, 2): - mmhub_cid = mmhub_client_ids_navi1x[cid][rw]; - break; - case IP_VERSION(2, 1, 0): - case IP_VERSION(2, 1, 1): - mmhub_cid = mmhub_client_ids_sienna_cichlid[cid][rw]; - break; - case IP_VERSION(2, 1, 2): - mmhub_cid = mmhub_client_ids_beige_goby[cid][rw]; - break; - default: - mmhub_cid = NULL; - break; - } + mmhub_cid = amdgpu_mmhub_client_name(&adev->mmhub, cid, rw); dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n", mmhub_cid ? mmhub_cid : "unknown", cid); dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n", @@ -518,6 +503,31 @@ static const struct amdgpu_vmhub_funcs mmhub_v2_0_vmhub_funcs = { .get_invalidate_req = mmhub_v2_0_get_invalidate_req, }; +static void mmhub_v2_0_init_client_info(struct amdgpu_device *adev) +{ + switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) { + case IP_VERSION(2, 0, 0): + case IP_VERSION(2, 0, 2): + amdgpu_mmhub_init_client_info(&adev->mmhub, + mmhub_client_ids_navi1x, + ARRAY_SIZE(mmhub_client_ids_navi1x)); + break; + case IP_VERSION(2, 1, 0): + case IP_VERSION(2, 1, 1): + amdgpu_mmhub_init_client_info(&adev->mmhub, + mmhub_client_ids_sienna_cichlid, + ARRAY_SIZE(mmhub_client_ids_sienna_cichlid)); + break; + case IP_VERSION(2, 1, 2): + amdgpu_mmhub_init_client_info(&adev->mmhub, + mmhub_client_ids_beige_goby, + ARRAY_SIZE(mmhub_client_ids_beige_goby)); + break; + default: + break; + } +} + static void mmhub_v2_0_init(struct amdgpu_device *adev) { struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; @@ -558,6 +568,8 @@ static void mmhub_v2_0_init(struct amdgpu_device *adev) MMVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK; hub->vmhub_funcs = &mmhub_v2_0_vmhub_funcs; + + mmhub_v2_0_init_client_info(adev); } static void mmhub_v2_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c index 5eb8122e2746..31c479d76c42 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c @@ -80,7 +80,7 @@ mmhub_v2_3_print_l2_protection_fault_status(struct amdgpu_device *adev, uint32_t status) { uint32_t cid, rw; - const char *mmhub_cid = NULL; + const char *mmhub_cid; cid = REG_GET_FIELD(status, MMVM_L2_PROTECTION_FAULT_STATUS, CID); @@ -90,16 +90,7 @@ mmhub_v2_3_print_l2_protection_fault_status(struct amdgpu_device *adev, dev_err(adev->dev, "MMVM_L2_PROTECTION_FAULT_STATUS:0x%08X\n", status); - switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) { - case IP_VERSION(2, 3, 0): - case IP_VERSION(2, 4, 0): - case IP_VERSION(2, 4, 1): - mmhub_cid = mmhub_client_ids_vangogh[cid][rw]; - break; - default: - mmhub_cid = NULL; - break; - } + mmhub_cid = amdgpu_mmhub_client_name(&adev->mmhub, cid, rw); dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n", mmhub_cid ? mmhub_cid : "unknown", cid); dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n", @@ -486,6 +477,10 @@ static void mmhub_v2_3_init(struct amdgpu_device *adev) MMVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK; hub->vmhub_funcs = &mmhub_v2_3_vmhub_funcs; + + amdgpu_mmhub_init_client_info(&adev->mmhub, + mmhub_client_ids_vangogh, + ARRAY_SIZE(mmhub_client_ids_vangogh)); } static void diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c index 7d5242df58a5..3d82cfa0f1b5 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c @@ -97,7 +97,7 @@ mmhub_v3_0_print_l2_protection_fault_status(struct amdgpu_device *adev, uint32_t status) { uint32_t cid, rw; - const char *mmhub_cid = NULL; + const char *mmhub_cid; cid = REG_GET_FIELD(status, MMVM_L2_PROTECTION_FAULT_STATUS, CID); @@ -107,15 +107,7 @@ mmhub_v3_0_print_l2_protection_fault_status(struct amdgpu_device *adev, dev_err(adev->dev, "MMVM_L2_PROTECTION_FAULT_STATUS:0x%08X\n", status); - switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) { - case IP_VERSION(3, 0, 0): - case IP_VERSION(3, 0, 1): - mmhub_cid = mmhub_client_ids_v3_0_0[cid][rw]; - break; - default: - mmhub_cid = NULL; - break; - } + mmhub_cid = amdgpu_mmhub_client_name(&adev->mmhub, cid, rw); dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n", mmhub_cid ? mmhub_cid : "unknown", cid); dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n", @@ -520,6 +512,10 @@ static void mmhub_v3_0_init(struct amdgpu_device *adev) SOC15_REG_OFFSET(MMHUB, 0, regMMVM_CONTEXTS_DISABLE); hub->vmhub_funcs = &mmhub_v3_0_vmhub_funcs; + + amdgpu_mmhub_init_client_info(&adev->mmhub, + mmhub_client_ids_v3_0_0, + ARRAY_SIZE(mmhub_client_ids_v3_0_0)); } static u64 mmhub_v3_0_get_fb_location(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c index 910337dc28d1..a1b0b7b39a42 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c @@ -104,7 +104,7 @@ mmhub_v3_0_1_print_l2_protection_fault_status(struct amdgpu_device *adev, uint32_t status) { uint32_t cid, rw; - const char *mmhub_cid = NULL; + const char *mmhub_cid; cid = REG_GET_FIELD(status, MMVM_L2_PROTECTION_FAULT_STATUS, CID); @@ -114,16 +114,7 @@ mmhub_v3_0_1_print_l2_protection_fault_status(struct amdgpu_device *adev, dev_err(adev->dev, "MMVM_L2_PROTECTION_FAULT_STATUS:0x%08X\n", status); - - switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) { - case IP_VERSION(3, 0, 1): - mmhub_cid = mmhub_client_ids_v3_0_1[cid][rw]; - break; - default: - mmhub_cid = NULL; - break; - } - + mmhub_cid = amdgpu_mmhub_client_name(&adev->mmhub, cid, rw); dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n", mmhub_cid ? mmhub_cid : "unknown", cid); dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n", @@ -503,6 +494,10 @@ static void mmhub_v3_0_1_init(struct amdgpu_device *adev) MMVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK; hub->vmhub_funcs = &mmhub_v3_0_1_vmhub_funcs; + + amdgpu_mmhub_init_client_info(&adev->mmhub, + mmhub_client_ids_v3_0_1, + ARRAY_SIZE(mmhub_client_ids_v3_0_1)); } static u64 mmhub_v3_0_1_get_fb_location(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c index f0f182f033b9..34e8dbd47c0f 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c @@ -97,7 +97,7 @@ mmhub_v3_0_2_print_l2_protection_fault_status(struct amdgpu_device *adev, uint32_t status) { uint32_t cid, rw; - const char *mmhub_cid = NULL; + const char *mmhub_cid; cid = REG_GET_FIELD(status, MMVM_L2_PROTECTION_FAULT_STATUS, CID); @@ -107,8 +107,7 @@ mmhub_v3_0_2_print_l2_protection_fault_status(struct amdgpu_device *adev, dev_err(adev->dev, "MMVM_L2_PROTECTION_FAULT_STATUS:0x%08X\n", status); - - mmhub_cid = mmhub_client_ids_v3_0_2[cid][rw]; + mmhub_cid = amdgpu_mmhub_client_name(&adev->mmhub, cid, rw); dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n", mmhub_cid ? mmhub_cid : "unknown", cid); dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n", @@ -509,6 +508,10 @@ static void mmhub_v3_0_2_init(struct amdgpu_device *adev) SOC15_REG_OFFSET(MMHUB, 0, regMMVM_L2_BANK_SELECT_RESERVED_CID2); hub->vmhub_funcs = &mmhub_v3_0_2_vmhub_funcs; + + amdgpu_mmhub_init_client_info(&adev->mmhub, + mmhub_client_ids_v3_0_2, + ARRAY_SIZE(mmhub_client_ids_v3_0_2)); } static u64 mmhub_v3_0_2_get_fb_location(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_3.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_3.c index daf1f8ad4cca..cfce7e1297d4 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_3.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_3.c @@ -217,7 +217,7 @@ mmhub_v3_3_print_l2_protection_fault_status(struct amdgpu_device *adev, uint32_t status) { uint32_t cid, rw; - const char *mmhub_cid = NULL; + const char *mmhub_cid; cid = REG_GET_FIELD(status, MMVM_L2_PROTECTION_FAULT_STATUS, CID); @@ -227,29 +227,10 @@ mmhub_v3_3_print_l2_protection_fault_status(struct amdgpu_device *adev, dev_err(adev->dev, "MMVM_L2_PROTECTION_FAULT_STATUS:0x%08X\n", status); - - switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) { - case IP_VERSION(3, 3, 0): - case IP_VERSION(3, 3, 2): - mmhub_cid = cid < ARRAY_SIZE(mmhub_client_ids_v3_3) ? - mmhub_client_ids_v3_3[cid][rw] : - cid == 0x140 ? "UMSCH" : NULL; - break; - case IP_VERSION(3, 3, 1): - mmhub_cid = cid < ARRAY_SIZE(mmhub_client_ids_v3_3_1) ? - mmhub_client_ids_v3_3_1[cid][rw] : - cid == 0x140 ? "UMSCH" : NULL; - break; - case IP_VERSION(3, 4, 0): - mmhub_cid = cid < ARRAY_SIZE(mmhub_client_ids_v3_4) ? - mmhub_client_ids_v3_4[cid][rw] : - cid == 0x140 ? "UMSCH" : NULL; - break; - default: - mmhub_cid = NULL; - break; - } - + if (cid == 0x140) + mmhub_cid = "UMSCH"; + else + mmhub_cid = amdgpu_mmhub_client_name(&adev->mmhub, cid, rw); dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n", mmhub_cid ? mmhub_cid : "unknown", cid); dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n", @@ -640,6 +621,30 @@ static const struct amdgpu_vmhub_funcs mmhub_v3_3_vmhub_funcs = { .get_invalidate_req = mmhub_v3_3_get_invalidate_req, }; +static void mmhub_v3_3_init_client_info(struct amdgpu_device *adev) +{ + switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) { + case IP_VERSION(3, 3, 0): + case IP_VERSION(3, 3, 2): + amdgpu_mmhub_init_client_info(&adev->mmhub, + mmhub_client_ids_v3_3, + ARRAY_SIZE(mmhub_client_ids_v3_3)); + break; + case IP_VERSION(3, 3, 1): + amdgpu_mmhub_init_client_info(&adev->mmhub, + mmhub_client_ids_v3_3_1, + ARRAY_SIZE(mmhub_client_ids_v3_3_1)); + break; + case IP_VERSION(3, 4, 0): + amdgpu_mmhub_init_client_info(&adev->mmhub, + mmhub_client_ids_v3_4, + ARRAY_SIZE(mmhub_client_ids_v3_4)); + break; + default: + break; + } +} + static void mmhub_v3_3_init(struct amdgpu_device *adev) { struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; @@ -680,6 +685,8 @@ static void mmhub_v3_3_init(struct amdgpu_device *adev) MMVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK; hub->vmhub_funcs = &mmhub_v3_3_vmhub_funcs; + + mmhub_v3_3_init_client_info(adev); } static u64 mmhub_v3_3_get_fb_location(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c index 951998454b25..bef75c4c48d3 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c @@ -90,7 +90,7 @@ mmhub_v4_1_0_print_l2_protection_fault_status(struct amdgpu_device *adev, uint32_t status) { uint32_t cid, rw; - const char *mmhub_cid = NULL; + const char *mmhub_cid; cid = REG_GET_FIELD(status, MMVM_L2_PROTECTION_FAULT_STATUS_LO32, CID); @@ -100,14 +100,7 @@ mmhub_v4_1_0_print_l2_protection_fault_status(struct amdgpu_device *adev, dev_err(adev->dev, "MMVM_L2_PROTECTION_FAULT_STATUS_LO32:0x%08X\n", status); - switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) { - case IP_VERSION(4, 1, 0): - mmhub_cid = mmhub_client_ids_v4_1_0[cid][rw]; - break; - default: - mmhub_cid = NULL; - break; - } + mmhub_cid = amdgpu_mmhub_client_name(&adev->mmhub, cid, rw); dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n", mmhub_cid ? mmhub_cid : "unknown", cid); dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n", @@ -514,6 +507,10 @@ static void mmhub_v4_1_0_init(struct amdgpu_device *adev) SOC15_REG_OFFSET(MMHUB, 0, regMMVM_CONTEXTS_DISABLE); hub->vmhub_funcs = &mmhub_v4_1_0_vmhub_funcs; + + amdgpu_mmhub_init_client_info(&adev->mmhub, + mmhub_client_ids_v4_1_0, + ARRAY_SIZE(mmhub_client_ids_v4_1_0)); } static u64 mmhub_v4_1_0_get_fb_location(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v4_2_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v4_2_0.c index a72770e3d0e9..29f7ed466858 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v4_2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v4_2_0.c @@ -72,6 +72,45 @@ static const char *mmhub_client_ids_v4_2_0[][2] = { [23][1] = "VCN1", }; +static int mmhub_v4_2_0_get_xgmi_info(struct amdgpu_device *adev) +{ + u32 max_num_physical_nodes; + u32 max_physical_node_id; + u32 xgmi_lfb_cntl; + u32 max_region; + u64 seg_size; + + /* limit this callback to A + A configuration only */ + if (!adev->gmc.xgmi.connected_to_cpu) + return 0; + + xgmi_lfb_cntl = RREG32_SOC15(MMHUB, GET_INST(MMHUB, 0), + regMMMC_VM_XGMI_LFB_CNTL); + seg_size = REG_GET_FIELD( + RREG32_SOC15(MMHUB, GET_INST(MMHUB, 0), regMMMC_VM_XGMI_LFB_SIZE), + MMMC_VM_XGMI_LFB_SIZE, PF_LFB_SIZE) << 24; + max_region = + REG_GET_FIELD(xgmi_lfb_cntl, MMMC_VM_XGMI_LFB_CNTL, PF_MAX_REGION); + + max_num_physical_nodes = 4; + max_physical_node_id = 3; + + adev->gmc.xgmi.num_physical_nodes = max_region + 1; + + if (adev->gmc.xgmi.num_physical_nodes > max_num_physical_nodes) + return -EINVAL; + + adev->gmc.xgmi.physical_node_id = + REG_GET_FIELD(xgmi_lfb_cntl, MMMC_VM_XGMI_LFB_CNTL, PF_LFB_REGION); + + if (adev->gmc.xgmi.physical_node_id > max_physical_node_id) + return -EINVAL; + + adev->gmc.xgmi.node_segment_size = seg_size; + + return 0; +} + static u64 mmhub_v4_2_0_get_fb_location(struct amdgpu_device *adev) { u64 base; @@ -131,7 +170,7 @@ static void mmhub_v4_2_0_setup_vm_pt_regs(struct amdgpu_device *adev, static void mmhub_v4_2_0_mid_init_gart_aperture_regs(struct amdgpu_device *adev, uint32_t mid_mask) { - uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo); + uint64_t pt_base; int i; if (adev->gmc.pdb0_bo) @@ -152,10 +191,10 @@ static void mmhub_v4_2_0_mid_init_gart_aperture_regs(struct amdgpu_device *adev, WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), regMMVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32, - (u32)(adev->gmc.fb_end >> 12)); + (u32)(adev->gmc.gart_end >> 12)); WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), regMMVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32, - (u32)(adev->gmc.fb_end >> 44)); + (u32)(adev->gmc.gart_end >> 44)); } else { WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), regMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32, @@ -190,41 +229,74 @@ static void mmhub_v4_2_0_mid_init_system_aperture_regs(struct amdgpu_device *ade return; for_each_inst(i, mid_mask) { - /* Program the AGP BAR */ - WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), - regMMMC_VM_AGP_BASE_LO32, 0); - WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), - regMMMC_VM_AGP_BASE_HI32, 0); - WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), - regMMMC_VM_AGP_BOT_LO32, - lower_32_bits(adev->gmc.agp_start >> 24)); - WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), - regMMMC_VM_AGP_BOT_HI32, - upper_32_bits(adev->gmc.agp_start >> 24)); - WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), - regMMMC_VM_AGP_TOP_LO32, - lower_32_bits(adev->gmc.agp_end >> 24)); - WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), - regMMMC_VM_AGP_TOP_HI32, - upper_32_bits(adev->gmc.agp_end >> 24)); + if (adev->gmc.pdb0_bo) { + /* Disable agp and system aperture + * when vmid0 page table is enabled */ + WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), + regMMMC_VM_FB_LOCATION_TOP_LO32, 0); + WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), + regMMMC_VM_FB_LOCATION_TOP_HI32, 0); + WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), + regMMMC_VM_FB_LOCATION_BASE_LO32, + 0xFFFFFFFF); + WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), + regMMMC_VM_FB_LOCATION_BASE_HI32, 1); + WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), + regMMMC_VM_AGP_TOP_LO32, 0); + WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), + regMMMC_VM_AGP_TOP_HI32, 0); + WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), + regMMMC_VM_AGP_BOT_LO32, + 0xFFFFFFFF); + WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), + regMMMC_VM_AGP_BOT_HI32, 1); + WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), + regMMMC_VM_SYSTEM_APERTURE_LOW_ADDR_LO32, + 0xFFFFFFFF); + WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), + regMMMC_VM_SYSTEM_APERTURE_LOW_ADDR_HI32, + 0x7F); + WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), + regMMMC_VM_SYSTEM_APERTURE_HIGH_ADDR_LO32, 0); + WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), + regMMMC_VM_SYSTEM_APERTURE_HIGH_ADDR_HI32, 0); + } else { + /* Program the AGP BAR */ + WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), + regMMMC_VM_AGP_BASE_LO32, 0); + WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), + regMMMC_VM_AGP_BASE_HI32, 0); + WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), + regMMMC_VM_AGP_BOT_LO32, + lower_32_bits(adev->gmc.agp_start >> 24)); + WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), + regMMMC_VM_AGP_BOT_HI32, + upper_32_bits(adev->gmc.agp_start >> 24)); + WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), + regMMMC_VM_AGP_TOP_LO32, + lower_32_bits(adev->gmc.agp_end >> 24)); + WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), + regMMMC_VM_AGP_TOP_HI32, + upper_32_bits(adev->gmc.agp_end >> 24)); - /* Program the system aperture low logical page number. */ - WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), - regMMMC_VM_SYSTEM_APERTURE_LOW_ADDR_LO32, - lower_32_bits(min(adev->gmc.fb_start, - adev->gmc.agp_start) >> 18)); - WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), - regMMMC_VM_SYSTEM_APERTURE_LOW_ADDR_HI32, - upper_32_bits(min(adev->gmc.fb_start, - adev->gmc.agp_start) >> 18)); - WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), - regMMMC_VM_SYSTEM_APERTURE_HIGH_ADDR_LO32, - lower_32_bits(max(adev->gmc.fb_end, - adev->gmc.agp_end) >> 18)); - WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), - regMMMC_VM_SYSTEM_APERTURE_HIGH_ADDR_HI32, - upper_32_bits(max(adev->gmc.fb_end, - adev->gmc.agp_end) >> 18)); + /* Program the system aperture low logical page number. */ + WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), + regMMMC_VM_SYSTEM_APERTURE_LOW_ADDR_LO32, + lower_32_bits(min(adev->gmc.fb_start, + adev->gmc.agp_start) >> 18)); + WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), + regMMMC_VM_SYSTEM_APERTURE_LOW_ADDR_HI32, + upper_32_bits(min(adev->gmc.fb_start, + adev->gmc.agp_start) >> 18)); + WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), + regMMMC_VM_SYSTEM_APERTURE_HIGH_ADDR_LO32, + lower_32_bits(max(adev->gmc.fb_end, + adev->gmc.agp_end) >> 18)); + WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), + regMMMC_VM_SYSTEM_APERTURE_HIGH_ADDR_HI32, + upper_32_bits(max(adev->gmc.fb_end, + adev->gmc.agp_end) >> 18)); + } /* Set default page address. */ value = amdgpu_gmc_vram_mc2pa(adev, adev->mem_scratch.gpu_addr); @@ -252,38 +324,6 @@ static void mmhub_v4_2_0_mid_init_system_aperture_regs(struct amdgpu_device *ade WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), regMMVM_L2_PROTECTION_FAULT_CNTL2, tmp); } - - /* In the case squeezing vram into GART aperture, we don't use - * FB aperture and AGP aperture. Disable them. - */ - if (adev->gmc.pdb0_bo) { - WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), - regMMMC_VM_FB_LOCATION_TOP_LO32, 0); - WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), - regMMMC_VM_FB_LOCATION_TOP_HI32, 0); - WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), - regMMMC_VM_FB_LOCATION_BASE_LO32, 0xFFFFFFFF); - WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), - regMMMC_VM_FB_LOCATION_BASE_HI32, 1); - WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), - regMMMC_VM_AGP_TOP_LO32, 0); - WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), - regMMMC_VM_AGP_TOP_HI32, 0); - WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), - regMMMC_VM_AGP_BOT_LO32, 0xFFFFFFFF); - WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), - regMMMC_VM_AGP_BOT_HI32, 1); - WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), - regMMMC_VM_SYSTEM_APERTURE_LOW_ADDR_LO32, - 0xFFFFFFFF); - WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), - regMMMC_VM_SYSTEM_APERTURE_LOW_ADDR_HI32, - 0x7F); - WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), - regMMMC_VM_SYSTEM_APERTURE_HIGH_ADDR_LO32, 0); - WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), - regMMMC_VM_SYSTEM_APERTURE_HIGH_ADDR_HI32, 0); - } } static void mmhub_v4_2_0_mid_init_tlb_regs(struct amdgpu_device *adev, @@ -676,7 +716,7 @@ mmhub_v4_2_0_print_l2_protection_fault_status(struct amdgpu_device *adev, uint32_t status) { uint32_t cid, rw; - const char *mmhub_cid = NULL; + const char *mmhub_cid; cid = REG_GET_FIELD(status, MMVM_L2_PROTECTION_FAULT_STATUS_LO32, CID); @@ -686,14 +726,7 @@ mmhub_v4_2_0_print_l2_protection_fault_status(struct amdgpu_device *adev, dev_err(adev->dev, "MMVM_L2_PROTECTION_FAULT_STATUS_LO32:0x%08X\n", status); - switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) { - case IP_VERSION(4, 2, 0): - mmhub_cid = mmhub_client_ids_v4_2_0[cid][rw]; - break; - default: - mmhub_cid = NULL; - break; - } + mmhub_cid = amdgpu_mmhub_client_name(&adev->mmhub, cid, rw); dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n", mmhub_cid ? mmhub_cid : "unknown", cid); dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n", @@ -784,6 +817,10 @@ static void mmhub_v4_2_0_init(struct amdgpu_device *adev) mid_mask = adev->aid_mask; mmhub_v4_2_0_mid_init(adev, mid_mask); + + amdgpu_mmhub_init_client_info(&adev->mmhub, + mmhub_client_ids_v4_2_0, + ARRAY_SIZE(mmhub_client_ids_v4_2_0)); } static void @@ -883,6 +920,7 @@ const struct amdgpu_mmhub_funcs mmhub_v4_2_0_funcs = { .set_fault_enable_default = mmhub_v4_2_0_set_fault_enable_default, .set_clockgating = mmhub_v4_2_0_set_clockgating, .get_clockgating = mmhub_v4_2_0_get_clockgating, + .get_xgmi_info = mmhub_v4_2_0_get_xgmi_info, }; static int mmhub_v4_2_0_xcp_resume(void *handle, uint32_t inst_mask) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_1.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_1.c index 03bf1f86098f..f20e0fc3fc74 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_1.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_1.c @@ -1129,12 +1129,21 @@ static void sdma_v7_1_vm_set_pte_pde(struct amdgpu_ib *ib, /* for physically contiguous pages (vram) */ u32 header = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_PTEPDE); - if (amdgpu_mtype_local) - header |= SDMA_PKT_PTEPDE_COPY_HEADER_MTYPE(0x3); - else - header |= (SDMA_PKT_PTEPDE_COPY_HEADER_MTYPE(0x2) | - SDMA_PKT_PTEPDE_COPY_HEADER_SNOOP(0x1) | - SDMA_PKT_PTEPDE_COPY_HEADER_SCOPE(0x3)); + /* TODO: + * When VM_L2_CNTL5.WALKER_FETCH_PDE_MTYPE_ENABLE is enabled, change below MTYPE + * to RW for AID A1 and UC for AID A0. NC needs additional GCR flush and need not + * be supported. Also, honour amdgpu_mtype_local override. RW would additionally + * require setting SCOPE bits in the header. + * + * header |= (SDMA_PKT_PTEPDE_COPY_HEADER_MTYPE(0x2:RW) | + * SDMA_PKT_PTEPDE_COPY_HEADER_SNOOP(0x1) | + * SDMA_PKT_PTEPDE_COPY_HEADER_SCOPE(0x3:SYS_SCOPE)); + */ + + /* VM_L2_CNTL5.WALKER_FETCH_PDE_MTYPE_ENABLE is 0 which defaults to UC. So, + * use MTYPE_UC (0x3). For ref. MTYPE_RW=0x2 MTYPE_NC=0x0 + */ + header |= SDMA_PKT_PTEPDE_COPY_HEADER_MTYPE(0x3) | SDMA_PKT_PTEPDE_COPY_HEADER_SNOOP(0x1); ib->ptr[ib->length_dw++] = header; ib->ptr[ib->length_dw++] = lower_32_bits(pe); /* dst addr */ diff --git a/drivers/gpu/drm/amd/amdgpu/soc_v1_0.c b/drivers/gpu/drm/amd/amdgpu/soc_v1_0.c index 0c7759b82fa6..709b1669b07b 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/soc_v1_0.c @@ -41,6 +41,44 @@ #define NORMALIZE_XCC_REG_OFFSET(offset) \ (offset & 0xFFFF) +#define MID1_REG_RANGE_0_LOW 0x40000 +#define MID1_REG_RANGE_0_HIGH 0x80000 +#define NORMALIZE_MID_REG_OFFSET(offset) \ + (offset & 0x3FFFF) + +static const struct amdgpu_video_codecs vcn_5_0_2_video_codecs_encode_vcn0 = { + .codec_count = 0, + .codec_array = NULL, +}; + +static const struct amdgpu_video_codec_info vcn_5_0_2_video_codecs_decode_array_vcn0[] = { + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)}, +}; + +static const struct amdgpu_video_codecs vcn_5_0_2_video_codecs_decode_vcn0 = { + .codec_count = ARRAY_SIZE(vcn_5_0_2_video_codecs_decode_array_vcn0), + .codec_array = vcn_5_0_2_video_codecs_decode_array_vcn0, +}; + +static int soc_v1_0_query_video_codecs(struct amdgpu_device *adev, bool encode, + const struct amdgpu_video_codecs **codecs) +{ + switch (amdgpu_ip_version(adev, UVD_HWIP, 0)) { + case IP_VERSION(5, 0, 2): + if (encode) + *codecs = &vcn_5_0_2_video_codecs_encode_vcn0; + else + *codecs = &vcn_5_0_2_video_codecs_decode_vcn0; + return 0; + default: + return -EINVAL; + } +} + /* Initialized doorbells for amdgpu including multimedia * KFD can use all the rest in 2M doorbell bar */ static void soc_v1_0_doorbell_index_init(struct amdgpu_device *adev) @@ -257,6 +295,7 @@ static const struct amdgpu_asic_funcs soc_v1_0_asic_funcs = { .encode_ext_smn_addressing = &soc_v1_0_encode_ext_smn_addressing, .reset = soc_v1_0_asic_reset, .reset_method = &soc_v1_0_asic_reset_method, + .query_video_codecs = &soc_v1_0_query_video_codecs, }; static int soc_v1_0_common_early_init(struct amdgpu_ip_block *ip_block) @@ -283,7 +322,7 @@ static int soc_v1_0_common_early_init(struct amdgpu_ip_block *ip_block) case IP_VERSION(12, 1, 0): adev->cg_flags = AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS; - adev->pg_flags = 0; + adev->pg_flags = AMD_PG_SUPPORT_VCN_DPG; adev->external_rev_id = adev->rev_id + 0x50; break; default: @@ -870,3 +909,31 @@ uint32_t soc_v1_0_normalize_xcc_reg_offset(uint32_t reg) else return reg; } + +bool soc_v1_0_mid1_reg_range(uint32_t reg) +{ + uint32_t normalized_reg = soc_v1_0_normalize_xcc_reg_offset(reg); + + if (soc_v1_0_normalize_xcc_reg_range(normalized_reg)) + return false; + + if ((reg >= MID1_REG_RANGE_0_LOW) && (reg < MID1_REG_RANGE_0_HIGH)) + return true; + else + return false; +} + +uint32_t soc_v1_0_normalize_reg_offset(uint32_t reg) +{ + uint32_t normalized_reg = soc_v1_0_normalize_xcc_reg_offset(reg); + + if (soc_v1_0_normalize_xcc_reg_range(normalized_reg)) + return soc_v1_0_normalize_xcc_reg_offset(reg); + + /* check if the reg offset is inside MID1. */ + if (soc_v1_0_mid1_reg_range(reg)) + return NORMALIZE_MID_REG_OFFSET(reg); + + return reg; +} + diff --git a/drivers/gpu/drm/amd/amdgpu/soc_v1_0.h b/drivers/gpu/drm/amd/amdgpu/soc_v1_0.h index 146996101aa0..16c220fcc4e9 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc_v1_0.h +++ b/drivers/gpu/drm/amd/amdgpu/soc_v1_0.h @@ -31,7 +31,9 @@ void soc_v1_0_grbm_select(struct amdgpu_device *adev, int xcc_id); int soc_v1_0_init_soc_config(struct amdgpu_device *adev); bool soc_v1_0_normalize_xcc_reg_range(uint32_t reg); +bool soc_v1_0_mid1_reg_range(uint32_t reg); uint32_t soc_v1_0_normalize_xcc_reg_offset(uint32_t reg); +uint32_t soc_v1_0_normalize_reg_offset(uint32_t reg); u64 soc_v1_0_encode_ext_smn_addressing(int ext_id); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c index 1f80045775f5..db505ab32fa0 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c @@ -183,50 +183,97 @@ static void umc_v12_0_get_retire_flip_bits(struct amdgpu_device *adev) if (adev->gmc.gmc_funcs->query_mem_partition_mode) nps = adev->gmc.gmc_funcs->query_mem_partition_mode(adev); - /* default setting */ - flip_bits->flip_bits_in_pa[0] = UMC_V12_0_PA_C2_BIT; - flip_bits->flip_bits_in_pa[1] = UMC_V12_0_PA_C3_BIT; - flip_bits->flip_bits_in_pa[2] = UMC_V12_0_PA_C4_BIT; - flip_bits->flip_bits_in_pa[3] = UMC_V12_0_PA_R13_BIT; - flip_bits->flip_row_bit = 13; - flip_bits->bit_num = 4; - flip_bits->r13_in_pa = UMC_V12_0_PA_R13_BIT; - - if (nps == AMDGPU_NPS2_PARTITION_MODE) { + if (adev->gmc.num_umc == 16) { + /* default setting */ + flip_bits->flip_bits_in_pa[0] = UMC_V12_0_PA_C2_BIT; + flip_bits->flip_bits_in_pa[1] = UMC_V12_0_PA_C3_BIT; + flip_bits->flip_bits_in_pa[2] = UMC_V12_0_PA_C4_BIT; + flip_bits->flip_bits_in_pa[3] = UMC_V12_0_PA_R13_BIT; + flip_bits->flip_row_bit = 13; + flip_bits->bit_num = 4; + flip_bits->r13_in_pa = UMC_V12_0_PA_R13_BIT; + + if (nps == AMDGPU_NPS2_PARTITION_MODE) { + flip_bits->flip_bits_in_pa[0] = UMC_V12_0_PA_CH5_BIT; + flip_bits->flip_bits_in_pa[1] = UMC_V12_0_PA_C2_BIT; + flip_bits->flip_bits_in_pa[2] = UMC_V12_0_PA_B1_BIT; + flip_bits->r13_in_pa = UMC_V12_0_PA_R12_BIT; + } else if (nps == AMDGPU_NPS4_PARTITION_MODE) { + flip_bits->flip_bits_in_pa[0] = UMC_V12_0_PA_CH4_BIT; + flip_bits->flip_bits_in_pa[1] = UMC_V12_0_PA_CH5_BIT; + flip_bits->flip_bits_in_pa[2] = UMC_V12_0_PA_B0_BIT; + flip_bits->r13_in_pa = UMC_V12_0_PA_R11_BIT; + } + + switch (vram_type) { + case AMDGPU_VRAM_TYPE_HBM: + /* other nps modes are taken as nps1 */ + if (nps == AMDGPU_NPS2_PARTITION_MODE) + flip_bits->flip_bits_in_pa[3] = UMC_V12_0_PA_R12_BIT; + else if (nps == AMDGPU_NPS4_PARTITION_MODE) + flip_bits->flip_bits_in_pa[3] = UMC_V12_0_PA_R11_BIT; + + break; + case AMDGPU_VRAM_TYPE_HBM3E: + flip_bits->flip_bits_in_pa[3] = UMC_V12_0_PA_R12_BIT; + flip_bits->flip_row_bit = 12; + + if (nps == AMDGPU_NPS2_PARTITION_MODE) + flip_bits->flip_bits_in_pa[3] = UMC_V12_0_PA_R11_BIT; + else if (nps == AMDGPU_NPS4_PARTITION_MODE) + flip_bits->flip_bits_in_pa[3] = UMC_V12_0_PA_R10_BIT; + + break; + default: + dev_warn(adev->dev, + "Unknown HBM type, set RAS retire flip bits to the value in NPS1 mode.\n"); + break; + } + } else if (adev->gmc.num_umc == 8) { + /* default setting */ flip_bits->flip_bits_in_pa[0] = UMC_V12_0_PA_CH5_BIT; flip_bits->flip_bits_in_pa[1] = UMC_V12_0_PA_C2_BIT; flip_bits->flip_bits_in_pa[2] = UMC_V12_0_PA_B1_BIT; + flip_bits->flip_bits_in_pa[3] = UMC_V12_0_PA_R11_BIT; + flip_bits->flip_row_bit = 12; + flip_bits->bit_num = 4; flip_bits->r13_in_pa = UMC_V12_0_PA_R12_BIT; - } else if (nps == AMDGPU_NPS4_PARTITION_MODE) { - flip_bits->flip_bits_in_pa[0] = UMC_V12_0_PA_CH4_BIT; - flip_bits->flip_bits_in_pa[1] = UMC_V12_0_PA_CH5_BIT; - flip_bits->flip_bits_in_pa[2] = UMC_V12_0_PA_B0_BIT; - flip_bits->r13_in_pa = UMC_V12_0_PA_R11_BIT; - } - switch (vram_type) { - case AMDGPU_VRAM_TYPE_HBM: - /* other nps modes are taken as nps1 */ - if (nps == AMDGPU_NPS2_PARTITION_MODE) + if (nps == AMDGPU_NPS2_PARTITION_MODE) { + flip_bits->flip_bits_in_pa[0] = UMC_V12_0_PA_CH4_BIT; + flip_bits->flip_bits_in_pa[1] = UMC_V12_0_PA_CH5_BIT; + flip_bits->flip_bits_in_pa[2] = UMC_V12_0_PA_B0_BIT; + flip_bits->r13_in_pa = UMC_V12_0_PA_R11_BIT; + } + + switch (vram_type) { + case AMDGPU_VRAM_TYPE_HBM: flip_bits->flip_bits_in_pa[3] = UMC_V12_0_PA_R12_BIT; - else if (nps == AMDGPU_NPS4_PARTITION_MODE) - flip_bits->flip_bits_in_pa[3] = UMC_V12_0_PA_R11_BIT; - break; - case AMDGPU_VRAM_TYPE_HBM3E: - flip_bits->flip_bits_in_pa[3] = UMC_V12_0_PA_R12_BIT; - flip_bits->flip_row_bit = 12; + /* other nps modes are taken as nps1 */ + if (nps == AMDGPU_NPS2_PARTITION_MODE) + flip_bits->flip_bits_in_pa[3] = UMC_V12_0_PA_R11_BIT; - if (nps == AMDGPU_NPS2_PARTITION_MODE) + break; + case AMDGPU_VRAM_TYPE_HBM3E: flip_bits->flip_bits_in_pa[3] = UMC_V12_0_PA_R11_BIT; - else if (nps == AMDGPU_NPS4_PARTITION_MODE) - flip_bits->flip_bits_in_pa[3] = UMC_V12_0_PA_R10_BIT; + flip_bits->flip_row_bit = 12; - break; - default: + if (nps == AMDGPU_NPS2_PARTITION_MODE) + flip_bits->flip_bits_in_pa[3] = UMC_V12_0_PA_R10_BIT; + + break; + default: + dev_warn(adev->dev, + "Unknown HBM type, set RAS retire flip bits to the value in NPS1 mode.\n"); + break; + } + } else { dev_warn(adev->dev, - "Unknown HBM type, set RAS retire flip bits to the value in NPS1 mode.\n"); - break; + "Unsupported UMC number(%d), failed to set RAS flip bits.\n", + adev->gmc.num_umc); + + return; } adev->umc.retire_unit = 0x1 << flip_bits->bit_num; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_2.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_2.c new file mode 100644 index 000000000000..c3d3cc023058 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_2.c @@ -0,0 +1,1219 @@ +/* + * Copyright 2025-2026 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include <linux/firmware.h> +#include "amdgpu.h" +#include "amdgpu_vcn.h" +#include "amdgpu_pm.h" +#include "soc15.h" +#include "soc15d.h" +#include "soc15_hw_ip.h" +#include "vcn_v2_0.h" +#include "vcn_v4_0_3.h" + +#include "vcn/vcn_5_0_0_offset.h" +#include "vcn/vcn_5_0_0_sh_mask.h" +#include "ivsrcid/vcn/irqsrcs_vcn_5_0.h" +#include "vcn_v5_0_0.h" +#include "vcn_v5_0_1.h" +#include "vcn_v5_0_2.h" + +#include <drm/drm_drv.h> + +static void vcn_v5_0_2_set_unified_ring_funcs(struct amdgpu_device *adev); +static void vcn_v5_0_2_set_irq_funcs(struct amdgpu_device *adev); +static int vcn_v5_0_2_set_pg_state(struct amdgpu_vcn_inst *vinst, + enum amd_powergating_state state); +static void vcn_v5_0_2_unified_ring_set_wptr(struct amdgpu_ring *ring); + +/** + * vcn_v5_0_2_early_init - set function pointers and load microcode + * + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. + * + * Set ring and irq function pointers + * Load microcode from filesystem + */ +static int vcn_v5_0_2_early_init(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + int i, r; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) + /* re-use enc ring as unified ring */ + adev->vcn.inst[i].num_enc_rings = 1; + + vcn_v5_0_2_set_unified_ring_funcs(adev); + vcn_v5_0_2_set_irq_funcs(adev); + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + adev->vcn.inst[i].set_pg_state = vcn_v5_0_2_set_pg_state; + + r = amdgpu_vcn_early_init(adev, i); + if (r) + return r; + } + + return 0; +} + +static void vcn_v5_0_2_fw_shared_init(struct amdgpu_device *adev, int inst_idx) +{ + struct amdgpu_vcn5_fw_shared *fw_shared; + + fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr; + + if (fw_shared->sq.is_enabled) + return; + fw_shared->present_flag_0 = + cpu_to_le32(AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE); + fw_shared->sq.is_enabled = 1; + + if (amdgpu_vcnfw_log) + amdgpu_vcn_fwlog_init(&adev->vcn.inst[inst_idx]); +} + +/** + * vcn_v5_0_2_sw_init - sw init for VCN block + * + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. + * + * Load firmware and sw initialization + */ +static int vcn_v5_0_2_sw_init(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + struct amdgpu_ring *ring; + int i, r, vcn_inst; + + /* VCN UNIFIED TRAP */ + r = amdgpu_irq_add_id(adev, SOC_V1_0_IH_CLIENTID_VCN, + VCN_5_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst->irq); + if (r) + return r; + + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + vcn_inst = GET_INST(VCN, i); + + r = amdgpu_vcn_sw_init(adev, i); + if (r) + return r; + + amdgpu_vcn_setup_ucode(adev, i); + + r = amdgpu_vcn_resume(adev, i); + if (r) + return r; + + ring = &adev->vcn.inst[i].ring_enc[0]; + ring->use_doorbell = true; + + ring->doorbell_index = + (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 32 * vcn_inst; + + ring->vm_hub = AMDGPU_MMHUB0(adev->vcn.inst[i].aid_id); + sprintf(ring->name, "vcn_unified_%d", adev->vcn.inst[i].aid_id); + + r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[i].irq, 0, + AMDGPU_RING_PRIO_DEFAULT, &adev->vcn.inst[i].sched_score); + if (r) + return r; + + vcn_v5_0_2_fw_shared_init(adev, i); + } + + /* TODO: Add queue reset mask when FW fully supports it */ + adev->vcn.supported_reset = + amdgpu_get_soft_full_reset_mask(&adev->vcn.inst[0].ring_enc[0]); + + return amdgpu_vcn_sysfs_reset_mask_init(adev); +} + +/** + * vcn_v5_0_2_sw_fini - sw fini for VCN block + * + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. + * + * VCN suspend and free up sw allocation + */ +static int vcn_v5_0_2_sw_fini(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + int i, r, idx; + + if (drm_dev_enter(adev_to_drm(adev), &idx)) { + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + struct amdgpu_vcn5_fw_shared *fw_shared; + + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; + fw_shared->present_flag_0 = 0; + fw_shared->sq.is_enabled = 0; + } + + drm_dev_exit(idx); + } + + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + r = amdgpu_vcn_suspend(adev, i); + if (r) + return r; + } + + for (i = 0; i < adev->vcn.num_vcn_inst; i++) + amdgpu_vcn_sw_fini(adev, i); + + amdgpu_vcn_sysfs_reset_mask_fini(adev); + + kfree(adev->vcn.ip_dump); //TODO check + + return 0; +} + +/** + * vcn_v5_0_2_hw_init - start and test VCN block + * + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. + * + * Initialize the hardware, boot up the VCPU and do some testing + */ +static int vcn_v5_0_2_hw_init(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + struct amdgpu_ring *ring; + int i, r, vcn_inst; + uint32_t tmp; + + if (RREG32_SOC15(VCN, GET_INST(VCN, 0), regVCN_RRMT_CNTL) & 0x200) + adev->vcn.caps |= AMDGPU_VCN_CAPS(RRMT_ENABLED); + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + vcn_inst = GET_INST(VCN, i); + ring = &adev->vcn.inst[i].ring_enc[0]; + + /* Remove Video Tiles antihang mechanism */ + tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_POWER_STATUS); + tmp &= (~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); + WREG32_SOC15(VCN, vcn_inst, regUVD_POWER_STATUS, tmp); + + if (ring->use_doorbell) + adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, + ((adev->doorbell_index.vcn.vcn_ring0_1 << 1) + + 11 * vcn_inst), + adev->vcn.inst[i].aid_id); + + /* Re-init fw_shared, if required */ + vcn_v5_0_2_fw_shared_init(adev, i); + + r = amdgpu_ring_test_helper(ring); + if (r) + return r; + } + + return 0; +} + +/** + * vcn_v5_0_2_hw_fini - stop the hardware block + * + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. + * + * Stop the VCN block, mark ring as not ready any more + */ +static int vcn_v5_0_2_hw_fini(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + int i; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[i]; + + cancel_delayed_work_sync(&adev->vcn.inst[i].idle_work); + if (vinst->cur_state != AMD_PG_STATE_GATE) + vinst->set_pg_state(vinst, AMD_PG_STATE_GATE); + } + + return 0; +} + +/** + * vcn_v5_0_2_suspend - suspend VCN block + * + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. + * + * HW fini and suspend VCN block + */ +static int vcn_v5_0_2_suspend(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + int r, i; + + r = vcn_v5_0_2_hw_fini(ip_block); + if (r) + return r; + + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + r = amdgpu_vcn_suspend(ip_block->adev, i); + if (r) + return r; + } + + return r; +} + +/** + * vcn_v5_0_2_resume - resume VCN block + * + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. + * + * Resume firmware and hw init VCN block + */ +static int vcn_v5_0_2_resume(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + int r, i; + + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[i]; + + if (amdgpu_in_reset(adev)) + vinst->cur_state = AMD_PG_STATE_GATE; + + r = amdgpu_vcn_resume(ip_block->adev, i); + if (r) + return r; + } + + r = vcn_v5_0_2_hw_init(ip_block); + + return r; +} + +/** + * vcn_v5_0_2_mc_resume - memory controller programming + * + * @vinst: VCN instance + * + * Let the VCN memory controller know it's offsets + */ +static void vcn_v5_0_2_mc_resume(struct amdgpu_vcn_inst *vinst) +{ + struct amdgpu_device *adev = vinst->adev; + int inst = vinst->inst; + uint32_t offset, size, vcn_inst; + const struct common_firmware_header *hdr; + + hdr = (const struct common_firmware_header *)adev->vcn.inst[inst].fw->data; + size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); + + vcn_inst = GET_INST(VCN, inst); + /* cache window 0: fw */ + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst].tmr_mc_addr_lo)); + WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst].tmr_mc_addr_hi)); + WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_OFFSET0, 0); + offset = 0; + } else { + WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, + lower_32_bits(adev->vcn.inst[inst].gpu_addr)); + WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, + upper_32_bits(adev->vcn.inst[inst].gpu_addr)); + offset = size; + WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_OFFSET0, + AMDGPU_UVD_FIRMWARE_OFFSET >> 3); + } + WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_SIZE0, size); + + /* cache window 1: stack */ + WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW, + lower_32_bits(adev->vcn.inst[inst].gpu_addr + offset)); + WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH, + upper_32_bits(adev->vcn.inst[inst].gpu_addr + offset)); + WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_OFFSET1, 0); + WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE); + + /* cache window 2: context */ + WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW, + lower_32_bits(adev->vcn.inst[inst].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); + WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH, + upper_32_bits(adev->vcn.inst[inst].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); + WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_OFFSET2, 0); + WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE); + + /* non-cache window */ + WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW, + lower_32_bits(adev->vcn.inst[inst].fw_shared.gpu_addr)); + WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH, + upper_32_bits(adev->vcn.inst[inst].fw_shared.gpu_addr)); + WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_NONCACHE_OFFSET0, 0); + WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_NONCACHE_SIZE0, + AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn5_fw_shared))); +} + +/** + * vcn_v5_0_2_mc_resume_dpg_mode - memory controller programming for dpg mode + * + * @vinst: VCN instance + * @indirect: indirectly write sram + * + * Let the VCN memory controller know it's offsets with dpg mode + */ +static void vcn_v5_0_2_mc_resume_dpg_mode(struct amdgpu_vcn_inst *vinst, + bool indirect) +{ + struct amdgpu_device *adev = vinst->adev; + int inst_idx = vinst->inst; + uint32_t offset, size; + const struct common_firmware_header *hdr; + + hdr = (const struct common_firmware_header *)adev->vcn.inst[inst_idx].fw->data; + size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); + + /* cache window 0: fw */ + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + if (!indirect) { + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + + inst_idx].tmr_mc_addr_lo), 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + + inst_idx].tmr_mc_addr_hi), 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect); + } else { + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 0, 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 0, 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect); + } + offset = 0; + } else { + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), + lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), + upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect); + offset = size; + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_VCPU_CACHE_OFFSET0), + AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0, indirect); + } + + if (!indirect) + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_VCPU_CACHE_SIZE0), size, 0, indirect); + else + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_VCPU_CACHE_SIZE0), 0, 0, indirect); + + /* cache window 1: stack */ + if (!indirect) { + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), + lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), + upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect); + } else { + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), 0, 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), 0, 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect); + } + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE, 0, indirect); + + /* cache window 2: context */ + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), + lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + + AMDGPU_VCN_STACK_SIZE), 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), + upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + + AMDGPU_VCN_STACK_SIZE), 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_VCPU_CACHE_OFFSET2), 0, 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE, 0, indirect); + + /* non-cache window */ + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW), + lower_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH), + upper_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_VCPU_NONCACHE_SIZE0), + AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn5_fw_shared)), 0, indirect); + + /* VCN global tiling registers */ + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_GFX10_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect); +} + +/** + * vcn_v5_0_2_disable_clock_gating - disable VCN clock gating + * + * @vinst: VCN instance + * + * Disable clock gating for VCN block + */ +static void vcn_v5_0_2_disable_clock_gating(struct amdgpu_vcn_inst *vinst) +{ +} + +/** + * vcn_v5_0_2_enable_clock_gating - enable VCN clock gating + * + * @vinst: VCN instance + * + * Enable clock gating for VCN block + */ +static void vcn_v5_0_2_enable_clock_gating(struct amdgpu_vcn_inst *vinst) +{ +} + +/** + * vcn_v5_0_2_pause_dpg_mode - VCN pause with dpg mode + * + * @vinst: VCN instance + * @new_state: pause state + * + * Pause dpg mode for VCN block + */ +static int vcn_v5_0_2_pause_dpg_mode(struct amdgpu_vcn_inst *vinst, + struct dpg_pause_state *new_state) +{ + struct amdgpu_device *adev = vinst->adev; + uint32_t reg_data = 0; + int vcn_inst; + + vcn_inst = GET_INST(VCN, vinst->inst); + + /* pause/unpause if state is changed */ + if (vinst->pause_state.fw_based != new_state->fw_based) { + DRM_DEV_DEBUG(adev->dev, "dpg pause state changed %d -> %d %s\n", + vinst->pause_state.fw_based, new_state->fw_based, + new_state->fw_based ? "VCN_DPG_STATE__PAUSE" : "VCN_DPG_STATE__UNPAUSE"); + reg_data = RREG32_SOC15(VCN, vcn_inst, regUVD_DPG_PAUSE) & + (~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK); + + if (new_state->fw_based == VCN_DPG_STATE__PAUSE) { + /* pause DPG */ + reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; + WREG32_SOC15(VCN, vcn_inst, regUVD_DPG_PAUSE, reg_data); + + /* wait for ACK */ + SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_DPG_PAUSE, + UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, + UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK); + } else { + /* unpause DPG, no need to wait */ + reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; + WREG32_SOC15(VCN, vcn_inst, regUVD_DPG_PAUSE, reg_data); + } + vinst->pause_state.fw_based = new_state->fw_based; + } + + return 0; +} + +/** + * vcn_v5_0_2_start_dpg_mode - VCN start with dpg mode + * + * @vinst: VCN instance + * @indirect: indirectly write sram + * + * Start VCN block with dpg mode + */ +static int vcn_v5_0_2_start_dpg_mode(struct amdgpu_vcn_inst *vinst, + bool indirect) +{ + struct amdgpu_device *adev = vinst->adev; + int inst_idx = vinst->inst; + struct amdgpu_vcn5_fw_shared *fw_shared = + adev->vcn.inst[inst_idx].fw_shared.cpu_addr; + struct amdgpu_ring *ring; + struct dpg_pause_state state = {.fw_based = VCN_DPG_STATE__PAUSE}; + int vcn_inst, ret; + uint32_t tmp; + + vcn_inst = GET_INST(VCN, inst_idx); + + /* disable register anti-hang mechanism */ + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_POWER_STATUS), 1, + ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); + + /* enable dynamic power gating mode */ + tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_POWER_STATUS); + tmp |= UVD_POWER_STATUS__UVD_PG_MODE_MASK; + WREG32_SOC15(VCN, vcn_inst, regUVD_POWER_STATUS, tmp); + + if (indirect) { + adev->vcn.inst[inst_idx].dpg_sram_curr_addr = + (uint32_t *)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr; + /* Use dummy register 0xDEADBEEF passing AID selection to PSP FW */ + WREG32_SOC24_DPG_MODE(inst_idx, 0xDEADBEEF, + adev->vcn.inst[inst_idx].aid_id, 0, true); + } + + /* enable VCPU clock */ + tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT); + tmp |= UVD_VCPU_CNTL__CLK_EN_MASK | UVD_VCPU_CNTL__BLK_RST_MASK; + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_VCPU_CNTL), tmp, 0, indirect); + + /* disable master interrupt */ + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_MASTINT_EN), 0, 0, indirect); + + /* setup regUVD_LMI_CTRL */ + tmp = (UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | + UVD_LMI_CTRL__REQ_MODE_MASK | + UVD_LMI_CTRL__CRC_RESET_MASK | + UVD_LMI_CTRL__MASK_MC_URGENT_MASK | + UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | + UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK | + (8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) | + 0x00100000L); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_CTRL), tmp, 0, indirect); + + vcn_v5_0_2_mc_resume_dpg_mode(vinst, indirect); + + tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT); + tmp |= UVD_VCPU_CNTL__CLK_EN_MASK; + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_VCPU_CNTL), tmp, 0, indirect); + + /* enable LMI MC and UMC channels */ + tmp = 0x1f << UVD_LMI_CTRL2__RE_OFLD_MIF_WR_REQ_NUM__SHIFT; + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_CTRL2), tmp, 0, indirect); + + /* enable master interrupt */ + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_MASTINT_EN), + UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect); + + if (indirect) { + ret = amdgpu_vcn_psp_update_sram(adev, inst_idx, AMDGPU_UCODE_ID_VCN0_RAM); + if (ret) { + dev_err(adev->dev, "vcn sram load failed %d\n", ret); + return ret; + } + } + + /* resetting ring, fw should not check RB ring */ + fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET; + + /* Pause dpg */ + vcn_v5_0_2_pause_dpg_mode(vinst, &state); + + ring = &adev->vcn.inst[inst_idx].ring_enc[0]; + + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_LO, lower_32_bits(ring->gpu_addr)); + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_SIZE, ring->ring_size / sizeof(uint32_t)); + + tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE); + tmp &= ~(VCN_RB_ENABLE__RB1_EN_MASK); + WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp); + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_RPTR, 0); + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR, 0); + + ring->wptr = RREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR); + + tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE); + tmp |= VCN_RB_ENABLE__RB1_EN_MASK; + WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp); + /* resetting done, fw can check RB ring */ + fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF); + + WREG32_SOC15(VCN, vcn_inst, regVCN_RB1_DB_CTRL, + ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT | + VCN_RB1_DB_CTRL__EN_MASK); + /* Read DB_CTRL to flush the write DB_CTRL command. */ + RREG32_SOC15(VCN, vcn_inst, regVCN_RB1_DB_CTRL); + + return 0; +} + +/** + * vcn_v5_0_2_start - VCN start + * + * @vinst: VCN instance + * + * Start VCN block + */ +static int vcn_v5_0_2_start(struct amdgpu_vcn_inst *vinst) +{ + struct amdgpu_device *adev = vinst->adev; + int i = vinst->inst; + struct amdgpu_vcn5_fw_shared *fw_shared; + struct amdgpu_ring *ring; + uint32_t tmp; + int j, k, r, vcn_inst; + + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; + + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) + return vcn_v5_0_2_start_dpg_mode(vinst, adev->vcn.inst[i].indirect_sram); + + vcn_inst = GET_INST(VCN, i); + + /* set VCN status busy */ + tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_STATUS) | UVD_STATUS__UVD_BUSY; + WREG32_SOC15(VCN, vcn_inst, regUVD_STATUS, tmp); + + /* enable VCPU clock */ + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL), + UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK); + + /* disable master interrupt */ + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_MASTINT_EN), 0, + ~UVD_MASTINT_EN__VCPU_EN_MASK); + + /* enable LMI MC and UMC channels */ + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_LMI_CTRL2), 0, + ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); + + tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET); + tmp &= ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK; + tmp &= ~UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK; + WREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET, tmp); + + /* setup regUVD_LMI_CTRL */ + tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL); + WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL, tmp | + UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | + UVD_LMI_CTRL__MASK_MC_URGENT_MASK | + UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | + UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK); + + vcn_v5_0_2_mc_resume(vinst); + + /* VCN global tiling registers */ + WREG32_SOC15(VCN, vcn_inst, regUVD_GFX10_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); + + /* unblock VCPU register access */ + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_RB_ARB_CTRL), 0, + ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK); + + /* release VCPU reset to boot */ + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL), 0, + ~UVD_VCPU_CNTL__BLK_RST_MASK); + + for (j = 0; j < 10; ++j) { + uint32_t status; + + for (k = 0; k < 100; ++k) { + status = RREG32_SOC15(VCN, vcn_inst, regUVD_STATUS); + if (status & 2) + break; + mdelay(1000); + if (amdgpu_emu_mode == 1) + msleep(520); + } + + if (amdgpu_emu_mode == 1) { + r = -1; + if (status & 2) { + r = 0; + break; + } + } else { + r = 0; + if (status & 2) + break; + + dev_err(adev->dev, + "VCN[%d] is not responding, trying to reset the VCPU!!!\n", i); + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL), + UVD_VCPU_CNTL__BLK_RST_MASK, + ~UVD_VCPU_CNTL__BLK_RST_MASK); + mdelay(10); + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL), 0, + ~UVD_VCPU_CNTL__BLK_RST_MASK); + + mdelay(10); + r = -1; + } + } + + if (r) { + dev_err(adev->dev, "VCN[%d] is not responding, giving up!!!\n", i); + return r; + } + + /* enable master interrupt */ + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_MASTINT_EN), + UVD_MASTINT_EN__VCPU_EN_MASK, + ~UVD_MASTINT_EN__VCPU_EN_MASK); + + /* clear the busy bit of VCN_STATUS */ + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_STATUS), 0, + ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT)); + + ring = &adev->vcn.inst[i].ring_enc[0]; + + WREG32_SOC15(VCN, vcn_inst, regVCN_RB1_DB_CTRL, + ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT | + VCN_RB1_DB_CTRL__EN_MASK); + + /* Read DB_CTRL to flush the write DB_CTRL command. */ + RREG32_SOC15(VCN, vcn_inst, regVCN_RB1_DB_CTRL); + + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_LO, ring->gpu_addr); + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_SIZE, ring->ring_size / 4); + + tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE); + tmp &= ~(VCN_RB_ENABLE__RB1_EN_MASK); + WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp); + fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET; + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_RPTR, 0); + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR, 0); + + tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_RB_RPTR); + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR, tmp); + ring->wptr = RREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR); + + tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE); + tmp |= VCN_RB_ENABLE__RB1_EN_MASK; + WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp); + fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF); + + return 0; +} + +/** + * vcn_v5_0_2_stop_dpg_mode - VCN stop with dpg mode + * + * @vinst: VCN instance + * + * Stop VCN block with dpg mode + */ +static void vcn_v5_0_2_stop_dpg_mode(struct amdgpu_vcn_inst *vinst) +{ + struct amdgpu_device *adev = vinst->adev; + int inst_idx = vinst->inst; + uint32_t tmp; + int vcn_inst; + struct dpg_pause_state state = {.fw_based = VCN_DPG_STATE__UNPAUSE}; + + vcn_inst = GET_INST(VCN, inst_idx); + + /* Unpause dpg */ + vcn_v5_0_2_pause_dpg_mode(vinst, &state); + + /* Wait for power status to be 1 */ + SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_POWER_STATUS, 1, + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); + + /* wait for read ptr to be equal to write ptr */ + tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR); + SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_RB_RPTR, tmp, 0xFFFFFFFF); + + /* disable dynamic power gating mode */ + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_POWER_STATUS), 0, + ~UVD_POWER_STATUS__UVD_PG_MODE_MASK); +} + +/** + * vcn_v5_0_2_stop - VCN stop + * + * @vinst: VCN instance + * + * Stop VCN block + */ +static int vcn_v5_0_2_stop(struct amdgpu_vcn_inst *vinst) +{ + struct amdgpu_device *adev = vinst->adev; + int i = vinst->inst; + struct amdgpu_vcn5_fw_shared *fw_shared; + uint32_t tmp; + int r = 0, vcn_inst; + + vcn_inst = GET_INST(VCN, i); + + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; + fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF; + + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { + vcn_v5_0_2_stop_dpg_mode(vinst); + return 0; + } + + /* wait for vcn idle */ + r = SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_STATUS, UVD_STATUS__IDLE, 0x7); + if (r) + return r; + + tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK | + UVD_LMI_STATUS__READ_CLEAN_MASK | + UVD_LMI_STATUS__WRITE_CLEAN_MASK | + UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK; + r = SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_LMI_STATUS, tmp, tmp); + if (r) + return r; + + /* disable LMI UMC channel */ + tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL2); + tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK; + WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL2, tmp); + tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK | + UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK; + r = SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_LMI_STATUS, tmp, tmp); + if (r) + return r; + + /* block VCPU register access */ + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_RB_ARB_CTRL), + UVD_RB_ARB_CTRL__VCPU_DIS_MASK, + ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK); + + /* reset VCPU */ + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL), + UVD_VCPU_CNTL__BLK_RST_MASK, + ~UVD_VCPU_CNTL__BLK_RST_MASK); + + /* disable VCPU clock */ + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL), 0, + ~(UVD_VCPU_CNTL__CLK_EN_MASK)); + + /* apply soft reset */ + tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET); + tmp |= UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK; + WREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET, tmp); + tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET); + tmp |= UVD_SOFT_RESET__LMI_SOFT_RESET_MASK; + WREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET, tmp); + + /* clear status */ + WREG32_SOC15(VCN, vcn_inst, regUVD_STATUS, 0); + + return 0; +} + +/** + * vcn_v5_0_2_unified_ring_get_rptr - get unified read pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware unified read pointer + */ +static uint64_t vcn_v5_0_2_unified_ring_get_rptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring != &adev->vcn.inst[ring->me].ring_enc[0]) + DRM_ERROR("wrong ring id is identified in %s", __func__); + + return RREG32_SOC15(VCN, GET_INST(VCN, ring->me), regUVD_RB_RPTR); +} + +/** + * vcn_v5_0_2_unified_ring_get_wptr - get unified write pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware unified write pointer + */ +static uint64_t vcn_v5_0_2_unified_ring_get_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring != &adev->vcn.inst[ring->me].ring_enc[0]) + DRM_ERROR("wrong ring id is identified in %s", __func__); + + if (ring->use_doorbell) + return *ring->wptr_cpu_addr; + else + return RREG32_SOC15(VCN, GET_INST(VCN, ring->me), regUVD_RB_WPTR); +} + +/** + * vcn_v5_0_2_unified_ring_set_wptr - set enc write pointer + * + * @ring: amdgpu_ring pointer + * + * Commits the enc write pointer to the hardware + */ +static void vcn_v5_0_2_unified_ring_set_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring != &adev->vcn.inst[ring->me].ring_enc[0]) + DRM_ERROR("wrong ring id is identified in %s", __func__); + + if (ring->use_doorbell) { + *ring->wptr_cpu_addr = lower_32_bits(ring->wptr); + WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); + } else { + WREG32_SOC15(VCN, GET_INST(VCN, ring->me), regUVD_RB_WPTR, + lower_32_bits(ring->wptr)); + } +} + +static const struct amdgpu_ring_funcs vcn_v5_0_2_unified_ring_vm_funcs = { + .type = AMDGPU_RING_TYPE_VCN_ENC, + .align_mask = 0x3f, + .nop = VCN_ENC_CMD_NO_OP, + .get_rptr = vcn_v5_0_2_unified_ring_get_rptr, + .get_wptr = vcn_v5_0_2_unified_ring_get_wptr, + .set_wptr = vcn_v5_0_2_unified_ring_set_wptr, + .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 + + 4 + /* vcn_v2_0_enc_ring_emit_vm_flush */ + 5 + + 5 + /* vcn_v2_0_enc_ring_emit_fence x2 vm fence */ + 1, /* vcn_v2_0_enc_ring_insert_end */ + .emit_ib_size = 5, /* vcn_v2_0_enc_ring_emit_ib */ + .emit_ib = vcn_v2_0_enc_ring_emit_ib, + .emit_fence = vcn_v2_0_enc_ring_emit_fence, + .emit_vm_flush = vcn_v4_0_3_enc_ring_emit_vm_flush, + .emit_hdp_flush = vcn_v4_0_3_ring_emit_hdp_flush, + .test_ring = amdgpu_vcn_enc_ring_test_ring, + .test_ib = amdgpu_vcn_unified_ring_test_ib, + .insert_nop = amdgpu_ring_insert_nop, + .insert_end = vcn_v2_0_enc_ring_insert_end, + .pad_ib = amdgpu_ring_generic_pad_ib, + .begin_use = amdgpu_vcn_ring_begin_use, + .end_use = amdgpu_vcn_ring_end_use, + .emit_wreg = vcn_v4_0_3_enc_ring_emit_wreg, + .emit_reg_wait = vcn_v4_0_3_enc_ring_emit_reg_wait, + .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, +}; + +/** + * vcn_v5_0_2_set_unified_ring_funcs - set unified ring functions + * + * @adev: amdgpu_device pointer + * + * Set unified ring functions + */ +static void vcn_v5_0_2_set_unified_ring_funcs(struct amdgpu_device *adev) +{ + int i, vcn_inst; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + adev->vcn.inst[i].ring_enc[0].funcs = &vcn_v5_0_2_unified_ring_vm_funcs; + adev->vcn.inst[i].ring_enc[0].me = i; + vcn_inst = GET_INST(VCN, i); + adev->vcn.inst[i].aid_id = vcn_inst / adev->vcn.num_inst_per_aid; + } +} + +/** + * vcn_v5_0_2_is_idle - check VCN block is idle + * + * @ip_block: Pointer to the amdgpu_ip_block structure + * + * Check whether VCN block is idle + */ +static bool vcn_v5_0_2_is_idle(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + int i, ret = 1; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) + ret &= (RREG32_SOC15(VCN, GET_INST(VCN, i), regUVD_STATUS) == UVD_STATUS__IDLE); + + return ret; +} + +/** + * vcn_v5_0_2_wait_for_idle - wait for VCN block idle + * + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. + * + * Wait for VCN block idle + */ +static int vcn_v5_0_2_wait_for_idle(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + int i, ret = 0; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + ret = SOC15_WAIT_ON_RREG(VCN, GET_INST(VCN, i), regUVD_STATUS, UVD_STATUS__IDLE, + UVD_STATUS__IDLE); + if (ret) + return ret; + } + + return ret; +} + +/** + * vcn_v5_0_2_set_clockgating_state - set VCN block clockgating state + * + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. + * @state: clock gating state + * + * Set VCN block clockgating state + */ +static int vcn_v5_0_2_set_clockgating_state(struct amdgpu_ip_block *ip_block, + enum amd_clockgating_state state) +{ + struct amdgpu_device *adev = ip_block->adev; + bool enable = state == AMD_CG_STATE_GATE; + int i; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[i]; + + if (enable) { + if (RREG32_SOC15(VCN, GET_INST(VCN, i), regUVD_STATUS) != UVD_STATUS__IDLE) + return -EBUSY; + vcn_v5_0_2_enable_clock_gating(vinst); + } else { + vcn_v5_0_2_disable_clock_gating(vinst); + } + } + + return 0; +} + +static int vcn_v5_0_2_set_pg_state(struct amdgpu_vcn_inst *vinst, + enum amd_powergating_state state) +{ + int ret = 0; + + if (state == vinst->cur_state) + return 0; + + if (state == AMD_PG_STATE_GATE) + ret = vcn_v5_0_2_stop(vinst); + else + ret = vcn_v5_0_2_start(vinst); + + if (!ret) + vinst->cur_state = state; + + return ret; +} + +/** + * vcn_v5_0_2_process_interrupt - process VCN block interrupt + * + * @adev: amdgpu_device pointer + * @source: interrupt sources + * @entry: interrupt entry from clients and sources + * + * Process VCN block interrupt + */ +static int vcn_v5_0_2_process_interrupt(struct amdgpu_device *adev, struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + uint32_t i, inst; + + i = node_id_to_phys_map[entry->node_id]; + + DRM_DEV_DEBUG(adev->dev, "IH: VCN TRAP\n"); + + for (inst = 0; inst < adev->vcn.num_vcn_inst; ++inst) + if (adev->vcn.inst[inst].aid_id == i) + break; + + if (inst >= adev->vcn.num_vcn_inst) { + dev_WARN_ONCE(adev->dev, 1, + "Interrupt received for unknown VCN instance %d", + entry->node_id); + return 0; + } + + switch (entry->src_id) { + case VCN_5_0__SRCID__UVD_ENC_GENERAL_PURPOSE: + amdgpu_fence_process(&adev->vcn.inst[inst].ring_enc[0]); + break; + default: + DRM_DEV_ERROR(adev->dev, "Unhandled interrupt: %d %d\n", + entry->src_id, entry->src_data[0]); + break; + } + + return 0; +} + +static const struct amdgpu_irq_src_funcs vcn_v5_0_2_irq_funcs = { + .process = vcn_v5_0_2_process_interrupt, +}; + +/** + * vcn_v5_0_2_set_irq_funcs - set VCN block interrupt irq functions + * + * @adev: amdgpu_device pointer + * + * Set VCN block interrupt irq functions + */ +static void vcn_v5_0_2_set_irq_funcs(struct amdgpu_device *adev) +{ + int i; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) + adev->vcn.inst->irq.num_types++; + adev->vcn.inst->irq.funcs = &vcn_v5_0_2_irq_funcs; +} + +static const struct amd_ip_funcs vcn_v5_0_2_ip_funcs = { + .name = "vcn_v5_0_2", + .early_init = vcn_v5_0_2_early_init, + .late_init = NULL, + .sw_init = vcn_v5_0_2_sw_init, + .sw_fini = vcn_v5_0_2_sw_fini, + .hw_init = vcn_v5_0_2_hw_init, + .hw_fini = vcn_v5_0_2_hw_fini, + .suspend = vcn_v5_0_2_suspend, + .resume = vcn_v5_0_2_resume, + .is_idle = vcn_v5_0_2_is_idle, + .wait_for_idle = vcn_v5_0_2_wait_for_idle, + .check_soft_reset = NULL, + .pre_soft_reset = NULL, + .soft_reset = NULL, + .post_soft_reset = NULL, + .set_clockgating_state = vcn_v5_0_2_set_clockgating_state, + .set_powergating_state = vcn_set_powergating_state, +}; + +const struct amdgpu_ip_block_version vcn_v5_0_2_ip_block = { + .type = AMD_IP_BLOCK_TYPE_VCN, + .major = 5, + .minor = 0, + .rev = 2, + .funcs = &vcn_v5_0_2_ip_funcs, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_2.h b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_2.h new file mode 100644 index 000000000000..461bdda91eb6 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_2.h @@ -0,0 +1,29 @@ +/* + * Copyright 2025-2026 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __VCN_v5_0_2_H__ +#define __VCN_v5_0_2_H__ + +extern const struct amdgpu_ip_block_version vcn_v5_0_2_ip_block; + +#endif /* __VCN_v5_0_2_H__ */ diff --git a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c index 73acbe0b7c21..b799c70f5742 100644 --- a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c +++ b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c @@ -97,11 +97,11 @@ static void cik_event_interrupt_wq(struct kfd_node *dev, return; if (ihre->source_id == CIK_INTSRC_CP_END_OF_PIPE) - kfd_signal_event_interrupt(pasid, context_id, 28); + kfd_signal_event_interrupt(pasid, context_id, 28, true); else if (ihre->source_id == CIK_INTSRC_SDMA_TRAP) - kfd_signal_event_interrupt(pasid, context_id, 28); + kfd_signal_event_interrupt(pasid, context_id, 28, true); else if (ihre->source_id == CIK_INTSRC_SQ_INTERRUPT_MSG) - kfd_signal_event_interrupt(pasid, context_id & 0xff, 8); + kfd_signal_event_interrupt(pasid, context_id & 0xff, 8, true); else if (ihre->source_id == CIK_INTSRC_CP_BAD_OPCODE) kfd_signal_hw_exception_event(pasid); else if (ihre->source_id == CIK_INTSRC_GFX_PAGE_INV_FAULT || diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c index bafc7b699dcc..44150a71ffd5 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c @@ -142,6 +142,7 @@ static struct kfd_event *lookup_event_by_id(struct kfd_process *p, uint32_t id) * @p: Pointer to struct kfd_process * @id: ID to look up * @bits: Number of valid bits in @id + * @signal_mailbox_updated: flag indicates if FW updates signal mailbox entry * * Finds the first signaled event with a matching partial ID. If no * matching signaled event is found, returns NULL. In that case the @@ -155,7 +156,8 @@ static struct kfd_event *lookup_event_by_id(struct kfd_process *p, uint32_t id) * driver. */ static struct kfd_event *lookup_signaled_event_by_partial_id( - struct kfd_process *p, uint32_t id, uint32_t bits) + struct kfd_process *p, uint32_t id, uint32_t bits, + bool signal_mailbox_updated) { struct kfd_event *ev; @@ -166,7 +168,8 @@ static struct kfd_event *lookup_signaled_event_by_partial_id( * and we only need a single lookup. */ if (bits > 31 || (1U << bits) >= KFD_SIGNAL_EVENT_LIMIT) { - if (page_slots(p->signal_page)[id] == UNSIGNALED_EVENT_SLOT) + if (signal_mailbox_updated && + page_slots(p->signal_page)[id] == UNSIGNALED_EVENT_SLOT) return NULL; return idr_find(&p->event_idr, id); @@ -724,7 +727,7 @@ static void set_event_from_interrupt(struct kfd_process *p, } void kfd_signal_event_interrupt(u32 pasid, uint32_t partial_id, - uint32_t valid_id_bits) + uint32_t valid_id_bits, bool signal_mailbox_updated) { struct kfd_event *ev = NULL; @@ -742,7 +745,8 @@ void kfd_signal_event_interrupt(u32 pasid, uint32_t partial_id, if (valid_id_bits) ev = lookup_signaled_event_by_partial_id(p, partial_id, - valid_id_bits); + valid_id_bits, + signal_mailbox_updated); if (ev) { set_event_from_interrupt(p, ev); } else if (p->signal_page) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.h b/drivers/gpu/drm/amd/amdkfd/kfd_events.h index 52ccfd397c2b..1dc21c13833b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.h @@ -85,6 +85,7 @@ struct kfd_event { #define KFD_EVENT_TYPE_MEMORY 8 extern void kfd_signal_event_interrupt(u32 pasid, uint32_t partial_id, - uint32_t valid_id_bits); + uint32_t valid_id_bits, + bool signal_mailbox_updated); #endif diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c index 3e1ad8974797..19406ab92c5b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c @@ -211,7 +211,7 @@ static void event_interrupt_wq_v10(struct kfd_node *dev, client_id == SOC15_IH_CLIENTID_SE2SH || client_id == SOC15_IH_CLIENTID_SE3SH) { if (source_id == SOC15_INTSRC_CP_END_OF_PIPE) - kfd_signal_event_interrupt(pasid, context_id0, 32); + kfd_signal_event_interrupt(pasid, context_id0, 32, true); else if (source_id == SOC15_INTSRC_SQ_INTERRUPT_MSG) { encoding = REG_GET_FIELD(context_id1, SQ_INTERRUPT_WORD_WAVE_CTXID1, ENCODING); @@ -324,7 +324,7 @@ static void event_interrupt_wq_v10(struct kfd_node *dev, default: break; } - kfd_signal_event_interrupt(pasid, context_id0 & 0x7fffff, 23); + kfd_signal_event_interrupt(pasid, context_id0 & 0x7fffff, 23, true); } else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE && KFD_DBG_EC_TYPE_IS_PACKET(KFD_DEBUG_CP_BAD_OP_ECODE(context_id0))) { kfd_set_dbg_ev_from_interrupt(dev, pasid, @@ -344,7 +344,7 @@ static void event_interrupt_wq_v10(struct kfd_node *dev, client_id == SOC15_IH_CLIENTID_SDMA6 || client_id == SOC15_IH_CLIENTID_SDMA7) { if (source_id == SOC15_INTSRC_SDMA_TRAP) { - kfd_signal_event_interrupt(pasid, context_id0 & 0xfffffff, 28); + kfd_signal_event_interrupt(pasid, context_id0 & 0xfffffff, 28, true); } } else if (client_id == SOC15_IH_CLIENTID_VMC || client_id == SOC15_IH_CLIENTID_VMC1 || diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c index 2788a52714d1..12d81abed748 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c @@ -353,7 +353,7 @@ static void event_interrupt_wq_v11(struct kfd_node *dev, /* CP */ if (source_id == SOC15_INTSRC_CP_END_OF_PIPE) - kfd_signal_event_interrupt(pasid, context_id0, 32); + kfd_signal_event_interrupt(pasid, context_id0, 32, true); else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE && KFD_DBG_EC_TYPE_IS_PACKET(KFD_CTXID0_CP_BAD_OP_ECODE(context_id0))) { u32 doorbell_id = KFD_CTXID0_DOORBELL_ID(context_id0); @@ -366,7 +366,7 @@ static void event_interrupt_wq_v11(struct kfd_node *dev, /* SDMA */ else if (source_id == SOC21_INTSRC_SDMA_TRAP) - kfd_signal_event_interrupt(pasid, context_id0 & 0xfffffff, 28); + kfd_signal_event_interrupt(pasid, context_id0 & 0xfffffff, 28, true); else if (source_id == SOC21_INTSRC_SDMA_ECC) { event_interrupt_poison_consumption_v11(dev, pasid, source_id); return; @@ -404,7 +404,7 @@ static void event_interrupt_wq_v11(struct kfd_node *dev, default: break; } - kfd_signal_event_interrupt(pasid, context_id0 & 0xffffff, 24); + kfd_signal_event_interrupt(pasid, context_id0 & 0xffffff, 24, true); } } else if (KFD_IRQ_IS_FENCE(client_id, source_id)) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v12_1.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v12_1.c index 230e57dbcec5..47947b94926b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v12_1.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v12_1.c @@ -28,6 +28,7 @@ #include "ivsrcid/vmc/irqsrcs_vmc_1_0.h" #include "kfd_smi_events.h" #include "kfd_debug.h" +#include "amdgpu_ras_mgr.h" /* * GFX12.1 SQ Interrupts @@ -185,6 +186,7 @@ static void event_interrupt_poison_consumption_v12_1(struct kfd_node *node, enum amdgpu_ras_block block = 0; int ret = -EINVAL; uint32_t reset = 0; + u64 event_id = RAS_EVENT_INVALID_ID; struct kfd_process *p = kfd_lookup_process_by_pasid(pasid, NULL); if (!p) @@ -220,7 +222,15 @@ static void event_interrupt_poison_consumption_v12_1(struct kfd_node *node, * resetting queue passes, do page retirement without gpu reset * resetting queue fails, fallback to gpu reset solution */ - amdgpu_amdkfd_ras_poison_consumption_handler(node->adev, block, reset); + if (amdgpu_uniras_enabled(node->adev)) + event_id = amdgpu_ras_mgr_gen_ras_event_seqno(node->adev, + RAS_SEQNO_TYPE_POISON_CONSUMPTION); + + RAS_EVENT_LOG(node->adev, event_id, + "poison is consumed by source %d, kick off gpu reset flow\n", source_id); + + amdgpu_amdkfd_ras_pasid_poison_consumption_handler(node->adev, + block, pasid, NULL, NULL, reset); } static bool event_interrupt_isr_v12_1(struct kfd_node *node, @@ -326,7 +336,7 @@ static void event_interrupt_wq_v12_1(struct kfd_node *node, /* CP */ if (source_id == SOC15_INTSRC_CP_END_OF_PIPE) - kfd_signal_event_interrupt(pasid, context_id0, 32); + kfd_signal_event_interrupt(pasid, context_id0, 32, false); else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE && KFD_DBG_EC_TYPE_IS_PACKET(KFD_CTXID0_CP_BAD_OP_ECODE(context_id0))) { u32 doorbell_id = KFD_CTXID0_DOORBELL_ID(context_id0); @@ -339,7 +349,7 @@ static void event_interrupt_wq_v12_1(struct kfd_node *node, /* SDMA */ else if (source_id == SOC21_INTSRC_SDMA_TRAP) - kfd_signal_event_interrupt(pasid, context_id0 & 0xfffffff, 28); + kfd_signal_event_interrupt(pasid, context_id0 & 0xfffffff, 28, true); else if (source_id == SOC21_INTSRC_SDMA_ECC) { event_interrupt_poison_consumption_v12_1(node, pasid, source_id); return; @@ -377,7 +387,7 @@ static void event_interrupt_wq_v12_1(struct kfd_node *node, default: break; } - kfd_signal_event_interrupt(pasid, context_id0 & 0xffffff, 24); + kfd_signal_event_interrupt(pasid, context_id0 & 0xffffff, 24, true); } } else if (KFD_IRQ_IS_FENCE(client_id, source_id)) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c index d76fb61869c7..1688d8e595f2 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c @@ -379,7 +379,7 @@ static void event_interrupt_wq_v9(struct kfd_node *dev, client_id == SOC15_IH_CLIENTID_SE2SH || client_id == SOC15_IH_CLIENTID_SE3SH) { if (source_id == SOC15_INTSRC_CP_END_OF_PIPE) - kfd_signal_event_interrupt(pasid, context_id0, 32); + kfd_signal_event_interrupt(pasid, context_id0, 32, true); else if (source_id == SOC15_INTSRC_SQ_INTERRUPT_MSG) { sq_int_data = KFD_CONTEXT_ID_GET_SQ_INT_DATA(context_id0, context_id1); encoding = REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, ENCODING); @@ -513,7 +513,7 @@ static void event_interrupt_wq_v9(struct kfd_node *dev, default: break; } - kfd_signal_event_interrupt(pasid, sq_int_data, 24); + kfd_signal_event_interrupt(pasid, sq_int_data, 24, true); } else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE && KFD_DBG_EC_TYPE_IS_PACKET(KFD_DEBUG_CP_BAD_OP_ECODE(context_id0))) { kfd_set_dbg_ev_from_interrupt(dev, pasid, @@ -530,7 +530,7 @@ static void event_interrupt_wq_v9(struct kfd_node *dev, client_id == SOC15_IH_CLIENTID_SDMA6 || client_id == SOC15_IH_CLIENTID_SDMA7) { if (source_id == SOC15_INTSRC_SDMA_TRAP) { - kfd_signal_event_interrupt(pasid, context_id0 & 0xfffffff, 28); + kfd_signal_event_interrupt(pasid, context_id0 & 0xfffffff, 28, true); } else if (source_id == SOC15_INTSRC_SDMA_ECC) { event_interrupt_poison_consumption_v9(dev, pasid, client_id); return; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index e5b56412931b..85f2bc3fbf85 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -1525,7 +1525,7 @@ int kfd_wait_on_events(struct kfd_process *p, bool all, uint32_t *user_timeout_ms, uint32_t *wait_result); void kfd_signal_event_interrupt(u32 pasid, uint32_t partial_id, - uint32_t valid_id_bits); + uint32_t valid_id_bits, bool signal_mailbox_updated); void kfd_signal_hw_exception_event(u32 pasid); int kfd_set_event(struct kfd_process *p, uint32_t event_id); int kfd_reset_event(struct kfd_process *p, uint32_t event_id); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index cc2621ae12f9..44e39ce222b7 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -590,7 +590,8 @@ int pqm_update_queue_properties(struct process_queue_manager *pqm, return err; if (kfd_queue_buffer_get(vm, (void *)p->queue_address, &p->ring_bo, - p->queue_size)) { + p->queue_size + + pqn->q->properties.metadata_queue_size)) { pr_debug("ring buf 0x%llx size 0x%llx not mapped on GPU\n", p->queue_address, p->queue_size); amdgpu_bo_unreserve(vm->root.bo); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 080242f9981b..37fdcaf7192f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1219,7 +1219,8 @@ svm_range_get_pte_flags(struct kfd_node *node, struct amdgpu_vm *vm, bool snoop = (domain != SVM_RANGE_VRAM_DOMAIN); bool coherent = flags & (KFD_IOCTL_SVM_FLAG_COHERENT | KFD_IOCTL_SVM_FLAG_EXT_COHERENT); bool ext_coherent = flags & KFD_IOCTL_SVM_FLAG_EXT_COHERENT; - unsigned int mtype_local; + unsigned int mtype_local, mtype_remote; + bool is_aid_a1, is_local; if (domain == SVM_RANGE_VRAM_DOMAIN) bo_node = prange->svm_bo->node; @@ -1307,20 +1308,23 @@ svm_range_get_pte_flags(struct kfd_node *node, struct amdgpu_vm *vm, mapping_flags |= AMDGPU_VM_MTYPE_NC; break; case IP_VERSION(12, 1, 0): + is_aid_a1 = (node->adev->rev_id & 0x10); + is_local = (domain == SVM_RANGE_VRAM_DOMAIN) && + (bo_node->adev == node->adev); + + mtype_local = amdgpu_mtype_local == 0 ? AMDGPU_VM_MTYPE_RW : + amdgpu_mtype_local == 1 ? AMDGPU_VM_MTYPE_NC : + is_aid_a1 ? AMDGPU_VM_MTYPE_RW : AMDGPU_VM_MTYPE_NC; + mtype_remote = is_aid_a1 ? AMDGPU_VM_MTYPE_NC : AMDGPU_VM_MTYPE_UC; snoop = true; - if (domain == SVM_RANGE_VRAM_DOMAIN) { - mtype_local = amdgpu_mtype_local == 1 ? AMDGPU_VM_MTYPE_NC : - AMDGPU_VM_MTYPE_RW; - /* local HBM */ - if (bo_node->adev == node->adev) - mapping_flags |= mtype_local; - /* Remote GPU memory */ - else - mapping_flags |= ext_coherent ? AMDGPU_VM_MTYPE_UC : - AMDGPU_VM_MTYPE_NC; - /* system memory accessed by the dGPU */ + + if (is_local) /* local HBM */ { + mapping_flags |= mtype_local; + } else if (ext_coherent) { + mapping_flags |= AMDGPU_VM_MTYPE_UC; } else { - mapping_flags |= ext_coherent ? AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; + /* system memory or remote VRAM */ + mapping_flags |= mtype_remote; } break; default: diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 65b256a7b6c4..6fb8c4f447a9 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -2290,6 +2290,11 @@ static void amdgpu_dm_fini(struct amdgpu_device *adev) &adev->dm.dmub_bo_gpu_addr, &adev->dm.dmub_bo_cpu_addr); + if (adev->dm.boot_time_crc_info.bo_ptr) + amdgpu_bo_free_kernel(&adev->dm.boot_time_crc_info.bo_ptr, + &adev->dm.boot_time_crc_info.gpu_addr, + &adev->dm.boot_time_crc_info.cpu_addr); + if (adev->dm.hpd_rx_offload_wq && adev->dm.dc) { for (i = 0; i < adev->dm.dc->caps.max_links; i++) { if (adev->dm.hpd_rx_offload_wq[i].wq) { @@ -2577,7 +2582,7 @@ static int dm_dmub_sw_init(struct amdgpu_device *adev) fw_meta_info_params.fw_inst_const = adev->dm.dmub_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes) + PSP_HEADER_BYTES_256; - fw_meta_info_params.fw_bss_data = region_params.bss_data_size ? adev->dm.dmub_fw->data + + fw_meta_info_params.fw_bss_data = fw_meta_info_params.bss_data_size ? adev->dm.dmub_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes) + le32_to_cpu(hdr->inst_const_bytes) : NULL; fw_meta_info_params.custom_psp_footer_size = 0; @@ -2738,6 +2743,54 @@ static int detect_mst_link_for_all_connectors(struct drm_device *dev) return ret; } +static void amdgpu_dm_boot_time_crc_init(struct amdgpu_device *adev) +{ + struct dm_boot_time_crc_info *bootcrc_info = NULL; + struct dmub_srv *dmub = NULL; + union dmub_fw_boot_options option = {0}; + int ret = 0; + const uint32_t fb_size = 3 * 1024 * 1024; /* 3MB for DCC pattern */ + + if (!adev || !adev->dm.dc || !adev->dm.dc->ctx || + !adev->dm.dc->ctx->dmub_srv) { + return; + } + + dmub = adev->dm.dc->ctx->dmub_srv->dmub; + bootcrc_info = &adev->dm.boot_time_crc_info; + + if (!dmub || !dmub->hw_funcs.get_fw_boot_option) { + drm_dbg(adev_to_drm(adev), "failed to init boot time crc buffer\n"); + return; + } + + option = dmub->hw_funcs.get_fw_boot_option(dmub); + + /* Return if boot time CRC is not enabled */ + if (option.bits.bootcrc_en_at_S0i3 == 0) + return; + + /* Create a buffer for boot time CRC */ + ret = amdgpu_bo_create_kernel(adev, fb_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT, + &bootcrc_info->bo_ptr, + &bootcrc_info->gpu_addr, + &bootcrc_info->cpu_addr); + + if (ret) { + drm_dbg(adev_to_drm(adev), "failed to create boot time crc buffer\n"); + } else { + bootcrc_info->size = fb_size; + + drm_dbg(adev_to_drm(adev), "boot time crc buffer created addr 0x%llx, size %u\n", + bootcrc_info->gpu_addr, bootcrc_info->size); + + /* Send the buffer info to DMUB */ + dc_dmub_srv_boot_time_crc_init(adev->dm.dc, + bootcrc_info->gpu_addr, bootcrc_info->size); + } +} + static int dm_late_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; @@ -2749,6 +2802,11 @@ static int dm_late_init(struct amdgpu_ip_block *ip_block) dmcu = adev->dm.dc->res_pool->dmcu; + /* Init the boot time CRC (skip in resume) */ + if ((adev->in_suspend == 0) && + (amdgpu_ip_version(adev, DCE_HWIP, 0) == IP_VERSION(3, 6, 0))) + amdgpu_dm_boot_time_crc_init(adev); + for (i = 0; i < 16; i++) linear_lut[i] = 0xFFFF * i / 15; @@ -13067,7 +13125,7 @@ static void parse_edid_displayid_vrr(struct drm_connector *connector, u16 min_vfreq; u16 max_vfreq; - if (edid == NULL || edid->extensions == 0) + if (!edid || !edid->extensions) return; /* Find DisplayID extension */ @@ -13077,7 +13135,7 @@ static void parse_edid_displayid_vrr(struct drm_connector *connector, break; } - if (edid_ext == NULL) + if (i == edid->extensions) return; while (j < EDID_LENGTH) { diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index 800813671748..83fefd902355 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -123,6 +123,20 @@ struct dm_compressor_info { uint64_t gpu_addr; }; +/** + * struct dm_boot_time_crc_info - Buffer info used by boot time CRC + * @cpu_addr: MMIO cpu addr + * @bo_ptr: Pointer to the buffer object + * @gpu_addr: MMIO gpu addr + * @size: Size of the buffer + */ +struct dm_boot_time_crc_info { + void *cpu_addr; + struct amdgpu_bo *bo_ptr; + uint64_t gpu_addr; + uint32_t size; +}; + typedef void (*dmub_notify_interrupt_callback_t)(struct amdgpu_device *adev, struct dmub_notification *notify); /** @@ -698,6 +712,13 @@ struct amdgpu_display_manager { struct completion replied; char reply_data[0x40]; // Cannot include dmub_cmd here } fused_io[8]; + + /** + * @dm_boot_time_crc_info: + * + * Buffer info for the boot time crc. + */ + struct dm_boot_time_crc_info boot_time_crc_info; }; enum dsc_clock_force_state { diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c index 3e05e48a8792..7ee051cb3c05 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_colorop.c @@ -37,19 +37,19 @@ const u64 amdgpu_dm_supported_degam_tfs = BIT(DRM_COLOROP_1D_CURVE_SRGB_EOTF) | BIT(DRM_COLOROP_1D_CURVE_PQ_125_EOTF) | BIT(DRM_COLOROP_1D_CURVE_BT2020_INV_OETF) | - BIT(DRM_COLOROP_1D_CURVE_GAMMA22_INV); + BIT(DRM_COLOROP_1D_CURVE_GAMMA22); const u64 amdgpu_dm_supported_shaper_tfs = BIT(DRM_COLOROP_1D_CURVE_SRGB_INV_EOTF) | BIT(DRM_COLOROP_1D_CURVE_PQ_125_INV_EOTF) | BIT(DRM_COLOROP_1D_CURVE_BT2020_OETF) | - BIT(DRM_COLOROP_1D_CURVE_GAMMA22); + BIT(DRM_COLOROP_1D_CURVE_GAMMA22_INV); const u64 amdgpu_dm_supported_blnd_tfs = BIT(DRM_COLOROP_1D_CURVE_SRGB_EOTF) | BIT(DRM_COLOROP_1D_CURVE_PQ_125_EOTF) | BIT(DRM_COLOROP_1D_CURVE_BT2020_INV_OETF) | - BIT(DRM_COLOROP_1D_CURVE_GAMMA22_INV); + BIT(DRM_COLOROP_1D_CURVE_GAMMA22); #define MAX_COLOR_PIPELINE_OPS 10 diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c index 24bc2a86904b..2409ac72b166 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c @@ -3305,6 +3305,52 @@ static int disallow_edp_enter_psr_set(void *data, u64 val) return 0; } +/* check if kernel disallow eDP enter replay state + * cat /sys/kernel/debug/dri/0/eDP-X/disallow_edp_enter_replay + * 0: allow edp enter replay; 1: disallow + */ +static int disallow_edp_enter_replay_get(void *data, u64 *val) +{ + struct amdgpu_dm_connector *aconnector = data; + + *val = (u64) aconnector->disallow_edp_enter_replay; + return 0; +} + +/* set kernel disallow eDP enter replay state + * echo 0x0 /sys/kernel/debug/dri/0/eDP-X/disallow_edp_enter_replay + * 0: allow edp enter replay; 1: disallow + * + * usage: test app read crc from PSR eDP rx. + * + * during kernel boot up, kernel write dpcd 0x37b to + * notify eDP rx replay enable. + * rx fw will start checking crc for rx internal logic. + * crc read count within dpcd 0x246 is not updated and + * value is 0. when eDP tx driver wants to read rx crc + * from dpcd 0x246, 0x270, read count 0 lead tx driver + * timeout. + * + * to avoid this, we add this debugfs to let test app to disbable + * rx replay. then test app can read non-zero crc read count. + * + * expected app sequence is as below: + * 1. disable eDP PHY and notify eDP rx with dpcd 0x600 = 2. + * 2. echo 0x1 /sys/kernel/debug/dri/0/eDP-X/disallow_edp_enter_replay + * 3. enable eDP PHY and notify eDP rx with dpcd 0x600 = 1 but + * without programming dpcd 0x37b. + * 4. read crc from rx dpcd 0x270, 0x246, etc. + * 5. echo 0x0 /sys/kernel/debug/dri/0/eDP-X/disallow_edp_enter_replay. + * this will let eDP back to normal with replay setup dpcd 0x37b. + */ +static int disallow_edp_enter_replay_set(void *data, u64 val) +{ + struct amdgpu_dm_connector *aconnector = data; + + aconnector->disallow_edp_enter_replay = val ? true : false; + return 0; +} + static int dmub_trace_mask_set(void *data, u64 val) { struct amdgpu_device *adev = data; @@ -3433,6 +3479,10 @@ DEFINE_DEBUGFS_ATTRIBUTE(disallow_edp_enter_psr_fops, disallow_edp_enter_psr_get, disallow_edp_enter_psr_set, "%llu\n"); +DEFINE_DEBUGFS_ATTRIBUTE(disallow_edp_enter_replay_fops, + disallow_edp_enter_replay_get, + disallow_edp_enter_replay_set, "%llu\n"); + DEFINE_DEBUGFS_ATTRIBUTE(ips_residency_cntl_fops, ips_residency_cntl_get, ips_residency_cntl_set, "%llu\n"); DEFINE_SHOW_ATTRIBUTE(current_backlight); @@ -3635,6 +3685,8 @@ void connector_debugfs_init(struct amdgpu_dm_connector *connector) &allow_edp_hotplug_detection_fops); debugfs_create_file("disallow_edp_enter_psr", 0644, dir, connector, &disallow_edp_enter_psr_fops); + debugfs_create_file("disallow_edp_enter_replay", 0644, dir, connector, + &disallow_edp_enter_replay_fops); } for (i = 0; i < ARRAY_SIZE(connector_debugfs_entries); i++) { diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c index d8c69fc94abb..54ae1c371511 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c @@ -62,9 +62,6 @@ static const uint32_t rgb_formats[] = { DRM_FORMAT_XBGR8888, DRM_FORMAT_ABGR8888, DRM_FORMAT_RGB565, - DRM_FORMAT_NV21, - DRM_FORMAT_NV12, - DRM_FORMAT_P010 }; static const uint32_t overlay_formats[] = { @@ -707,21 +704,21 @@ static void amdgpu_dm_plane_add_gfx12_modifiers(struct amdgpu_device *adev, uint8_t max_comp_block[] = {2, 1, 0}; uint64_t max_comp_block_mod[ARRAY_SIZE(max_comp_block)] = {0}; uint8_t i = 0, j = 0; - uint64_t gfx12_modifiers[] = {mod_256k, mod_64k, mod_4k, mod_256b, DRM_FORMAT_MOD_LINEAR}; + /* Note, linear (no DCC) gets added to the modifier list for all chips by the caller. */ + uint64_t gfx12_modifiers[] = {mod_256k, mod_64k, mod_4k, mod_256b}; for (i = 0; i < ARRAY_SIZE(max_comp_block); i++) max_comp_block_mod[i] = AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, max_comp_block[i]); /* With DCC: Best choice should be kept first. Hence, add all 256k modifiers of different * max compressed blocks first and then move on to the next smaller sized layouts. - * Do not add the linear modifier here, and hence the condition of size-1 for the loop */ - for (j = 0; j < ARRAY_SIZE(gfx12_modifiers) - 1; j++) + for (j = 0; j < ARRAY_SIZE(gfx12_modifiers); j++) for (i = 0; i < ARRAY_SIZE(max_comp_block); i++) amdgpu_dm_plane_add_modifier(mods, size, capacity, ver | dcc | max_comp_block_mod[i] | gfx12_modifiers[j]); - /* Without DCC. Add all modifiers including linear at the end */ + /* Without DCC. */ for (i = 0; i < ARRAY_SIZE(gfx12_modifiers); i++) amdgpu_dm_plane_add_modifier(mods, size, capacity, gfx12_modifiers[i]); diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c index e91636d033c7..5a5249f3ffbd 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c @@ -244,6 +244,10 @@ struct clk_mgr *dc_clk_mgr_create(struct dc_context *ctx, struct pp_smu_funcs *p BREAK_TO_DEBUGGER(); return NULL; } + if (ctx->dce_version == DCN_VERSION_2_01) { + dcn201_clk_mgr_construct(ctx, clk_mgr, pp_smu, dccg); + return &clk_mgr->base; + } if (ASICREV_IS_SIENNA_CICHLID_P(asic_id.hw_internal_rev)) { dcn3_clk_mgr_construct(ctx, clk_mgr, pp_smu, dccg); return &clk_mgr->base; @@ -256,10 +260,6 @@ struct clk_mgr *dc_clk_mgr_create(struct dc_context *ctx, struct pp_smu_funcs *p dcn3_clk_mgr_construct(ctx, clk_mgr, pp_smu, dccg); return &clk_mgr->base; } - if (ctx->dce_version == DCN_VERSION_2_01) { - dcn201_clk_mgr_construct(ctx, clk_mgr, pp_smu, dccg); - return &clk_mgr->base; - } dcn20_clk_mgr_construct(ctx, clk_mgr, pp_smu, dccg); return &clk_mgr->base; } diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 31589f22aae0..8b9c686eefd2 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -2617,6 +2617,16 @@ void dc_post_update_surfaces_to_stream(struct dc *dc) dc->optimized_required = false; } +void dc_get_default_tiling_info(const struct dc *dc, struct dc_tiling_info *tiling_info) +{ + if (!dc || !tiling_info) + return; + if (dc->res_pool && dc->res_pool->funcs && dc->res_pool->funcs->get_default_tiling_info) { + dc->res_pool->funcs->get_default_tiling_info(tiling_info); + return; + } +} + bool dc_set_generic_gpio_for_stereo(bool enable, struct gpio_service *gpio_service) { diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index c7a09724f569..0b48feaba131 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -63,7 +63,7 @@ struct dcn_dsc_reg_state; struct dcn_optc_reg_state; struct dcn_dccg_reg_state; -#define DC_VER "3.2.373" +#define DC_VER "3.2.374" /** * MAX_SURFACES - representative of the upper bound of surfaces that can be piped to a single CRTC @@ -1970,6 +1970,15 @@ void dc_plane_cm_retain(struct dc_plane_cm *cm); void dc_post_update_surfaces_to_stream( struct dc *dc); +/* + * dc_get_default_tiling_info() - Retrieve an ASIC-appropriate default tiling + * description for (typically) linear surfaces. + * + * This is used by OS/DM paths that need a valid, fully-initialized tiling + * description without hardcoding gfx-version specifics in the caller. + */ +void dc_get_default_tiling_info(const struct dc *dc, struct dc_tiling_info *tiling_info); + /** * struct dc_validation_set - Struct to store surface/stream associations for validation */ diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c index b15360bcdacf..e5a222425814 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c @@ -2349,6 +2349,33 @@ bool dc_dmub_srv_is_cursor_offload_enabled(const struct dc *dc) return dc->ctx->dmub_srv && dc->ctx->dmub_srv->cursor_offload_enabled; } +void dc_dmub_srv_boot_time_crc_init(const struct dc *dc, uint64_t gpu_addr, uint32_t size) +{ + struct dc_dmub_srv *dc_dmub_srv; + struct dc_context *dc_ctx; + union dmub_rb_cmd cmd = {0}; + bool result = false; + + if (!dc || !dc->ctx || !dc->ctx->dmub_srv || size == 0) + return; + + dc_dmub_srv = dc->ctx->dmub_srv; + dc_ctx = dc_dmub_srv->ctx; + + memset(&cmd, 0, sizeof(cmd)); + cmd.boot_time_crc_init.header.type = DMUB_CMD__BOOT_TIME_CRC; + cmd.boot_time_crc_init.header.sub_type = DMUB_CMD__BOOT_TIME_CRC_INIT_MEM; + cmd.boot_time_crc_init.header.payload_bytes = + sizeof(struct dmub_rb_cmd_boot_time_crc_init); + cmd.boot_time_crc_init.data.buffer_addr.quad_part = gpu_addr; + cmd.boot_time_crc_init.data.buffer_size = size; + + result = dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_NO_WAIT); + + if (!result) + DC_ERROR("Boot time crc init failed in DMUB"); +} + void dc_dmub_srv_release_hw(const struct dc *dc) { struct dc_dmub_srv *dc_dmub_srv = dc->ctx->dmub_srv; diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h index 72e0a41f39f0..6579cf9cce3c 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h @@ -362,6 +362,15 @@ void dc_dmub_srv_program_cursor_now(struct dc *dc, const struct pipe_ctx *pipe); bool dc_dmub_srv_is_cursor_offload_enabled(const struct dc *dc); /** + * dc_dmub_srv_boot_time_crc_init() - Initializes DMUB boot time CRC. + * + * @dc - pointer to DC object + * @gpu_addr - address for the boot time CRC buffer + * @size - size of the boot time CRC buffer + */ +void dc_dmub_srv_boot_time_crc_init(const struct dc *dc, uint64_t gpu_addr, uint32_t size); + +/** * dc_dmub_srv_release_hw() - Notifies DMUB service that HW access is no longer required. * * @dc - pointer to DC object diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c index cd8cca651419..48905ca39b70 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c @@ -5304,7 +5304,7 @@ static void CalculateWatermarksAndDRAMSpeedChangeSupport( double LinesInDETC; unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX]; unsigned int LinesInDETCRoundedDownToSwath; - double FullDETBufferingTimeY[DC__NUM_DPP__MAX]; + double FullDETBufferingTimeY[DC__NUM_DPP__MAX] = { 0 }; double FullDETBufferingTimeC; double ActiveDRAMClockChangeLatencyMarginY; double ActiveDRAMClockChangeLatencyMarginC; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c index 0748ef36a16a..19b142412a84 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c @@ -459,8 +459,8 @@ void dml32_CalculateSwathAndDETConfiguration( { unsigned int MaximumSwathHeightY[DC__NUM_DPP__MAX]; unsigned int MaximumSwathHeightC[DC__NUM_DPP__MAX]; - unsigned int RoundedUpMaxSwathSizeBytesY[DC__NUM_DPP__MAX]; - unsigned int RoundedUpMaxSwathSizeBytesC[DC__NUM_DPP__MAX]; + unsigned int RoundedUpMaxSwathSizeBytesY[DC__NUM_DPP__MAX] = { 0 }; + unsigned int RoundedUpMaxSwathSizeBytesC[DC__NUM_DPP__MAX] = { 0 }; unsigned int RoundedUpSwathSizeBytesY; unsigned int RoundedUpSwathSizeBytesC; double SwathWidthdoubleDPP[DC__NUM_DPP__MAX]; @@ -2988,7 +2988,7 @@ void dml32_UseMinimumDCFCLK( for (j = 0; j <= 1; ++j) { double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX]; double PrefetchPixelLinesTime[DC__NUM_DPP__MAX]; - double DCFCLKRequiredForPeakBandwidthPerSurface[DC__NUM_DPP__MAX]; + double DCFCLKRequiredForPeakBandwidthPerSurface[DC__NUM_DPP__MAX] = { 0 }; double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX]; double MinimumTWait = 0.0; double DPTEBandwidth; diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml_top_soc_parameter_types.h b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml_top_soc_parameter_types.h index ddbb8dfa9ff8..6152155d6073 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml_top_soc_parameter_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/inc/dml_top_soc_parameter_types.h @@ -192,6 +192,7 @@ struct dml2_ip_capabilities { unsigned int max_flip_time_us; unsigned int max_flip_time_lines; unsigned int hostvm_mode; + bool dcn_mrq_present; unsigned int subvp_drr_scheduling_margin_us; unsigned int subvp_prefetch_end_to_mall_start_us; unsigned int subvp_fw_processing_delay; diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_dcn4.c b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_dcn4.c index 608b4a305c65..99fc2f0666e2 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_dcn4.c +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_dcn4.c @@ -187,6 +187,7 @@ static void patch_ip_params_with_ip_caps(struct dml2_core_ip_params *ip_params, { ip_params->max_num_dpp = ip_caps->pipe_count; ip_params->max_num_otg = ip_caps->otg_count; + ip_params->max_num_opp = ip_caps->otg_count; ip_params->num_dsc = ip_caps->num_dsc; ip_params->max_num_dp2p0_streams = ip_caps->max_num_dp2p0_streams; ip_params->max_num_dp2p0_outputs = ip_caps->max_num_dp2p0_outputs; diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_dcn4_calcs.c b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_dcn4_calcs.c index ca5ac3c0deb5..b30d16474ceb 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_dcn4_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_dcn4_calcs.c @@ -12262,11 +12262,15 @@ static void rq_dlg_get_rq_reg(struct dml2_display_rq_regs *rq_regs, unsigned int pixel_chunk_bytes = 0; unsigned int min_pixel_chunk_bytes = 0; + unsigned int meta_chunk_bytes = 0; + unsigned int min_meta_chunk_bytes = 0; unsigned int dpte_group_bytes = 0; unsigned int mpte_group_bytes = 0; unsigned int p1_pixel_chunk_bytes = 0; unsigned int p1_min_pixel_chunk_bytes = 0; + unsigned int p1_meta_chunk_bytes = 0; + unsigned int p1_min_meta_chunk_bytes = 0; unsigned int p1_dpte_group_bytes = 0; unsigned int p1_mpte_group_bytes = 0; @@ -12287,8 +12291,13 @@ static void rq_dlg_get_rq_reg(struct dml2_display_rq_regs *rq_regs, dpte_group_bytes = (unsigned int)(dml_get_dpte_group_size_in_bytes(mode_lib, pipe_idx)); mpte_group_bytes = (unsigned int)(dml_get_vm_group_size_in_bytes(mode_lib, pipe_idx)); + meta_chunk_bytes = (unsigned int)(mode_lib->ip.meta_chunk_size_kbytes * 1024); + min_meta_chunk_bytes = (unsigned int)(mode_lib->ip.min_meta_chunk_size_bytes); + p1_pixel_chunk_bytes = pixel_chunk_bytes; p1_min_pixel_chunk_bytes = min_pixel_chunk_bytes; + p1_meta_chunk_bytes = meta_chunk_bytes; + p1_min_meta_chunk_bytes = min_meta_chunk_bytes; p1_dpte_group_bytes = dpte_group_bytes; p1_mpte_group_bytes = mpte_group_bytes; @@ -12309,6 +12318,19 @@ static void rq_dlg_get_rq_reg(struct dml2_display_rq_regs *rq_regs, else rq_regs->rq_regs_c.min_chunk_size = log_and_substract_if_non_zero(p1_min_pixel_chunk_bytes, 8 - 1); + rq_regs->rq_regs_l.meta_chunk_size = log_and_substract_if_non_zero(meta_chunk_bytes, 10); + rq_regs->rq_regs_c.meta_chunk_size = log_and_substract_if_non_zero(p1_meta_chunk_bytes, 10); + + if (min_meta_chunk_bytes == 0) + rq_regs->rq_regs_l.min_meta_chunk_size = 0; + else + rq_regs->rq_regs_l.min_meta_chunk_size = log_and_substract_if_non_zero(min_meta_chunk_bytes, 6 - 1); + + if (min_meta_chunk_bytes == 0) + rq_regs->rq_regs_c.min_meta_chunk_size = 0; + else + rq_regs->rq_regs_c.min_meta_chunk_size = log_and_substract_if_non_zero(p1_min_meta_chunk_bytes, 6 - 1); + rq_regs->rq_regs_l.dpte_group_size = log_and_substract_if_non_zero(dpte_group_bytes, 6); rq_regs->rq_regs_l.mpte_group_size = log_and_substract_if_non_zero(mpte_group_bytes, 6); rq_regs->rq_regs_c.dpte_group_size = log_and_substract_if_non_zero(p1_dpte_group_bytes, 6); diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c index 4985e885952d..263e0c4d34f6 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c @@ -812,9 +812,8 @@ void hubp401_cursor_set_position( int x_pos_viewport = 0; int x_hot_viewport = 0; uint32_t cur_en = pos->enable ? 1 : 0; - + uint32_t x_hotspot_clamped = pos->x_hotspot; hubp->curs_pos = *pos; - /* Recout is zero for pipes if the entire dst_rect is contained * within preceeding ODM slices. */ @@ -845,6 +844,8 @@ void hubp401_cursor_set_position( ASSERT(param->h_scale_ratio.value); + if (x_hotspot_clamped > 0xFF) + x_hotspot_clamped = 0xFF; if (param->h_scale_ratio.value) dst_x_offset = dc_fixpt_floor(dc_fixpt_div( dc_fixpt_from_int(dst_x_offset), @@ -865,7 +866,7 @@ void hubp401_cursor_set_position( CURSOR_Y_POSITION, pos->y); REG_SET_2(CURSOR_HOT_SPOT, 0, - CURSOR_HOT_SPOT_X, pos->x_hotspot, + CURSOR_HOT_SPOT_X, x_hotspot_clamped, CURSOR_HOT_SPOT_Y, pos->y_hotspot); REG_SET(CURSOR_DST_OFFSET, 0, diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h index 43579b0e1482..e960ca9062ad 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h +++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h @@ -214,6 +214,7 @@ struct resource_funcs { unsigned int index); void (*get_panel_config_defaults)(struct dc_panel_config *panel_config); + void (*get_default_tiling_info)(struct dc_tiling_info *tiling_info); void (*build_pipe_pix_clk_params)(struct pipe_ctx *pipe_ctx); /* * Get indicator of power from a context that went through full validation diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.c index bbe185e15eb6..44178e915bdc 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.c @@ -1273,6 +1273,12 @@ static const struct dc_cap_funcs cap_funcs = { .get_dcc_compression_cap = dcn10_get_dcc_compression_cap }; +void dcn10_get_default_tiling_info(struct dc_tiling_info *tiling_info) +{ + tiling_info->gfxversion = DcGfxVersion9; + tiling_info->gfx9.swizzle = DC_SW_LINEAR; +} + static const struct resource_funcs dcn10_res_pool_funcs = { .destroy = dcn10_destroy_resource_pool, .link_enc_create = dcn10_link_encoder_create, @@ -1284,7 +1290,8 @@ static const struct resource_funcs dcn10_res_pool_funcs = { .add_stream_to_ctx = dcn10_add_stream_to_ctx, .patch_unknown_plane_state = dcn10_patch_unknown_plane_state, .find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link, - .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe + .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe, + .get_default_tiling_info = dcn10_get_default_tiling_info }; static uint32_t read_pipe_fuses(struct dc_context *ctx) diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.h index 7bc1be53e800..c7409298caa9 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.h +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.h @@ -53,5 +53,7 @@ struct stream_encoder *dcn10_find_first_free_match_stream_enc_for_link( unsigned int dcn10_get_vstartup_for_pipe(struct pipe_ctx *pipe_ctx); +void dcn10_get_default_tiling_info(struct dc_tiling_info *tiling_info); + #endif /* __DC_RESOURCE_DCN10_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c index 366576b1c617..8426d5f9f377 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c @@ -2232,7 +2232,8 @@ static const struct resource_funcs dcn20_res_pool_funcs = { .set_mcif_arb_params = dcn20_set_mcif_arb_params, .populate_dml_pipes = dcn20_populate_dml_pipes_from_context, .find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link, - .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe + .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe, + .get_default_tiling_info = dcn10_get_default_tiling_info }; bool dcn20_dwbc_create(struct dc_context *ctx, struct resource_pool *pool) diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.c index 4ea76e46ab15..491d8d3b1b68 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.c @@ -1081,7 +1081,8 @@ static struct resource_funcs dcn201_res_pool_funcs = { .patch_unknown_plane_state = dcn20_patch_unknown_plane_state, .set_mcif_arb_params = dcn20_set_mcif_arb_params, .find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link, - .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe + .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe, + .get_default_tiling_info = dcn10_get_default_tiling_info }; static bool dcn201_resource_construct( diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c index 0f4307f8f3dd..a1a529cabb93 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c @@ -1378,7 +1378,8 @@ static const struct resource_funcs dcn21_res_pool_funcs = { .find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link, .update_bw_bounding_box = dcn21_update_bw_bounding_box, .get_panel_config_defaults = dcn21_get_panel_config_defaults, - .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe + .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe, + .get_default_tiling_info = dcn10_get_default_tiling_info }; static bool dcn21_resource_construct( diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c index 2fa86b9587ed..566517b99a09 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c @@ -2248,7 +2248,8 @@ static const struct resource_funcs dcn30_res_pool_funcs = { .update_bw_bounding_box = dcn30_update_bw_bounding_box, .patch_unknown_plane_state = dcn20_patch_unknown_plane_state, .get_panel_config_defaults = dcn30_get_panel_config_defaults, - .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe + .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe, + .get_default_tiling_info = dcn10_get_default_tiling_info }; #define CTX ctx diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c index 7842bee57e63..d21b928055e5 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c @@ -1400,7 +1400,8 @@ static struct resource_funcs dcn301_res_pool_funcs = { .release_post_bldn_3dlut = dcn30_release_post_bldn_3dlut, .update_bw_bounding_box = dcn301_update_bw_bounding_box, .patch_unknown_plane_state = dcn20_patch_unknown_plane_state, - .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe + .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe, + .get_default_tiling_info = dcn10_get_default_tiling_info, }; static bool dcn301_resource_construct( diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.c index 1874d5d6b782..d24b9b81df77 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.c @@ -1155,7 +1155,8 @@ static struct resource_funcs dcn302_res_pool_funcs = { .update_bw_bounding_box = dcn302_update_bw_bounding_box, .patch_unknown_plane_state = dcn20_patch_unknown_plane_state, .get_panel_config_defaults = dcn302_get_panel_config_defaults, - .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe + .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe, + .get_default_tiling_info = dcn10_get_default_tiling_info }; static struct dc_cap_funcs cap_funcs = { diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.c index d52201cb359f..0b44a33a0d32 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.c @@ -1099,7 +1099,8 @@ static struct resource_funcs dcn303_res_pool_funcs = { .update_bw_bounding_box = dcn303_update_bw_bounding_box, .patch_unknown_plane_state = dcn20_patch_unknown_plane_state, .get_panel_config_defaults = dcn303_get_panel_config_defaults, - .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe + .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe, + .get_default_tiling_info = dcn10_get_default_tiling_info }; static struct dc_cap_funcs cap_funcs = { diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c index 2055f1f8af65..046724c86c7a 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c @@ -1851,6 +1851,7 @@ static struct resource_funcs dcn31_res_pool_funcs = { .get_det_buffer_size = dcn31_get_det_buffer_size, .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe, .update_dc_state_for_encoder_switch = dcn31_update_dc_state_for_encoder_switch, + .get_default_tiling_info = dcn10_get_default_tiling_info, .build_pipe_pix_clk_params = dcn20_build_pipe_pix_clk_params }; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c index 1939f720ba29..66bf7725aeaf 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c @@ -1782,7 +1782,8 @@ static struct resource_funcs dcn314_res_pool_funcs = { .get_det_buffer_size = dcn31_get_det_buffer_size, .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe, .update_dc_state_for_encoder_switch = dcn31_update_dc_state_for_encoder_switch, - .build_pipe_pix_clk_params = dcn20_build_pipe_pix_clk_params + .build_pipe_pix_clk_params = dcn20_build_pipe_pix_clk_params, + .get_default_tiling_info = dcn10_get_default_tiling_info }; static struct clock_source *dcn30_clock_source_create( diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c index e8377c190f63..4d5fcd7a0b00 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c @@ -1846,7 +1846,8 @@ static struct resource_funcs dcn315_res_pool_funcs = { .get_det_buffer_size = dcn31_get_det_buffer_size, .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe, .update_dc_state_for_encoder_switch = dcn31_update_dc_state_for_encoder_switch, - .build_pipe_pix_clk_params = dcn20_build_pipe_pix_clk_params + .build_pipe_pix_clk_params = dcn20_build_pipe_pix_clk_params, + .get_default_tiling_info = dcn10_get_default_tiling_info }; static bool dcn315_resource_construct( diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn316/dcn316_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn316/dcn316_resource.c index 045ce01bd74e..63675b53674a 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn316/dcn316_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn316/dcn316_resource.c @@ -1721,7 +1721,8 @@ static struct resource_funcs dcn316_res_pool_funcs = { .get_det_buffer_size = dcn31_get_det_buffer_size, .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe, .update_dc_state_for_encoder_switch = dcn31_update_dc_state_for_encoder_switch, - .build_pipe_pix_clk_params = dcn20_build_pipe_pix_clk_params + .build_pipe_pix_clk_params = dcn20_build_pipe_pix_clk_params, + .get_default_tiling_info = dcn10_get_default_tiling_info }; static bool dcn316_resource_construct( diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c index 7ebb7d1193af..2b9d8d224572 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c @@ -1785,7 +1785,10 @@ static bool dml1_validate(struct dc *dc, struct dc_state *context, enum dc_valid dc->res_pool->funcs->calculate_wm_and_dlg(dc, context, pipes, pipe_cnt, vlevel); + DC_FP_START(); dcn32_override_min_req_memclk(dc, context); + DC_FP_END(); + dcn32_override_min_req_dcfclk(dc, context); BW_VAL_TRACE_END_WATERMARKS(); @@ -2109,6 +2112,7 @@ static struct resource_funcs dcn32_res_pool_funcs = { .patch_unknown_plane_state = dcn20_patch_unknown_plane_state, .update_soc_for_wm_a = dcn30_update_soc_for_wm_a, .add_phantom_pipes = dcn32_add_phantom_pipes, + .get_default_tiling_info = dcn10_get_default_tiling_info, .build_pipe_pix_clk_params = dcn20_build_pipe_pix_clk_params, .calculate_mall_ways_from_bytes = dcn32_calculate_mall_ways_from_bytes, .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe, diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c index c1582c27ac87..e3dc4b1aacda 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c @@ -1619,6 +1619,7 @@ static struct resource_funcs dcn321_res_pool_funcs = { .calculate_mall_ways_from_bytes = dcn32_calculate_mall_ways_from_bytes, .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe, .get_max_hw_cursor_size = dcn32_get_max_hw_cursor_size, + .get_default_tiling_info = dcn10_get_default_tiling_info, }; static uint32_t read_pipe_fuses(struct dc_context *ctx) diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c index 3494a40cea99..ff557c4d594e 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c @@ -1802,7 +1802,8 @@ static struct resource_funcs dcn35_res_pool_funcs = { .get_det_buffer_size = dcn31_get_det_buffer_size, .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe, .update_dc_state_for_encoder_switch = dcn31_update_dc_state_for_encoder_switch, - .build_pipe_pix_clk_params = dcn20_build_pipe_pix_clk_params + .build_pipe_pix_clk_params = dcn20_build_pipe_pix_clk_params, + .get_default_tiling_info = dcn10_get_default_tiling_info }; static bool dcn35_resource_construct( diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c index 080bc7f24ffa..0c39d0b17947 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c @@ -1775,7 +1775,8 @@ static struct resource_funcs dcn351_res_pool_funcs = { .get_det_buffer_size = dcn31_get_det_buffer_size, .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe, .update_dc_state_for_encoder_switch = dcn31_update_dc_state_for_encoder_switch, - .build_pipe_pix_clk_params = dcn20_build_pipe_pix_clk_params + .build_pipe_pix_clk_params = dcn20_build_pipe_pix_clk_params, + .get_default_tiling_info = dcn10_get_default_tiling_info }; static bool dcn351_resource_construct( diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn36/dcn36_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn36/dcn36_resource.c index af51ac4ea59e..1ad44fb64213 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn36/dcn36_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn36/dcn36_resource.c @@ -1781,7 +1781,8 @@ static struct resource_funcs dcn36_res_pool_funcs = { .get_preferred_eng_id_dpia = dcn36_get_preferred_eng_id_dpia, .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe, .update_dc_state_for_encoder_switch = dcn31_update_dc_state_for_encoder_switch, - .build_pipe_pix_clk_params = dcn20_build_pipe_pix_clk_params + .build_pipe_pix_clk_params = dcn20_build_pipe_pix_clk_params, + .get_default_tiling_info = dcn10_get_default_tiling_info, }; static bool dcn36_resource_construct( diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c index e37aab939a41..60a0e7c95c73 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c @@ -1839,9 +1839,16 @@ static struct resource_funcs dcn401_res_pool_funcs = { .calculate_mall_ways_from_bytes = dcn32_calculate_mall_ways_from_bytes, .get_power_profile = dcn401_get_power_profile, .get_vstartup_for_pipe = dcn401_get_vstartup_for_pipe, - .get_max_hw_cursor_size = dcn32_get_max_hw_cursor_size + .get_max_hw_cursor_size = dcn32_get_max_hw_cursor_size, + .get_default_tiling_info = dcn401_get_default_tiling_info }; +void dcn401_get_default_tiling_info(struct dc_tiling_info *tiling_info) +{ + tiling_info->gfxversion = DcGfxAddr3; + tiling_info->gfx_addr3.swizzle = DC_ADDR3_SW_LINEAR; +} + static uint32_t read_pipe_fuses(struct dc_context *ctx) { uint32_t value = REG_READ(CC_DC_PIPE_DIS); diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h index 08bec1755617..5f3b0319cb5b 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h @@ -28,6 +28,8 @@ enum dc_status dcn401_validate_bandwidth(struct dc *dc, void dcn401_prepare_mcache_programming(struct dc *dc, struct dc_state *context); +void dcn401_get_default_tiling_info(struct dc_tiling_info *tiling_info); + unsigned int dcn401_get_vstartup_for_pipe(struct pipe_ctx *pipe_ctx); /* Following are definitions for run time init of reg offsets */ diff --git a/drivers/gpu/drm/amd/include/atomfirmware.h b/drivers/gpu/drm/amd/include/atomfirmware.h index 18f9642a42ee..62f7ed0b2066 100644 --- a/drivers/gpu/drm/amd/include/atomfirmware.h +++ b/drivers/gpu/drm/amd/include/atomfirmware.h @@ -1695,7 +1695,9 @@ struct atom_integrated_system_info_v2_3 { uint8_t gpu_package_id; struct edp_info_table edp1_info; struct edp_info_table edp2_info; - uint32_t reserved2[8]; + uint32_t cpuid; + uint32_t vram_bit_width; + uint32_t reserved2[6]; struct atom_external_display_connection_info extdispconninfo; uint8_t UMACarveoutVersion; uint8_t UMACarveoutIndexMax; @@ -1770,6 +1772,7 @@ enum atom_dmi_t17_mem_type_def{ Hbm2MemType, ///< Assign 33 to HBM2 Ddr5MemType, ///< Assign 34 to DDR5 LpDdr5MemType, ///< Assign 35 to LPDDR5 + LpDdr5xMemType, ///< Assign 36 to LPDDR5x }; diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index eca93a9d0b84..9e2f69663f96 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -1910,8 +1910,6 @@ static int ss_bias_attr_update(struct amdgpu_device *adev, struct amdgpu_device_ static int pp_od_clk_voltage_attr_update(struct amdgpu_device *adev, struct amdgpu_device_attr *attr, uint32_t mask, enum amdgpu_device_attr_states *states) { - uint32_t gc_ver = amdgpu_ip_version(adev, GC_HWIP, 0); - *states = ATTR_STATE_SUPPORTED; if (!amdgpu_dpm_is_overdrive_supported(adev)) { @@ -1919,10 +1917,8 @@ static int pp_od_clk_voltage_attr_update(struct amdgpu_device *adev, struct amdg return 0; } - /* Enable pp_od_clk_voltage node for gc 9.4.3, 9.4.4, 9.5.0 SRIOV/BM support */ - if (gc_ver == IP_VERSION(9, 4, 3) || - gc_ver == IP_VERSION(9, 4, 4) || - gc_ver == IP_VERSION(9, 5, 0)) { + /* Enable pp_od_clk_voltage node for gc 9.4.3, 9.4.4, 9.5.0, 12.1.0 SRIOV/BM support */ + if (amdgpu_is_multi_aid(adev)) { if (amdgpu_sriov_multi_vf_mode(adev)) *states = ATTR_STATE_UNSUPPORTED; return 0; @@ -2000,9 +1996,7 @@ static int pp_dpm_clk_default_attr_update(struct amdgpu_device *adev, struct amd gc_ver == IP_VERSION(11, 5, 0) || gc_ver == IP_VERSION(11, 0, 2) || gc_ver == IP_VERSION(11, 0, 3) || - gc_ver == IP_VERSION(9, 4, 3) || - gc_ver == IP_VERSION(9, 4, 4) || - gc_ver == IP_VERSION(9, 5, 0))) + amdgpu_is_multi_aid(adev))) *states = ATTR_STATE_UNSUPPORTED; } else if (DEVICE_ATTR_IS(pp_dpm_vclk1)) { if (!((gc_ver == IP_VERSION(10, 3, 1) || @@ -2023,9 +2017,7 @@ static int pp_dpm_clk_default_attr_update(struct amdgpu_device *adev, struct amd gc_ver == IP_VERSION(11, 5, 0) || gc_ver == IP_VERSION(11, 0, 2) || gc_ver == IP_VERSION(11, 0, 3) || - gc_ver == IP_VERSION(9, 4, 3) || - gc_ver == IP_VERSION(9, 4, 4) || - gc_ver == IP_VERSION(9, 5, 0))) + amdgpu_is_multi_aid(adev))) *states = ATTR_STATE_UNSUPPORTED; } else if (DEVICE_ATTR_IS(pp_dpm_dclk1)) { if (!((gc_ver == IP_VERSION(10, 3, 1) || @@ -2035,9 +2027,7 @@ static int pp_dpm_clk_default_attr_update(struct amdgpu_device *adev, struct amd *states = ATTR_STATE_UNSUPPORTED; } else if (DEVICE_ATTR_IS(pp_dpm_pcie)) { if (gc_ver == IP_VERSION(9, 4, 2) || - gc_ver == IP_VERSION(9, 4, 3) || - gc_ver == IP_VERSION(9, 4, 4) || - gc_ver == IP_VERSION(9, 5, 0)) + amdgpu_is_multi_aid(adev)) *states = ATTR_STATE_UNSUPPORTED; } @@ -2651,6 +2641,7 @@ static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_ case IP_VERSION(11, 0, 3): case IP_VERSION(12, 0, 0): case IP_VERSION(12, 0, 1): + case IP_VERSION(12, 1, 0): *states = ATTR_STATE_SUPPORTED; break; default: @@ -3732,8 +3723,7 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj, /* Skip crit temp on APU */ if ((((adev->flags & AMD_IS_APU) && (adev->family >= AMDGPU_FAMILY_CZ)) || - (gc_ver == IP_VERSION(9, 4, 3) || gc_ver == IP_VERSION(9, 4, 4) || - gc_ver == IP_VERSION(9, 5, 0))) && + amdgpu_is_multi_aid(adev)) && (attr == &sensor_dev_attr_temp1_crit.dev_attr.attr || attr == &sensor_dev_attr_temp1_crit_hyst.dev_attr.attr)) return 0; @@ -3815,18 +3805,14 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj, if ((adev->family == AMDGPU_FAMILY_SI || /* not implemented yet */ adev->family == AMDGPU_FAMILY_KV || /* not implemented yet */ - (gc_ver == IP_VERSION(9, 4, 3) || - gc_ver == IP_VERSION(9, 4, 4) || - gc_ver == IP_VERSION(9, 5, 0))) && + amdgpu_is_multi_aid(adev)) && (attr == &sensor_dev_attr_in0_input.dev_attr.attr || attr == &sensor_dev_attr_in0_label.dev_attr.attr)) return 0; /* only APUs other than gc 9,4,3 have vddnb */ if ((!(adev->flags & AMD_IS_APU) || - (gc_ver == IP_VERSION(9, 4, 3) || - gc_ver == IP_VERSION(9, 4, 4) || - gc_ver == IP_VERSION(9, 5, 0))) && + amdgpu_is_multi_aid(adev)) && (attr == &sensor_dev_attr_in1_input.dev_attr.attr || attr == &sensor_dev_attr_in1_label.dev_attr.attr)) return 0; @@ -3855,9 +3841,7 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj, return 0; /* hotspot temperature for gc 9,4,3*/ - if (gc_ver == IP_VERSION(9, 4, 3) || - gc_ver == IP_VERSION(9, 4, 4) || - gc_ver == IP_VERSION(9, 5, 0)) { + if (amdgpu_is_multi_aid(adev)) { if (attr == &sensor_dev_attr_temp1_input.dev_attr.attr || attr == &sensor_dev_attr_temp1_emergency.dev_attr.attr || attr == &sensor_dev_attr_temp1_label.dev_attr.attr) diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c index 61b1c5aa74cb..36942467d4ad 100644 --- a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c @@ -3454,9 +3454,11 @@ static void si_apply_state_adjust_rules(struct amdgpu_device *adev, if (adev->asic_type == CHIP_HAINAN) { if ((adev->pdev->revision == 0x81) || (adev->pdev->revision == 0xC3) || + (adev->pdev->device == 0x6660) || (adev->pdev->device == 0x6664) || (adev->pdev->device == 0x6665) || - (adev->pdev->device == 0x6667)) { + (adev->pdev->device == 0x6667) || + (adev->pdev->device == 0x666F)) { max_sclk = 75000; } if ((adev->pdev->revision == 0xC3) || diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h index 90346e70a614..609f5ab07d8a 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h @@ -389,6 +389,7 @@ struct smu_table_context { void *metrics_table; void *clocks_table; void *watermarks_table; + struct mutex metrics_lock; void *max_sustainable_clocks; struct smu_bios_boot_up_values boot_values; diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h index b0d6b7b0946d..89bbda0670ef 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h @@ -132,8 +132,6 @@ int smu_v13_0_setup_pptable(struct smu_context *smu); int smu_v13_0_get_vbios_bootup_values(struct smu_context *smu); -int smu_v13_0_check_fw_version(struct smu_context *smu); - int smu_v13_0_set_driver_table_location(struct smu_context *smu); int smu_v13_0_set_tool_table_location(struct smu_context *smu); diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v15_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v15_0.h index ab4a64f54e79..09743ccd0d15 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v15_0.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v15_0.h @@ -142,8 +142,6 @@ int smu_v15_0_setup_pptable(struct smu_context *smu); int smu_v15_0_get_vbios_bootup_values(struct smu_context *smu); -int smu_v15_0_check_fw_version(struct smu_context *smu); - int smu_v15_0_set_driver_table_location(struct smu_context *smu); int smu_v15_0_set_tool_table_location(struct smu_context *smu); diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c index 2b4faab37693..dc056f1e4b64 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c @@ -1988,7 +1988,7 @@ static const struct pptable_funcs aldebaran_ppt_funcs = { /* pptable related */ .setup_pptable = aldebaran_setup_pptable, .get_vbios_bootup_values = smu_v13_0_get_vbios_bootup_values, - .check_fw_version = smu_v13_0_check_fw_version, + .check_fw_version = smu_cmn_check_fw_version, .write_pptable = smu_cmn_write_pptable, .set_driver_table_location = smu_v13_0_set_driver_table_location, .set_tool_table_location = smu_v13_0_set_tool_table_location, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c index 4e09eda77d8b..a3b755c61b1f 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c @@ -258,49 +258,6 @@ int smu_v13_0_check_fw_status(struct smu_context *smu) return -EIO; } -int smu_v13_0_check_fw_version(struct smu_context *smu) -{ - struct amdgpu_device *adev = smu->adev; - uint32_t if_version = 0xff, smu_version = 0xff; - uint8_t smu_program, smu_major, smu_minor, smu_debug; - int ret = 0; - - ret = smu_cmn_get_smc_version(smu, &if_version, &smu_version); - if (ret) - return ret; - - smu_program = (smu_version >> 24) & 0xff; - smu_major = (smu_version >> 16) & 0xff; - smu_minor = (smu_version >> 8) & 0xff; - smu_debug = (smu_version >> 0) & 0xff; - adev->pm.fw_version = smu_version; - - /* only for dGPU w/ SMU13*/ - if (adev->pm.fw) - dev_dbg(smu->adev->dev, "smu fw reported program %d, version = 0x%08x (%d.%d.%d)\n", - smu_program, smu_version, smu_major, smu_minor, smu_debug); - - /* - * 1. if_version mismatch is not critical as our fw is designed - * to be backward compatible. - * 2. New fw usually brings some optimizations. But that's visible - * only on the paired driver. - * Considering above, we just leave user a verbal message instead - * of halt driver loading. - */ - dev_info_once(adev->dev, "smu driver if version = 0x%08x, smu fw if version = 0x%08x, " - "smu fw program = %d, smu fw version = 0x%08x (%d.%d.%d)\n", - smu->smc_driver_if_version, if_version, - smu_program, smu_version, smu_major, smu_minor, smu_debug); - - if (smu->smc_driver_if_version != SMU_IGNORE_IF_VERSION && - if_version != smu->smc_driver_if_version) { - dev_info(adev->dev, "SMU driver if version not matched\n"); - } - - return ret; -} - static int smu_v13_0_set_pptable_v2_0(struct smu_context *smu, void **table, uint32_t *size) { struct amdgpu_device *adev = smu->adev; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index a8d63d4d1f6e..59720372fc17 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -3133,7 +3133,7 @@ static const struct pptable_funcs smu_v13_0_0_ppt_funcs = { .fini_power = smu_v13_0_fini_power, .check_fw_status = smu_v13_0_check_fw_status, .setup_pptable = smu_v13_0_0_setup_pptable, - .check_fw_version = smu_v13_0_check_fw_version, + .check_fw_version = smu_cmn_check_fw_version, .write_pptable = smu_cmn_write_pptable, .set_driver_table_location = smu_v13_0_set_driver_table_location, .system_features_control = smu_v13_0_0_system_features_control, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c index 5b1a038d6a19..ba91bf590eed 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c @@ -1098,7 +1098,7 @@ static int smu_v13_0_4_set_fine_grain_gfx_freq_parameters(struct smu_context *sm static const struct pptable_funcs smu_v13_0_4_ppt_funcs = { .check_fw_status = smu_v13_0_check_fw_status, - .check_fw_version = smu_v13_0_check_fw_version, + .check_fw_version = smu_cmn_check_fw_version, .init_smc_tables = smu_v13_0_4_init_smc_tables, .fini_smc_tables = smu_v13_0_4_fini_smc_tables, .get_vbios_bootup_values = smu_v13_0_get_vbios_bootup_values, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_5_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_5_ppt.c index d534723fef91..27372a60e83d 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_5_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_5_ppt.c @@ -1102,7 +1102,7 @@ static int smu_v13_0_5_set_fine_grain_gfx_freq_parameters(struct smu_context *sm static const struct pptable_funcs smu_v13_0_5_ppt_funcs = { .check_fw_status = smu_v13_0_check_fw_status, - .check_fw_version = smu_v13_0_check_fw_version, + .check_fw_version = smu_cmn_check_fw_version, .init_smc_tables = smu_v13_0_5_init_smc_tables, .fini_smc_tables = smu_v13_0_5_fini_smc_tables, .get_vbios_bootup_values = smu_v13_0_get_vbios_bootup_values, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index 8dc8674b7ce1..bc361fee5777 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -481,7 +481,7 @@ static int smu_v13_0_6_check_fw_version(struct smu_context *smu) { int r; - r = smu_v13_0_check_fw_version(smu); + r = smu_cmn_check_fw_version(smu); /* Initialize caps flags once fw version is fetched */ if (!r) smu_v13_0_x_init_caps(smu); diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c index 5500a0f12f0e..88e0d99b8ad2 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c @@ -2788,7 +2788,7 @@ static const struct pptable_funcs smu_v13_0_7_ppt_funcs = { .fini_power = smu_v13_0_fini_power, .check_fw_status = smu_v13_0_7_check_fw_status, .setup_pptable = smu_v13_0_7_setup_pptable, - .check_fw_version = smu_v13_0_check_fw_version, + .check_fw_version = smu_cmn_check_fw_version, .write_pptable = smu_cmn_write_pptable, .set_driver_table_location = smu_v13_0_set_driver_table_location, .system_features_control = smu_v13_0_system_features_control, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c index f43a91ac6970..7bf88ffd311b 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c @@ -1331,7 +1331,7 @@ static int yellow_carp_set_fine_grain_gfx_freq_parameters(struct smu_context *sm static const struct pptable_funcs yellow_carp_ppt_funcs = { .check_fw_status = smu_v13_0_check_fw_status, - .check_fw_version = smu_v13_0_check_fw_version, + .check_fw_version = smu_cmn_check_fw_version, .init_smc_tables = yellow_carp_init_smc_tables, .fini_smc_tables = yellow_carp_fini_smc_tables, .get_vbios_bootup_values = smu_v13_0_get_vbios_bootup_values, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu15/smu_v15_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu15/smu_v15_0.c index 3fd84dd85e9b..09bb3d91a001 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu15/smu_v15_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu15/smu_v15_0.c @@ -207,58 +207,6 @@ int smu_v15_0_check_fw_status(struct smu_context *smu) return -EIO; } -int smu_v15_0_check_fw_version(struct smu_context *smu) -{ - struct amdgpu_device *adev = smu->adev; - uint32_t if_version = 0xff, smu_version = 0xff; - uint8_t smu_program, smu_major, smu_minor, smu_debug; - int ret = 0; - - ret = smu_cmn_get_smc_version(smu, &if_version, &smu_version); - if (ret) - return ret; - - smu_program = (smu_version >> 24) & 0xff; - smu_major = (smu_version >> 16) & 0xff; - smu_minor = (smu_version >> 8) & 0xff; - smu_debug = (smu_version >> 0) & 0xff; - if (smu->is_apu) - adev->pm.fw_version = smu_version; - - switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) { - case IP_VERSION(15, 0, 0): - smu->smc_driver_if_version = SMU15_DRIVER_IF_VERSION_SMU_V15_0; - break; - default: - dev_err(adev->dev, "smu unsupported IP version: 0x%x.\n", - amdgpu_ip_version(adev, MP1_HWIP, 0)); - smu->smc_driver_if_version = SMU15_DRIVER_IF_VERSION_INV; - break; - } - - if (adev->pm.fw) - dev_dbg(smu->adev->dev, "smu fw reported program %d, version = 0x%08x (%d.%d.%d)\n", - smu_program, smu_version, smu_major, smu_minor, smu_debug); - - /* - * 1. if_version mismatch is not critical as our fw is designed - * to be backward compatible. - * 2. New fw usually brings some optimizations. But that's visible - * only on the paired driver. - * Considering above, we just leave user a verbal message instead - * of halt driver loading. - */ - if (if_version != smu->smc_driver_if_version) { - dev_info(adev->dev, "smu driver if version = 0x%08x, smu fw if version = 0x%08x, " - "smu fw program = %d, smu fw version = 0x%08x (%d.%d.%d)\n", - smu->smc_driver_if_version, if_version, - smu_program, smu_version, smu_major, smu_minor, smu_debug); - dev_info(adev->dev, "SMU driver if version not matched\n"); - } - - return ret; -} - static int smu_v15_0_set_pptable_v2_0(struct smu_context *smu, void **table, uint32_t *size) { struct amdgpu_device *adev = smu->adev; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu15/smu_v15_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu15/smu_v15_0_0_ppt.c index 49cf2b9d931e..8d092c347076 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu15/smu_v15_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu15/smu_v15_0_0_ppt.c @@ -1416,7 +1416,7 @@ static int smu_v15_0_common_get_dpm_table(struct smu_context *smu, struct dpm_cl static const struct pptable_funcs smu_v15_0_0_ppt_funcs = { .check_fw_status = smu_v15_0_check_fw_status, - .check_fw_version = smu_v15_0_check_fw_version, + .check_fw_version = smu_cmn_check_fw_version, .init_smc_tables = smu_v15_0_0_init_smc_tables, .fini_smc_tables = smu_v15_0_0_fini_smc_tables, .get_vbios_bootup_values = smu_v15_0_get_vbios_bootup_values, @@ -1468,6 +1468,7 @@ void smu_v15_0_0_set_ppt_funcs(struct smu_context *smu) smu->feature_map = smu_v15_0_0_feature_mask_map; smu->table_map = smu_v15_0_0_table_map; smu->is_apu = true; + smu->smc_driver_if_version = SMU15_DRIVER_IF_VERSION_SMU_V15_0; smu_v15_0_0_init_msg_ctl(smu); } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c index 326c86b920a2..a644579903f4 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c @@ -1035,6 +1035,35 @@ int smu_cmn_get_smc_version(struct smu_context *smu, return ret; } +int smu_cmn_check_fw_version(struct smu_context *smu) +{ + struct amdgpu_device *adev = smu->adev; + uint32_t if_version = 0xff, smu_version = 0xff; + uint8_t smu_program, smu_major, smu_minor, smu_debug; + int ret; + + ret = smu_cmn_get_smc_version(smu, &if_version, &smu_version); + if (ret) + return ret; + + smu_program = (smu_version >> 24) & 0xff; + smu_major = (smu_version >> 16) & 0xff; + smu_minor = (smu_version >> 8) & 0xff; + smu_debug = (smu_version >> 0) & 0xff; + adev->pm.fw_version = smu_version; + + dev_info_once(adev->dev, "smu driver if version = 0x%08x, smu fw if version = 0x%08x, " + "smu fw program = %d, smu fw version = 0x%08x (%d.%d.%d)\n", + smu->smc_driver_if_version, if_version, + smu_program, smu_version, smu_major, smu_minor, smu_debug); + + if (smu->smc_driver_if_version != SMU_IGNORE_IF_VERSION && + if_version != smu->smc_driver_if_version) + dev_info(adev->dev, "SMU driver if version not matched\n"); + + return 0; +} + int smu_cmn_update_table(struct smu_context *smu, enum smu_table_id table_index, int argument, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h index b7bfddc65fb2..e4d282d8bcae 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h @@ -207,6 +207,7 @@ int smu_cmn_print_pcie_levels(struct smu_context *smu, int smu_cmn_dpm_pcie_gen_idx(int gen); int smu_cmn_dpm_pcie_width_idx(int width); +int smu_cmn_check_fw_version(struct smu_context *smu); /*SMU gpu metrics */ diff --git a/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.c b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.c index 4b86a58e8149..d213eea71cff 100644 --- a/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.c +++ b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.c @@ -535,6 +535,37 @@ int amdgpu_ras_mgr_handle_controller_interrupt(struct amdgpu_device *adev, void return ret; } +int amdgpu_ras_mgr_dispatch_interrupt(struct amdgpu_device *adev, struct ras_ih_info *ih_info) +{ + struct amdgpu_ras_mgr *ras_mgr = amdgpu_ras_mgr_get_context(adev); + uint64_t seq_no = 0; + int ret = 0; + + if (!amdgpu_ras_mgr_is_ready(adev)) + return -EPERM; + + if (!ih_info) + return 0; + + if (ih_info->block == RAS_BLOCK_ID__UMC) { + if (ras_mgr->ras_core->poison_supported) { + seq_no = amdgpu_ras_mgr_gen_ras_event_seqno(adev, RAS_SEQNO_TYPE_DE); + RAS_DEV_INFO(adev, + "{%llu} RAS poison is created, no user action is needed.\n", + seq_no); + } + + ret = amdgpu_ras_process_handle_umc_interrupt(adev, ih_info); + } else if (ras_mgr->ras_core->poison_supported) { + ret = amdgpu_ras_process_handle_consumption_interrupt(adev, ih_info); + } else { + RAS_DEV_WARN(adev, + "No RAS interrupt handler for non-UMC block with poison disabled.\n"); + } + + return ret; +} + int amdgpu_ras_mgr_handle_consumer_interrupt(struct amdgpu_device *adev, void *data) { if (!amdgpu_ras_mgr_is_ready(adev)) diff --git a/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.h b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.h index 23c411c98231..4f44a917d48b 100644 --- a/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.h +++ b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.h @@ -67,6 +67,7 @@ bool amdgpu_uniras_enabled(struct amdgpu_device *adev); int amdgpu_ras_mgr_handle_fatal_interrupt(struct amdgpu_device *adev, void *data); int amdgpu_ras_mgr_handle_controller_interrupt(struct amdgpu_device *adev, void *data); int amdgpu_ras_mgr_handle_consumer_interrupt(struct amdgpu_device *adev, void *data); +int amdgpu_ras_mgr_dispatch_interrupt(struct amdgpu_device *adev, struct ras_ih_info *ih_info); int amdgpu_ras_mgr_update_ras_ecc(struct amdgpu_device *adev); int amdgpu_ras_mgr_reset_gpu(struct amdgpu_device *adev, uint32_t flags); uint64_t amdgpu_ras_mgr_gen_ras_event_seqno(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_process.c b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_process.c index 5782c007de71..285d81a85a99 100644 --- a/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_process.c +++ b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_process.c @@ -103,6 +103,16 @@ int amdgpu_ras_process_handle_consumption_interrupt(struct amdgpu_device *adev, if (!ih_info) return -EINVAL; + if (amdgpu_sriov_vf(adev)) { + if (adev->virt.ops && adev->virt.ops->ras_poison_handler) + adev->virt.ops->ras_poison_handler(adev, ih_info->block); + else + dev_warn(adev->dev, + "No ras_poison_handler interface in SRIOV for block[%d]!\n", + ih_info->block); + return 0; + } + memset(&req, 0, sizeof(req)); req.block = ih_info->block; req.data = ih_info->data; diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_core.c b/drivers/gpu/drm/amd/ras/rascore/ras_core.c index bbf13c076a94..e889baec378b 100644 --- a/drivers/gpu/drm/amd/ras/rascore/ras_core.c +++ b/drivers/gpu/drm/amd/ras/rascore/ras_core.c @@ -527,8 +527,11 @@ bool ras_core_is_enabled(struct ras_core_context *ras_core) uint64_t ras_core_get_utc_second_timestamp(struct ras_core_context *ras_core) { - if (ras_core && ras_core->sys_fn && - ras_core->sys_fn->get_utc_second_timestamp) + if (!ras_core) + return 0; + + if (ras_core->sys_fn && + ras_core->sys_fn->get_utc_second_timestamp) return ras_core->sys_fn->get_utc_second_timestamp(ras_core); RAS_DEV_ERR(ras_core->dev, "Failed to get system timestamp!\n"); @@ -550,7 +553,9 @@ bool ras_core_ras_interrupt_detected(struct ras_core_context *ras_core) ras_core->sys_fn->detect_ras_interrupt) return ras_core->sys_fn->detect_ras_interrupt(ras_core); - RAS_DEV_ERR(ras_core->dev, "Failed to detect ras interrupt!\n"); + if (ras_core && ras_core->dev) + RAS_DEV_ERR(ras_core->dev, "Failed to detect ras interrupt!\n"); + return false; } diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c index b4aa49b1ac63..4b10715f951c 100644 --- a/drivers/gpu/drm/radeon/si_dpm.c +++ b/drivers/gpu/drm/radeon/si_dpm.c @@ -2915,9 +2915,11 @@ static void si_apply_state_adjust_rules(struct radeon_device *rdev, if (rdev->family == CHIP_HAINAN) { if ((rdev->pdev->revision == 0x81) || (rdev->pdev->revision == 0xC3) || + (rdev->pdev->device == 0x6660) || (rdev->pdev->device == 0x6664) || (rdev->pdev->device == 0x6665) || - (rdev->pdev->device == 0x6667)) { + (rdev->pdev->device == 0x6667) || + (rdev->pdev->device == 0x666F)) { max_sclk = 75000; } if ((rdev->pdev->revision == 0xC3) || |
