diff options
| author | Dave Airlie <airlied@redhat.com> | 2026-07-03 08:10:18 +1000 |
|---|---|---|
| committer | Dave Airlie <airlied@redhat.com> | 2026-07-03 08:10:55 +1000 |
| commit | 7e21dc06c7270496b020c5fd44b9fa08b568e9b4 (patch) | |
| tree | 70b7629fa5032b0daabb9e0ab03109c5d05eb5d9 /drivers/gpu | |
| parent | 46d67197521e41fc0077ce3efe459dce82622631 (diff) | |
| parent | c44af3810fc8b3adf6910a332038aa566560c8fa (diff) | |
| download | linux-next-7e21dc06c7270496b020c5fd44b9fa08b568e9b4.tar.gz linux-next-7e21dc06c7270496b020c5fd44b9fa08b568e9b4.zip | |
Merge tag 'amd-drm-fixes-7.2-2026-07-02' of https://gitlab.freedesktop.org/agd5f/linux into drm-fixes
amd-drm-fixes-7.2-2026-07-02:
amdgpu:
- Soc24 aborted suspend fix
- Drop unecessary BUG() and BUG_ON() from error paths
- SCPM fix
- Power reporting fix
- DCE HDR fix
- UVD boundary checks
- VCN boundary checks
- VCE boundary checks
- DCN 4.2 fixes
- Large stack allocation fixes
- Fix aperture mapping leak
- UserQ fixes
- Ignore_damage_clips fix
- ACP fixes
- DC boundary checks
- GPUVM fixes
- JPEG idle check fixes
- Userptr fix
- GC 11.7 updates
- Non-4K page fix
- SMU 13 fixes
- DP alt mode fix
amdkfd:
- Boundary checks
- CRIU fixes
Signed-off-by: Dave Airlie <airlied@redhat.com>
From: Alex Deucher <alexander.deucher@amd.com>
Link: https://patch.msgid.link/20260702143138.68463-1-alexander.deucher@amd.com
Diffstat (limited to 'drivers/gpu')
62 files changed, 541 insertions, 268 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c index 4c732e0f776e..9014678d75ab 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c @@ -508,6 +508,7 @@ static int acp_hw_fini(struct amdgpu_ip_block *ip_block) u32 val = 0; u32 count = 0; struct amdgpu_device *adev = ip_block->adev; + int ret = 0; /* return early if no ACP */ if (!adev->acp.acp_genpd) { @@ -529,7 +530,8 @@ static int acp_hw_fini(struct amdgpu_ip_block *ip_block) break; if (--count == 0) { dev_err(&adev->pdev->dev, "Failed to reset ACP\n"); - return -ETIMEDOUT; + ret = -ETIMEDOUT; + goto out; } udelay(100); } @@ -546,21 +548,24 @@ static int acp_hw_fini(struct amdgpu_ip_block *ip_block) break; if (--count == 0) { dev_err(&adev->pdev->dev, "Failed to reset ACP\n"); - return -ETIMEDOUT; + ret = -ETIMEDOUT; + goto out; } udelay(100); } - +out: device_for_each_child(adev->acp.parent, NULL, acp_genpd_remove_device); mfd_remove_devices(adev->acp.parent); kfree(adev->acp.i2s_pdata); kfree(adev->acp.acp_res); + pm_genpd_remove(&adev->acp.acp_genpd->gpd); kfree(adev->acp.acp_genpd); + adev->acp.acp_genpd = NULL; kfree(adev->acp.acp_cell); - return 0; + return ret; } static int acp_suspend(struct amdgpu_ip_block *ip_block) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index c2e6495a28bc..e714cee2997a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1322,7 +1322,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, e->range = NULL; } - if (r || !list_empty(&vm->individual.moved)) { + if (r || !list_empty(&vm->individual.needs_update)) { r = -EAGAIN; mutex_unlock(&p->adev->notifier_lock); return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 211d30f03d25..8d6502a94306 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4184,8 +4184,6 @@ static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev) iounmap(adev->rmmio); adev->rmmio = NULL; - if (adev->mman.aper_base_kaddr) - iounmap(adev->mman.aper_base_kaddr); adev->mman.aper_base_kaddr = NULL; /* Memory manager related */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index be5069642a90..853365dee2a7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -2119,6 +2119,8 @@ static int amdgpu_discovery_set_common_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(11, 5, 3): case IP_VERSION(11, 5, 4): case IP_VERSION(11, 5, 6): + case IP_VERSION(11, 7, 0): + case IP_VERSION(11, 7, 1): amdgpu_device_ip_block_add(adev, &soc21_common_ip_block); break; case IP_VERSION(12, 0, 0): @@ -2180,6 +2182,8 @@ static int amdgpu_discovery_set_gmc_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(11, 5, 3): case IP_VERSION(11, 5, 4): case IP_VERSION(11, 5, 6): + case IP_VERSION(11, 7, 0): + case IP_VERSION(11, 7, 1): amdgpu_device_ip_block_add(adev, &gmc_v11_0_ip_block); break; case IP_VERSION(12, 0, 0): @@ -2506,6 +2510,8 @@ static int amdgpu_discovery_set_gc_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(11, 5, 3): case IP_VERSION(11, 5, 4): case IP_VERSION(11, 5, 6): + case IP_VERSION(11, 7, 0): + case IP_VERSION(11, 7, 1): amdgpu_device_ip_block_add(adev, &gfx_v11_0_ip_block); break; case IP_VERSION(12, 0, 0): @@ -2719,6 +2725,8 @@ static int amdgpu_discovery_set_mes_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(11, 5, 3): case IP_VERSION(11, 5, 4): case IP_VERSION(11, 5, 6): + case IP_VERSION(11, 7, 0): + case IP_VERSION(11, 7, 1): amdgpu_device_ip_block_add(adev, &mes_v11_0_ip_block); adev->enable_mes = true; adev->enable_mes_kiq = true; @@ -3127,6 +3135,8 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(11, 5, 3): case IP_VERSION(11, 5, 4): case IP_VERSION(11, 5, 6): + case IP_VERSION(11, 7, 0): + case IP_VERSION(11, 7, 1): adev->family = AMDGPU_FAMILY_GC_11_5_0; break; case IP_VERSION(12, 0, 0): @@ -3156,6 +3166,8 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(11, 5, 3): case IP_VERSION(11, 5, 4): case IP_VERSION(11, 5, 6): + case IP_VERSION(11, 7, 0): + case IP_VERSION(11, 7, 1): adev->flags |= AMD_IS_APU; break; default: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index bf4260269681..4c0c77eafbd1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -3196,6 +3196,14 @@ static void __exit amdgpu_exit(void) amdgpu_sync_fini(); mmu_notifier_synchronize(); amdgpu_xcp_drv_release(); + + /* + * Flush outstanding call_rcu() callbacks before the + * module text is freed. Otherwise a grace period elapsing after + * unload invokes a callback in already-freed module memory and + * faults in rcu_do_batch(). + */ + rcu_barrier(); } module_init(amdgpu_init); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 76da3f932f24..6a0699746fbc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -535,6 +535,7 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, bo = gem_to_amdgpu_bo(gobj); bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT; bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT; + bo->parent = amdgpu_bo_ref(fpriv->vm.root.bo); r = amdgpu_ttm_tt_set_userptr(&bo->tbo, args->addr, args->flags); if (r) goto release_object; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index 5f7745143f56..5d6149ba7ab7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -977,6 +977,8 @@ void amdgpu_gmc_tmz_set(struct amdgpu_device *adev) case IP_VERSION(11, 5, 3): case IP_VERSION(11, 5, 4): case IP_VERSION(11, 5, 6): + case IP_VERSION(11, 7, 0): + case IP_VERSION(11, 7, 1): /* Don't enable it by default yet. */ if (amdgpu_tmz < 1) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c index 99bc9ad67d5b..a7d13e337d84 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c @@ -67,7 +67,6 @@ static bool amdgpu_hmm_invalidate_gfx(struct mmu_interval_notifier *mni, { struct amdgpu_bo *bo = container_of(mni, struct amdgpu_bo, notifier); struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - struct amdgpu_bo *vm_root = bo->vm_bo->vm->root.bo; long r; if (!mmu_notifier_range_blockable(range)) @@ -78,7 +77,7 @@ static bool amdgpu_hmm_invalidate_gfx(struct mmu_interval_notifier *mni, mmu_interval_set_seq(mni, cur_seq); amdgpu_vm_bo_invalidate(bo, false); - r = dma_resv_wait_timeout(vm_root->tbo.base.resv, + r = dma_resv_wait_timeout(bo->parent->tbo.base.resv, DMA_RESV_USAGE_BOOKKEEP, false, MAX_SCHEDULE_TIMEOUT); mutex_unlock(&adev->notifier_lock); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h index fdd06a17520a..1aae49f4df49 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h @@ -302,12 +302,14 @@ struct mes_suspend_gang_input { uint64_t gang_context_addr; uint64_t suspend_fence_addr; uint32_t suspend_fence_value; + uint32_t doorbell_offset; }; struct mes_resume_gang_input { uint32_t xcc_id; bool resume_all_gangs; uint64_t gang_context_addr; + uint32_t doorbell_offset; }; struct mes_reset_queue_input { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 16c060badaee..025625e7e800 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -208,9 +208,10 @@ static int amdgpu_ttm_map_buffer(struct amdgpu_ttm_buffer_entity *entity, void *cpu_addr; uint64_t flags; int r; + const u64 GTT_MAX_PAGES = (AMDGPU_GTT_MAX_TRANSFER_SIZE >> PAGE_SHIFT); BUG_ON(adev->mman.buffer_funcs->copy_max_bytes < - AMDGPU_GTT_MAX_TRANSFER_SIZE * 8); + GTT_MAX_PAGES * AMDGPU_GPU_PAGES_IN_CPU_PAGE * 8); if (WARN_ON(mem->mem_type == AMDGPU_PL_PREEMPT)) return -EINVAL; @@ -230,7 +231,7 @@ static int amdgpu_ttm_map_buffer(struct amdgpu_ttm_buffer_entity *entity, offset = mm_cur->start & ~PAGE_MASK; num_pages = PFN_UP(*size + offset); - num_pages = min_t(uint32_t, num_pages, AMDGPU_GTT_MAX_TRANSFER_SIZE); + num_pages = min_t(uint32_t, num_pages, GTT_MAX_PAGES); *size = min(*size, (uint64_t)num_pages * PAGE_SIZE - offset); @@ -2033,6 +2034,7 @@ static int amdgpu_ttm_buffer_entity_init(struct amdgpu_gtt_mgr *mgr, u32 num_gart_windows) { int i, r, num_pages; + const u64 GTT_MAX_PAGES = (AMDGPU_GTT_MAX_TRANSFER_SIZE >> PAGE_SHIFT); r = drm_sched_entity_init(&entity->base, prio, scheds, num_schedulers, NULL); if (r) @@ -2045,7 +2047,7 @@ static int amdgpu_ttm_buffer_entity_init(struct amdgpu_gtt_mgr *mgr, if (num_gart_windows == 0) return 0; - num_pages = num_gart_windows * AMDGPU_GTT_MAX_TRANSFER_SIZE; + num_pages = num_gart_windows * GTT_MAX_PAGES; r = amdgpu_gtt_mgr_alloc_entries(mgr, &entity->gart_node, num_pages, DRM_MM_INSERT_BEST); if (r) { @@ -2056,7 +2058,7 @@ static int amdgpu_ttm_buffer_entity_init(struct amdgpu_gtt_mgr *mgr, for (i = 0; i < num_gart_windows; i++) { entity->gart_window_offs[i] = amdgpu_gtt_node_to_byte_offset(&entity->gart_node) + - i * AMDGPU_GTT_MAX_TRANSFER_SIZE * PAGE_SIZE; + i * GTT_MAX_PAGES * PAGE_SIZE; } return 0; @@ -2118,18 +2120,23 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) /* Change the size here instead of the init above so only lpfn is affected */ amdgpu_ttm_disable_buffer_funcs(adev); #ifdef CONFIG_64BIT -#ifdef CONFIG_X86 - if (adev->gmc.xgmi.connected_to_cpu) - adev->mman.aper_base_kaddr = ioremap_cache(adev->gmc.aper_base, - adev->gmc.visible_vram_size); - - else if (adev->gmc.is_app_apu) + if (adev->gmc.xgmi.connected_to_cpu) { + void *kaddr = devm_memremap(adev->dev, adev->gmc.aper_base, + adev->gmc.visible_vram_size, + MEMREMAP_WB); + if (IS_ERR(kaddr)) + return PTR_ERR(kaddr); + adev->mman.aper_base_kaddr = (__force void __iomem *)kaddr; + } else if (adev->gmc.is_app_apu) { DRM_DEBUG_DRIVER( "No need to ioremap when real vram size is 0\n"); - else -#endif - adev->mman.aper_base_kaddr = ioremap_wc(adev->gmc.aper_base, - adev->gmc.visible_vram_size); + } else { + adev->mman.aper_base_kaddr = devm_ioremap_wc(adev->dev, + adev->gmc.aper_base, + adev->gmc.visible_vram_size); + if (!adev->mman.aper_base_kaddr) + return -ENOMEM; + } #endif amdgpu_ttm_init_vram_resv_regions(adev); @@ -2246,8 +2253,6 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) */ void amdgpu_ttm_fini(struct amdgpu_device *adev) { - int idx; - if (!adev->mman.initialized) return; @@ -2270,14 +2275,7 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev) amdgpu_ttm_unmark_vram_reserved(adev, AMDGPU_RESV_FW_VRAM_USAGE); amdgpu_ttm_unmark_vram_reserved(adev, AMDGPU_RESV_DRV_VRAM_USAGE); - if (drm_dev_enter(adev_to_drm(adev), &idx)) { - - if (adev->mman.aper_base_kaddr) - iounmap(adev->mman.aper_base_kaddr); - adev->mman.aper_base_kaddr = NULL; - - drm_dev_exit(idx); - } + adev->mman.aper_base_kaddr = NULL; if (!adev->gmc.is_app_apu) amdgpu_vram_mgr_fini(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index 2d72fa217274..b5d938b31383 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -39,7 +39,7 @@ #define AMDGPU_PL_MMIO_REMAP (TTM_PL_PRIV + 5) #define __AMDGPU_PL_NUM (TTM_PL_PRIV + 6) -#define AMDGPU_GTT_MAX_TRANSFER_SIZE 1024 +#define AMDGPU_GTT_MAX_TRANSFER_SIZE (1ULL << 22) extern const struct attribute_group amdgpu_vram_mgr_attr_group; extern const struct attribute_group amdgpu_gtt_mgr_attr_group; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c index 91554e7c092c..ef3f0213cc46 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c @@ -680,8 +680,8 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args) /* Update VM owner at userq submit-time for page-fault attribution. */ amdgpu_vm_set_task_info(&fpriv->vm); - r = xa_err(xa_store_irq(&adev->userq_doorbell_xa, index, queue, - GFP_KERNEL)); + r = xa_insert_irq(&adev->userq_doorbell_xa, index, queue, + GFP_KERNEL); if (r) goto clean_mqd; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index 480bf88def46..23383ac5323f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -655,6 +655,14 @@ static int amdgpu_uvd_cs_msg_decode(struct amdgpu_device *adev, uint32_t *msg, unsigned int image_size, tmp, min_dpb_size, num_dpb_buffer; unsigned int min_ctx_size = ~0; + /* Reject invalid dimensions to prevent division by zero */ + if (width < 16 || height < 16) { + dev_WARN_ONCE(adev->dev, 1, + "Invalid UVD decoding dimensions (%dx%d)!\n", + width, height); + return -EINVAL; + } + image_size = width * height; image_size += image_size / 2; image_size = ALIGN(image_size, 1024); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index efdebd9c0a1f..eef3c9853a5c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -877,9 +877,20 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, goto out; } - *size = amdgpu_ib_get_value(ib, idx + 8) * - amdgpu_ib_get_value(ib, idx + 10) * - 8 * 3 / 2; + uint32_t width, height; + width = amdgpu_ib_get_value(ib, idx + 8); + height = amdgpu_ib_get_value(ib, idx + 10); + + if (width == 0 || height == 0 || + width > 4096 || height > 2304) { + DRM_ERROR("invalid VCE image size: %ux%u\n", + width, height); + r = -EINVAL; + goto out; + } + + *size = width * height * 8 * 3 / 2; + break; case 0x04000001: /* config extension */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index fee4c94c2585..f317f888b59f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -142,7 +142,7 @@ static void amdgpu_vm_assert_locked(struct amdgpu_vm *vm) static void amdgpu_vm_bo_status_init(struct amdgpu_vm_bo_status *lists) { INIT_LIST_HEAD(&lists->evicted); - INIT_LIST_HEAD(&lists->moved); + INIT_LIST_HEAD(&lists->needs_update); INIT_LIST_HEAD(&lists->idle); } @@ -211,14 +211,14 @@ static void amdgpu_vm_bo_evicted(struct amdgpu_vm_bo_base *vm_bo) amdgpu_vm_bo_unlock_lists(vm_bo); } /** - * amdgpu_vm_bo_moved - vm_bo is moved + * amdgpu_vm_bo_needs_update - vm_bo needs pagetable update * - * @vm_bo: vm_bo which is moved + * @vm_bo: vm_bo which is out of date * - * State for vm_bo objects meaning the underlying BO was moved but the new - * location not yet reflected in the page tables. + * State for vm_bo objects meaning the underlying BO had mapping changes (move, PRT bind/unbind) + * but the new location is not yet reflected in the page tables. */ -static void amdgpu_vm_bo_moved(struct amdgpu_vm_bo_base *vm_bo) +static void amdgpu_vm_bo_needs_update(struct amdgpu_vm_bo_base *vm_bo) { struct amdgpu_vm_bo_status *lists; struct amdgpu_bo *bo = vm_bo->bo; @@ -232,8 +232,7 @@ static void amdgpu_vm_bo_moved(struct amdgpu_vm_bo_base *vm_bo) vm_bo->moved = false; list_move(&vm_bo->vm_status, &lists->idle); } else { - vm_bo->moved = true; - list_move(&vm_bo->vm_status, &lists->moved); + list_move(&vm_bo->vm_status, &lists->needs_update); } amdgpu_vm_bo_unlock_lists(vm_bo); } @@ -274,14 +273,14 @@ static void amdgpu_vm_bo_reset_state_machine(struct amdgpu_vm *vm) */ amdgpu_vm_assert_locked(vm); list_for_each_entry_safe(vm_bo, tmp, &vm->kernel.idle, vm_status) - amdgpu_vm_bo_moved(vm_bo); + amdgpu_vm_bo_needs_update(vm_bo); list_for_each_entry_safe(vm_bo, tmp, &vm->always_valid.idle, vm_status) - amdgpu_vm_bo_moved(vm_bo); + amdgpu_vm_bo_needs_update(vm_bo); spin_lock(&vm->individual_lock); list_for_each_entry_safe(vm_bo, tmp, &vm->individual.idle, vm_status) { vm_bo->moved = true; - list_move(&vm_bo->vm_status, &vm->individual.moved); + list_move(&vm_bo->vm_status, &vm->individual.needs_update); } spin_unlock(&vm->individual_lock); } @@ -436,7 +435,7 @@ void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base, */ if (bo->preferred_domains & amdgpu_mem_type_to_domain(bo->tbo.resource->mem_type)) - amdgpu_vm_bo_moved(base); + amdgpu_vm_bo_needs_update(base); else amdgpu_vm_bo_evicted(base); } @@ -608,7 +607,8 @@ int amdgpu_vm_validate(struct amdgpu_device *adev, struct amdgpu_vm *vm, return r; vm->update_funcs->map_table(to_amdgpu_bo_vm(bo_base->bo)); - amdgpu_vm_bo_moved(bo_base); + bo_base->moved = true; + amdgpu_vm_bo_needs_update(bo_base); } /* @@ -625,7 +625,8 @@ int amdgpu_vm_validate(struct amdgpu_device *adev, struct amdgpu_vm *vm, if (r) return r; - amdgpu_vm_bo_moved(bo_base); + bo_base->moved = true; + amdgpu_vm_bo_needs_update(bo_base); } if (!ticket) @@ -645,7 +646,8 @@ restart: if (r) return r; - amdgpu_vm_bo_moved(bo_base); + bo_base->moved = true; + amdgpu_vm_bo_needs_update(bo_base); /* It's a bit inefficient to always jump back to the start, but * we would need to re-structure the KFD for properly fixing @@ -979,7 +981,7 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev, amdgpu_vm_assert_locked(vm); - if (list_empty(&vm->kernel.moved)) + if (list_empty(&vm->kernel.needs_update)) return 0; if (!drm_dev_enter(adev_to_drm(adev), &idx)) @@ -995,7 +997,7 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev, if (r) goto error; - list_for_each_entry(entry, &vm->kernel.moved, vm_status) { + list_for_each_entry(entry, &vm->kernel.needs_update, vm_status) { /* vm_flush_needed after updating moved PDEs */ flush_tlb_needed |= entry->moved; @@ -1011,7 +1013,8 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev, if (flush_tlb_needed) atomic64_inc(&vm->tlb_seq); - list_for_each_entry_safe(entry, tmp, &vm->kernel.moved, vm_status) + list_for_each_entry_safe(entry, tmp, &vm->kernel.needs_update, + vm_status) amdgpu_vm_bo_idle(entry); error: @@ -1615,7 +1618,7 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev, bool clear, unlock; int r; - list_for_each_entry_safe(bo_va, tmp, &vm->always_valid.moved, + list_for_each_entry_safe(bo_va, tmp, &vm->always_valid.needs_update, base.vm_status) { /* Per VM BOs never need to bo cleared in the page tables */ r = amdgpu_vm_bo_update(adev, bo_va, false); @@ -1624,8 +1627,8 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev, } spin_lock(&vm->individual_lock); - while (!list_empty(&vm->individual.moved)) { - bo_va = list_first_entry(&vm->individual.moved, + while (!list_empty(&vm->individual.needs_update)) { + bo_va = list_first_entry(&vm->individual.needs_update, typeof(*bo_va), base.vm_status); bo = bo_va->base.bo; resv = bo->tbo.base.resv; @@ -1786,7 +1789,7 @@ static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev, amdgpu_vm_prt_get(adev); if (amdgpu_vm_is_bo_always_valid(vm, bo) && !bo_va->base.moved) - amdgpu_vm_bo_moved(&bo_va->base); + amdgpu_vm_bo_needs_update(&bo_va->base); trace_amdgpu_vm_bo_map(bo_va, mapping); } @@ -2095,7 +2098,7 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev, if (amdgpu_vm_is_bo_always_valid(vm, bo) && !before->bo_va->base.moved) - amdgpu_vm_bo_moved(&before->bo_va->base); + amdgpu_vm_bo_needs_update(&before->bo_va->base); } else { kfree(before); } @@ -2110,7 +2113,7 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev, if (amdgpu_vm_is_bo_always_valid(vm, bo) && !after->bo_va->base.moved) - amdgpu_vm_bo_moved(&after->bo_va->base); + amdgpu_vm_bo_needs_update(&after->bo_va->base); } else { kfree(after); } @@ -2284,7 +2287,8 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_bo *bo, bool evicted) if (bo_base->moved) continue; - amdgpu_vm_bo_moved(bo_base); + bo_base->moved = true; + amdgpu_vm_bo_needs_update(bo_base); } } @@ -3098,7 +3102,7 @@ static void amdgpu_debugfs_vm_bo_status_info(struct seq_file *m, id = 0; seq_puts(m, "\tMoved BOs:\n"); - list_for_each_entry(base, &lists->moved, vm_status) { + list_for_each_entry(base, &lists->needs_update, vm_status) { if (!base->bo) continue; @@ -3107,7 +3111,7 @@ static void amdgpu_debugfs_vm_bo_status_info(struct seq_file *m, id = 0; seq_puts(m, "\tIdle BOs:\n"); - list_for_each_entry(base, &lists->moved, vm_status) { + list_for_each_entry(base, &lists->needs_update, vm_status) { if (!base->bo) continue; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index b32f51a78cd8..5822836fa4a3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -212,7 +212,8 @@ struct amdgpu_vm_bo_base { * protected by vm BO being reserved */ bool shared; - /* protected by the BO being reserved */ + /* if the BO was moved and all mappings are invalid + * protected by the BO being reserved */ bool moved; }; @@ -220,14 +221,14 @@ struct amdgpu_vm_bo_base { * The following status lists contain amdgpu_vm_bo_base objects for * either PD/PTs, per VM BOs or BOs with individual resv object. * - * The state transits are: evicted -> moved -> idle + * The state transits are: evicted -> needs_update -> idle */ struct amdgpu_vm_bo_status { /* BOs evicted which need to move into place again */ struct list_head evicted; - /* BOs which moved but new location hasn't been updated in the PDs/PTs */ - struct list_head moved; + /* BOs whose mappings changed but PDs/PTs haven't been updated */ + struct list_head needs_update; /* BOs done with the state machine and need no further action */ struct list_head idle; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 0780c5e5de4f..b4b27e4c495d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -4022,7 +4022,7 @@ static void gfx_v10_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, WAIT_REG_MEM_ENGINE(eng_sel))); if (mem_space) - BUG_ON(addr0 & 0x3); /* Dword align */ + WARN_ON(addr0 & 0x3); /* Dword align */ amdgpu_ring_write(ring, addr0); amdgpu_ring_write(ring, addr1); amdgpu_ring_write(ring, ref); @@ -8658,7 +8658,7 @@ static void gfx_v10_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, } amdgpu_ring_write(ring, header); - BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ + WARN_ON(ib->gpu_addr & 0x3); /* Dword align */ amdgpu_ring_write(ring, #ifdef __BIG_ENDIAN (2 << 0) | @@ -8693,7 +8693,7 @@ static void gfx_v10_0_ring_emit_ib_compute(struct amdgpu_ring *ring, } amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); - BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ + WARN_ON(ib->gpu_addr & 0x3); /* Dword align */ amdgpu_ring_write(ring, #ifdef __BIG_ENDIAN (2 << 0) | @@ -8726,9 +8726,9 @@ static void gfx_v10_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, * aligned if only send 32bit data low (discard data high) */ if (write64bit) - BUG_ON(addr & 0x7); + WARN_ON(addr & 0x7); else - BUG_ON(addr & 0x3); + WARN_ON(addr & 0x3); amdgpu_ring_write(ring, lower_32_bits(addr)); amdgpu_ring_write(ring, upper_32_bits(addr)); amdgpu_ring_write(ring, lower_32_bits(seq)); @@ -8776,9 +8776,6 @@ static void gfx_v10_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr, { struct amdgpu_device *adev = ring->adev; - /* we only allocate 32bit for each seq wb address */ - BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT); - /* write fence seq to the "addr" */ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index f856b0cf5bec..3b12eb27a253 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -133,6 +133,14 @@ MODULE_FIRMWARE("amdgpu/gc_11_5_6_pfp.bin"); MODULE_FIRMWARE("amdgpu/gc_11_5_6_me.bin"); MODULE_FIRMWARE("amdgpu/gc_11_5_6_mec.bin"); MODULE_FIRMWARE("amdgpu/gc_11_5_6_rlc.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_7_0_pfp.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_7_0_me.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_7_0_mec.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_7_0_rlc.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_7_1_pfp.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_7_1_me.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_7_1_mec.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_7_1_rlc.bin"); static const struct amdgpu_hwip_reg_entry gc_reg_list_11_0[] = { SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS), @@ -546,7 +554,7 @@ static void gfx_v11_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, WAIT_REG_MEM_ENGINE(eng_sel))); if (mem_space) - BUG_ON(addr0 & 0x3); /* Dword align */ + WARN_ON(addr0 & 0x3); /* Dword align */ amdgpu_ring_write(ring, addr0); amdgpu_ring_write(ring, addr1); amdgpu_ring_write(ring, ref); @@ -1128,6 +1136,8 @@ static int gfx_v11_0_gpu_early_init(struct amdgpu_device *adev) case IP_VERSION(11, 5, 3): case IP_VERSION(11, 5, 4): case IP_VERSION(11, 5, 6): + case IP_VERSION(11, 7, 0): + case IP_VERSION(11, 7, 1): adev->gfx.config.max_hw_contexts = 8; adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; adev->gfx.config.sc_prim_fifo_size_backend = 0x100; @@ -1612,6 +1622,8 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) case IP_VERSION(11, 5, 3): case IP_VERSION(11, 5, 4): case IP_VERSION(11, 5, 6): + case IP_VERSION(11, 7, 0): + case IP_VERSION(11, 7, 1): adev->gfx.me.num_me = 1; adev->gfx.me.num_pipe_per_me = 1; adev->gfx.me.num_queue_per_pipe = 2; @@ -3085,7 +3097,9 @@ static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev) amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 2) || amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 3) || amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 4) || - amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 6)) + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 6) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 7, 0) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 7, 1)) bootload_status = RREG32_SOC15(GC, 0, regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1); else @@ -5758,6 +5772,8 @@ static void gfx_v11_cntl_power_gating(struct amdgpu_device *adev, bool enable) case IP_VERSION(11, 5, 3): case IP_VERSION(11, 5, 4): case IP_VERSION(11, 5, 6): + case IP_VERSION(11, 7, 0): + case IP_VERSION(11, 7, 1): WREG32_SOC15(GC, 0, regRLC_PG_DELAY_3, RLC_PG_DELAY_3_DEFAULT_GC_11_0_1); break; default: @@ -5798,6 +5814,8 @@ static int gfx_v11_0_set_powergating_state(struct amdgpu_ip_block *ip_block, case IP_VERSION(11, 5, 3): case IP_VERSION(11, 5, 4): case IP_VERSION(11, 5, 6): + case IP_VERSION(11, 7, 0): + case IP_VERSION(11, 7, 1): if (!enable) amdgpu_gfx_off_ctrl(adev, false); @@ -5834,6 +5852,8 @@ static int gfx_v11_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, case IP_VERSION(11, 5, 3): case IP_VERSION(11, 5, 4): case IP_VERSION(11, 5, 6): + case IP_VERSION(11, 7, 0): + case IP_VERSION(11, 7, 1): gfx_v11_0_update_gfx_clock_gating(adev, state == AMD_CG_STATE_GATE); break; @@ -5997,7 +6017,7 @@ static void gfx_v11_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, } amdgpu_ring_write(ring, header); - BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ + WARN_ON(ib->gpu_addr & 0x3); /* Dword align */ amdgpu_ring_write(ring, #ifdef __BIG_ENDIAN (2 << 0) | @@ -6032,7 +6052,7 @@ static void gfx_v11_0_ring_emit_ib_compute(struct amdgpu_ring *ring, } amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); - BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ + WARN_ON(ib->gpu_addr & 0x3); /* Dword align */ amdgpu_ring_write(ring, #ifdef __BIG_ENDIAN (2 << 0) | @@ -6065,9 +6085,9 @@ static void gfx_v11_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, * aligned if only send 32bit data low (discard data high) */ if (write64bit) - BUG_ON(addr & 0x7); + WARN_ON(addr & 0x7); else - BUG_ON(addr & 0x3); + WARN_ON(addr & 0x3); amdgpu_ring_write(ring, lower_32_bits(addr)); amdgpu_ring_write(ring, upper_32_bits(addr)); amdgpu_ring_write(ring, lower_32_bits(seq)); @@ -6121,9 +6141,6 @@ static void gfx_v11_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr, { struct amdgpu_device *adev = ring->adev; - /* we only allocate 32bit for each seq wb address */ - BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT); - /* write fence seq to the "addr" */ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | @@ -6510,25 +6527,33 @@ static int gfx_v11_0_eop_irq(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry) { u32 doorbell_offset = entry->src_data[0]; - u8 me_id, pipe_id, queue_id; - struct amdgpu_ring *ring; - int i; DRM_DEBUG("IH: CP EOP\n"); - if (adev->enable_mes && doorbell_offset) { - amdgpu_userq_process_fence_irq(adev, doorbell_offset); - } else { - me_id = (entry->ring_id & 0x0c) >> 2; - pipe_id = (entry->ring_id & 0x03) >> 0; - queue_id = (entry->ring_id & 0x70) >> 4; + if (!adev->gfx.disable_kq) { + u8 me_id = (entry->ring_id & 0x0c) >> 2; + u8 pipe_id = (entry->ring_id & 0x03) >> 0; + u8 queue_id = (entry->ring_id & 0x70) >> 4; + struct amdgpu_ring *ring; + int i; switch (me_id) { case 0: - if (pipe_id == 0) - amdgpu_fence_process(&adev->gfx.gfx_ring[0]); - else - amdgpu_fence_process(&adev->gfx.gfx_ring[1]); + /* + * MES splits gfx HQDs per (me,pipe): KGQ owns queue=0, + * userq gfx owns queue>=1 (see amdgpu_mes_get_hqd_mask). + * Require a strict (me,pipe,queue) match so userq gfx + * EOPs fall through to amdgpu_userq_process_fence_irq(). + */ + for (i = 0; i < adev->gfx.num_gfx_rings; i++) { + ring = &adev->gfx.gfx_ring[i]; + if ((ring->me == me_id) && + (ring->pipe == pipe_id) && + (ring->queue == queue_id)) { + amdgpu_fence_process(ring); + return 0; + } + } break; case 1: case 2: @@ -6540,13 +6565,20 @@ static int gfx_v11_0_eop_irq(struct amdgpu_device *adev, */ if ((ring->me == me_id) && (ring->pipe == pipe_id) && - (ring->queue == queue_id)) + (ring->queue == queue_id)) { amdgpu_fence_process(ring); + return 0; + } } break; + default: + break; } } + if (adev->enable_mes && doorbell_offset) + amdgpu_userq_process_fence_irq(adev, doorbell_offset); + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index f66293fc675e..da668a8d6abd 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -440,7 +440,7 @@ static void gfx_v12_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, WAIT_REG_MEM_ENGINE(eng_sel))); if (mem_space) - BUG_ON(addr0 & 0x3); /* Dword align */ + WARN_ON(addr0 & 0x3); /* Dword align */ amdgpu_ring_write(ring, addr0); amdgpu_ring_write(ring, addr1); amdgpu_ring_write(ring, ref); @@ -3519,10 +3519,19 @@ static int gfx_v12_0_cp_resume(struct amdgpu_device *adev) gfx_v12_0_cp_gfx_enable(adev, true); } - if (adev->enable_mes_kiq && adev->mes.kiq_hw_init) + if (adev->enable_mes_kiq && adev->mes.kiq_hw_init) { r = amdgpu_mes_kiq_hw_init(adev, 0); - else + /* + * With MES, GFX KIQ ring is owned by the MES and is never + * initialized/used directly by the driver, so it must + * not be left flagged as ready. mes_v12_0_hw_init() clears + * but clear here if MES init fails + */ + if (r) + adev->gfx.kiq[0].ring.sched.ready = false; + } else { r = gfx_v12_0_kiq_resume(adev); + } if (r) return r; @@ -4493,7 +4502,7 @@ static void gfx_v12_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, control |= ib->length_dw | (vmid << 24); amdgpu_ring_write(ring, header); - BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ + WARN_ON(ib->gpu_addr & 0x3); /* Dword align */ amdgpu_ring_write(ring, #ifdef __BIG_ENDIAN (2 << 0) | @@ -4512,7 +4521,7 @@ static void gfx_v12_0_ring_emit_ib_compute(struct amdgpu_ring *ring, u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); - BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ + WARN_ON(ib->gpu_addr & 0x3); /* Dword align */ amdgpu_ring_write(ring, #ifdef __BIG_ENDIAN (2 << 0) | @@ -4543,9 +4552,9 @@ static void gfx_v12_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, * aligned if only send 32bit data low (discard data high) */ if (write64bit) - BUG_ON(addr & 0x7); + WARN_ON(addr & 0x7); else - BUG_ON(addr & 0x3); + WARN_ON(addr & 0x3); amdgpu_ring_write(ring, lower_32_bits(addr)); amdgpu_ring_write(ring, upper_32_bits(addr)); amdgpu_ring_write(ring, lower_32_bits(seq)); @@ -4593,9 +4602,6 @@ static void gfx_v12_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr, { struct amdgpu_device *adev = ring->adev; - /* we only allocate 32bit for each seq wb address */ - BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT); - /* write fence seq to the "addr" */ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | @@ -4838,25 +4844,33 @@ static int gfx_v12_0_eop_irq(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry) { u32 doorbell_offset = entry->src_data[0]; - u8 me_id, pipe_id, queue_id; - struct amdgpu_ring *ring; - int i; DRM_DEBUG("IH: CP EOP\n"); - if (adev->enable_mes && doorbell_offset) { - amdgpu_userq_process_fence_irq(adev, doorbell_offset); - } else { - me_id = (entry->ring_id & 0x0c) >> 2; - pipe_id = (entry->ring_id & 0x03) >> 0; - queue_id = (entry->ring_id & 0x70) >> 4; + if (!adev->gfx.disable_kq) { + u8 me_id = (entry->ring_id & 0x0c) >> 2; + u8 pipe_id = (entry->ring_id & 0x03) >> 0; + u8 queue_id = (entry->ring_id & 0x70) >> 4; + struct amdgpu_ring *ring; + int i; switch (me_id) { case 0: - if (pipe_id == 0) - amdgpu_fence_process(&adev->gfx.gfx_ring[0]); - else - amdgpu_fence_process(&adev->gfx.gfx_ring[1]); + /* + * MES splits gfx HQDs per (me,pipe): KGQ owns queue=0, + * userq gfx owns queue>=1 (see amdgpu_mes_get_hqd_mask). + * Require a strict (me,pipe,queue) match so userq gfx + * EOPs fall through to amdgpu_userq_process_fence_irq(). + */ + for (i = 0; i < adev->gfx.num_gfx_rings; i++) { + ring = &adev->gfx.gfx_ring[i]; + if ((ring->me == me_id) && + (ring->pipe == pipe_id) && + (ring->queue == queue_id)) { + amdgpu_fence_process(ring); + return 0; + } + } break; case 1: case 2: @@ -4868,13 +4882,20 @@ static int gfx_v12_0_eop_irq(struct amdgpu_device *adev, */ if ((ring->me == me_id) && (ring->pipe == pipe_id) && - (ring->queue == queue_id)) + (ring->queue == queue_id)) { amdgpu_fence_process(ring); + return 0; + } } break; + default: + break; } } + if (adev->enable_mes && doorbell_offset) + amdgpu_userq_process_fence_irq(adev, doorbell_offset); + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c index 61c3577f829f..e7e9f11b9754 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c @@ -248,7 +248,7 @@ static void gfx_v12_1_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, PACKET3_WAIT_REG_MEM__FUNCTION(3))); /* equal */ if (mem_space) - BUG_ON(addr0 & 0x3); /* Dword align */ + WARN_ON(addr0 & 0x3); /* Dword align */ amdgpu_ring_write(ring, addr0); amdgpu_ring_write(ring, addr1); amdgpu_ring_write(ring, ref); @@ -2547,10 +2547,19 @@ static int gfx_v12_1_xcc_cp_resume(struct amdgpu_device *adev, uint16_t xcc_mask gfx_v12_1_xcc_cp_compute_enable(adev, true, xcc_id); - if (adev->enable_mes_kiq && adev->mes.kiq_hw_init) + if (adev->enable_mes_kiq && adev->mes.kiq_hw_init) { r = amdgpu_mes_kiq_hw_init(adev, xcc_id); - else + /* + * With MES, GFX KIQ ring is owned by the MES and is never + * initialized/used directly by the driver, so it must + * not be left flagged as ready. mes_v12_0_hw_init() clears + * but clear here if MES init fails + */ + if (r) + adev->gfx.kiq[xcc_id].ring.sched.ready = false; + } else { r = gfx_v12_1_xcc_kiq_resume(adev, xcc_id); + } if (r) return r; @@ -3433,7 +3442,7 @@ static void gfx_v12_1_ring_emit_ib_compute(struct amdgpu_ring *ring, } amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); - BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ + WARN_ON(ib->gpu_addr & 0x3); /* Dword align */ amdgpu_ring_write(ring, #ifdef __BIG_ENDIAN (2 << 0) | @@ -3466,9 +3475,9 @@ static void gfx_v12_1_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, * aligned if only send 32bit data low (discard data high) */ if (write64bit) - BUG_ON(addr & 0x7); + WARN_ON(addr & 0x7); else - BUG_ON(addr & 0x3); + WARN_ON(addr & 0x3); amdgpu_ring_write(ring, lower_32_bits(addr)); amdgpu_ring_write(ring, upper_32_bits(addr)); amdgpu_ring_write(ring, lower_32_bits(seq)); @@ -3515,9 +3524,6 @@ static void gfx_v12_1_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr, { struct amdgpu_device *adev = ring->adev; - /* we only allocate 32bit for each seq wb address */ - BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT); - /* write fence seq to the "addr" */ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); amdgpu_ring_write(ring, (PACKET3_WRITE_DATA__DST_SEL(5) | PACKET3_WRITE_DATA__WR_CONFIRM(1))); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 130196859ff3..70ba81e6b4d4 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -6256,9 +6256,6 @@ static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring, static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr, u64 seq, unsigned int flags) { - /* we only allocate 32bit for each seq wb address */ - BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT); - /* write fence seq to the "addr" */ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 81a759a98725..3370f542e990 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -1183,7 +1183,7 @@ static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, WAIT_REG_MEM_ENGINE(eng_sel))); if (mem_space) - BUG_ON(addr0 & 0x3); /* Dword align */ + WARN_ON(addr0 & 0x3); /* Dword align */ amdgpu_ring_write(ring, addr0); amdgpu_ring_write(ring, addr1); amdgpu_ring_write(ring, ref); @@ -5474,7 +5474,7 @@ static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, } amdgpu_ring_write(ring, header); - BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ + WARN_ON(ib->gpu_addr & 0x3); /* Dword align */ amdgpu_ring_write(ring, #ifdef __BIG_ENDIAN (2 << 0) | @@ -5570,7 +5570,7 @@ static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring, } amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); - BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ + WARN_ON(ib->gpu_addr & 0x3); /* Dword align */ amdgpu_ring_write(ring, #ifdef __BIG_ENDIAN (2 << 0) | @@ -5611,9 +5611,9 @@ static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, * aligned if only send 32bit data low (discard data high) */ if (write64bit) - BUG_ON(addr & 0x7); + WARN_ON(addr & 0x7); else - BUG_ON(addr & 0x3); + WARN_ON(addr & 0x3); amdgpu_ring_write(ring, lower_32_bits(addr)); amdgpu_ring_write(ring, upper_32_bits(addr)); amdgpu_ring_write(ring, lower_32_bits(seq)); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 510266ba0c38..2a36647b975a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -405,7 +405,7 @@ static void gfx_v9_4_3_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, WAIT_REG_MEM_ENGINE(eng_sel))); if (mem_space) - BUG_ON(addr0 & 0x3); /* Dword align */ + WARN_ON(addr0 & 0x3); /* Dword align */ amdgpu_ring_write(ring, addr0); amdgpu_ring_write(ring, addr1); amdgpu_ring_write(ring, ref); @@ -2944,7 +2944,7 @@ static void gfx_v9_4_3_ring_emit_ib_compute(struct amdgpu_ring *ring, } amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); - BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ + WARN_ON(ib->gpu_addr & 0x3); /* Dword align */ amdgpu_ring_write(ring, #ifdef __BIG_ENDIAN (2 << 0) | @@ -2978,9 +2978,9 @@ static void gfx_v9_4_3_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, * aligned if only send 32bit data low (discard data high) */ if (write64bit) - BUG_ON(addr & 0x7); + WARN_ON(addr & 0x7); else - BUG_ON(addr & 0x3); + WARN_ON(addr & 0x3); amdgpu_ring_write(ring, lower_32_bits(addr)); amdgpu_ring_write(ring, upper_32_bits(addr)); amdgpu_ring_write(ring, lower_32_bits(seq)); @@ -3040,9 +3040,6 @@ static void gfx_v9_4_3_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr, { struct amdgpu_device *adev = ring->adev; - /* we only allocate 32bit for each seq wb address */ - BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT); - /* write fence seq to the "addr" */ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c index 8eb9847d9e1e..c40d9c467204 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c @@ -606,6 +606,8 @@ static void gmc_v11_0_set_gfxhub_funcs(struct amdgpu_device *adev) case IP_VERSION(11, 5, 3): case IP_VERSION(11, 5, 4): case IP_VERSION(11, 5, 6): + case IP_VERSION(11, 7, 0): + case IP_VERSION(11, 7, 1): adev->gfxhub.funcs = &gfxhub_v11_5_0_funcs; break; default: @@ -781,6 +783,8 @@ static int gmc_v11_0_sw_init(struct amdgpu_ip_block *ip_block) case IP_VERSION(11, 5, 3): case IP_VERSION(11, 5, 4): case IP_VERSION(11, 5, 6): + case IP_VERSION(11, 7, 0): + case IP_VERSION(11, 7, 1): set_bit(AMDGPU_GFXHUB(0), adev->vmhubs_mask); set_bit(AMDGPU_MMHUB0(0), adev->vmhubs_mask); /* diff --git a/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c b/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c index f5927c3553ce..05b164f38c97 100644 --- a/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c @@ -43,6 +43,8 @@ MODULE_FIRMWARE("amdgpu/gc_11_5_2_imu.bin"); MODULE_FIRMWARE("amdgpu/gc_11_5_3_imu.bin"); MODULE_FIRMWARE("amdgpu/gc_11_5_4_imu.bin"); MODULE_FIRMWARE("amdgpu/gc_11_5_6_imu.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_7_0_imu.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_7_1_imu.bin"); static int imu_v11_0_init_microcode(struct amdgpu_device *adev) { diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c index 0c746580de11..d8204fbc198d 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c @@ -1010,7 +1010,7 @@ void jpeg_v4_0_3_dec_ring_nop(struct amdgpu_ring *ring, uint32_t count) static bool jpeg_v4_0_3_is_idle(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; - bool ret = false; + bool ret = true; int i, j; for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c index 250316704dfa..ae3afc7ab326 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c @@ -657,7 +657,7 @@ static void jpeg_v5_0_1_dec_ring_set_wptr(struct amdgpu_ring *ring) static bool jpeg_v5_0_1_is_idle(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; - bool ret = false; + bool ret = true; int i, j; for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index ac6d4f277336..1b071a3de173 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -60,6 +60,10 @@ MODULE_FIRMWARE("amdgpu/gc_11_5_4_mes_2.bin"); MODULE_FIRMWARE("amdgpu/gc_11_5_4_mes1.bin"); MODULE_FIRMWARE("amdgpu/gc_11_5_6_mes_2.bin"); MODULE_FIRMWARE("amdgpu/gc_11_5_6_mes1.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_7_0_mes_2.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_7_0_mes1.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_7_1_mes_2.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_7_1_mes1.bin"); static int mes_v11_0_hw_init(struct amdgpu_ip_block *ip_block); static int mes_v11_0_hw_fini(struct amdgpu_ip_block *ip_block); @@ -559,6 +563,7 @@ static int mes_v11_0_suspend_gang(struct amdgpu_mes *mes, mes_suspend_gang_pkt.gang_context_addr = input->gang_context_addr; mes_suspend_gang_pkt.suspend_fence_addr = input->suspend_fence_addr; mes_suspend_gang_pkt.suspend_fence_value = input->suspend_fence_value; + mes_suspend_gang_pkt.doorbell_offset = input->doorbell_offset; return mes_v11_0_submit_pkt_and_poll_completion(mes, &mes_suspend_gang_pkt, sizeof(mes_suspend_gang_pkt), @@ -578,6 +583,7 @@ static int mes_v11_0_resume_gang(struct amdgpu_mes *mes, mes_resume_gang_pkt.resume_all_gangs = input->resume_all_gangs; mes_resume_gang_pkt.gang_context_addr = input->gang_context_addr; + mes_resume_gang_pkt.doorbell_offset = input->doorbell_offset; return mes_v11_0_submit_pkt_and_poll_completion(mes, &mes_resume_gang_pkt, sizeof(mes_resume_gang_pkt), diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c index 7453fb11289e..b6cbc25e1ab4 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c @@ -592,6 +592,7 @@ static int mes_v12_0_suspend_gang(struct amdgpu_mes *mes, mes_suspend_gang_pkt.gang_context_addr = input->gang_context_addr; mes_suspend_gang_pkt.suspend_fence_addr = input->suspend_fence_addr; mes_suspend_gang_pkt.suspend_fence_value = input->suspend_fence_value; + mes_suspend_gang_pkt.doorbell_offset = input->doorbell_offset; return mes_v12_0_submit_pkt_and_poll_completion(mes, AMDGPU_MES_SCHED_PIPE, &mes_suspend_gang_pkt, sizeof(mes_suspend_gang_pkt), @@ -611,6 +612,7 @@ static int mes_v12_0_resume_gang(struct amdgpu_mes *mes, mes_resume_gang_pkt.resume_all_gangs = input->resume_all_gangs; mes_resume_gang_pkt.gang_context_addr = input->gang_context_addr; + mes_resume_gang_pkt.doorbell_offset = input->doorbell_offset; return mes_v12_0_submit_pkt_and_poll_completion(mes, AMDGPU_MES_SCHED_PIPE, &mes_resume_gang_pkt, sizeof(mes_resume_gang_pkt), diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_1.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_1.c index 8a90ad5a51b8..e13535d94c51 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_1.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_1.c @@ -484,6 +484,7 @@ static int mes_v12_1_suspend_gang(struct amdgpu_mes *mes, mes_suspend_gang_pkt.gang_context_addr = input->gang_context_addr; mes_suspend_gang_pkt.suspend_fence_addr = input->suspend_fence_addr; mes_suspend_gang_pkt.suspend_fence_value = input->suspend_fence_value; + mes_suspend_gang_pkt.doorbell_offset = input->doorbell_offset; /* Suspend gang is handled by master MES */ return mes_v12_1_submit_pkt_and_poll_completion(mes, input->xcc_id, AMDGPU_MES_SCHED_PIPE, @@ -504,6 +505,7 @@ static int mes_v12_1_resume_gang(struct amdgpu_mes *mes, mes_resume_gang_pkt.resume_all_gangs = input->resume_all_gangs; mes_resume_gang_pkt.gang_context_addr = input->gang_context_addr; + mes_resume_gang_pkt.doorbell_offset = input->doorbell_offset; /* Resume gang is handled by master MES */ return mes_v12_1_submit_pkt_and_poll_completion(mes, input->xcc_id, AMDGPU_MES_SCHED_PIPE, diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v15_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v15_0.c index 2a8582e87f2b..2a4d91368ac6 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v15_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v15_0.c @@ -33,6 +33,8 @@ MODULE_FIRMWARE("amdgpu/psp_15_0_0_toc.bin"); MODULE_FIRMWARE("amdgpu/psp_15_0_0_ta.bin"); +MODULE_FIRMWARE("amdgpu/psp_15_0_9_toc.bin"); +MODULE_FIRMWARE("amdgpu/psp_15_0_9_ta.bin"); static int psp_v15_0_0_init_microcode(struct psp_context *psp) { diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c index 88428b88e00f..8652928861ad 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c @@ -457,7 +457,7 @@ static void sdma_v4_4_2_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 /* write the fence */ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE)); /* zero in first two bits */ - BUG_ON(addr & 0x3); + WARN_ON(addr & 0x3); amdgpu_ring_write(ring, lower_32_bits(addr)); amdgpu_ring_write(ring, upper_32_bits(addr)); amdgpu_ring_write(ring, lower_32_bits(seq)); @@ -467,7 +467,7 @@ static void sdma_v4_4_2_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 addr += 4; amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE)); /* zero in first two bits */ - BUG_ON(addr & 0x3); + WARN_ON(addr & 0x3); amdgpu_ring_write(ring, lower_32_bits(addr)); amdgpu_ring_write(ring, upper_32_bits(addr)); amdgpu_ring_write(ring, upper_32_bits(seq)); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index fa02907217e0..b809942b1eb7 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -527,7 +527,7 @@ static void sdma_v5_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE) | SDMA_PKT_FENCE_HEADER_MTYPE(0x3)); /* Ucached(UC) */ /* zero in first two bits */ - BUG_ON(addr & 0x3); + WARN_ON(addr & 0x3); amdgpu_ring_write(ring, lower_32_bits(addr)); amdgpu_ring_write(ring, upper_32_bits(addr)); amdgpu_ring_write(ring, lower_32_bits(seq)); @@ -538,7 +538,7 @@ static void sdma_v5_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE) | SDMA_PKT_FENCE_HEADER_MTYPE(0x3)); /* zero in first two bits */ - BUG_ON(addr & 0x3); + WARN_ON(addr & 0x3); amdgpu_ring_write(ring, lower_32_bits(addr)); amdgpu_ring_write(ring, upper_32_bits(addr)); amdgpu_ring_write(ring, upper_32_bits(seq)); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index f6ecbc524c9b..87c1e29fd298 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -377,7 +377,7 @@ static void sdma_v5_2_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE) | SDMA_PKT_FENCE_HEADER_MTYPE(0x3)); /* Ucached(UC) */ /* zero in first two bits */ - BUG_ON(addr & 0x3); + WARN_ON(addr & 0x3); amdgpu_ring_write(ring, lower_32_bits(addr)); amdgpu_ring_write(ring, upper_32_bits(addr)); amdgpu_ring_write(ring, lower_32_bits(seq)); @@ -388,7 +388,7 @@ static void sdma_v5_2_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE) | SDMA_PKT_FENCE_HEADER_MTYPE(0x3)); /* zero in first two bits */ - BUG_ON(addr & 0x3); + WARN_ON(addr & 0x3); amdgpu_ring_write(ring, lower_32_bits(addr)); amdgpu_ring_write(ring, upper_32_bits(addr)); amdgpu_ring_write(ring, upper_32_bits(seq)); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c index d894b7599c18..d7537888e60c 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c @@ -361,7 +361,7 @@ static void sdma_v6_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_FENCE) | SDMA_PKT_FENCE_HEADER_MTYPE(0x3)); /* Ucached(UC) */ /* zero in first two bits */ - BUG_ON(addr & 0x3); + WARN_ON(addr & 0x3); amdgpu_ring_write(ring, lower_32_bits(addr)); amdgpu_ring_write(ring, upper_32_bits(addr)); amdgpu_ring_write(ring, lower_32_bits(seq)); @@ -372,7 +372,7 @@ static void sdma_v6_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_FENCE) | SDMA_PKT_FENCE_HEADER_MTYPE(0x3)); /* zero in first two bits */ - BUG_ON(addr & 0x3); + WARN_ON(addr & 0x3); amdgpu_ring_write(ring, lower_32_bits(addr)); amdgpu_ring_write(ring, upper_32_bits(addr)); amdgpu_ring_write(ring, upper_32_bits(seq)); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c index f154b68dda70..49c57a38151b 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c @@ -363,7 +363,7 @@ static void sdma_v7_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_FENCE) | SDMA_PKT_FENCE_HEADER_MTYPE(0x3)); /* Ucached(UC) */ /* zero in first two bits */ - BUG_ON(addr & 0x3); + WARN_ON(addr & 0x3); amdgpu_ring_write(ring, lower_32_bits(addr)); amdgpu_ring_write(ring, upper_32_bits(addr)); amdgpu_ring_write(ring, lower_32_bits(seq)); @@ -374,7 +374,7 @@ static void sdma_v7_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_FENCE) | SDMA_PKT_FENCE_HEADER_MTYPE(0x3)); /* zero in first two bits */ - BUG_ON(addr & 0x3); + WARN_ON(addr & 0x3); amdgpu_ring_write(ring, lower_32_bits(addr)); amdgpu_ring_write(ring, upper_32_bits(addr)); amdgpu_ring_write(ring, upper_32_bits(seq)); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_1.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_1.c index cd9668605a50..b06001f6b536 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_1.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_1.c @@ -331,7 +331,7 @@ static void sdma_v7_1_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_FENCE) | SDMA_PKT_FENCE_HEADER_MTYPE(0x3)); /* Ucached(UC) */ /* zero in first two bits */ - BUG_ON(addr & 0x3); + WARN_ON(addr & 0x3); amdgpu_ring_write(ring, lower_32_bits(addr)); amdgpu_ring_write(ring, upper_32_bits(addr)); amdgpu_ring_write(ring, lower_32_bits(seq)); @@ -342,7 +342,7 @@ static void sdma_v7_1_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_FENCE) | SDMA_PKT_FENCE_HEADER_MTYPE(0x3)); /* zero in first two bits */ - BUG_ON(addr & 0x3); + WARN_ON(addr & 0x3); amdgpu_ring_write(ring, lower_32_bits(addr)); amdgpu_ring_write(ring, upper_32_bits(addr)); amdgpu_ring_write(ring, upper_32_bits(seq)); diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index 963659deeaff..1677e88a4e36 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -838,6 +838,62 @@ static int soc21_common_early_init(struct amdgpu_ip_block *ip_block) adev->pg_flags = 0; adev->external_rev_id = adev->rev_id + 0xd0; break; + case IP_VERSION(11, 7, 0): + adev->cg_flags = AMD_CG_SUPPORT_VCN_MGCG | + AMD_CG_SUPPORT_JPEG_MGCG | + AMD_CG_SUPPORT_GFX_CGCG | + AMD_CG_SUPPORT_GFX_CGLS | + AMD_CG_SUPPORT_GFX_MGCG | + AMD_CG_SUPPORT_GFX_FGCG | + AMD_CG_SUPPORT_REPEATER_FGCG | + AMD_CG_SUPPORT_GFX_PERF_CLK | + AMD_CG_SUPPORT_GFX_3D_CGCG | + AMD_CG_SUPPORT_GFX_3D_CGLS | + AMD_CG_SUPPORT_MC_MGCG | + AMD_CG_SUPPORT_MC_LS | + AMD_CG_SUPPORT_HDP_LS | + AMD_CG_SUPPORT_HDP_DS | + AMD_CG_SUPPORT_HDP_SD | + AMD_CG_SUPPORT_ATHUB_MGCG | + AMD_CG_SUPPORT_ATHUB_LS | + AMD_CG_SUPPORT_IH_CG | + AMD_CG_SUPPORT_BIF_MGCG | + AMD_CG_SUPPORT_BIF_LS; + adev->pg_flags = AMD_PG_SUPPORT_VCN_DPG | + AMD_PG_SUPPORT_VCN | + AMD_PG_SUPPORT_JPEG_DPG | + AMD_PG_SUPPORT_JPEG | + AMD_PG_SUPPORT_GFX_PG; + adev->external_rev_id = adev->rev_id + 0xF; + break; + case IP_VERSION(11, 7, 1): + adev->cg_flags = AMD_CG_SUPPORT_VCN_MGCG | + AMD_CG_SUPPORT_JPEG_MGCG | + AMD_CG_SUPPORT_GFX_CGCG | + AMD_CG_SUPPORT_GFX_CGLS | + AMD_CG_SUPPORT_GFX_MGCG | + AMD_CG_SUPPORT_GFX_FGCG | + AMD_CG_SUPPORT_REPEATER_FGCG | + AMD_CG_SUPPORT_GFX_PERF_CLK | + AMD_CG_SUPPORT_GFX_3D_CGCG | + AMD_CG_SUPPORT_GFX_3D_CGLS | + AMD_CG_SUPPORT_MC_MGCG | + AMD_CG_SUPPORT_MC_LS | + AMD_CG_SUPPORT_HDP_LS | + AMD_CG_SUPPORT_HDP_DS | + AMD_CG_SUPPORT_HDP_SD | + AMD_CG_SUPPORT_ATHUB_MGCG | + AMD_CG_SUPPORT_ATHUB_LS | + AMD_CG_SUPPORT_IH_CG | + AMD_CG_SUPPORT_BIF_MGCG | + AMD_CG_SUPPORT_BIF_LS; + adev->pg_flags = AMD_PG_SUPPORT_VCN_DPG | + AMD_PG_SUPPORT_VCN | + AMD_PG_SUPPORT_JPEG_DPG | + AMD_PG_SUPPORT_JPEG | + AMD_PG_SUPPORT_GFX_PG; + adev->external_rev_id = adev->rev_id + 0x40; + break; default: /* FIXME: not supported yet */ return -EINVAL; diff --git a/drivers/gpu/drm/amd/amdgpu/soc24.c b/drivers/gpu/drm/amd/amdgpu/soc24.c index 265db9331d0b..9dce30d2bb8d 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc24.c +++ b/drivers/gpu/drm/amd/amdgpu/soc24.c @@ -496,8 +496,36 @@ static int soc24_common_suspend(struct amdgpu_ip_block *ip_block) return soc24_common_hw_fini(ip_block); } +static bool soc24_need_reset_on_resume(struct amdgpu_device *adev) +{ + u32 sol_reg1, sol_reg2; + + /* Will reset for the following suspend abort cases. + * 1) Only reset dGPU side. + * 2) S3 suspend got aborted and TOS is active. + * As for dGPU suspend abort cases the SOL value + * will be kept as zero at this resume point. + */ + if (!(adev->flags & AMD_IS_APU) && adev->in_s3) { + sol_reg1 = RREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_81); + msleep(100); + sol_reg2 = RREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_81); + + return (sol_reg1 != sol_reg2); + } + + return false; +} + static int soc24_common_resume(struct amdgpu_ip_block *ip_block) { + struct amdgpu_device *adev = ip_block->adev; + + if (soc24_need_reset_on_resume(adev)) { + dev_info(adev->dev, "S3 suspend aborted, resetting..."); + soc24_asic_reset(adev); + } + return soc24_common_hw_init(ip_block); } diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c index ff7269bafae8..894780669f9c 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c @@ -1927,14 +1927,17 @@ out: #define RENCODE_IB_PARAM_SESSION_INIT 0x00000003 /* return the offset in ib if id is found, -1 otherwise */ -static int vcn_v4_0_enc_find_ib_param(struct amdgpu_ib *ib, uint32_t id, int start) +static int vcn_v4_0_enc_find_ib_param(struct amdgpu_ib *ib, uint32_t id, int start, uint32_t *length) { int i; uint32_t len; for (i = start; (len = amdgpu_ib_get_value(ib, i)) >= 8; i += len / 4) { - if (amdgpu_ib_get_value(ib, i + 1) == id) + if (amdgpu_ib_get_value(ib, i + 1) == id) { + if (length) + *length = len; return i; + } } return -1; } @@ -1944,14 +1947,14 @@ static int vcn_v4_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p, struct amdgpu_ib *ib) { struct amdgpu_ring *ring = amdgpu_job_ring(job); - uint32_t val; + uint32_t val, len; int idx = 0, sidx; /* The first instance can decode anything */ if (!ring->me) return 0; - while ((idx = vcn_v4_0_enc_find_ib_param(ib, RADEON_VCN_ENGINE_INFO, idx)) >= 0) { + while ((idx = vcn_v4_0_enc_find_ib_param(ib, RADEON_VCN_ENGINE_INFO, idx, &len)) >= 0) { val = amdgpu_ib_get_value(ib, idx + 2); /* RADEON_VCN_ENGINE_TYPE */ if (val == RADEON_VCN_ENGINE_TYPE_DECODE) { uint32_t valid_buf_flag = amdgpu_ib_get_value(ib, idx + 6); @@ -1964,12 +1967,12 @@ static int vcn_v4_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p, amdgpu_ib_get_value(ib, idx + 8); return vcn_v4_0_dec_msg(p, job, msg_buffer_addr); } else if (val == RADEON_VCN_ENGINE_TYPE_ENCODE) { - sidx = vcn_v4_0_enc_find_ib_param(ib, RENCODE_IB_PARAM_SESSION_INIT, idx); + sidx = vcn_v4_0_enc_find_ib_param(ib, RENCODE_IB_PARAM_SESSION_INIT, idx, NULL); if (sidx >= 0 && amdgpu_ib_get_value(ib, sidx + 2) == RENCODE_ENCODE_STANDARD_AV1) return vcn_v4_0_limit_sched(p, job); } - idx += amdgpu_ib_get_value(ib, idx) / 4; + idx += len / 4; } return 0; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 531e20748198..c7edebd2fd8a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -1914,13 +1914,13 @@ static int criu_checkpoint_devices(struct kfd_process *p, struct kfd_criu_device_bucket *device_buckets = NULL; int ret = 0, i; - device_buckets = kvzalloc(num_devices * sizeof(*device_buckets), GFP_KERNEL); + device_buckets = kvcalloc(num_devices, sizeof(*device_buckets), GFP_KERNEL); if (!device_buckets) { ret = -ENOMEM; goto exit; } - device_priv = kvzalloc(num_devices * sizeof(*device_priv), GFP_KERNEL); + device_priv = kvcalloc(num_devices, sizeof(*device_priv), GFP_KERNEL); if (!device_priv) { ret = -ENOMEM; goto exit; @@ -2040,17 +2040,17 @@ static int criu_checkpoint_bos(struct kfd_process *p, int ret = 0, pdd_index, bo_index = 0, id; void *mem; - bo_buckets = kvzalloc(num_bos * sizeof(*bo_buckets), GFP_KERNEL); + bo_buckets = kvcalloc(num_bos, sizeof(*bo_buckets), GFP_KERNEL); if (!bo_buckets) return -ENOMEM; - bo_privs = kvzalloc(num_bos * sizeof(*bo_privs), GFP_KERNEL); + bo_privs = kvcalloc(num_bos, sizeof(*bo_privs), GFP_KERNEL); if (!bo_privs) { ret = -ENOMEM; goto exit; } - files = kvzalloc(num_bos * sizeof(struct file *), GFP_KERNEL); + files = kvcalloc(num_bos, sizeof(struct file *), GFP_KERNEL); if (!files) { ret = -ENOMEM; goto exit; @@ -2581,7 +2581,7 @@ static int criu_restore_bos(struct kfd_process *p, if (!bo_buckets) return -ENOMEM; - files = kvzalloc(args->num_bos * sizeof(struct file *), GFP_KERNEL); + files = kvcalloc(args->num_bos, sizeof(struct file *), GFP_KERNEL); if (!files) { ret = -ENOMEM; goto exit; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index f28259d13818..2a239f45fc24 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -1715,6 +1715,8 @@ int kfd_get_gpu_cache_info(struct kfd_node *kdev, struct kfd_gpu_cache_info **pc case IP_VERSION(11, 5, 3): case IP_VERSION(11, 5, 4): case IP_VERSION(11, 5, 6): + case IP_VERSION(11, 7, 0): + case IP_VERSION(11, 7, 1): /* Cacheline size not available in IP discovery for gc11. * kfd_fill_gpu_cache_info_from_gfx_config to hard code it */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 5eb863dec8f4..008a0719fe1f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -169,6 +169,8 @@ static void kfd_device_info_set_event_interrupt_class(struct kfd_dev *kfd) case IP_VERSION(11, 5, 3): case IP_VERSION(11, 5, 4): case IP_VERSION(11, 5, 6): + case IP_VERSION(11, 7, 0): + case IP_VERSION(11, 7, 1): kfd->device_info.event_interrupt_class = &event_interrupt_class_v11; break; case IP_VERSION(12, 0, 0): @@ -451,6 +453,14 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf) gfx_target_version = 110504; f2g = &gfx_v11_kfd2kgd; break; + case IP_VERSION(11, 7, 0): + gfx_target_version = 110700; + f2g = &gfx_v11_kfd2kgd; + break; + case IP_VERSION(11, 7, 1): + gfx_target_version = 110701; + f2g = &gfx_v11_kfd2kgd; + break; case IP_VERSION(12, 0, 0): gfx_target_version = 120000; f2g = &gfx_v12_kfd2kgd; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index 226e76ae0be7..7cd236c1ff75 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -128,7 +128,7 @@ svm_migrate_copy_memory_gart(struct amdgpu_device *adev, dma_addr_t *sys, enum MIGRATION_COPY_DIR direction, struct dma_fence **mfence) { - const u64 GTT_MAX_PAGES = AMDGPU_GTT_MAX_TRANSFER_SIZE; + const u64 GTT_MAX_PAGES = (AMDGPU_GTT_MAX_TRANSFER_SIZE >> PAGE_SHIFT); struct amdgpu_ring *ring; struct amdgpu_ttm_buffer_entity *entity; u64 gart_s, gart_d; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h index 06ca6235ff1b..63ea70e5c0e6 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h @@ -127,6 +127,7 @@ struct mqd_manager { struct mutex mqd_mutex; struct kfd_node *dev; uint32_t mqd_size; + uint32_t ctl_stack_size; }; struct mqd_user_context_save_area_header { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c index 8e8ec266ca46..e034da638c07 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c @@ -203,8 +203,8 @@ static void update_mqd(struct mqd_manager *mm, void *mqd, * more than (EOP entry count - 1) so a queue size of 0x800 dwords * is safe, giving a maximum field value of 0xA. */ - m->cp_hqd_eop_control = min(0xA, - ffs(q->eop_ring_buffer_size / sizeof(unsigned int)) - 1 - 1); + m->cp_hqd_eop_control = q->eop_ring_buffer_size ? min(0xA, + ffs(q->eop_ring_buffer_size / sizeof(unsigned int)) - 1 - 1) : 0; m->cp_hqd_eop_base_addr_lo = lower_32_bits(q->eop_ring_buffer_address >> 8); m->cp_hqd_eop_base_addr_hi = diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c index fff137e00b5e..350fcbbba4b2 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c @@ -241,8 +241,8 @@ static void update_mqd(struct mqd_manager *mm, void *mqd, * more than (EOP entry count - 1) so a queue size of 0x800 dwords * is safe, giving a maximum field value of 0xA. */ - m->cp_hqd_eop_control = min(0xA, - ffs(q->eop_ring_buffer_size / sizeof(unsigned int)) - 1 - 1); + m->cp_hqd_eop_control = q->eop_ring_buffer_size ? min(0xA, + ffs(q->eop_ring_buffer_size / sizeof(unsigned int)) - 1 - 1) : 0; m->cp_hqd_eop_base_addr_lo = lower_32_bits(q->eop_ring_buffer_address >> 8); m->cp_hqd_eop_base_addr_hi = diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c index 8c815f129614..7c387fa90076 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c @@ -216,8 +216,8 @@ static void update_mqd(struct mqd_manager *mm, void *mqd, * more than (EOP entry count - 1) so a queue size of 0x800 dwords * is safe, giving a maximum field value of 0xA. */ - m->cp_hqd_eop_control = min(0xA, - ffs(q->eop_ring_buffer_size / sizeof(unsigned int)) - 1 - 1); + m->cp_hqd_eop_control = q->eop_ring_buffer_size ? min(0xA, + ffs(q->eop_ring_buffer_size / sizeof(unsigned int)) - 1 - 1) : 0; m->cp_hqd_eop_base_addr_lo = lower_32_bits(q->eop_ring_buffer_address >> 8); m->cp_hqd_eop_base_addr_hi = diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12_1.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12_1.c index 475589b924e9..431a940f91f3 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12_1.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12_1.c @@ -294,8 +294,8 @@ static void update_mqd(struct mqd_manager *mm, void *mqd, * more than (EOP entry count - 1) so a queue size of 0x800 dwords * is safe, giving a maximum field value of 0xA. */ - m->cp_hqd_eop_control = min(0xA, - ffs(q->eop_ring_buffer_size / sizeof(unsigned int)) - 1 - 1); + m->cp_hqd_eop_control = q->eop_ring_buffer_size ? min(0xA, + ffs(q->eop_ring_buffer_size / sizeof(unsigned int)) - 1 - 1) : 0; m->cp_hqd_eop_base_addr_lo = lower_32_bits(q->eop_ring_buffer_address >> 8); m->cp_hqd_eop_base_addr_hi = diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c index 17bfb419b202..be99f0d53b18 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c @@ -27,6 +27,7 @@ #include <linux/uaccess.h> #include "kfd_priv.h" #include "kfd_mqd_manager.h" +#include "kfd_topology.h" #include "v9_structs.h" #include "gc/gc_9_0_offset.h" #include "gc/gc_9_0_sh_mask.h" @@ -411,8 +412,11 @@ static int get_wave_state(struct mqd_manager *mm, void *mqd, static int get_checkpoint_info(struct mqd_manager *mm, void *mqd, u32 *ctl_stack_size) { struct v9_mqd *m = get_mqd(mqd); + u32 per_xcc_size; - if (check_mul_overflow(m->cp_hqd_cntl_stack_size, NUM_XCC(mm->dev->xcc_mask), ctl_stack_size)) + per_xcc_size = min_t(u32, m->cp_hqd_cntl_stack_size, mm->ctl_stack_size); + + if (check_mul_overflow(per_xcc_size, NUM_XCC(mm->dev->xcc_mask), ctl_stack_size)) return -EINVAL; return 0; @@ -421,13 +425,15 @@ static int get_checkpoint_info(struct mqd_manager *mm, void *mqd, u32 *ctl_stack static void checkpoint_mqd(struct mqd_manager *mm, void *mqd, void *mqd_dst, void *ctl_stack_dst) { struct v9_mqd *m; + u32 ctl_stack_copy_size; /* Control stack is located one page after MQD. */ void *ctl_stack = (void *)((uintptr_t)mqd + AMDGPU_GPU_PAGE_SIZE); m = get_mqd(mqd); + ctl_stack_copy_size = min_t(u32, m->cp_hqd_cntl_stack_size, mm->ctl_stack_size); memcpy(mqd_dst, m, sizeof(struct v9_mqd)); - memcpy(ctl_stack_dst, ctl_stack, m->cp_hqd_cntl_stack_size); + memcpy(ctl_stack_dst, ctl_stack, ctl_stack_copy_size); } static void checkpoint_mqd_v9_4_3(struct mqd_manager *mm, @@ -436,15 +442,19 @@ static void checkpoint_mqd_v9_4_3(struct mqd_manager *mm, void *ctl_stack_dst) { struct v9_mqd *m; + u32 ctl_stack_stride; int xcc; uint64_t size = get_mqd(mqd)->cp_mqd_stride_size; + ctl_stack_stride = min_t(u32, get_mqd(mqd)->cp_hqd_cntl_stack_size, + mm->ctl_stack_size); + for (xcc = 0; xcc < NUM_XCC(mm->dev->xcc_mask); xcc++) { m = get_mqd(mqd + size * xcc); checkpoint_mqd(mm, m, (uint8_t *)mqd_dst + sizeof(*m) * xcc, - (uint8_t *)ctl_stack_dst + m->cp_hqd_cntl_stack_size * xcc); + (uint8_t *)ctl_stack_dst + ctl_stack_stride * xcc); } } @@ -998,6 +1008,15 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type, mqd->is_occupied = kfd_is_occupied_cp; mqd->get_checkpoint_info = get_checkpoint_info; mqd->mqd_size = sizeof(struct v9_mqd); + if (dev->kfd->cwsr_enabled) { + struct kfd_topology_device *topo_dev; + + topo_dev = kfd_topology_device_by_id(dev->id); + if (topo_dev) + mqd->ctl_stack_size = + ALIGN(topo_dev->node_props.ctl_stack_size, + AMDGPU_GPU_PAGE_SIZE); + } mqd->mqd_stride = mqd_stride_v9; #if defined(CONFIG_DEBUG_FS) mqd->debugfs_show_mqd = debugfs_show_mqd; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c index c86779af323b..60b87a500698 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c @@ -214,8 +214,8 @@ static void __update_mqd(struct mqd_manager *mm, void *mqd, * more than (EOP entry count - 1) so a queue size of 0x800 dwords * is safe, giving a maximum field value of 0xA. */ - m->cp_hqd_eop_control |= min(0xA, - order_base_2(q->eop_ring_buffer_size / 4) - 1); + m->cp_hqd_eop_control |= q->eop_ring_buffer_size ? min(0xA, + order_base_2(q->eop_ring_buffer_size / 4) - 1) : 0; m->cp_hqd_eop_base_addr_lo = lower_32_bits(q->eop_ring_buffer_address >> 8); m->cp_hqd_eop_base_addr_hi = diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index d3a8d681227a..18145d78334f 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -6614,8 +6614,8 @@ static void fill_dc_dirty_rects(struct drm_plane *plane, { struct dm_crtc_state *dm_crtc_state = to_dm_crtc_state(crtc_state); struct rect *dirty_rects = flip_addrs->dirty_rects; - u32 num_clips; - struct drm_mode_rect *clips; + u32 num_clips = 0; + struct drm_mode_rect *clips = NULL; bool bb_changed; bool fb_changed; u32 i = 0; @@ -6631,8 +6631,10 @@ static void fill_dc_dirty_rects(struct drm_plane *plane, if (new_plane_state->rotation != DRM_MODE_ROTATE_0) goto ffu; - num_clips = drm_plane_get_damage_clips_count(new_plane_state); - clips = drm_plane_get_damage_clips(new_plane_state); + if (!new_plane_state->ignore_damage_clips) { + num_clips = drm_plane_get_damage_clips_count(new_plane_state); + clips = drm_plane_get_damage_clips(new_plane_state); + } if (num_clips && (!amdgpu_damage_clips || (amdgpu_damage_clips < 0 && is_psr_su))) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index bcdbf3471039..175106cce5a4 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -1509,7 +1509,7 @@ static void disable_vbios_mode_if_required( struct dc *dc_create(const struct dc_init_data *init_params) { - struct dc *dc = kzalloc_obj(*dc); + struct dc *dc = kvzalloc_obj(*dc); unsigned int full_pipe_count; if (!dc) @@ -1557,7 +1557,7 @@ struct dc *dc_create(const struct dc_init_data *init_params) destruct_dc: dc_destruct(dc); - kfree(dc); + kvfree(dc); return NULL; } @@ -1606,7 +1606,7 @@ void dc_deinit_callbacks(struct dc *dc) void dc_destroy(struct dc **dc) { dc_destruct(*dc); - kfree(*dc); + kvfree(*dc); *dc = NULL; } @@ -4077,8 +4077,6 @@ static void commit_planes_do_stream_update_sequence(struct dc *dc, { int j; struct block_sequence_state seq_state = { .steps = block_sequence, .num_steps = num_steps }; - struct dsc_config dsc_cfgs[MAX_PIPES]; - struct dsc_optc_config dsc_optc_cfgs[MAX_PIPES]; unsigned int dsc_cfg_index = 0; *num_steps = 0; // Initialize to 0 @@ -4150,11 +4148,13 @@ static void commit_planes_do_stream_update_sequence(struct dc *dc, if (stream_update->dsc_config) if (dsc_cfg_index < MAX_PIPES) { + struct dsc_config dsc_cfg; + struct dsc_optc_config dsc_optc_cfg; + add_link_update_dsc_config_sequence(&seq_state, pipe_ctx, - &dsc_cfgs[dsc_cfg_index], - &dsc_optc_cfgs[dsc_cfg_index]); - dsc_cfg_index++; + &dsc_cfg, + &dsc_optc_cfg); } if (stream_update->mst_bw_update) { diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn42/dcn42_dccg.h b/drivers/gpu/drm/amd/display/dc/dccg/dcn42/dcn42_dccg.h index 2076565b1caa..a2b17ed11bdb 100644 --- a/drivers/gpu/drm/amd/display/dc/dccg/dcn42/dcn42_dccg.h +++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn42/dcn42_dccg.h @@ -46,6 +46,7 @@ DCCG_SF(DISPCLK_FREQ_CHANGE_CNTL, DCCG_FIFO_ERRDET_STATE, mask_sh),\ DCCG_SF(DISPCLK_FREQ_CHANGE_CNTL, DCCG_FIFO_ERRDET_OVR_EN, mask_sh),\ DCCG_SF(DISPCLK_FREQ_CHANGE_CNTL, DISPCLK_CHG_FWD_CORR_DISABLE, mask_sh),\ + DCCG_SF(DISPCLK_FREQ_CHANGE_CNTL, RESYNC_FIFO_LEVEL_ADJUST_EN, mask_sh),\ DCCG_SF(DPPCLK0_DTO_PARAM, DPPCLK0_DTO_PHASE, mask_sh),\ DCCG_SF(DPPCLK0_DTO_PARAM, DPPCLK0_DTO_MODULO, mask_sh),\ DCCG_SF(HDMICHARCLK0_CLOCK_CNTL, HDMICHARCLK0_EN, mask_sh),\ @@ -56,34 +57,24 @@ DCCG_SF(PHYBSYMCLK_CLOCK_CNTL, PHYBSYMCLK_SRC_SEL, mask_sh),\ DCCG_SF(PHYCSYMCLK_CLOCK_CNTL, PHYCSYMCLK_EN, mask_sh),\ DCCG_SF(PHYCSYMCLK_CLOCK_CNTL, PHYCSYMCLK_SRC_SEL, mask_sh),\ - DCCG_SF(PHYDSYMCLK_CLOCK_CNTL, PHYDSYMCLK_EN, mask_sh),\ - DCCG_SF(PHYDSYMCLK_CLOCK_CNTL, PHYDSYMCLK_SRC_SEL, mask_sh),\ DCCG_SF(DPSTREAMCLK_CNTL, DPSTREAMCLK0_EN, mask_sh),\ DCCG_SF(DPSTREAMCLK_CNTL, DPSTREAMCLK1_EN, mask_sh),\ DCCG_SF(DPSTREAMCLK_CNTL, DPSTREAMCLK2_EN, mask_sh),\ - DCCG_SF(DPSTREAMCLK_CNTL, DPSTREAMCLK3_EN, mask_sh),\ DCCG_SF(DPSTREAMCLK_CNTL, DPSTREAMCLK0_SRC_SEL, mask_sh),\ DCCG_SF(DPSTREAMCLK_CNTL, DPSTREAMCLK1_SRC_SEL, mask_sh),\ DCCG_SF(DPSTREAMCLK_CNTL, DPSTREAMCLK2_SRC_SEL, mask_sh),\ - DCCG_SF(DPSTREAMCLK_CNTL, DPSTREAMCLK3_SRC_SEL, mask_sh),\ DCCG_SF(HDMISTREAMCLK_CNTL, HDMISTREAMCLK0_EN, mask_sh),\ DCCG_SF(HDMISTREAMCLK_CNTL, HDMISTREAMCLK0_SRC_SEL, mask_sh),\ DCCG_SF(SYMCLK32_SE_CNTL, SYMCLK32_SE0_SRC_SEL, mask_sh),\ DCCG_SF(SYMCLK32_SE_CNTL, SYMCLK32_SE1_SRC_SEL, mask_sh),\ DCCG_SF(SYMCLK32_SE_CNTL, SYMCLK32_SE2_SRC_SEL, mask_sh),\ - DCCG_SF(SYMCLK32_SE_CNTL, SYMCLK32_SE3_SRC_SEL, mask_sh),\ DCCG_SF(SYMCLK32_SE_CNTL, SYMCLK32_SE0_EN, mask_sh),\ DCCG_SF(SYMCLK32_SE_CNTL, SYMCLK32_SE1_EN, mask_sh),\ DCCG_SF(SYMCLK32_SE_CNTL, SYMCLK32_SE2_EN, mask_sh),\ - DCCG_SF(SYMCLK32_SE_CNTL, SYMCLK32_SE3_EN, mask_sh),\ DCCG_SF(SYMCLK32_LE_CNTL, SYMCLK32_LE0_SRC_SEL, mask_sh),\ DCCG_SF(SYMCLK32_LE_CNTL, SYMCLK32_LE1_SRC_SEL, mask_sh),\ - DCCG_SF(SYMCLK32_LE_CNTL, SYMCLK32_LE2_SRC_SEL, mask_sh),\ - DCCG_SF(SYMCLK32_LE_CNTL, SYMCLK32_LE3_SRC_SEL, mask_sh),\ DCCG_SF(SYMCLK32_LE_CNTL, SYMCLK32_LE0_EN, mask_sh),\ DCCG_SF(SYMCLK32_LE_CNTL, SYMCLK32_LE1_EN, mask_sh),\ - DCCG_SF(SYMCLK32_LE_CNTL, SYMCLK32_LE2_EN, mask_sh),\ - DCCG_SF(SYMCLK32_LE_CNTL, SYMCLK32_LE3_EN, mask_sh),\ DCCG_SFII(OTG, PIXEL_RATE_CNTL, PIPE, DTO_SRC_SEL, 0, mask_sh),\ DCCG_SFII(OTG, PIXEL_RATE_CNTL, PIPE, DTO_SRC_SEL, 1, mask_sh),\ DCCG_SFII(OTG, PIXEL_RATE_CNTL, PIPE, DTO_SRC_SEL, 2, mask_sh),\ @@ -121,7 +112,6 @@ DCCG_SF(DCCG_GATE_DISABLE_CNTL2, PHYASYMCLK_ROOT_GATE_DISABLE, mask_sh),\ DCCG_SF(DCCG_GATE_DISABLE_CNTL2, PHYBSYMCLK_ROOT_GATE_DISABLE, mask_sh),\ DCCG_SF(DCCG_GATE_DISABLE_CNTL2, PHYCSYMCLK_ROOT_GATE_DISABLE, mask_sh),\ - DCCG_SF(DCCG_GATE_DISABLE_CNTL2, PHYDSYMCLK_ROOT_GATE_DISABLE, mask_sh),\ DCCG_SF(DCCG_GLOBAL_FGCG_REP_CNTL, DCCG_GLOBAL_FGCG_REP_DIS, mask_sh),\ DCCG_SFII(OTG, PIXEL_RATE_CNTL, DP_DTO, ENABLE, 0, mask_sh),\ DCCG_SFII(OTG, PIXEL_RATE_CNTL, DP_DTO, ENABLE, 1, mask_sh),\ @@ -134,7 +124,6 @@ DCCG_SF(DSCCLK_DTO_CTRL, DSCCLK0_EN, mask_sh),\ DCCG_SF(DSCCLK_DTO_CTRL, DSCCLK1_EN, mask_sh),\ DCCG_SF(DSCCLK_DTO_CTRL, DSCCLK2_EN, mask_sh),\ - DCCG_SF(DSCCLK_DTO_CTRL, DSCCLK3_EN, mask_sh),\ DCCG_SF(DSCCLK0_DTO_PARAM, DSCCLK0_DTO_PHASE, mask_sh),\ DCCG_SF(DSCCLK0_DTO_PARAM, DSCCLK0_DTO_MODULO, mask_sh),\ DCCG_SF(DSCCLK1_DTO_PARAM, DSCCLK1_DTO_PHASE, mask_sh),\ @@ -147,36 +136,26 @@ DCCG_SF(DCCG_GATE_DISABLE_CNTL2, SYMCLKA_FE_GATE_DISABLE, mask_sh),\ DCCG_SF(DCCG_GATE_DISABLE_CNTL2, SYMCLKB_FE_GATE_DISABLE, mask_sh),\ DCCG_SF(DCCG_GATE_DISABLE_CNTL2, SYMCLKC_FE_GATE_DISABLE, mask_sh),\ - DCCG_SF(DCCG_GATE_DISABLE_CNTL2, SYMCLKD_FE_GATE_DISABLE, mask_sh),\ DCCG_SF(DCCG_GATE_DISABLE_CNTL2, SYMCLKA_GATE_DISABLE, mask_sh),\ DCCG_SF(DCCG_GATE_DISABLE_CNTL2, SYMCLKB_GATE_DISABLE, mask_sh),\ DCCG_SF(DCCG_GATE_DISABLE_CNTL2, SYMCLKC_GATE_DISABLE, mask_sh),\ - DCCG_SF(DCCG_GATE_DISABLE_CNTL2, SYMCLKD_GATE_DISABLE, mask_sh),\ DCCG_SF(DCCG_GATE_DISABLE_CNTL2, PHYASYMCLK_ROOT_GATE_DISABLE, mask_sh),\ DCCG_SF(DCCG_GATE_DISABLE_CNTL2, PHYBSYMCLK_ROOT_GATE_DISABLE, mask_sh),\ DCCG_SF(DCCG_GATE_DISABLE_CNTL2, PHYCSYMCLK_ROOT_GATE_DISABLE, mask_sh),\ - DCCG_SF(DCCG_GATE_DISABLE_CNTL2, PHYDSYMCLK_ROOT_GATE_DISABLE, mask_sh),\ DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_SE0_GATE_DISABLE, mask_sh),\ DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_SE1_GATE_DISABLE, mask_sh),\ DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_SE2_GATE_DISABLE, mask_sh),\ - DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_SE3_GATE_DISABLE, mask_sh),\ DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_LE0_GATE_DISABLE, mask_sh),\ DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_LE1_GATE_DISABLE, mask_sh),\ - DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_LE2_GATE_DISABLE, mask_sh),\ - DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_LE3_GATE_DISABLE, mask_sh),\ DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_SE0_GATE_DISABLE, mask_sh),\ DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_SE1_GATE_DISABLE, mask_sh),\ DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_SE2_GATE_DISABLE, mask_sh),\ - DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_SE3_GATE_DISABLE, mask_sh),\ DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_LE0_GATE_DISABLE, mask_sh),\ DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_LE1_GATE_DISABLE, mask_sh),\ - DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_LE2_GATE_DISABLE, mask_sh),\ - DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_LE3_GATE_DISABLE, mask_sh),\ DCCG_SF(DCCG_GATE_DISABLE_CNTL4, HDMICHARCLK0_ROOT_GATE_DISABLE, mask_sh),\ DCCG_SF(DCCG_GATE_DISABLE_CNTL4, PHYA_REFCLK_ROOT_GATE_DISABLE, mask_sh),\ DCCG_SF(DCCG_GATE_DISABLE_CNTL4, PHYB_REFCLK_ROOT_GATE_DISABLE, mask_sh),\ DCCG_SF(DCCG_GATE_DISABLE_CNTL4, PHYC_REFCLK_ROOT_GATE_DISABLE, mask_sh),\ - DCCG_SF(DCCG_GATE_DISABLE_CNTL4, PHYD_REFCLK_ROOT_GATE_DISABLE, mask_sh),\ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P0_GATE_DISABLE, mask_sh),\ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P1_GATE_DISABLE, mask_sh),\ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P2_GATE_DISABLE, mask_sh),\ @@ -184,19 +163,15 @@ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKA_FE_ROOT_GATE_DISABLE, mask_sh),\ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKB_FE_ROOT_GATE_DISABLE, mask_sh),\ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKC_FE_ROOT_GATE_DISABLE, mask_sh),\ - DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKD_FE_ROOT_GATE_DISABLE, mask_sh),\ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKA_ROOT_GATE_DISABLE, mask_sh),\ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKB_ROOT_GATE_DISABLE, mask_sh),\ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKC_ROOT_GATE_DISABLE, mask_sh),\ - DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKD_ROOT_GATE_DISABLE, mask_sh),\ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK0_ROOT_GATE_DISABLE, mask_sh),\ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK1_ROOT_GATE_DISABLE, mask_sh),\ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK2_ROOT_GATE_DISABLE, mask_sh),\ - DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK3_ROOT_GATE_DISABLE, mask_sh),\ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK0_GATE_DISABLE, mask_sh),\ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK1_GATE_DISABLE, mask_sh),\ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK2_GATE_DISABLE, mask_sh),\ - DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK3_GATE_DISABLE, mask_sh),\ DCCG_SF(DCCG_GATE_DISABLE_CNTL6, DSCCLK0_ROOT_GATE_DISABLE, mask_sh),\ DCCG_SF(DCCG_GATE_DISABLE_CNTL6, DSCCLK1_ROOT_GATE_DISABLE, mask_sh),\ DCCG_SF(DCCG_GATE_DISABLE_CNTL6, DSCCLK2_ROOT_GATE_DISABLE, mask_sh),\ @@ -208,26 +183,38 @@ DCCG_SF(SYMCLKA_CLOCK_ENABLE, SYMCLKA_CLOCK_ENABLE, mask_sh),\ DCCG_SF(SYMCLKB_CLOCK_ENABLE, SYMCLKB_CLOCK_ENABLE, mask_sh),\ DCCG_SF(SYMCLKC_CLOCK_ENABLE, SYMCLKC_CLOCK_ENABLE, mask_sh),\ - DCCG_SF(SYMCLKD_CLOCK_ENABLE, SYMCLKD_CLOCK_ENABLE, mask_sh),\ DCCG_SF(SYMCLKA_CLOCK_ENABLE, SYMCLKA_FE_EN, mask_sh),\ DCCG_SF(SYMCLKB_CLOCK_ENABLE, SYMCLKB_FE_EN, mask_sh),\ DCCG_SF(SYMCLKC_CLOCK_ENABLE, SYMCLKC_FE_EN, mask_sh),\ - DCCG_SF(SYMCLKD_CLOCK_ENABLE, SYMCLKD_FE_EN, mask_sh),\ DCCG_SF(SYMCLKA_CLOCK_ENABLE, SYMCLKA_FE_SRC_SEL, mask_sh),\ DCCG_SF(SYMCLKB_CLOCK_ENABLE, SYMCLKB_FE_SRC_SEL, mask_sh),\ - DCCG_SF(SYMCLKC_CLOCK_ENABLE, SYMCLKC_FE_SRC_SEL, mask_sh),\ - DCCG_SF(SYMCLKD_CLOCK_ENABLE, SYMCLKD_FE_SRC_SEL, mask_sh) + DCCG_SF(SYMCLKC_CLOCK_ENABLE, SYMCLKC_FE_SRC_SEL, mask_sh) #define DCCG_MASK_SH_LIST_DCN42(mask_sh) \ DCCG_MASK_SH_LIST_DCN42_COMMON(mask_sh),\ + DCCG_SF(PHYDSYMCLK_CLOCK_CNTL, PHYDSYMCLK_EN, mask_sh),\ + DCCG_SF(PHYDSYMCLK_CLOCK_CNTL, PHYDSYMCLK_SRC_SEL, mask_sh),\ DCCG_SF(PHYESYMCLK_CLOCK_CNTL, PHYESYMCLK_EN, mask_sh),\ DCCG_SF(PHYESYMCLK_CLOCK_CNTL, PHYESYMCLK_SRC_SEL, mask_sh),\ DCCG_SF(HDMISTREAMCLK0_DTO_PARAM, HDMISTREAMCLK0_DTO_PHASE, mask_sh),\ DCCG_SF(HDMISTREAMCLK0_DTO_PARAM, HDMISTREAMCLK0_DTO_MODULO, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL2, PHYDSYMCLK_ROOT_GATE_DISABLE, mask_sh),\ DCCG_SF(DCCG_GATE_DISABLE_CNTL2, PHYESYMCLK_ROOT_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL2, SYMCLKD_FE_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL2, SYMCLKD_GATE_DISABLE, mask_sh),\ DCCG_SF(DSCCLK3_DTO_PARAM, DSCCLK3_DTO_PHASE, mask_sh),\ DCCG_SF(DSCCLK3_DTO_PARAM, DSCCLK3_DTO_MODULO, mask_sh),\ - DCCG_SF(DCCG_GATE_DISABLE_CNTL2, PHYESYMCLK_ROOT_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_SE3_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_LE2_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_LE3_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_SE3_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_LE2_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_LE3_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKD_FE_ROOT_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKD_ROOT_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK3_ROOT_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK3_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL4, PHYD_REFCLK_ROOT_GATE_DISABLE, mask_sh),\ DCCG_SF(DCCG_GATE_DISABLE_CNTL4, PHYE_REFCLK_ROOT_GATE_DISABLE, mask_sh),\ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKE_FE_ROOT_GATE_DISABLE, mask_sh),\ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKE_ROOT_GATE_DISABLE, mask_sh),\ @@ -236,11 +223,22 @@ DCCG_SF(SYMCLKB_CLOCK_ENABLE, SYMCLKB_SRC_SEL, mask_sh),\ DCCG_SF(SYMCLKC_CLOCK_ENABLE, SYMCLKC_SRC_SEL, mask_sh),\ DCCG_SF(SYMCLKD_CLOCK_ENABLE, SYMCLKD_SRC_SEL, mask_sh),\ + DCCG_SF(SYMCLKD_CLOCK_ENABLE, SYMCLKD_CLOCK_ENABLE, mask_sh),\ + DCCG_SF(SYMCLKD_CLOCK_ENABLE, SYMCLKD_FE_EN, mask_sh),\ + DCCG_SF(SYMCLKD_CLOCK_ENABLE, SYMCLKD_FE_SRC_SEL, mask_sh),\ DCCG_SF(SYMCLKE_CLOCK_ENABLE, SYMCLKE_SRC_SEL, mask_sh),\ DCCG_SF(SYMCLKE_CLOCK_ENABLE, SYMCLKE_CLOCK_ENABLE, mask_sh),\ DCCG_SF(SYMCLKE_CLOCK_ENABLE, SYMCLKE_FE_EN, mask_sh),\ DCCG_SF(SYMCLKE_CLOCK_ENABLE, SYMCLKE_FE_SRC_SEL, mask_sh),\ - DCCG_SF(DISPCLK_FREQ_CHANGE_CNTL, RESYNC_FIFO_LEVEL_ADJUST_EN, mask_sh) + DCCG_SF(SYMCLK32_SE_CNTL, SYMCLK32_SE3_SRC_SEL, mask_sh),\ + DCCG_SF(SYMCLK32_SE_CNTL, SYMCLK32_SE3_EN, mask_sh),\ + DCCG_SF(SYMCLK32_LE_CNTL, SYMCLK32_LE2_SRC_SEL, mask_sh),\ + DCCG_SF(SYMCLK32_LE_CNTL, SYMCLK32_LE3_SRC_SEL, mask_sh),\ + DCCG_SF(SYMCLK32_LE_CNTL, SYMCLK32_LE2_EN, mask_sh),\ + DCCG_SF(SYMCLK32_LE_CNTL, SYMCLK32_LE3_EN, mask_sh),\ + DCCG_SF(DPSTREAMCLK_CNTL, DPSTREAMCLK3_EN, mask_sh),\ + DCCG_SF(DPSTREAMCLK_CNTL, DPSTREAMCLK3_SRC_SEL, mask_sh),\ + DCCG_SF(DSCCLK_DTO_CTRL, DSCCLK3_EN, mask_sh) void dccg42_otg_add_pixel(struct dccg *dccg, diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c index ed407e779c12..2c3a20d35fe9 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c @@ -271,7 +271,6 @@ static void dce110_stream_encoder_dp_set_stream_attribute( bool use_vsc_sdp_for_colorimetry, uint32_t enable_sdp_splitting) { - (void)use_vsc_sdp_for_colorimetry; (void)enable_sdp_splitting; uint32_t h_active_start; uint32_t v_active_start; @@ -334,6 +333,16 @@ static void dce110_stream_encoder_dp_set_stream_attribute( if (REG(DP_MSA_MISC)) misc1 = REG_READ(DP_MSA_MISC); + /* For YCbCr420 and BT2020 Colorimetry Formats, VSC SDP shall be used. + * When MISC1, bit 6, is Set to 1, a Source device uses a VSC SDP to indicate the + * Pixel Encoding/Colorimetry Format and that a Sink device shall ignore MISC1, bit 7, + * and MISC0, bits 7:1 (MISC1, bit 7, and MISC0, bits 7:1, become "don't care"). + */ + if (use_vsc_sdp_for_colorimetry) + misc1 = misc1 | 0x40; + else + misc1 = misc1 & ~0x40; + /* set color depth */ switch (hw_crtc_timing.display_color_depth) { @@ -499,6 +508,10 @@ static void dce110_stream_encoder_dp_set_stream_attribute( hw_crtc_timing.h_addressable + hw_crtc_timing.h_border_right, DP_MSA_VHEIGHT, hw_crtc_timing.v_border_top + hw_crtc_timing.v_addressable + hw_crtc_timing.v_border_bottom); + } else { + /* DCE-only path */ + if (REG(DP_MSA_MISC)) + REG_WRITE(DP_MSA_MISC, misc1); /* MSA_MISC1 */ } } diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.h b/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.h index 342c0afe6a94..88d6044904d1 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.h +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.h @@ -96,7 +96,8 @@ #define SE_COMMON_REG_LIST(id)\ SE_COMMON_REG_LIST_DCE_BASE(id), \ - SRI(AFMT_CNTL, DIG, id) + SRI(AFMT_CNTL, DIG, id), \ + SRI(DP_MSA_MISC, DP, id) #define SE_DCN_REG_LIST(id)\ SE_COMMON_REG_LIST_BASE(id),\ diff --git a/drivers/gpu/drm/amd/display/dc/link/link_detection.c b/drivers/gpu/drm/amd/display/dc/link/link_detection.c index a3212fd151d1..7d8951fecd57 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_detection.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_detection.c @@ -1164,8 +1164,11 @@ static bool detect_link_and_local_sink(struct dc_link *link, link->link_enc->features.flags.bits.DP_IS_USB_C == 1) { /* if alt mode times out, return false */ - if (!wait_for_entering_dp_alt_mode(link)) + if (!wait_for_entering_dp_alt_mode(link)) { + if (prev_sink) + dc_sink_release(prev_sink); return false; + } } if (!detect_dp(link, &sink_caps, reason)) { diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_log.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_log.c index 1164fd96b714..f0f8e280ed30 100644 --- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_log.c +++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_log.c @@ -33,22 +33,28 @@ void mod_hdcp_dump_binary_message(uint8_t *msg, uint32_t msg_size, byte_size = 3, newline_size = 1, terminator_size = 1; - uint32_t line_count = msg_size / bytes_per_line, - trailing_bytes = msg_size % bytes_per_line; - uint32_t target_size = (byte_size * bytes_per_line + newline_size) * line_count + - byte_size * trailing_bytes + newline_size + terminator_size; uint32_t buf_pos = 0; uint32_t i = 0; - if (buf_size >= target_size) { - for (i = 0; i < msg_size; i++) { - if (i % bytes_per_line == 0) - buf[buf_pos++] = '\n'; - sprintf((char *)&buf[buf_pos], "%02X ", msg[i]); - buf_pos += byte_size; - } - buf[buf_pos++] = '\0'; + /* Need room for at least the terminator. */ + if (buf_size < terminator_size) + return; + + for (i = 0; i < msg_size; i++) { + uint32_t needed = byte_size + terminator_size; + + if (i % bytes_per_line == 0) + needed += newline_size; + + if (buf_pos + needed > buf_size) + break; + + if (i % bytes_per_line == 0) + buf[buf_pos++] = '\n'; + sprintf((char *)&buf[buf_pos], "%02X ", msg[i]); + buf_pos += byte_size; } + buf[buf_pos++] = '\0'; } void mod_hdcp_log_ddc_trace(struct mod_hdcp *hdcp) diff --git a/drivers/gpu/drm/amd/include/mes_v11_api_def.h b/drivers/gpu/drm/amd/include/mes_v11_api_def.h index f9629d42ada2..7808147ada38 100644 --- a/drivers/gpu/drm/amd/include/mes_v11_api_def.h +++ b/drivers/gpu/drm/amd/include/mes_v11_api_def.h @@ -427,6 +427,7 @@ union MESAPI__SUSPEND { uint32_t suspend_fence_value; struct MES_API_STATUS api_status; + uint32_t doorbell_offset; }; uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS]; @@ -444,6 +445,7 @@ union MESAPI__RESUME { uint64_t gang_context_addr; struct MES_API_STATUS api_status; + uint32_t doorbell_offset; }; uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS]; diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index f43d09769320..97da01aff76c 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -41,6 +41,8 @@ #define DEVICE_ATTR_IS(_name) (attr_id == device_attr_id__##_name) +#define power_2_mwatt(power) (((power) >> 8) * 1000 + ((power) & 0xff)) + struct od_attribute { struct kobj_attribute attribute; struct list_head entry; @@ -2696,6 +2698,11 @@ static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_ gc_ver != IP_VERSION(9, 4, 3)) || gc_ver < IP_VERSION(9, 0, 0)) *states = ATTR_STATE_UNSUPPORTED; + + if (adev->scpm_enabled) { + dev_attr->attr.mode &= ~S_IWUGO; + dev_attr->store = NULL; + } } else if (DEVICE_ATTR_IS(gpu_metrics)) { if (gc_ver < IP_VERSION(9, 1, 0)) *states = ATTR_STATE_UNSUPPORTED; @@ -3349,7 +3356,6 @@ static int amdgpu_hwmon_get_power(struct device *dev, enum amd_pp_sensors sensor) { struct amdgpu_device *adev = dev_get_drvdata(dev); - unsigned int uw; u32 query = 0; int r; @@ -3358,9 +3364,7 @@ static int amdgpu_hwmon_get_power(struct device *dev, return r; /* convert to microwatts */ - uw = (query >> 8) * 1000000 + (query & 0xff) * 1000; - - return uw; + return power_2_mwatt(query) * 1000; } static ssize_t amdgpu_hwmon_show_power_avg(struct device *dev, @@ -4903,7 +4907,7 @@ static int amdgpu_debugfs_pm_info_pp(struct seq_file *m, struct amdgpu_device *a { uint32_t mp1_ver = amdgpu_ip_version(adev, MP1_HWIP, 0); uint32_t gc_ver = amdgpu_ip_version(adev, GC_HWIP, 0); - uint32_t value; + uint32_t value, mwatt, centiwatt; uint64_t value64 = 0; uint32_t query = 0; int size; @@ -4928,17 +4932,21 @@ static int amdgpu_debugfs_pm_info_pp(struct seq_file *m, struct amdgpu_device *a seq_printf(m, "\t%u mV (VDDNB)\n", value); size = sizeof(uint32_t); if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_AVG_POWER, (void *)&query, &size)) { + mwatt = power_2_mwatt(query); + centiwatt = DIV_ROUND_CLOSEST(mwatt, 10); if (adev->flags & AMD_IS_APU) - seq_printf(m, "\t%u.%02u W (average SoC including CPU)\n", query >> 8, query & 0xff); + seq_printf(m, "\t%u.%02u W (average SoC including CPU)\n", centiwatt / 100, centiwatt % 100); else - seq_printf(m, "\t%u.%02u W (average SoC)\n", query >> 8, query & 0xff); + seq_printf(m, "\t%u.%02u W (average SoC)\n", centiwatt / 100, centiwatt % 100); } size = sizeof(uint32_t); if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_INPUT_POWER, (void *)&query, &size)) { + mwatt = power_2_mwatt(query); + centiwatt = DIV_ROUND_CLOSEST(mwatt, 10); if (adev->flags & AMD_IS_APU) - seq_printf(m, "\t%u.%02u W (current SoC including CPU)\n", query >> 8, query & 0xff); + seq_printf(m, "\t%u.%02u W (current SoC including CPU)\n", centiwatt / 100, centiwatt % 100); else - seq_printf(m, "\t%u.%02u W (current SoC)\n", query >> 8, query & 0xff); + seq_printf(m, "\t%u.%02u W (current SoC)\n", centiwatt / 100, centiwatt % 100); } size = sizeof(value); seq_printf(m, "\n"); diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index 7f8d4bb47d02..acbd7046d8a5 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -2403,11 +2403,14 @@ static int smu_v13_0_0_get_power_limit(struct smu_context *smu, uint32_t pp_limit = smu->adev->pm.ac_power ? skutable->SocketPowerLimitAc[PPT_THROTTLER_PPT0] : skutable->SocketPowerLimitDc[PPT_THROTTLER_PPT0]; - uint32_t power_limit = 0, od_percent_upper = 0, od_percent_lower = 0; + uint32_t msg_limit = skutable->MsgLimits.Power[PPT_THROTTLER_PPT0][POWER_SOURCE_AC]; + uint32_t min_limit = min_t(uint32_t, pp_limit, msg_limit); + uint32_t max_limit = max_t(uint32_t, pp_limit, msg_limit); + uint32_t od_percent_upper = 0, od_percent_lower = 0; int ret; if (current_power_limit) { - ret = smu_v13_0_get_current_power_limit(smu, &power_limit); + ret = smu_v13_0_get_current_power_limit(smu, current_power_limit); if (ret) *current_power_limit = pp_limit; } @@ -2430,12 +2433,12 @@ static int smu_v13_0_0_get_power_limit(struct smu_context *smu, od_percent_upper, od_percent_lower, pp_limit); if (max_power_limit) { - *max_power_limit = pp_limit * (100 + od_percent_upper); + *max_power_limit = max_limit * (100 + od_percent_upper); *max_power_limit /= 100; } if (min_power_limit) { - *min_power_limit = pp_limit * (100 - od_percent_lower); + *min_power_limit = min_limit * (100 - od_percent_lower); *min_power_limit /= 100; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c index 0f774b0920ce..42c9ceeb4f7d 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c @@ -2385,15 +2385,16 @@ static int smu_v13_0_7_get_power_limit(struct smu_context *smu, uint32_t pp_limit = smu->adev->pm.ac_power ? skutable->SocketPowerLimitAc[PPT_THROTTLER_PPT0] : skutable->SocketPowerLimitDc[PPT_THROTTLER_PPT0]; - uint32_t power_limit = 0, od_percent_upper = 0, od_percent_lower = 0; + uint32_t msg_limit = skutable->MsgLimits.Power[PPT_THROTTLER_PPT0][POWER_SOURCE_AC]; + uint32_t min_limit = min_t(uint32_t, pp_limit, msg_limit); + uint32_t max_limit = max_t(uint32_t, pp_limit, msg_limit); + uint32_t od_percent_upper = 0, od_percent_lower = 0; int ret; if (current_power_limit) { - ret = smu_v13_0_get_current_power_limit(smu, &power_limit); + ret = smu_v13_0_get_current_power_limit(smu, current_power_limit); if (ret) - power_limit = pp_limit; - - *current_power_limit = power_limit; + *current_power_limit = pp_limit; } if (default_power_limit) @@ -2414,12 +2415,12 @@ static int smu_v13_0_7_get_power_limit(struct smu_context *smu, od_percent_upper, od_percent_lower, pp_limit); if (max_power_limit) { - *max_power_limit = pp_limit * (100 + od_percent_upper); + *max_power_limit = max_limit * (100 + od_percent_upper); *max_power_limit /= 100; } if (min_power_limit) { - *min_power_limit = pp_limit * (100 - od_percent_lower); + *min_power_limit = min_limit * (100 - od_percent_lower); *min_power_limit /= 100; } |
