diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_object.c')
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 136 |
1 files changed, 98 insertions, 38 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 96f4b8904e9a..f98bfba59a2c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -32,6 +32,7 @@ #include <linux/list.h> #include <linux/slab.h> #include <linux/dma-buf.h> +#include <linux/export.h> #include <drm/drm_drv.h> #include <drm/amdgpu_drm.h> @@ -62,7 +63,7 @@ static void amdgpu_bo_destroy(struct ttm_buffer_object *tbo) amdgpu_bo_kunmap(bo); - if (bo->tbo.base.import_attach) + if (drm_gem_is_imported(&bo->tbo.base)) drm_prime_gem_destroy(&bo->tbo.base, bo->tbo.sg); drm_gem_object_release(&bo->tbo.base); amdgpu_bo_unref(&bo->parent); @@ -163,8 +164,8 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain) * When GTT is just an alternative to VRAM make sure that we * only use it as fallback and still try to fill up VRAM first. */ - if (domain & abo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM && - !(adev->flags & AMD_IS_APU)) + if (abo->tbo.resource && !(adev->flags & AMD_IS_APU) && + domain & abo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) places[c].flags |= TTM_PL_FLAG_FALLBACK; c++; } @@ -351,7 +352,6 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev, return 0; } -EXPORT_SYMBOL(amdgpu_bo_create_kernel); /** * amdgpu_bo_create_isp_user - create user BO for isp @@ -420,7 +420,6 @@ error_unreserve: return r; } -EXPORT_SYMBOL(amdgpu_bo_create_isp_user); /** * amdgpu_bo_create_kernel_at - create BO for kernel use at specific location @@ -524,7 +523,6 @@ void amdgpu_bo_free_kernel(struct amdgpu_bo **bo, u64 *gpu_addr, if (cpu_addr) *cpu_addr = NULL; } -EXPORT_SYMBOL(amdgpu_bo_free_kernel); /** * amdgpu_bo_free_isp_user - free BO for isp use @@ -547,7 +545,6 @@ void amdgpu_bo_free_isp_user(struct amdgpu_bo *bo) } amdgpu_bo_unref(&bo); } -EXPORT_SYMBOL(amdgpu_bo_free_isp_user); /* Validate bo size is bit bigger than the request domain */ static bool amdgpu_bo_validate_size(struct amdgpu_device *adev, @@ -720,13 +717,17 @@ int amdgpu_bo_create(struct amdgpu_device *adev, bo->tbo.resource->mem_type == TTM_PL_VRAM) { struct dma_fence *fence; - r = amdgpu_ttm_clear_buffer(bo, bo->tbo.base.resv, &fence); + r = amdgpu_ttm_clear_buffer(amdgpu_ttm_next_clear_entity(adev), + bo, bo->tbo.base.resv, &fence, + true, AMDGPU_KERNEL_JOB_ID_TTM_CLEAR_BUFFER); if (unlikely(r)) goto fail_unreserve; - dma_resv_add_fence(bo->tbo.base.resv, fence, - DMA_RESV_USAGE_KERNEL); - dma_fence_put(fence); + if (fence) { + dma_resv_add_fence(bo->tbo.base.resv, fence, + DMA_RESV_USAGE_KERNEL); + dma_fence_put(fence); + } } if (!bp->resv) amdgpu_bo_unreserve(bo); @@ -939,7 +940,7 @@ int amdgpu_bo_pin(struct amdgpu_bo *bo, u32 domain) domain = bo->preferred_domains & domain; /* A shared bo cannot be migrated to VRAM */ - if (bo->tbo.base.import_attach) { + if (drm_gem_is_imported(&bo->tbo.base)) { if (domain & AMDGPU_GEM_DOMAIN_GTT) domain = AMDGPU_GEM_DOMAIN_GTT; else @@ -967,7 +968,7 @@ int amdgpu_bo_pin(struct amdgpu_bo *bo, u32 domain) */ domain = amdgpu_bo_get_preferred_domain(adev, domain); - if (bo->tbo.base.import_attach) + if (drm_gem_is_imported(&bo->tbo.base)) dma_buf_pin(bo->tbo.base.import_attach); /* force to pin into visible video ram */ @@ -1018,7 +1019,7 @@ void amdgpu_bo_unpin(struct amdgpu_bo *bo) if (bo->tbo.pin_count) return; - if (bo->tbo.base.import_attach) + if (drm_gem_is_imported(&bo->tbo.base)) dma_buf_unpin(bo->tbo.base.import_attach); if (bo->tbo.resource->mem_type == TTM_PL_VRAM) { @@ -1044,7 +1045,9 @@ static const char * const amdgpu_vram_names[] = { "GDDR6", "DDR5", "LPDDR4", - "LPDDR5" + "LPDDR5", + "HBM3E", + "HBM4" }; /** @@ -1074,10 +1077,10 @@ int amdgpu_bo_init(struct amdgpu_device *adev) adev->gmc.aper_size); } - DRM_INFO("Detected VRAM RAM=%lluM, BAR=%lluM\n", + drm_info(adev_to_drm(adev), "Detected VRAM RAM=%lluM, BAR=%lluM\n", adev->gmc.mc_vram_size >> 20, (unsigned long long)adev->gmc.aper_size >> 20); - DRM_INFO("RAM width %dbits %s\n", + drm_info(adev_to_drm(adev), "RAM width %dbits %s\n", adev->gmc.vram_width, amdgpu_vram_names[adev->gmc.vram_type]); return amdgpu_ttm_init(adev); } @@ -1119,6 +1122,10 @@ int amdgpu_bo_set_tiling_flags(struct amdgpu_bo *bo, u64 tiling_flags) struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); struct amdgpu_bo_user *ubo; + /* MMIO_REMAP is BAR I/O space; tiling should never be used here. */ + WARN_ON_ONCE(bo->tbo.resource && + bo->tbo.resource->mem_type == AMDGPU_PL_MMIO_REMAP); + BUG_ON(bo->tbo.type == ttm_bo_type_kernel); if (adev->family <= AMDGPU_FAMILY_CZ && AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT) > 6) @@ -1141,6 +1148,13 @@ void amdgpu_bo_get_tiling_flags(struct amdgpu_bo *bo, u64 *tiling_flags) { struct amdgpu_bo_user *ubo; + /* + * MMIO_REMAP BOs are not real VRAM/GTT memory but a fixed BAR I/O window. + * They should never go through GEM tiling helpers. + */ + WARN_ON_ONCE(bo->tbo.resource && + bo->tbo.resource->mem_type == AMDGPU_PL_MMIO_REMAP); + BUG_ON(bo->tbo.type == ttm_bo_type_kernel); dma_resv_assert_held(bo->tbo.base.resv); ubo = to_amdgpu_bo_user(bo); @@ -1262,9 +1276,9 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, amdgpu_bo_kunmap(abo); - if (abo->tbo.base.dma_buf && !abo->tbo.base.import_attach && + if (abo->tbo.base.dma_buf && !drm_gem_is_imported(&abo->tbo.base) && old_mem && old_mem->mem_type != TTM_PL_SYSTEM) - dma_buf_move_notify(abo->tbo.base.dma_buf); + dma_buf_invalidate_mappings(abo->tbo.base.dma_buf); /* move_notify is called before move happens */ trace_amdgpu_bo_move(abo, new_mem ? new_mem->mem_type : -1, @@ -1295,28 +1309,38 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo) if (abo->kfd_bo) amdgpu_amdkfd_release_notify(abo); - /* We only remove the fence if the resv has individualized. */ - WARN_ON_ONCE(bo->type == ttm_bo_type_kernel - && bo->base.resv != &bo->base._resv); - if (bo->base.resv == &bo->base._resv) - amdgpu_amdkfd_remove_fence_on_pt_pd_bos(abo); + /* + * We lock the private dma_resv object here and since the BO is about to + * be released nobody else should have a pointer to it. + * So when this locking here fails something is wrong with the reference + * counting. + */ + if (WARN_ON_ONCE(!dma_resv_trylock(&bo->base._resv))) + return; + + amdgpu_amdkfd_remove_all_eviction_fences(abo); if (!bo->resource || bo->resource->mem_type != TTM_PL_VRAM || !(abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE) || adev->in_suspend || drm_dev_is_unplugged(adev_to_drm(adev))) - return; + goto out; - if (WARN_ON_ONCE(!dma_resv_trylock(bo->base.resv))) - return; + r = dma_resv_reserve_fences(&bo->base._resv, 1); + if (r) + goto out; - r = amdgpu_fill_buffer(abo, 0, bo->base.resv, &fence, true); - if (!WARN_ON(r)) { - amdgpu_vram_mgr_set_cleared(bo->resource); - amdgpu_bo_fence(abo, fence, false); - dma_fence_put(fence); - } + r = amdgpu_ttm_clear_buffer(amdgpu_ttm_next_clear_entity(adev), + abo, &bo->base._resv, &fence, + false, AMDGPU_KERNEL_JOB_ID_CLEAR_ON_RELEASE); + if (WARN_ON(r)) + goto out; - dma_resv_unlock(bo->base.resv); + amdgpu_vram_mgr_set_cleared(bo->resource); + dma_resv_add_fence(&bo->base._resv, fence, DMA_RESV_USAGE_KERNEL); + dma_fence_put(fence); + +out: + dma_resv_unlock(&bo->base._resv); } /** @@ -1464,6 +1488,26 @@ u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo) } /** + * amdgpu_bo_fb_aper_addr - return FB aperture GPU offset of the VRAM bo + * @bo: amdgpu VRAM buffer object for which we query the offset + * + * Returns: + * current FB aperture GPU offset of the object. + */ +u64 amdgpu_bo_fb_aper_addr(struct amdgpu_bo *bo) +{ + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); + uint64_t offset, fb_base; + + WARN_ON_ONCE(bo->tbo.resource->mem_type != TTM_PL_VRAM); + + fb_base = adev->gmc.fb_start; + fb_base += adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size; + offset = (bo->tbo.resource->start << PAGE_SHIFT) + fb_base; + return amdgpu_gmc_sign_extend(offset); +} + +/** * amdgpu_bo_gpu_offset_no_check - return GPU offset of bo * @bo: amdgpu object for which we query the offset * @@ -1499,8 +1543,17 @@ u64 amdgpu_bo_gpu_offset_no_check(struct amdgpu_bo *bo) */ uint32_t amdgpu_bo_mem_stats_placement(struct amdgpu_bo *bo) { - uint32_t domain = bo->preferred_domains & AMDGPU_GEM_DOMAIN_MASK; + u32 domain; + /* + * MMIO_REMAP is internal now, so it no longer maps from a userspace + * domain bit. Keep fdinfo/mem-stats visibility by checking the actual + * TTM placement. + */ + if (bo->tbo.resource && bo->tbo.resource->mem_type == AMDGPU_PL_MMIO_REMAP) + return AMDGPU_PL_MMIO_REMAP; + + domain = bo->preferred_domains & AMDGPU_GEM_DOMAIN_MASK; if (!domain) return TTM_PL_SYSTEM; @@ -1602,6 +1655,9 @@ u64 amdgpu_bo_print_info(int id, struct amdgpu_bo *bo, struct seq_file *m) case AMDGPU_PL_DOORBELL: placement = "DOORBELL"; break; + case AMDGPU_PL_MMIO_REMAP: + placement = "MMIO REMAP"; + break; case TTM_PL_SYSTEM: default: placement = "CPU"; @@ -1625,9 +1681,9 @@ u64 amdgpu_bo_print_info(int id, struct amdgpu_bo *bo, struct seq_file *m) attachment = READ_ONCE(bo->tbo.base.import_attach); if (attachment) - seq_printf(m, " imported from ino:%lu", file_inode(dma_buf->file)->i_ino); + seq_printf(m, " imported from ino:%llu", file_inode(dma_buf->file)->i_ino); else if (dma_buf) - seq_printf(m, " exported as ino:%lu", file_inode(dma_buf->file)->i_ino); + seq_printf(m, " exported as ino:%llu", file_inode(dma_buf->file)->i_ino); amdgpu_bo_print_flag(m, bo, CPU_ACCESS_REQUIRED); amdgpu_bo_print_flag(m, bo, NO_CPU_ACCESS); @@ -1636,7 +1692,11 @@ u64 amdgpu_bo_print_info(int id, struct amdgpu_bo *bo, struct seq_file *m) amdgpu_bo_print_flag(m, bo, VRAM_CONTIGUOUS); amdgpu_bo_print_flag(m, bo, VM_ALWAYS_VALID); amdgpu_bo_print_flag(m, bo, EXPLICIT_SYNC); - + /* Add the gem obj resv fence dump*/ + if (dma_resv_trylock(bo->tbo.base.resv)) { + dma_resv_describe(bo->tbo.base.resv, m); + dma_resv_unlock(bo->tbo.base.resv); + } seq_puts(m, "\n"); return size; |
