diff options
author | Dave Airlie <airlied@redhat.com> | 2023-06-15 14:11:22 +1000 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2023-06-15 14:11:22 +1000 |
commit | 901bdf5ea1a836400ee69aa32b04e9c209271ec7 (patch) | |
tree | ccb1851c8a71e776dbccf1ccae132dc9b5f093c6 /drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | |
parent | ba57b9b11f78530146f02b776854b2b6b6d344a4 (diff) | |
parent | 3b718dcaf163d17fe907ea098c8449e0cd6bc271 (diff) | |
download | lwn-901bdf5ea1a836400ee69aa32b04e9c209271ec7.tar.gz lwn-901bdf5ea1a836400ee69aa32b04e9c209271ec7.zip |
Merge tag 'amd-drm-next-6.5-2023-06-09' of https://gitlab.freedesktop.org/agd5f/linux into drm-next
amd-drm-next-6.5-2023-06-02:
amdgpu:
- SR-IOV fixes
- Warning fixes
- Misc code cleanups and spelling fixes
- DCN 3.2 updates
- Improved DC FAMS support for better power management
- Improved DC SubVP support for better power management
- DCN 3.1.x fixes
- Max IB size query
- DC GPU reset fixes
- RAS updates
- DCN 3.0.x fixes
- S/G display fixes
- CP shadow buffer support
- Implement connector force callback
- Z8 power improvements
- PSP 13.0.10 vbflash support
- Mode2 reset fixes
- Store MQDs in VRAM to improve queue switch latency
- VCN 3.x fixes
- JPEG 3.x fixes
- Enable DC_FP on LoongArch
- GFXOFF fixes
- GC 9.4.3 partition support
- SDMA 4.4.2 partition support
- VCN/JPEG 4.0.3 partition support
- VCN 4.0.3 updates
- NBIO 7.9 updates
- GC 9.4.3 updates
- Take NUMA into account when allocating memory
- Handle NUMA for partitions
- SMU 13.0.6 updates
- GC 9.4.3 RAS updates
- Stop including unused swiotlb.h
- SMU 13.0.7 fixes
- Fix clock output ordering on some APUs
- Clean up DC FPGA code
- GFX9 preemption fixes
- Misc irq fixes
- S0ix fixes
- Add new DRM_AMDGPU_WERROR config parameter to help with CI
- PCIe fix for RDNA2
- kdoc fixes
- Documentation updates
amdkfd:
- Query TTM mem limit rather than hardcoding it
- GC 9.4.3 partition support
- Handle NUMA for partitions
radeon:
- Fix possible double free
- Stop including unused swiotlb.h
- Fix possible division by zero
ttm:
- Add query for TTM mem limit
- Add NUMA awareness to pools
- Export ttm_pool_fini()
UAPI:
- Add new ctx query flag to better handle GPU resets
Mesa MR: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22290
- Add new interface to query and set shadow buffer for RDNA3
Mesa MR: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21986
- Add new INFO query for max IB size
Proposed userspace: https://gitlab.freedesktop.org/bnieuwenhuizen/mesa/-/commits/ib-rejection-v3
amd-drm-next-6.5-2023-06-09:
amdgpu:
- S0ix fixes
- Initial SMU13 Overdrive support
- kdoc fixes
- Misc clode cleanups
- Flexible array fixes
- Display OTG fixes
- SMU 13.0.6 updates
- Revert some broken clock counter updates
- Misc display fixes
- GFX9 preemption fixes
- Add support for newer EEPROM bad page table format
- Add missing radeon secondary id
- Add support for new colorspace KMS API
- CSA fix
- Stable pstate fixes for APUs
- make vbl interface admin only
- Handle PCI accelerator class
amdkfd:
- Add debugger support for gdb
radeon:
- Fix possible UAF
drm:
- Add Colorspace functionality
UAPI:
- Add debugger interface for enabling gdb
Proposed userspace: https://github.com/ROCm-Developer-Tools/ROCdbgapi/tree/wip-dbgapi
- Add KMS colorspace API
Discussion: https://lists.freedesktop.org/archives/dri-devel/2023-June/408128.html
From: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230609174817.7764-1-alexander.deucher@amd.com
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 322 |
1 files changed, 218 insertions, 104 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 2cd081cbf706..d2d0d27f9053 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -38,7 +38,6 @@ #include <linux/seq_file.h> #include <linux/slab.h> #include <linux/swap.h> -#include <linux/swiotlb.h> #include <linux/dma-buf.h> #include <linux/sizes.h> #include <linux/module.h> @@ -65,7 +64,7 @@ MODULE_IMPORT_NS(DMA_BUF); -#define AMDGPU_TTM_VRAM_MAX_DW_READ (size_t)128 +#define AMDGPU_TTM_VRAM_MAX_DW_READ ((size_t)128) static int amdgpu_ttm_backend_bind(struct ttm_device *bdev, struct ttm_tt *ttm, @@ -184,11 +183,11 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo, static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo, struct ttm_resource *mem, struct amdgpu_res_cursor *mm_cur, - unsigned window, struct amdgpu_ring *ring, + unsigned int window, struct amdgpu_ring *ring, bool tmz, uint64_t *size, uint64_t *addr) { struct amdgpu_device *adev = ring->adev; - unsigned offset, num_pages, num_dw, num_bytes; + unsigned int offset, num_pages, num_dw, num_bytes; uint64_t src_addr, dst_addr; struct amdgpu_job *job; void *cpu_addr; @@ -384,7 +383,8 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo, (abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) { struct dma_fence *wipe_fence = NULL; - r = amdgpu_fill_buffer(abo, AMDGPU_POISON, NULL, &wipe_fence); + r = amdgpu_fill_buffer(abo, AMDGPU_POISON, NULL, &wipe_fence, + false); if (r) { goto error; } else if (wipe_fence) { @@ -631,6 +631,7 @@ struct amdgpu_ttm_tt { struct task_struct *usertask; uint32_t userflags; bool bound; + int32_t pool_id; }; #define ttm_to_amdgpu_ttm_tt(ptr) container_of(ptr, struct amdgpu_ttm_tt, ttm) @@ -800,6 +801,44 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_device *bdev, sg_free_table(ttm->sg); } +/* + * total_pages is constructed as MQD0+CtrlStack0 + MQD1+CtrlStack1 + ... + * MQDn+CtrlStackn where n is the number of XCCs per partition. + * pages_per_xcc is the size of one MQD+CtrlStack. The first page is MQD + * and uses memory type default, UC. The rest of pages_per_xcc are + * Ctrl stack and modify their memory type to NC. + */ +static void amdgpu_ttm_gart_bind_gfx9_mqd(struct amdgpu_device *adev, + struct ttm_tt *ttm, uint64_t flags) +{ + struct amdgpu_ttm_tt *gtt = (void *)ttm; + uint64_t total_pages = ttm->num_pages; + int num_xcc = max(1U, adev->gfx.num_xcc_per_xcp); + uint64_t page_idx, pages_per_xcc; + int i; + uint64_t ctrl_flags = (flags & ~AMDGPU_PTE_MTYPE_VG10_MASK) | + AMDGPU_PTE_MTYPE_VG10(AMDGPU_MTYPE_NC); + + pages_per_xcc = total_pages; + do_div(pages_per_xcc, num_xcc); + + for (i = 0, page_idx = 0; i < num_xcc; i++, page_idx += pages_per_xcc) { + /* MQD page: use default flags */ + amdgpu_gart_bind(adev, + gtt->offset + (page_idx << PAGE_SHIFT), + 1, >t->ttm.dma_address[page_idx], flags); + /* + * Ctrl pages - modify the memory type to NC (ctrl_flags) from + * the second page of the BO onward. + */ + amdgpu_gart_bind(adev, + gtt->offset + ((page_idx + 1) << PAGE_SHIFT), + pages_per_xcc - 1, + >t->ttm.dma_address[page_idx + 1], + ctrl_flags); + } +} + static void amdgpu_ttm_gart_bind(struct amdgpu_device *adev, struct ttm_buffer_object *tbo, uint64_t flags) @@ -812,21 +851,7 @@ static void amdgpu_ttm_gart_bind(struct amdgpu_device *adev, flags |= AMDGPU_PTE_TMZ; if (abo->flags & AMDGPU_GEM_CREATE_CP_MQD_GFX9) { - uint64_t page_idx = 1; - - amdgpu_gart_bind(adev, gtt->offset, page_idx, - gtt->ttm.dma_address, flags); - - /* The memory type of the first page defaults to UC. Now - * modify the memory type to NC from the second page of - * the BO onward. - */ - flags &= ~AMDGPU_PTE_MTYPE_VG10_MASK; - flags |= AMDGPU_PTE_MTYPE_VG10(AMDGPU_MTYPE_NC); - - amdgpu_gart_bind(adev, gtt->offset + (page_idx << PAGE_SHIFT), - ttm->num_pages - page_idx, - &(gtt->ttm.dma_address[page_idx]), flags); + amdgpu_ttm_gart_bind_gfx9_mqd(adev, ttm, flags); } else { amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages, gtt->ttm.dma_address, flags); @@ -1029,15 +1054,20 @@ static void amdgpu_ttm_backend_destroy(struct ttm_device *bdev, static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo, uint32_t page_flags) { + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo); struct amdgpu_ttm_tt *gtt; enum ttm_caching caching; gtt = kzalloc(sizeof(struct amdgpu_ttm_tt), GFP_KERNEL); - if (gtt == NULL) { + if (!gtt) return NULL; - } + gtt->gobj = &bo->base; + if (adev->gmc.mem_partitions && abo->xcp_id >= 0) + gtt->pool_id = KFD_XCP_MEM_ID(adev, abo->xcp_id); + else + gtt->pool_id = abo->xcp_id; if (abo->flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) caching = ttm_write_combined; @@ -1064,6 +1094,7 @@ static int amdgpu_ttm_tt_populate(struct ttm_device *bdev, { struct amdgpu_device *adev = amdgpu_ttm_adev(bdev); struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); + struct ttm_pool *pool; pgoff_t i; int ret; @@ -1078,7 +1109,11 @@ static int amdgpu_ttm_tt_populate(struct ttm_device *bdev, if (ttm->page_flags & TTM_TT_FLAG_EXTERNAL) return 0; - ret = ttm_pool_alloc(&adev->mman.bdev.pool, ttm, ctx); + if (adev->mman.ttm_pools && gtt->pool_id >= 0) + pool = &adev->mman.ttm_pools[gtt->pool_id]; + else + pool = &adev->mman.bdev.pool; + ret = ttm_pool_alloc(pool, ttm, ctx); if (ret) return ret; @@ -1099,6 +1134,7 @@ static void amdgpu_ttm_tt_unpopulate(struct ttm_device *bdev, { struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); struct amdgpu_device *adev; + struct ttm_pool *pool; pgoff_t i; amdgpu_ttm_backend_unbind(bdev, ttm); @@ -1117,7 +1153,13 @@ static void amdgpu_ttm_tt_unpopulate(struct ttm_device *bdev, ttm->pages[i]->mapping = NULL; adev = amdgpu_ttm_adev(bdev); - return ttm_pool_free(&adev->mman.bdev.pool, ttm); + + if (adev->mman.ttm_pools && gtt->pool_id >= 0) + pool = &adev->mman.ttm_pools[gtt->pool_id]; + else + pool = &adev->mman.bdev.pool; + + return ttm_pool_free(pool, ttm); } /** @@ -1623,14 +1665,15 @@ static int amdgpu_ttm_training_reserve_vram_fini(struct amdgpu_device *adev) return 0; } -static void amdgpu_ttm_training_data_block_init(struct amdgpu_device *adev) +static void amdgpu_ttm_training_data_block_init(struct amdgpu_device *adev, + uint32_t reserve_size) { struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx; memset(ctx, 0, sizeof(*ctx)); ctx->c2p_train_data_offset = - ALIGN((adev->gmc.mc_vram_size - adev->mman.discovery_tmr_size - SZ_1M), SZ_1M); + ALIGN((adev->gmc.mc_vram_size - reserve_size - SZ_1M), SZ_1M); ctx->p2c_train_data_offset = (adev->gmc.mc_vram_size - GDDR6_MEM_TRAINING_OFFSET); ctx->train_data_size = @@ -1648,11 +1691,12 @@ static void amdgpu_ttm_training_data_block_init(struct amdgpu_device *adev) */ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev) { - int ret; struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx; bool mem_train_support = false; + uint32_t reserve_size = 0; + int ret; - if (!amdgpu_sriov_vf(adev)) { + if (adev->bios && !amdgpu_sriov_vf(adev)) { if (amdgpu_atomfirmware_mem_training_supported(adev)) mem_train_support = true; else @@ -1666,14 +1710,18 @@ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev) * Otherwise, fallback to legacy approach to check and reserve tmr block for ip * discovery data and G6 memory training data respectively */ - adev->mman.discovery_tmr_size = - amdgpu_atomfirmware_get_fw_reserved_fb_size(adev); - if (!adev->mman.discovery_tmr_size) - adev->mman.discovery_tmr_size = DISCOVERY_TMR_OFFSET; + if (adev->bios) + reserve_size = + amdgpu_atomfirmware_get_fw_reserved_fb_size(adev); + + if (!adev->bios && adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3)) + reserve_size = max(reserve_size, (uint32_t)280 << 20); + else if (!reserve_size) + reserve_size = DISCOVERY_TMR_OFFSET; if (mem_train_support) { /* reserve vram for mem train according to TMR location */ - amdgpu_ttm_training_data_block_init(adev); + amdgpu_ttm_training_data_block_init(adev, reserve_size); ret = amdgpu_bo_create_kernel_at(adev, ctx->c2p_train_data_offset, ctx->train_data_size, @@ -1687,20 +1735,58 @@ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev) ctx->init = PSP_MEM_TRAIN_RESERVE_SUCCESS; } - ret = amdgpu_bo_create_kernel_at(adev, - adev->gmc.real_vram_size - adev->mman.discovery_tmr_size, - adev->mman.discovery_tmr_size, - &adev->mman.discovery_memory, - NULL); - if (ret) { - DRM_ERROR("alloc tmr failed(%d)!\n", ret); - amdgpu_bo_free_kernel(&adev->mman.discovery_memory, NULL, NULL); - return ret; + if (!adev->gmc.is_app_apu) { + ret = amdgpu_bo_create_kernel_at( + adev, adev->gmc.real_vram_size - reserve_size, + reserve_size, &adev->mman.fw_reserved_memory, NULL); + if (ret) { + DRM_ERROR("alloc tmr failed(%d)!\n", ret); + amdgpu_bo_free_kernel(&adev->mman.fw_reserved_memory, + NULL, NULL); + return ret; + } + } else { + DRM_DEBUG_DRIVER("backdoor fw loading path for PSP TMR, no reservation needed\n"); } return 0; } +static int amdgpu_ttm_pools_init(struct amdgpu_device *adev) +{ + int i; + + if (!adev->gmc.is_app_apu || !adev->gmc.num_mem_partitions) + return 0; + + adev->mman.ttm_pools = kcalloc(adev->gmc.num_mem_partitions, + sizeof(*adev->mman.ttm_pools), + GFP_KERNEL); + if (!adev->mman.ttm_pools) + return -ENOMEM; + + for (i = 0; i < adev->gmc.num_mem_partitions; i++) { + ttm_pool_init(&adev->mman.ttm_pools[i], adev->dev, + adev->gmc.mem_partitions[i].numa.node, + false, false); + } + return 0; +} + +static void amdgpu_ttm_pools_fini(struct amdgpu_device *adev) +{ + int i; + + if (!adev->gmc.is_app_apu || !adev->mman.ttm_pools) + return; + + for (i = 0; i < adev->gmc.num_mem_partitions; i++) + ttm_pool_fini(&adev->mman.ttm_pools[i]); + + kfree(adev->mman.ttm_pools); + adev->mman.ttm_pools = NULL; +} + /* * amdgpu_ttm_init - Init the memory management (ttm) as well as various * gtt/vram related fields. @@ -1727,6 +1813,12 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) DRM_ERROR("failed initializing buffer object driver(%d).\n", r); return r; } + + r = amdgpu_ttm_pools_init(adev); + if (r) { + DRM_ERROR("failed to init ttm pools(%d).\n", r); + return r; + } adev->mman.initialized = true; /* Initialize VRAM pool with all of VRAM divided into pages */ @@ -1744,6 +1836,9 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) adev->mman.aper_base_kaddr = ioremap_cache(adev->gmc.aper_base, adev->gmc.visible_vram_size); + else if (adev->gmc.is_app_apu) + DRM_DEBUG_DRIVER( + "No need to ioremap when real vram size is 0\n"); else #endif adev->mman.aper_base_kaddr = ioremap_wc(adev->gmc.aper_base, @@ -1755,9 +1850,8 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) *place on the VRAM, so reserve it early. */ r = amdgpu_ttm_fw_reserve_vram_init(adev); - if (r) { + if (r) return r; - } /* *The reserved vram for driver must be pinned to the specified @@ -1781,49 +1875,46 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) /* allocate memory as required for VGA * This is used for VGA emulation and pre-OS scanout buffers to * avoid display artifacts while transitioning between pre-OS - * and driver. */ - r = amdgpu_bo_create_kernel_at(adev, 0, adev->mman.stolen_vga_size, - &adev->mman.stolen_vga_memory, - NULL); - if (r) - return r; - r = amdgpu_bo_create_kernel_at(adev, adev->mman.stolen_vga_size, - adev->mman.stolen_extended_size, - &adev->mman.stolen_extended_memory, - NULL); - if (r) - return r; - r = amdgpu_bo_create_kernel_at(adev, adev->mman.stolen_reserved_offset, - adev->mman.stolen_reserved_size, - &adev->mman.stolen_reserved_memory, - NULL); - if (r) - return r; + * and driver. + */ + if (!adev->gmc.is_app_apu) { + r = amdgpu_bo_create_kernel_at(adev, 0, + adev->mman.stolen_vga_size, + &adev->mman.stolen_vga_memory, + NULL); + if (r) + return r; - DRM_INFO("amdgpu: %uM of VRAM memory ready\n", - (unsigned) (adev->gmc.real_vram_size / (1024 * 1024))); - - /* Compute GTT size, either based on 1/2 the size of RAM size - * or whatever the user passed on module init */ - if (amdgpu_gtt_size == -1) { - struct sysinfo si; - - si_meminfo(&si); - /* Certain GL unit tests for large textures can cause problems - * with the OOM killer since there is no way to link this memory - * to a process. This was originally mitigated (but not necessarily - * eliminated) by limiting the GTT size. The problem is this limit - * is often too low for many modern games so just make the limit 1/2 - * of system memory which aligns with TTM. The OOM accounting needs - * to be addressed, but we shouldn't prevent common 3D applications - * from being usable just to potentially mitigate that corner case. - */ - gtt_size = max((AMDGPU_DEFAULT_GTT_SIZE_MB << 20), - (u64)si.totalram * si.mem_unit / 2); + r = amdgpu_bo_create_kernel_at(adev, adev->mman.stolen_vga_size, + adev->mman.stolen_extended_size, + &adev->mman.stolen_extended_memory, + NULL); + + if (r) + return r; + + r = amdgpu_bo_create_kernel_at(adev, + adev->mman.stolen_reserved_offset, + adev->mman.stolen_reserved_size, + &adev->mman.stolen_reserved_memory, + NULL); + if (r) + return r; } else { - gtt_size = (uint64_t)amdgpu_gtt_size << 20; + DRM_DEBUG_DRIVER("Skipped stolen memory reservation\n"); } + DRM_INFO("amdgpu: %uM of VRAM memory ready\n", + (unsigned int)(adev->gmc.real_vram_size / (1024 * 1024))); + + /* Compute GTT size, either based on TTM limit + * or whatever the user passed on module init. + */ + if (amdgpu_gtt_size == -1) + gtt_size = ttm_tt_pages_limit() << PAGE_SHIFT; + else + gtt_size = (uint64_t)amdgpu_gtt_size << 20; + /* Initialize GTT memory pool */ r = amdgpu_gtt_mgr_init(adev, gtt_size); if (r) { @@ -1831,7 +1922,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) return r; } DRM_INFO("amdgpu: %uM of GTT memory ready.\n", - (unsigned)(gtt_size / (1024 * 1024))); + (unsigned int)(gtt_size / (1024 * 1024))); /* Initialize preemptible memory pool */ r = amdgpu_preempt_mgr_init(adev); @@ -1858,7 +1949,6 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) DRM_ERROR("Failed initializing oa heap.\n"); return r; } - if (amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, &adev->mman.sdma_access_bo, NULL, @@ -1874,18 +1964,24 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) void amdgpu_ttm_fini(struct amdgpu_device *adev) { int idx; + if (!adev->mman.initialized) return; + amdgpu_ttm_pools_fini(adev); + amdgpu_ttm_training_reserve_vram_fini(adev); /* return the stolen vga memory back to VRAM */ - amdgpu_bo_free_kernel(&adev->mman.stolen_vga_memory, NULL, NULL); - amdgpu_bo_free_kernel(&adev->mman.stolen_extended_memory, NULL, NULL); - /* return the IP Discovery TMR memory back to VRAM */ - amdgpu_bo_free_kernel(&adev->mman.discovery_memory, NULL, NULL); - if (adev->mman.stolen_reserved_size) - amdgpu_bo_free_kernel(&adev->mman.stolen_reserved_memory, - NULL, NULL); + if (!adev->gmc.is_app_apu) { + amdgpu_bo_free_kernel(&adev->mman.stolen_vga_memory, NULL, NULL); + amdgpu_bo_free_kernel(&adev->mman.stolen_extended_memory, NULL, NULL); + /* return the FW reserved memory back to VRAM */ + amdgpu_bo_free_kernel(&adev->mman.fw_reserved_memory, NULL, + NULL); + if (adev->mman.stolen_reserved_size) + amdgpu_bo_free_kernel(&adev->mman.stolen_reserved_memory, + NULL, NULL); + } amdgpu_bo_free_kernel(&adev->mman.sdma_access_bo, NULL, &adev->mman.sdma_access_ptr); amdgpu_ttm_fw_reserve_vram_fini(adev); @@ -1927,7 +2023,7 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable) int r; if (!adev->mman.initialized || amdgpu_in_reset(adev) || - adev->mman.buffer_funcs_enabled == enable) + adev->mman.buffer_funcs_enabled == enable || adev->gmc.is_app_apu) return; if (enable) { @@ -1944,8 +2040,18 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable) r); return; } + + r = drm_sched_entity_init(&adev->mman.delayed, + DRM_SCHED_PRIORITY_NORMAL, &sched, + 1, NULL); + if (r) { + DRM_ERROR("Failed setting up TTM BO move entity (%d)\n", + r); + goto error_free_entity; + } } else { drm_sched_entity_destroy(&adev->mman.entity); + drm_sched_entity_destroy(&adev->mman.delayed); dma_fence_put(man->move); man->move = NULL; } @@ -1957,6 +2063,11 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable) size = adev->gmc.visible_vram_size; man->size = size; adev->mman.buffer_funcs_enabled = enable; + + return; + +error_free_entity: + drm_sched_entity_destroy(&adev->mman.entity); } static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev, @@ -1964,14 +2075,16 @@ static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev, unsigned int num_dw, struct dma_resv *resv, bool vm_needs_flush, - struct amdgpu_job **job) + struct amdgpu_job **job, + bool delayed) { enum amdgpu_ib_pool_type pool = direct_submit ? AMDGPU_IB_POOL_DIRECT : AMDGPU_IB_POOL_DELAYED; int r; - - r = amdgpu_job_alloc_with_ib(adev, &adev->mman.entity, + struct drm_sched_entity *entity = delayed ? &adev->mman.delayed : + &adev->mman.entity; + r = amdgpu_job_alloc_with_ib(adev, entity, AMDGPU_FENCE_OWNER_UNDEFINED, num_dw * 4, pool, job); if (r) @@ -1997,10 +2110,10 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, bool vm_needs_flush, bool tmz) { struct amdgpu_device *adev = ring->adev; - unsigned num_loops, num_dw; + unsigned int num_loops, num_dw; struct amdgpu_job *job; uint32_t max_bytes; - unsigned i; + unsigned int i; int r; if (!direct_submit && !ring->sched.ready) { @@ -2012,7 +2125,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, num_loops = DIV_ROUND_UP(byte_count, max_bytes); num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->copy_num_dw, 8); r = amdgpu_ttm_prepare_job(adev, direct_submit, num_dw, - resv, vm_needs_flush, &job); + resv, vm_needs_flush, &job, false); if (r) return r; @@ -2048,7 +2161,7 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data, uint64_t dst_addr, uint32_t byte_count, struct dma_resv *resv, struct dma_fence **fence, - bool vm_needs_flush) + bool vm_needs_flush, bool delayed) { struct amdgpu_device *adev = ring->adev; unsigned int num_loops, num_dw; @@ -2061,7 +2174,7 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data, num_loops = DIV_ROUND_UP_ULL(byte_count, max_bytes); num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->fill_num_dw, 8); r = amdgpu_ttm_prepare_job(adev, false, num_dw, resv, vm_needs_flush, - &job); + &job, delayed); if (r) return r; @@ -2084,7 +2197,8 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data, int amdgpu_fill_buffer(struct amdgpu_bo *bo, uint32_t src_data, struct dma_resv *resv, - struct dma_fence **f) + struct dma_fence **f, + bool delayed) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; @@ -2113,7 +2227,7 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo, goto error; r = amdgpu_ttm_fill_mem(ring, src_data, to, cur_size, resv, - &next, true); + &next, true, delayed); if (r) goto error; @@ -2164,7 +2278,7 @@ int amdgpu_ttm_evict_resources(struct amdgpu_device *adev, int mem_type) static int amdgpu_ttm_page_pool_show(struct seq_file *m, void *unused) { - struct amdgpu_device *adev = (struct amdgpu_device *)m->private; + struct amdgpu_device *adev = m->private; return ttm_pool_debugfs(&adev->mman.bdev.pool, m); } |