diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c')
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 230 |
1 files changed, 167 insertions, 63 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 0ccb31788b20..afaaab6496de 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -163,7 +163,8 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo, } static struct dma_fence * -amdgpu_ttm_job_submit(struct amdgpu_device *adev, struct amdgpu_job *job, u32 num_dw) +amdgpu_ttm_job_submit(struct amdgpu_device *adev, struct amdgpu_ttm_buffer_entity *entity, + struct amdgpu_job *job, u32 num_dw) { struct amdgpu_ring *ring; @@ -171,6 +172,8 @@ amdgpu_ttm_job_submit(struct amdgpu_device *adev, struct amdgpu_job *job, u32 nu amdgpu_ring_pad_ib(ring, &job->ibs[0]); WARN_ON(job->ibs[0].length_dw > num_dw); + lockdep_assert_held(&entity->lock); + return amdgpu_job_submit(job); } @@ -228,9 +231,7 @@ static int amdgpu_ttm_map_buffer(struct amdgpu_ttm_buffer_entity *entity, *size = min(*size, (uint64_t)num_pages * PAGE_SIZE - offset); - *addr = adev->gmc.gart_start; - *addr += (u64)window * AMDGPU_GTT_MAX_TRANSFER_SIZE * - AMDGPU_GPU_PAGE_SIZE; + *addr = amdgpu_compute_gart_address(&adev->gmc, entity, window); *addr += offset; num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8); @@ -248,7 +249,7 @@ static int amdgpu_ttm_map_buffer(struct amdgpu_ttm_buffer_entity *entity, src_addr += job->ibs[0].gpu_addr; dst_addr = amdgpu_bo_gpu_offset(adev->gart.bo); - dst_addr += window * AMDGPU_GTT_MAX_TRANSFER_SIZE * 8; + dst_addr += (entity->gart_window_offs[window] >> AMDGPU_GPU_PAGE_SHIFT) * 8; amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, dst_addr, num_bytes, 0); @@ -269,7 +270,7 @@ static int amdgpu_ttm_map_buffer(struct amdgpu_ttm_buffer_entity *entity, amdgpu_gart_map_vram_range(adev, pa, 0, num_pages, flags, cpu_addr); } - dma_fence_put(amdgpu_ttm_job_submit(adev, job, num_dw)); + dma_fence_put(amdgpu_ttm_job_submit(adev, entity, job, num_dw)); return 0; } @@ -313,7 +314,7 @@ static int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, amdgpu_res_first(src->mem, src->offset, size, &src_mm); amdgpu_res_first(dst->mem, dst->offset, size, &dst_mm); - mutex_lock(&adev->mman.gtt_window_lock); + mutex_lock(&entity->lock); while (src_mm.remaining) { uint64_t from, to, cur_size, tiling_flags; uint32_t num_type, data_format, max_com, write_compress_disable; @@ -368,7 +369,7 @@ static int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, amdgpu_res_next(&dst_mm, cur_size); } error: - mutex_unlock(&adev->mman.gtt_window_lock); + mutex_unlock(&entity->lock); *f = fence; return r; } @@ -386,9 +387,11 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo, { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo); + struct amdgpu_ttm_buffer_entity *entity; struct amdgpu_copy_mem src, dst; struct dma_fence *fence = NULL; int r; + u32 e; src.bo = bo; dst.bo = bo; @@ -397,8 +400,12 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo, src.offset = 0; dst.offset = 0; + e = atomic_inc_return(&adev->mman.next_move_entity) % + adev->mman.num_move_entities; + entity = &adev->mman.move_entities[e]; + r = amdgpu_ttm_copy_mem_to_mem(adev, - &adev->mman.move_entity, + entity, &src, &dst, new_mem->size, amdgpu_bo_encrypted(abo), @@ -410,9 +417,7 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo, if (old_mem->mem_type == TTM_PL_VRAM && (abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) { struct dma_fence *wipe_fence = NULL; - - r = amdgpu_fill_buffer(&adev->mman.move_entity, - abo, 0, NULL, &wipe_fence, + r = amdgpu_fill_buffer(entity, abo, 0, NULL, &wipe_fence, AMDGPU_KERNEL_JOB_ID_MOVE_BLIT); if (r) { goto error; @@ -1570,7 +1575,7 @@ static int amdgpu_ttm_access_memory_sdma(struct ttm_buffer_object *bo, if (r) goto out; - mutex_lock(&adev->mman.gtt_window_lock); + mutex_lock(&adev->mman.default_entity.lock); amdgpu_res_first(abo->tbo.resource, offset, len, &src_mm); src_addr = amdgpu_ttm_domain_start(adev, bo->resource->mem_type) + src_mm.start; @@ -1581,8 +1586,8 @@ static int amdgpu_ttm_access_memory_sdma(struct ttm_buffer_object *bo, amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, dst_addr, PAGE_SIZE, 0); - fence = amdgpu_ttm_job_submit(adev, job, num_dw); - mutex_unlock(&adev->mman.gtt_window_lock); + fence = amdgpu_ttm_job_submit(adev, &adev->mman.default_entity, job, num_dw); + mutex_unlock(&adev->mman.default_entity.lock); if (!dma_fence_wait_timeout(fence, false, adev->sdma_timeout)) r = -ETIMEDOUT; @@ -1898,7 +1903,7 @@ static void amdgpu_ttm_pools_fini(struct amdgpu_device *adev) } /** - * amdgpu_ttm_mmio_remap_bo_init - Allocate the singleton MMIO_REMAP BO + * amdgpu_ttm_alloc_mmio_remap_bo - Allocate the singleton MMIO_REMAP BO * @adev: amdgpu device * * Allocates a global BO with backing AMDGPU_PL_MMIO_REMAP when the @@ -2003,6 +2008,50 @@ static void amdgpu_ttm_free_mmio_remap_bo(struct amdgpu_device *adev) adev->rmmio_remap.bo = NULL; } +static int amdgpu_ttm_buffer_entity_init(struct amdgpu_gtt_mgr *mgr, + struct amdgpu_ttm_buffer_entity *entity, + enum drm_sched_priority prio, + struct drm_gpu_scheduler **scheds, + int num_schedulers, + u32 num_gart_windows) +{ + int i, r, num_pages; + + r = drm_sched_entity_init(&entity->base, prio, scheds, num_schedulers, NULL); + if (r) + return r; + + mutex_init(&entity->lock); + + if (ARRAY_SIZE(entity->gart_window_offs) < num_gart_windows) + return -EINVAL; + if (num_gart_windows == 0) + return 0; + + num_pages = num_gart_windows * AMDGPU_GTT_MAX_TRANSFER_SIZE; + r = amdgpu_gtt_mgr_alloc_entries(mgr, &entity->gart_node, num_pages, + DRM_MM_INSERT_BEST); + if (r) { + drm_sched_entity_destroy(&entity->base); + return r; + } + + for (i = 0; i < num_gart_windows; i++) { + entity->gart_window_offs[i] = + amdgpu_gtt_node_to_byte_offset(&entity->gart_node) + + i * AMDGPU_GTT_MAX_TRANSFER_SIZE * PAGE_SIZE; + } + + return 0; +} + +static void amdgpu_ttm_buffer_entity_fini(struct amdgpu_gtt_mgr *mgr, + struct amdgpu_ttm_buffer_entity *entity) +{ + amdgpu_gtt_mgr_free_entries(mgr, &entity->gart_node); + drm_sched_entity_destroy(&entity->base); +} + /* * amdgpu_ttm_init - Init the memory management (ttm) as well as various * gtt/vram related fields. @@ -2017,8 +2066,6 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) uint64_t gtt_size; int r; - mutex_init(&adev->mman.gtt_window_lock); - dma_set_max_seg_size(adev->dev, UINT_MAX); /* No others user of address space so set it to 0 */ r = ttm_device_init(&adev->mman.bdev, &amdgpu_bo_driver, adev->dev, @@ -2292,8 +2339,9 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev) void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable) { struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM); + u32 num_clear_entities, num_move_entities; uint64_t size; - int r; + int r, i, j; if (!adev->mman.initialized || amdgpu_in_reset(adev) || adev->mman.buffer_funcs_enabled == enable || adev->gmc.is_app_apu) @@ -2303,46 +2351,83 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable) struct amdgpu_ring *ring; struct drm_gpu_scheduler *sched; + if (!adev->mman.buffer_funcs_ring || !adev->mman.buffer_funcs_ring->sched.ready) { + dev_warn(adev->dev, "Not enabling DMA transfers for in kernel use"); + return; + } + + num_clear_entities = 1; + num_move_entities = 1; ring = adev->mman.buffer_funcs_ring; sched = &ring->sched; - r = drm_sched_entity_init(&adev->mman.default_entity.base, - DRM_SCHED_PRIORITY_KERNEL, &sched, - 1, NULL); - if (r) { + r = amdgpu_ttm_buffer_entity_init(&adev->mman.gtt_mgr, + &adev->mman.default_entity, + DRM_SCHED_PRIORITY_KERNEL, + &sched, 1, 0); + if (r < 0) { dev_err(adev->dev, - "Failed setting up TTM BO move entity (%d)\n", - r); + "Failed setting up TTM entity (%d)\n", r); return; } - r = drm_sched_entity_init(&adev->mman.clear_entity.base, - DRM_SCHED_PRIORITY_NORMAL, &sched, - 1, NULL); - if (r) { - dev_err(adev->dev, - "Failed setting up TTM BO clear entity (%d)\n", - r); - goto error_free_entity; + adev->mman.clear_entities = kcalloc(num_clear_entities, + sizeof(struct amdgpu_ttm_buffer_entity), + GFP_KERNEL); + atomic_set(&adev->mman.next_clear_entity, 0); + if (!adev->mman.clear_entities) + goto error_free_default_entity; + + adev->mman.num_clear_entities = num_clear_entities; + + for (i = 0; i < num_clear_entities; i++) { + r = amdgpu_ttm_buffer_entity_init( + &adev->mman.gtt_mgr, &adev->mman.clear_entities[i], + DRM_SCHED_PRIORITY_NORMAL, &sched, 1, 1); + + if (r < 0) { + for (j = 0; j < i; j++) + amdgpu_ttm_buffer_entity_fini( + &adev->mman.gtt_mgr, &adev->mman.clear_entities[j]); + kfree(adev->mman.clear_entities); + adev->mman.num_clear_entities = 0; + adev->mman.clear_entities = NULL; + goto error_free_default_entity; + } } - r = drm_sched_entity_init(&adev->mman.move_entity.base, - DRM_SCHED_PRIORITY_NORMAL, &sched, - 1, NULL); - if (r) { - dev_err(adev->dev, - "Failed setting up TTM BO move entity (%d)\n", - r); - drm_sched_entity_destroy(&adev->mman.clear_entity.base); - goto error_free_entity; + adev->mman.num_move_entities = num_move_entities; + atomic_set(&adev->mman.next_move_entity, 0); + for (i = 0; i < num_move_entities; i++) { + r = amdgpu_ttm_buffer_entity_init( + &adev->mman.gtt_mgr, + &adev->mman.move_entities[i], + DRM_SCHED_PRIORITY_NORMAL, &sched, 1, 2); + + if (r < 0) { + for (j = 0; j < i; j++) + amdgpu_ttm_buffer_entity_fini( + &adev->mman.gtt_mgr, &adev->mman.move_entities[j]); + adev->mman.num_move_entities = 0; + goto error_free_clear_entities; + } } } else { - drm_sched_entity_destroy(&adev->mman.default_entity.base); - drm_sched_entity_destroy(&adev->mman.clear_entity.base); - drm_sched_entity_destroy(&adev->mman.move_entity.base); + amdgpu_ttm_buffer_entity_fini(&adev->mman.gtt_mgr, + &adev->mman.default_entity); + for (i = 0; i < adev->mman.num_clear_entities; i++) + amdgpu_ttm_buffer_entity_fini(&adev->mman.gtt_mgr, + &adev->mman.clear_entities[i]); + for (i = 0; i < adev->mman.num_move_entities; i++) + amdgpu_ttm_buffer_entity_fini(&adev->mman.gtt_mgr, + &adev->mman.move_entities[i]); /* Drop all the old fences since re-creating the scheduler entities * will allocate new contexts. */ ttm_resource_manager_cleanup(man); + kfree(adev->mman.clear_entities); + adev->mman.clear_entities = NULL; + adev->mman.num_clear_entities = 0; + adev->mman.num_move_entities = 0; } /* this just adjusts TTM size idea, which sets lpfn to the correct value */ @@ -2355,8 +2440,16 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable) return; -error_free_entity: - drm_sched_entity_destroy(&adev->mman.default_entity.base); +error_free_clear_entities: + for (i = 0; i < adev->mman.num_clear_entities; i++) + amdgpu_ttm_buffer_entity_fini(&adev->mman.gtt_mgr, + &adev->mman.clear_entities[i]); + kfree(adev->mman.clear_entities); + adev->mman.clear_entities = NULL; + adev->mman.num_clear_entities = 0; +error_free_default_entity: + amdgpu_ttm_buffer_entity_fini(&adev->mman.gtt_mgr, + &adev->mman.default_entity); } static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev, @@ -2430,7 +2523,7 @@ int amdgpu_copy_buffer(struct amdgpu_device *adev, byte_count -= cur_size_in_bytes; } - *fence = amdgpu_ttm_job_submit(adev, job, num_dw); + *fence = amdgpu_ttm_job_submit(adev, entity, job, num_dw); return 0; @@ -2473,7 +2566,7 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_device *adev, byte_count -= cur_size; } - *fence = amdgpu_ttm_job_submit(adev, job, num_dw); + *fence = amdgpu_ttm_job_submit(adev, entity, job, num_dw); return 0; } @@ -2493,6 +2586,7 @@ int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo, struct dma_fence **fence) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); + struct amdgpu_ttm_buffer_entity *entity; struct amdgpu_res_cursor cursor; u64 addr; int r = 0; @@ -2502,12 +2596,12 @@ int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo, if (!fence) return -EINVAL; - + entity = &adev->mman.clear_entities[0]; *fence = dma_fence_get_stub(); amdgpu_res_first(bo->tbo.resource, 0, amdgpu_bo_size(bo), &cursor); - mutex_lock(&adev->mman.gtt_window_lock); + mutex_lock(&entity->lock); while (cursor.remaining) { struct dma_fence *next = NULL; u64 size; @@ -2520,13 +2614,12 @@ int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo, /* Never clear more than 256MiB at once to avoid timeouts */ size = min(cursor.size, 256ULL << 20); - r = amdgpu_ttm_map_buffer(&adev->mman.clear_entity, - &bo->tbo, bo->tbo.resource, &cursor, - 1, false, &size, &addr); + r = amdgpu_ttm_map_buffer(entity, &bo->tbo, bo->tbo.resource, &cursor, + 0, false, &size, &addr); if (r) goto err; - r = amdgpu_ttm_fill_mem(adev, &adev->mman.clear_entity, 0, addr, size, resv, + r = amdgpu_ttm_fill_mem(adev, entity, 0, addr, size, resv, &next, true, AMDGPU_KERNEL_JOB_ID_TTM_CLEAR_BUFFER); if (r) @@ -2538,7 +2631,7 @@ int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo, amdgpu_res_next(&cursor, size); } err: - mutex_unlock(&adev->mman.gtt_window_lock); + mutex_unlock(&entity->lock); return r; } @@ -2555,15 +2648,12 @@ int amdgpu_fill_buffer(struct amdgpu_ttm_buffer_entity *entity, struct amdgpu_res_cursor dst; int r; - if (!adev->mman.buffer_funcs_enabled) { - dev_err(adev->dev, - "Trying to clear memory with ring turned off.\n"); + if (!entity) return -EINVAL; - } amdgpu_res_first(bo->tbo.resource, 0, amdgpu_bo_size(bo), &dst); - mutex_lock(&adev->mman.gtt_window_lock); + mutex_lock(&entity->lock); while (dst.remaining) { struct dma_fence *next; uint64_t cur_size, to; @@ -2572,7 +2662,7 @@ int amdgpu_fill_buffer(struct amdgpu_ttm_buffer_entity *entity, cur_size = min(dst.size, 256ULL << 20); r = amdgpu_ttm_map_buffer(entity, &bo->tbo, bo->tbo.resource, &dst, - 1, false, &cur_size, &to); + 0, false, &cur_size, &to); if (r) goto error; @@ -2588,13 +2678,27 @@ int amdgpu_fill_buffer(struct amdgpu_ttm_buffer_entity *entity, amdgpu_res_next(&dst, cur_size); } error: - mutex_unlock(&adev->mman.gtt_window_lock); + mutex_unlock(&entity->lock); if (f) *f = dma_fence_get(fence); dma_fence_put(fence); return r; } +struct amdgpu_ttm_buffer_entity * +amdgpu_ttm_next_clear_entity(struct amdgpu_device *adev) +{ + struct amdgpu_mman *mman = &adev->mman; + u32 i; + + if (mman->num_clear_entities == 0) + return NULL; + + i = atomic_inc_return(&mman->next_clear_entity) % + mman->num_clear_entities; + return &mman->clear_entities[i]; +} + /** * amdgpu_ttm_evict_resources - evict memory buffers * @adev: amdgpu device object |
