diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_queue.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_queue.c | 26 |
1 files changed, 19 insertions, 7 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c index ad29634f8b44..4afff7094caf 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c @@ -233,6 +233,7 @@ void kfd_queue_buffer_put(struct amdgpu_bo **bo) int kfd_queue_acquire_buffers(struct kfd_process_device *pdd, struct queue_properties *properties) { struct kfd_topology_device *topo_dev; + u64 expected_queue_size; struct amdgpu_vm *vm; u32 total_cwsr_size; int err; @@ -241,6 +242,15 @@ int kfd_queue_acquire_buffers(struct kfd_process_device *pdd, struct queue_prope if (!topo_dev) return -EINVAL; + /* AQL queues on GFX7 and GFX8 appear twice their actual size */ + if (properties->type == KFD_QUEUE_TYPE_COMPUTE && + properties->format == KFD_QUEUE_FORMAT_AQL && + topo_dev->node_props.gfx_target_version >= 70000 && + topo_dev->node_props.gfx_target_version < 90000) + expected_queue_size = properties->queue_size / 2; + else + expected_queue_size = properties->queue_size; + vm = drm_priv_to_vm(pdd->drm_priv); err = amdgpu_bo_reserve(vm->root.bo, false); if (err) @@ -255,7 +265,7 @@ int kfd_queue_acquire_buffers(struct kfd_process_device *pdd, struct queue_prope goto out_err_unreserve; err = kfd_queue_buffer_get(vm, (void *)properties->queue_address, - &properties->ring_bo, properties->queue_size); + &properties->ring_bo, expected_queue_size); if (err) goto out_err_unreserve; @@ -266,8 +276,8 @@ int kfd_queue_acquire_buffers(struct kfd_process_device *pdd, struct queue_prope /* EOP buffer is not required for all ASICs */ if (properties->eop_ring_buffer_address) { if (properties->eop_ring_buffer_size != topo_dev->node_props.eop_buffer_size) { - pr_debug("queue eop bo size 0x%lx not equal to node eop buf size 0x%x\n", - properties->eop_buf_bo->tbo.base.size, + pr_debug("queue eop bo size 0x%x not equal to node eop buf size 0x%x\n", + properties->eop_ring_buffer_size, topo_dev->node_props.eop_buffer_size); err = -EINVAL; goto out_err_unreserve; @@ -394,7 +404,8 @@ static u32 kfd_get_vgpr_size_per_cu(u32 gfxv) if ((gfxv / 100 * 100) == 90400 || /* GFX_VERSION_AQUA_VANJARAM */ gfxv == 90010 || /* GFX_VERSION_ALDEBARAN */ - gfxv == 90008) /* GFX_VERSION_ARCTURUS */ + gfxv == 90008 || /* GFX_VERSION_ARCTURUS */ + gfxv == 90500) vgpr_size = 0x80000; else if (gfxv == 110000 || /* GFX_VERSION_PLUM_BONITO */ gfxv == 110001 || /* GFX_VERSION_WHEAT_NAS */ @@ -405,9 +416,10 @@ static u32 kfd_get_vgpr_size_per_cu(u32 gfxv) return vgpr_size; } -#define WG_CONTEXT_DATA_SIZE_PER_CU(gfxv) \ +#define WG_CONTEXT_DATA_SIZE_PER_CU(gfxv, props) \ (kfd_get_vgpr_size_per_cu(gfxv) + SGPR_SIZE_PER_CU +\ - LDS_SIZE_PER_CU + HWREG_SIZE_PER_CU) + (((gfxv) == 90500) ? (props->lds_size_in_kb << 10) : LDS_SIZE_PER_CU) +\ + HWREG_SIZE_PER_CU) #define CNTL_STACK_BYTES_PER_WAVE(gfxv) \ ((gfxv) >= 100100 ? 12 : 8) /* GFX_VERSION_NAVI10*/ @@ -431,7 +443,7 @@ void kfd_queue_ctx_save_restore_size(struct kfd_topology_device *dev) min(cu_num * 40, props->array_count / props->simd_arrays_per_engine * 512) : cu_num * 32; - wg_data_size = ALIGN(cu_num * WG_CONTEXT_DATA_SIZE_PER_CU(gfxv), PAGE_SIZE); + wg_data_size = ALIGN(cu_num * WG_CONTEXT_DATA_SIZE_PER_CU(gfxv, props), PAGE_SIZE); ctl_stack_size = wave_num * CNTL_STACK_BYTES_PER_WAVE(gfxv) + 8; ctl_stack_size = ALIGN(SIZEOF_HSA_USER_CONTEXT_SAVE_AREA_HEADER + ctl_stack_size, PAGE_SIZE); |