diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_queue.c')
| -rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_queue.c | 102 |
1 files changed, 76 insertions, 26 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c index 4afff7094caf..28354a4e5dd5 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c @@ -70,7 +70,7 @@ int init_queue(struct queue **q, const struct queue_properties *properties) { struct queue *tmp_q; - tmp_q = kzalloc(sizeof(*tmp_q), GFP_KERNEL); + tmp_q = kzalloc_obj(*tmp_q); if (!tmp_q) return -ENOMEM; @@ -247,9 +247,12 @@ int kfd_queue_acquire_buffers(struct kfd_process_device *pdd, struct queue_prope properties->format == KFD_QUEUE_FORMAT_AQL && topo_dev->node_props.gfx_target_version >= 70000 && topo_dev->node_props.gfx_target_version < 90000) - expected_queue_size = properties->queue_size / 2; + /* metadata_queue_size not supported on GFX7/GFX8 */ + expected_queue_size = + PAGE_ALIGN(properties->queue_size / 2); else - expected_queue_size = properties->queue_size; + expected_queue_size = + PAGE_ALIGN(properties->queue_size + properties->metadata_queue_size); vm = drm_priv_to_vm(pdd->drm_priv); err = amdgpu_bo_reserve(vm->root.bo, false); @@ -275,8 +278,8 @@ int kfd_queue_acquire_buffers(struct kfd_process_device *pdd, struct queue_prope /* EOP buffer is not required for all ASICs */ if (properties->eop_ring_buffer_address) { - if (properties->eop_ring_buffer_size != topo_dev->node_props.eop_buffer_size) { - pr_debug("queue eop bo size 0x%x not equal to node eop buf size 0x%x\n", + if (properties->eop_ring_buffer_size < topo_dev->node_props.eop_buffer_size) { + pr_debug("queue eop bo size 0x%x is less than node eop buf size 0x%x\n", properties->eop_ring_buffer_size, topo_dev->node_props.eop_buffer_size); err = -EINVAL; @@ -284,7 +287,7 @@ int kfd_queue_acquire_buffers(struct kfd_process_device *pdd, struct queue_prope } err = kfd_queue_buffer_get(vm, (void *)properties->eop_ring_buffer_address, &properties->eop_buf_bo, - properties->eop_ring_buffer_size); + ALIGN(properties->eop_ring_buffer_size, PAGE_SIZE)); if (err) goto out_err_unreserve; } @@ -297,16 +300,16 @@ int kfd_queue_acquire_buffers(struct kfd_process_device *pdd, struct queue_prope goto out_err_unreserve; } - if (properties->ctx_save_restore_area_size != topo_dev->node_props.cwsr_size) { - pr_debug("queue cwsr size 0x%x not equal to node cwsr size 0x%x\n", + if (properties->ctx_save_restore_area_size < topo_dev->node_props.cwsr_size) { + pr_debug("queue cwsr size 0x%x not sufficient for node cwsr size 0x%x\n", properties->ctx_save_restore_area_size, topo_dev->node_props.cwsr_size); err = -EINVAL; goto out_err_unreserve; } - total_cwsr_size = (topo_dev->node_props.cwsr_size + topo_dev->node_props.debug_memory_size) - * NUM_XCC(pdd->dev->xcc_mask); + total_cwsr_size = (properties->ctx_save_restore_area_size + + topo_dev->node_props.debug_memory_size) * NUM_XCC(pdd->dev->xcc_mask); total_cwsr_size = ALIGN(total_cwsr_size, PAGE_SIZE); err = kfd_queue_buffer_get(vm, (void *)properties->ctx_save_restore_area_address, @@ -352,8 +355,8 @@ int kfd_queue_release_buffers(struct kfd_process_device *pdd, struct queue_prope topo_dev = kfd_topology_device_by_id(pdd->dev->id); if (!topo_dev) return -EINVAL; - total_cwsr_size = (topo_dev->node_props.cwsr_size + topo_dev->node_props.debug_memory_size) - * NUM_XCC(pdd->dev->xcc_mask); + total_cwsr_size = (properties->ctx_save_restore_area_size + + topo_dev->node_props.debug_memory_size) * NUM_XCC(pdd->dev->xcc_mask); total_cwsr_size = ALIGN(total_cwsr_size, PAGE_SIZE); kfd_queue_buffer_svm_put(pdd, properties->ctx_save_restore_area_address, total_cwsr_size); @@ -392,34 +395,82 @@ int kfd_queue_unref_bo_vas(struct kfd_process_device *pdd, return 0; } -#define SGPR_SIZE_PER_CU 0x4000 -#define LDS_SIZE_PER_CU 0x10000 -#define HWREG_SIZE_PER_CU 0x1000 #define DEBUGGER_BYTES_ALIGN 64 #define DEBUGGER_BYTES_PER_WAVE 32 +static u32 kfd_get_sgpr_size_per_cu(u32 gfxv) +{ + u32 sgpr_size = 0x4000; + + if (gfxv == 120500 || + gfxv == 120501) + sgpr_size = 0x8000; + + return sgpr_size; +} + static u32 kfd_get_vgpr_size_per_cu(u32 gfxv) { u32 vgpr_size = 0x40000; - if ((gfxv / 100 * 100) == 90400 || /* GFX_VERSION_AQUA_VANJARAM */ + if (gfxv == 90402 || /* GFX_VERSION_AQUA_VANJARAM */ gfxv == 90010 || /* GFX_VERSION_ALDEBARAN */ gfxv == 90008 || /* GFX_VERSION_ARCTURUS */ gfxv == 90500) vgpr_size = 0x80000; else if (gfxv == 110000 || /* GFX_VERSION_PLUM_BONITO */ gfxv == 110001 || /* GFX_VERSION_WHEAT_NAS */ + gfxv == 110501 || /* GFX_VERSION_GFX1151 */ gfxv == 120000 || /* GFX_VERSION_GFX1200 */ gfxv == 120001) /* GFX_VERSION_GFX1201 */ vgpr_size = 0x60000; + else if (gfxv == 120500 || /* GFX_VERSION_GFX1250 */ + gfxv == 120501) /* GFX_VERSION_GFX1251 */ + vgpr_size = 0x80000; return vgpr_size; } +static u32 kfd_get_hwreg_size_per_cu(u32 gfxv) +{ + u32 hwreg_size = 0x1000; + + if (gfxv == 120500 || gfxv == 120501) + hwreg_size = 0x8000; + + return hwreg_size; +} + +static u32 kfd_get_lds_size_per_cu(u32 gfxv, struct kfd_node_properties *props) +{ + u32 lds_size = 0x10000; + + if (gfxv == 90500 || gfxv == 120500 || gfxv == 120501) + lds_size = props->lds_size_in_kb << 10; + + return lds_size; +} + +static u32 get_num_waves(struct kfd_node_properties *props, u32 gfxv, u32 cu_num) +{ + u32 wave_num = 0; + + if (gfxv < 100100) + wave_num = min(cu_num * 40, + props->array_count / props->simd_arrays_per_engine * 512); + else if (gfxv < 120500) + wave_num = cu_num * 32; + else if (gfxv <= 120501) + wave_num = cu_num * 64; + + WARN_ON(wave_num == 0); + + return wave_num; +} + #define WG_CONTEXT_DATA_SIZE_PER_CU(gfxv, props) \ - (kfd_get_vgpr_size_per_cu(gfxv) + SGPR_SIZE_PER_CU +\ - (((gfxv) == 90500) ? (props->lds_size_in_kb << 10) : LDS_SIZE_PER_CU) +\ - HWREG_SIZE_PER_CU) + (kfd_get_vgpr_size_per_cu(gfxv) + kfd_get_sgpr_size_per_cu(gfxv) +\ + kfd_get_lds_size_per_cu(gfxv, props) + kfd_get_hwreg_size_per_cu(gfxv)) #define CNTL_STACK_BYTES_PER_WAVE(gfxv) \ ((gfxv) >= 100100 ? 12 : 8) /* GFX_VERSION_NAVI10*/ @@ -439,14 +490,13 @@ void kfd_queue_ctx_save_restore_size(struct kfd_topology_device *dev) return; cu_num = props->simd_count / props->simd_per_cu / NUM_XCC(dev->gpu->xcc_mask); - wave_num = (gfxv < 100100) ? /* GFX_VERSION_NAVI10 */ - min(cu_num * 40, props->array_count / props->simd_arrays_per_engine * 512) - : cu_num * 32; + wave_num = get_num_waves(props, gfxv, cu_num); - wg_data_size = ALIGN(cu_num * WG_CONTEXT_DATA_SIZE_PER_CU(gfxv, props), PAGE_SIZE); + wg_data_size = ALIGN(cu_num * WG_CONTEXT_DATA_SIZE_PER_CU(gfxv, props), + AMDGPU_GPU_PAGE_SIZE); ctl_stack_size = wave_num * CNTL_STACK_BYTES_PER_WAVE(gfxv) + 8; ctl_stack_size = ALIGN(SIZEOF_HSA_USER_CONTEXT_SAVE_AREA_HEADER + ctl_stack_size, - PAGE_SIZE); + AMDGPU_GPU_PAGE_SIZE); if ((gfxv / 10000 * 10000) == 100000) { /* HW design limits control stack size to 0x7000. @@ -458,11 +508,11 @@ void kfd_queue_ctx_save_restore_size(struct kfd_topology_device *dev) props->ctl_stack_size = ctl_stack_size; props->debug_memory_size = ALIGN(wave_num * DEBUGGER_BYTES_PER_WAVE, DEBUGGER_BYTES_ALIGN); - props->cwsr_size = ctl_stack_size + wg_data_size; + props->cwsr_size = ALIGN(ctl_stack_size + wg_data_size, PAGE_SIZE); if (gfxv == 80002) /* GFX_VERSION_TONGA */ props->eop_buffer_size = 0x8000; - else if ((gfxv / 100 * 100) == 90400) /* GFX_VERSION_AQUA_VANJARAM */ + else if (gfxv == 90402) /* GFX_VERSION_AQUA_VANJARAM */ props->eop_buffer_size = 4096; else if (gfxv >= 80000) props->eop_buffer_size = 4096; |
