diff options
author | Dave Airlie <airlied@redhat.com> | 2023-06-15 14:11:22 +1000 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2023-06-15 14:11:22 +1000 |
commit | 901bdf5ea1a836400ee69aa32b04e9c209271ec7 (patch) | |
tree | ccb1851c8a71e776dbccf1ccae132dc9b5f093c6 /drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | |
parent | ba57b9b11f78530146f02b776854b2b6b6d344a4 (diff) | |
parent | 3b718dcaf163d17fe907ea098c8449e0cd6bc271 (diff) | |
download | lwn-901bdf5ea1a836400ee69aa32b04e9c209271ec7.tar.gz lwn-901bdf5ea1a836400ee69aa32b04e9c209271ec7.zip |
Merge tag 'amd-drm-next-6.5-2023-06-09' of https://gitlab.freedesktop.org/agd5f/linux into drm-next
amd-drm-next-6.5-2023-06-02:
amdgpu:
- SR-IOV fixes
- Warning fixes
- Misc code cleanups and spelling fixes
- DCN 3.2 updates
- Improved DC FAMS support for better power management
- Improved DC SubVP support for better power management
- DCN 3.1.x fixes
- Max IB size query
- DC GPU reset fixes
- RAS updates
- DCN 3.0.x fixes
- S/G display fixes
- CP shadow buffer support
- Implement connector force callback
- Z8 power improvements
- PSP 13.0.10 vbflash support
- Mode2 reset fixes
- Store MQDs in VRAM to improve queue switch latency
- VCN 3.x fixes
- JPEG 3.x fixes
- Enable DC_FP on LoongArch
- GFXOFF fixes
- GC 9.4.3 partition support
- SDMA 4.4.2 partition support
- VCN/JPEG 4.0.3 partition support
- VCN 4.0.3 updates
- NBIO 7.9 updates
- GC 9.4.3 updates
- Take NUMA into account when allocating memory
- Handle NUMA for partitions
- SMU 13.0.6 updates
- GC 9.4.3 RAS updates
- Stop including unused swiotlb.h
- SMU 13.0.7 fixes
- Fix clock output ordering on some APUs
- Clean up DC FPGA code
- GFX9 preemption fixes
- Misc irq fixes
- S0ix fixes
- Add new DRM_AMDGPU_WERROR config parameter to help with CI
- PCIe fix for RDNA2
- kdoc fixes
- Documentation updates
amdkfd:
- Query TTM mem limit rather than hardcoding it
- GC 9.4.3 partition support
- Handle NUMA for partitions
radeon:
- Fix possible double free
- Stop including unused swiotlb.h
- Fix possible division by zero
ttm:
- Add query for TTM mem limit
- Add NUMA awareness to pools
- Export ttm_pool_fini()
UAPI:
- Add new ctx query flag to better handle GPU resets
Mesa MR: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22290
- Add new interface to query and set shadow buffer for RDNA3
Mesa MR: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21986
- Add new INFO query for max IB size
Proposed userspace: https://gitlab.freedesktop.org/bnieuwenhuizen/mesa/-/commits/ib-rejection-v3
amd-drm-next-6.5-2023-06-09:
amdgpu:
- S0ix fixes
- Initial SMU13 Overdrive support
- kdoc fixes
- Misc clode cleanups
- Flexible array fixes
- Display OTG fixes
- SMU 13.0.6 updates
- Revert some broken clock counter updates
- Misc display fixes
- GFX9 preemption fixes
- Add support for newer EEPROM bad page table format
- Add missing radeon secondary id
- Add support for new colorspace KMS API
- CSA fix
- Stable pstate fixes for APUs
- make vbl interface admin only
- Handle PCI accelerator class
amdkfd:
- Add debugger support for gdb
radeon:
- Fix possible UAF
drm:
- Add Colorspace functionality
UAPI:
- Add debugger interface for enabling gdb
Proposed userspace: https://github.com/ROCm-Developer-Tools/ROCdbgapi/tree/wip-dbgapi
- Add KMS colorspace API
Discussion: https://lists.freedesktop.org/archives/dri-devel/2023-June/408128.html
From: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230609174817.7764-1-alexander.deucher@amd.com
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 1029 |
1 files changed, 872 insertions, 157 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 7a95698d83f7..d6b15493fffd 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -36,6 +36,7 @@ #include "kfd_kernel_queue.h" #include "amdgpu_amdkfd.h" #include "mes_api_def.h" +#include "kfd_debug.h" /* Size of the per-pipe EOP queue */ #define CIK_HPD_EOP_BYTES_LOG2 11 @@ -46,10 +47,13 @@ static int set_pasid_vmid_mapping(struct device_queue_manager *dqm, static int execute_queues_cpsch(struct device_queue_manager *dqm, enum kfd_unmap_queues_filter filter, - uint32_t filter_param); + uint32_t filter_param, + uint32_t grace_period); static int unmap_queues_cpsch(struct device_queue_manager *dqm, enum kfd_unmap_queues_filter filter, - uint32_t filter_param, bool reset); + uint32_t filter_param, + uint32_t grace_period, + bool reset); static int map_queues_cpsch(struct device_queue_manager *dqm); @@ -74,31 +78,31 @@ enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type) static bool is_pipe_enabled(struct device_queue_manager *dqm, int mec, int pipe) { int i; - int pipe_offset = (mec * dqm->dev->shared_resources.num_pipe_per_mec - + pipe) * dqm->dev->shared_resources.num_queue_per_pipe; + int pipe_offset = (mec * dqm->dev->kfd->shared_resources.num_pipe_per_mec + + pipe) * dqm->dev->kfd->shared_resources.num_queue_per_pipe; /* queue is available for KFD usage if bit is 1 */ - for (i = 0; i < dqm->dev->shared_resources.num_queue_per_pipe; ++i) + for (i = 0; i < dqm->dev->kfd->shared_resources.num_queue_per_pipe; ++i) if (test_bit(pipe_offset + i, - dqm->dev->shared_resources.cp_queue_bitmap)) + dqm->dev->kfd->shared_resources.cp_queue_bitmap)) return true; return false; } unsigned int get_cp_queues_num(struct device_queue_manager *dqm) { - return bitmap_weight(dqm->dev->shared_resources.cp_queue_bitmap, + return bitmap_weight(dqm->dev->kfd->shared_resources.cp_queue_bitmap, KGD_MAX_QUEUES); } unsigned int get_queues_per_pipe(struct device_queue_manager *dqm) { - return dqm->dev->shared_resources.num_queue_per_pipe; + return dqm->dev->kfd->shared_resources.num_queue_per_pipe; } unsigned int get_pipes_per_mec(struct device_queue_manager *dqm) { - return dqm->dev->shared_resources.num_pipe_per_mec; + return dqm->dev->kfd->shared_resources.num_pipe_per_mec; } static unsigned int get_num_all_sdma_engines(struct device_queue_manager *dqm) @@ -110,29 +114,40 @@ static unsigned int get_num_all_sdma_engines(struct device_queue_manager *dqm) unsigned int get_num_sdma_queues(struct device_queue_manager *dqm) { return kfd_get_num_sdma_engines(dqm->dev) * - dqm->dev->device_info.num_sdma_queues_per_engine; + dqm->dev->kfd->device_info.num_sdma_queues_per_engine; } unsigned int get_num_xgmi_sdma_queues(struct device_queue_manager *dqm) { return kfd_get_num_xgmi_sdma_engines(dqm->dev) * - dqm->dev->device_info.num_sdma_queues_per_engine; + dqm->dev->kfd->device_info.num_sdma_queues_per_engine; } -static inline uint64_t get_reserved_sdma_queues_bitmap(struct device_queue_manager *dqm) +static void init_sdma_bitmaps(struct device_queue_manager *dqm) { - return dqm->dev->device_info.reserved_sdma_queues_bitmap; + bitmap_zero(dqm->sdma_bitmap, KFD_MAX_SDMA_QUEUES); + bitmap_set(dqm->sdma_bitmap, 0, get_num_sdma_queues(dqm)); + + bitmap_zero(dqm->xgmi_sdma_bitmap, KFD_MAX_SDMA_QUEUES); + bitmap_set(dqm->xgmi_sdma_bitmap, 0, get_num_xgmi_sdma_queues(dqm)); + + /* Mask out the reserved queues */ + bitmap_andnot(dqm->sdma_bitmap, dqm->sdma_bitmap, + dqm->dev->kfd->device_info.reserved_sdma_queues_bitmap, + KFD_MAX_SDMA_QUEUES); } void program_sh_mem_settings(struct device_queue_manager *dqm, struct qcm_process_device *qpd) { - return dqm->dev->kfd2kgd->program_sh_mem_settings( - dqm->dev->adev, qpd->vmid, - qpd->sh_mem_config, - qpd->sh_mem_ape1_base, - qpd->sh_mem_ape1_limit, - qpd->sh_mem_bases); + uint32_t xcc_mask = dqm->dev->xcc_mask; + int xcc_id; + + for_each_inst(xcc_id, xcc_mask) + dqm->dev->kfd2kgd->program_sh_mem_settings( + dqm->dev->adev, qpd->vmid, qpd->sh_mem_config, + qpd->sh_mem_ape1_base, qpd->sh_mem_ape1_limit, + qpd->sh_mem_bases, xcc_id); } static void kfd_hws_hang(struct device_queue_manager *dqm) @@ -211,6 +226,9 @@ static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q, queue_input.paging = false; queue_input.tba_addr = qpd->tba_addr; queue_input.tma_addr = qpd->tma_addr; + queue_input.trap_en = KFD_GC_VERSION(q->device) < IP_VERSION(11, 0, 0) || + KFD_GC_VERSION(q->device) >= IP_VERSION(12, 0, 0); + queue_input.skip_process_ctx_clear = qpd->pqm->process->debug_trap_enabled; queue_type = convert_to_mes_queue_type(q->properties.type); if (queue_type < 0) { @@ -330,7 +348,7 @@ static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q, uint32_t const *restore_id) { - struct kfd_dev *dev = qpd->dqm->dev; + struct kfd_node *dev = qpd->dqm->dev; if (!KFD_IS_SOC15(dev)) { /* On pre-SOC15 chips we need to use the queue ID to @@ -349,8 +367,17 @@ static int allocate_doorbell(struct qcm_process_device *qpd, * for a SDMA engine is 512. */ - uint32_t *idx_offset = dev->shared_resources.sdma_doorbell_idx; - uint32_t valid_id = idx_offset[q->properties.sdma_engine_id] + uint32_t *idx_offset = dev->kfd->shared_resources.sdma_doorbell_idx; + + /* + * q->properties.sdma_engine_id corresponds to the virtual + * sdma engine number. However, for doorbell allocation, + * we need the physical sdma engine id in order to get the + * correct doorbell offset. + */ + uint32_t valid_id = idx_offset[qpd->dqm->dev->node_id * + get_num_all_sdma_engines(qpd->dqm) + + q->properties.sdma_engine_id] + (q->properties.sdma_queue_id & 1) * KFD_QUEUE_DOORBELL_MIRROR_OFFSET + (q->properties.sdma_queue_id >> 1); @@ -382,7 +409,7 @@ static int allocate_doorbell(struct qcm_process_device *qpd, } q->properties.doorbell_off = - kfd_get_doorbell_dw_offset_in_bar(dev, qpd_to_pdd(qpd), + kfd_get_doorbell_dw_offset_in_bar(dev->kfd, qpd_to_pdd(qpd), q->doorbell_id); return 0; } @@ -391,7 +418,7 @@ static void deallocate_doorbell(struct qcm_process_device *qpd, struct queue *q) { unsigned int old; - struct kfd_dev *dev = qpd->dqm->dev; + struct kfd_node *dev = qpd->dqm->dev; if (!KFD_IS_SOC15(dev) || q->properties.type == KFD_QUEUE_TYPE_SDMA || @@ -405,10 +432,14 @@ static void deallocate_doorbell(struct qcm_process_device *qpd, static void program_trap_handler_settings(struct device_queue_manager *dqm, struct qcm_process_device *qpd) { + uint32_t xcc_mask = dqm->dev->xcc_mask; + int xcc_id; + if (dqm->dev->kfd2kgd->program_trap_handler_settings) - dqm->dev->kfd2kgd->program_trap_handler_settings( - dqm->dev->adev, qpd->vmid, - qpd->tba_addr, qpd->tma_addr); + for_each_inst(xcc_id, xcc_mask) + dqm->dev->kfd2kgd->program_trap_handler_settings( + dqm->dev->adev, qpd->vmid, qpd->tba_addr, + qpd->tma_addr, xcc_id); } static int allocate_vmid(struct device_queue_manager *dqm, @@ -441,7 +472,7 @@ static int allocate_vmid(struct device_queue_manager *dqm, program_sh_mem_settings(dqm, qpd); - if (KFD_IS_SOC15(dqm->dev) && dqm->dev->cwsr_enabled) + if (KFD_IS_SOC15(dqm->dev) && dqm->dev->kfd->cwsr_enabled) program_trap_handler_settings(dqm, qpd); /* qpd->page_table_base is set earlier when register_process() @@ -460,7 +491,7 @@ static int allocate_vmid(struct device_queue_manager *dqm, return 0; } -static int flush_texture_cache_nocpsch(struct kfd_dev *kdev, +static int flush_texture_cache_nocpsch(struct kfd_node *kdev, struct qcm_process_device *qpd) { const struct packet_manager_funcs *pmf = qpd->dqm->packet_mgr.pmf; @@ -661,7 +692,7 @@ static inline void deallocate_hqd(struct device_queue_manager *dqm, #define SQ_IND_CMD_CMD_KILL 0x00000003 #define SQ_IND_CMD_MODE_BROADCAST 0x00000001 -static int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p) +static int dbgdev_wave_reset_wavefronts(struct kfd_node *dev, struct kfd_process *p) { int status = 0; unsigned int vmid; @@ -671,6 +702,8 @@ static int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process struct kfd_process_device *pdd; int first_vmid_to_scan = dev->vm_info.first_vmid_kfd; int last_vmid_to_scan = dev->vm_info.last_vmid_kfd; + uint32_t xcc_mask = dev->xcc_mask; + int xcc_id; reg_sq_cmd.u32All = 0; reg_gfx_index.u32All = 0; @@ -715,9 +748,10 @@ static int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_KILL; reg_sq_cmd.bits.vm_id = vmid; - dev->kfd2kgd->wave_control_execute(dev->adev, - reg_gfx_index.u32All, - reg_sq_cmd.u32All); + for_each_inst(xcc_id, xcc_mask) + dev->kfd2kgd->wave_control_execute( + dev->adev, reg_gfx_index.u32All, + reg_sq_cmd.u32All, xcc_id); return 0; } @@ -837,9 +871,9 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q, /* Make sure the queue is unmapped before updating the MQD */ if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) { - if (!dqm->dev->shared_resources.enable_mes) + if (!dqm->dev->kfd->shared_resources.enable_mes) retval = unmap_queues_cpsch(dqm, - KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, false); + KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD, false); else if (prev_active) retval = remove_queue_mes(dqm, q, &pdd->qpd); @@ -858,7 +892,7 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q, } retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, - (dqm->dev->cwsr_enabled ? + (dqm->dev->kfd->cwsr_enabled ? KFD_PREEMPT_TYPE_WAVEFRONT_SAVE : KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN), KFD_UNMAP_LATENCY_MS, q->pipe, q->queue); @@ -895,7 +929,7 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q, } if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) { - if (!dqm->dev->shared_resources.enable_mes) + if (!dqm->dev->kfd->shared_resources.enable_mes) retval = map_queues_cpsch(dqm); else if (q->properties.is_active) retval = add_queue_mes(dqm, q, &pdd->qpd); @@ -917,6 +951,92 @@ out_unlock: return retval; } +/* suspend_single_queue does not lock the dqm like the + * evict_process_queues_cpsch or evict_process_queues_nocpsch. You should + * lock the dqm before calling, and unlock after calling. + * + * The reason we don't lock the dqm is because this function may be + * called on multiple queues in a loop, so rather than locking/unlocking + * multiple times, we will just keep the dqm locked for all of the calls. + */ +static int suspend_single_queue(struct device_queue_manager *dqm, + struct kfd_process_device *pdd, + struct queue *q) +{ + bool is_new; + + if (q->properties.is_suspended) + return 0; + + pr_debug("Suspending PASID %u queue [%i]\n", + pdd->process->pasid, + q->properties.queue_id); + + is_new = q->properties.exception_status & KFD_EC_MASK(EC_QUEUE_NEW); + + if (is_new || q->properties.is_being_destroyed) { + pr_debug("Suspend: skip %s queue id %i\n", + is_new ? "new" : "destroyed", + q->properties.queue_id); + return -EBUSY; + } + + q->properties.is_suspended = true; + if (q->properties.is_active) { + if (dqm->dev->kfd->shared_resources.enable_mes) { + int r = remove_queue_mes(dqm, q, &pdd->qpd); + + if (r) + return r; + } + + decrement_queue_count(dqm, &pdd->qpd, q); + q->properties.is_active = false; + } + + return 0; +} + +/* resume_single_queue does not lock the dqm like the functions + * restore_process_queues_cpsch or restore_process_queues_nocpsch. You should + * lock the dqm before calling, and unlock after calling. + * + * The reason we don't lock the dqm is because this function may be + * called on multiple queues in a loop, so rather than locking/unlocking + * multiple times, we will just keep the dqm locked for all of the calls. + */ +static int resume_single_queue(struct device_queue_manager *dqm, + struct qcm_process_device *qpd, + struct queue *q) +{ + struct kfd_process_device *pdd; + + if (!q->properties.is_suspended) + return 0; + + pdd = qpd_to_pdd(qpd); + + pr_debug("Restoring from suspend PASID %u queue [%i]\n", + pdd->process->pasid, + q->properties.queue_id); + + q->properties.is_suspended = false; + + if (QUEUE_IS_ACTIVE(q->properties)) { + if (dqm->dev->kfd->shared_resources.enable_mes) { + int r = add_queue_mes(dqm, q, &pdd->qpd); + + if (r) + return r; + } + + q->properties.is_active = true; + increment_queue_count(dqm, qpd, q); + } + + return 0; +} + static int evict_process_queues_nocpsch(struct device_queue_manager *dqm, struct qcm_process_device *qpd) { @@ -951,7 +1071,7 @@ static int evict_process_queues_nocpsch(struct device_queue_manager *dqm, continue; retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, - (dqm->dev->cwsr_enabled ? + (dqm->dev->kfd->cwsr_enabled ? KFD_PREEMPT_TYPE_WAVEFRONT_SAVE : KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN), KFD_UNMAP_LATENCY_MS, q->pipe, q->queue); @@ -979,6 +1099,14 @@ static int evict_process_queues_cpsch(struct device_queue_manager *dqm, goto out; pdd = qpd_to_pdd(qpd); + + /* The debugger creates processes that temporarily have not acquired + * all VMs for all devices and has no VMs itself. + * Skip queue eviction on process eviction. + */ + if (!pdd->drm_priv) + goto out; + pr_debug_ratelimited("Evicting PASID 0x%x queues\n", pdd->process->pasid); @@ -993,7 +1121,7 @@ static int evict_process_queues_cpsch(struct device_queue_manager *dqm, q->properties.is_active = false; decrement_queue_count(dqm, qpd, q); - if (dqm->dev->shared_resources.enable_mes) { + if (dqm->dev->kfd->shared_resources.enable_mes) { retval = remove_queue_mes(dqm, q, qpd); if (retval) { pr_err("Failed to evict queue %d\n", @@ -1003,11 +1131,12 @@ static int evict_process_queues_cpsch(struct device_queue_manager *dqm, } } pdd->last_evict_timestamp = get_jiffies_64(); - if (!dqm->dev->shared_resources.enable_mes) + if (!dqm->dev->kfd->shared_resources.enable_mes) retval = execute_queues_cpsch(dqm, qpd->is_debug ? KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES : - KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); + KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, + USE_DEFAULT_GRACE_PERIOD); out: dqm_unlock(dqm); @@ -1100,13 +1229,10 @@ static int restore_process_queues_cpsch(struct device_queue_manager *dqm, { struct queue *q; struct kfd_process_device *pdd; - uint64_t pd_base; uint64_t eviction_duration; int retval = 0; pdd = qpd_to_pdd(qpd); - /* Retrieve PD base */ - pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv); dqm_lock(dqm); if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */ @@ -1116,12 +1242,19 @@ static int restore_process_queues_cpsch(struct device_queue_manager *dqm, goto out; } + /* The debugger creates processes that temporarily have not acquired + * all VMs for all devices and has no VMs itself. + * Skip queue restore on process restore. + */ + if (!pdd->drm_priv) + goto vm_not_acquired; + pr_debug_ratelimited("Restoring PASID 0x%x queues\n", pdd->process->pasid); /* Update PD Base in QPD */ - qpd->page_table_base = pd_base; - pr_debug("Updated PD address to 0x%llx\n", pd_base); + qpd->page_table_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv); + pr_debug("Updated PD address to 0x%llx\n", qpd->page_table_base); /* activate all active queues on the qpd */ list_for_each_entry(q, &qpd->queues_list, list) { @@ -1132,7 +1265,7 @@ static int restore_process_queues_cpsch(struct device_queue_manager *dqm, q->properties.is_active = true; increment_queue_count(dqm, &pdd->qpd, q); - if (dqm->dev->shared_resources.enable_mes) { + if (dqm->dev->kfd->shared_resources.enable_mes) { retval = add_queue_mes(dqm, q, qpd); if (retval) { pr_err("Failed to restore queue %d\n", @@ -1141,12 +1274,13 @@ static int restore_process_queues_cpsch(struct device_queue_manager *dqm, } } } - if (!dqm->dev->shared_resources.enable_mes) + if (!dqm->dev->kfd->shared_resources.enable_mes) retval = execute_queues_cpsch(dqm, - KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); - qpd->evicted = 0; + KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD); eviction_duration = get_jiffies_64() - pdd->last_evict_timestamp; atomic64_add(eviction_duration, &pdd->evict_duration_counter); +vm_not_acquired: + qpd->evicted = 0; out: dqm_unlock(dqm); return retval; @@ -1229,35 +1363,32 @@ static int set_pasid_vmid_mapping(struct device_queue_manager *dqm, u32 pasid, unsigned int vmid) { - return dqm->dev->kfd2kgd->set_pasid_vmid_mapping( - dqm->dev->adev, pasid, vmid); -} + uint32_t xcc_mask = dqm->dev->xcc_mask; + int xcc_id, ret; -static void init_interrupts(struct device_queue_manager *dqm) -{ - unsigned int i; + for_each_inst(xcc_id, xcc_mask) { + ret = dqm->dev->kfd2kgd->set_pasid_vmid_mapping( + dqm->dev->adev, pasid, vmid, xcc_id); + if (ret) + break; + } - for (i = 0 ; i < get_pipes_per_mec(dqm) ; i++) - if (is_pipe_enabled(dqm, 0, i)) - dqm->dev->kfd2kgd->init_interrupts(dqm->dev->adev, i); + return ret; } -static void init_sdma_bitmaps(struct device_queue_manager *dqm) +static void init_interrupts(struct device_queue_manager *dqm) { - unsigned int num_sdma_queues = - min_t(unsigned int, sizeof(dqm->sdma_bitmap)*8, - get_num_sdma_queues(dqm)); - unsigned int num_xgmi_sdma_queues = - min_t(unsigned int, sizeof(dqm->xgmi_sdma_bitmap)*8, - get_num_xgmi_sdma_queues(dqm)); + uint32_t xcc_mask = dqm->dev->xcc_mask; + unsigned int i, xcc_id; - if (num_sdma_queues) - dqm->sdma_bitmap = GENMASK_ULL(num_sdma_queues-1, 0); - if (num_xgmi_sdma_queues) - dqm->xgmi_sdma_bitmap = GENMASK_ULL(num_xgmi_sdma_queues-1, 0); - - dqm->sdma_bitmap &= ~get_reserved_sdma_queues_bitmap(dqm); - pr_info("sdma_bitmap: %llx\n", dqm->sdma_bitmap); + for_each_inst(xcc_id, xcc_mask) { + for (i = 0 ; i < get_pipes_per_mec(dqm) ; i++) { + if (is_pipe_enabled(dqm, 0, i)) { + dqm->dev->kfd2kgd->init_interrupts( + dqm->dev->adev, i, xcc_id); + } + } + } } static int initialize_nocpsch(struct device_queue_manager *dqm) @@ -1282,7 +1413,7 @@ static int initialize_nocpsch(struct device_queue_manager *dqm) for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) if (test_bit(pipe_offset + queue, - dqm->dev->shared_resources.cp_queue_bitmap)) + dqm->dev->kfd->shared_resources.cp_queue_bitmap)) dqm->allocated_queues[pipe] |= 1 << queue; } @@ -1322,9 +1453,16 @@ static int start_nocpsch(struct device_queue_manager *dqm) static int stop_nocpsch(struct device_queue_manager *dqm) { + dqm_lock(dqm); + if (!dqm->sched_running) { + dqm_unlock(dqm); + return 0; + } + if (dqm->dev->adev->asic_type == CHIP_HAWAII) pm_uninit(&dqm->packet_mgr, false); dqm->sched_running = false; + dqm_unlock(dqm); return 0; } @@ -1342,46 +1480,48 @@ static int allocate_sdma_queue(struct device_queue_manager *dqm, int bit; if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { - if (dqm->sdma_bitmap == 0) { + if (bitmap_empty(dqm->sdma_bitmap, KFD_MAX_SDMA_QUEUES)) { pr_err("No more SDMA queue to allocate\n"); return -ENOMEM; } if (restore_sdma_id) { /* Re-use existing sdma_id */ - if (!(dqm->sdma_bitmap & (1ULL << *restore_sdma_id))) { + if (!test_bit(*restore_sdma_id, dqm->sdma_bitmap)) { pr_err("SDMA queue already in use\n"); return -EBUSY; } - dqm->sdma_bitmap &= ~(1ULL << *restore_sdma_id); + clear_bit(*restore_sdma_id, dqm->sdma_bitmap); q->sdma_id = *restore_sdma_id; } else { /* Find first available sdma_id */ - bit = __ffs64(dqm->sdma_bitmap); - dqm->sdma_bitmap &= ~(1ULL << bit); + bit = find_first_bit(dqm->sdma_bitmap, + get_num_sdma_queues(dqm)); + clear_bit(bit, dqm->sdma_bitmap); q->sdma_id = bit; } - q->properties.sdma_engine_id = q->sdma_id % - kfd_get_num_sdma_engines(dqm->dev); + q->properties.sdma_engine_id = + q->sdma_id % kfd_get_num_sdma_engines(dqm->dev); q->properties.sdma_queue_id = q->sdma_id / kfd_get_num_sdma_engines(dqm->dev); } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { - if (dqm->xgmi_sdma_bitmap == 0) { + if (bitmap_empty(dqm->xgmi_sdma_bitmap, KFD_MAX_SDMA_QUEUES)) { pr_err("No more XGMI SDMA queue to allocate\n"); return -ENOMEM; } if (restore_sdma_id) { /* Re-use existing sdma_id */ - if (!(dqm->xgmi_sdma_bitmap & (1ULL << *restore_sdma_id))) { + if (!test_bit(*restore_sdma_id, dqm->xgmi_sdma_bitmap)) { pr_err("SDMA queue already in use\n"); return -EBUSY; } - dqm->xgmi_sdma_bitmap &= ~(1ULL << *restore_sdma_id); + clear_bit(*restore_sdma_id, dqm->xgmi_sdma_bitmap); q->sdma_id = *restore_sdma_id; } else { - bit = __ffs64(dqm->xgmi_sdma_bitmap); - dqm->xgmi_sdma_bitmap &= ~(1ULL << bit); + bit = find_first_bit(dqm->xgmi_sdma_bitmap, + get_num_xgmi_sdma_queues(dqm)); + clear_bit(bit, dqm->xgmi_sdma_bitmap); q->sdma_id = bit; } /* sdma_engine_id is sdma id including @@ -1409,11 +1549,11 @@ static void deallocate_sdma_queue(struct device_queue_manager *dqm, if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { if (q->sdma_id >= get_num_sdma_queues(dqm)) return; - dqm->sdma_bitmap |= (1ULL << q->sdma_id); + set_bit(q->sdma_id, dqm->sdma_bitmap); } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { if (q->sdma_id >= get_num_xgmi_sdma_queues(dqm)) return; - dqm->xgmi_sdma_bitmap |= (1ULL << q->sdma_id); + set_bit(q->sdma_id, dqm->xgmi_sdma_bitmap); } } @@ -1426,14 +1566,14 @@ static int set_sched_resources(struct device_queue_manager *dqm) int i, mec; struct scheduling_resources res; - res.vmid_mask = dqm->dev->shared_resources.compute_vmid_bitmap; + res.vmid_mask = dqm->dev->compute_vmid_bitmap; res.queue_mask = 0; for (i = 0; i < KGD_MAX_QUEUES; ++i) { - mec = (i / dqm->dev->shared_resources.num_queue_per_pipe) - / dqm->dev->shared_resources.num_pipe_per_mec; + mec = (i / dqm->dev->kfd->shared_resources.num_queue_per_pipe) + / dqm->dev->kfd->shared_resources.num_pipe_per_mec; - if (!test_bit(i, dqm->dev->shared_resources.cp_queue_bitmap)) + if (!test_bit(i, dqm->dev->kfd->shared_resources.cp_queue_bitmap)) continue; /* only acquire queues from the first MEC */ @@ -1475,9 +1615,13 @@ static int initialize_cpsch(struct device_queue_manager *dqm) dqm->gws_queue_count = 0; dqm->active_runlist = false; INIT_WORK(&dqm->hw_exception_work, kfd_process_hw_exception); + dqm->trap_debug_vmid = 0; init_sdma_bitmaps(dqm); + if (dqm->dev->kfd2kgd->get_iq_wait_times) + dqm->dev->kfd2kgd->get_iq_wait_times(dqm->dev->adev, + &dqm->wait_times); return 0; } @@ -1489,7 +1633,7 @@ static int start_cpsch(struct device_queue_manager *dqm) dqm_lock(dqm); - if (!dqm->dev->shared_resources.enable_mes) { + if (!dqm->dev->kfd->shared_resources.enable_mes) { retval = pm_init(&dqm->packet_mgr, dqm); if (retval) goto fail_packet_manager_init; @@ -1516,14 +1660,15 @@ static int start_cpsch(struct device_queue_manager *dqm) dqm->is_hws_hang = false; dqm->is_resetting = false; dqm->sched_running = true; - if (!dqm->dev->shared_resources.enable_mes) - execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); + + if (!dqm->dev->kfd->shared_resources.enable_mes) + execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD); dqm_unlock(dqm); return 0; fail_allocate_vidmem: fail_set_sched_resources: - if (!dqm->dev->shared_resources.enable_mes) + if (!dqm->dev->kfd->shared_resources.enable_mes) pm_uninit(&dqm->packet_mgr, false); fail_packet_manager_init: dqm_unlock(dqm); @@ -1541,8 +1686,8 @@ static int stop_cpsch(struct device_queue_manager *dqm) } if (!dqm->is_hws_hang) { - if (!dqm->dev->shared_resources.enable_mes) - unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, false); + if (!dqm->dev->kfd->shared_resources.enable_mes) + unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD, false); else remove_all_queues_mes(dqm); } @@ -1550,11 +1695,11 @@ static int stop_cpsch(struct device_queue_manager *dqm) hanging = dqm->is_hws_hang || dqm->is_resetting; dqm->sched_running = false; - if (!dqm->dev->shared_resources.enable_mes) + if (!dqm->dev->kfd->shared_resources.enable_mes) pm_release_ib(&dqm->packet_mgr); kfd_gtt_sa_free(dqm->dev, dqm->fence_mem); - if (!dqm->dev->shared_resources.enable_mes) + if (!dqm->dev->kfd->shared_resources.enable_mes) pm_uninit(&dqm->packet_mgr, hanging); dqm_unlock(dqm); @@ -1584,7 +1729,8 @@ static int create_kernel_queue_cpsch(struct device_queue_manager *dqm, list_add(&kq->list, &qpd->priv_queue_list); increment_queue_count(dqm, qpd, kq->queue); qpd->is_debug = true; - execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); + execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, + USE_DEFAULT_GRACE_PERIOD); dqm_unlock(dqm); return 0; @@ -1598,7 +1744,8 @@ static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm, list_del(&kq->list); decrement_queue_count(dqm, qpd, kq->queue); qpd->is_debug = false; - execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0); + execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, + USE_DEFAULT_GRACE_PERIOD); /* * Unconditionally decrement this counter, regardless of the queue's * type. @@ -1658,6 +1805,9 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, * updates the is_evicted flag but is a no-op otherwise. */ q->properties.is_evicted = !!qpd->evicted; + q->properties.is_dbg_wa = qpd->pqm->process->debug_trap_enabled && + KFD_GC_VERSION(q->device) >= IP_VERSION(11, 0, 0) && + KFD_GC_VERSION(q->device) < IP_VERSION(12, 0, 0); if (qd) mqd_mgr->restore_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, &q->gart_mqd_addr, @@ -1673,9 +1823,9 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, if (q->properties.is_active) { increment_queue_count(dqm, qpd, q); - if (!dqm->dev->shared_resources.enable_mes) + if (!dqm->dev->kfd->shared_resources.enable_mes) retval = execute_queues_cpsch(dqm, - KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); + KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD); else retval = add_queue_mes(dqm, q, qpd); if (retval) @@ -1764,7 +1914,9 @@ static int map_queues_cpsch(struct device_queue_manager *dqm) /* dqm->lock mutex has to be locked before calling this function */ static int unmap_queues_cpsch(struct device_queue_manager *dqm, enum kfd_unmap_queues_filter filter, - uint32_t filter_param, bool reset) + uint32_t filter_param, + uint32_t grace_period, + bool reset) { int retval = 0; struct mqd_manager *mqd_mgr; @@ -1776,6 +1928,12 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm, if (!dqm->active_runlist) return retval; + if (grace_period != USE_DEFAULT_GRACE_PERIOD) { + retval = pm_update_grace_period(&dqm->packet_mgr, grace_period); + if (retval) + return retval; + } + retval = pm_send_unmap_queue(&dqm->packet_mgr, filter, filter_param, reset); if (retval) return retval; @@ -1808,6 +1966,13 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm, return -ETIME; } + /* We need to reset the grace period value for this device */ + if (grace_period != USE_DEFAULT_GRACE_PERIOD) { + if (pm_update_grace_period(&dqm->packet_mgr, + USE_DEFAULT_GRACE_PERIOD)) + pr_err("Failed to reset grace period\n"); + } + pm_release_ib(&dqm->packet_mgr); dqm->active_runlist = false; @@ -1823,7 +1988,7 @@ static int reset_queues_cpsch(struct device_queue_manager *dqm, dqm_lock(dqm); retval = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_BY_PASID, - pasid, true); + pasid, USE_DEFAULT_GRACE_PERIOD, true); dqm_unlock(dqm); return retval; @@ -1832,19 +1997,45 @@ static int reset_queues_cpsch(struct device_queue_manager *dqm, /* dqm->lock mutex has to be locked before calling this function */ static int execute_queues_cpsch(struct device_queue_manager *dqm, enum kfd_unmap_queues_filter filter, - uint32_t filter_param) + uint32_t filter_param, + uint32_t grace_period) { int retval; if (dqm->is_hws_hang) return -EIO; - retval = unmap_queues_cpsch(dqm, filter, filter_param, false); + retval = unmap_queues_cpsch(dqm, filter, filter_param, grace_period, false); if (retval) return retval; return map_queues_cpsch(dqm); } +static int wait_on_destroy_queue(struct device_queue_manager *dqm, + struct queue *q) +{ + struct kfd_process_device *pdd = kfd_get_process_device_data(q->device, + q->process); + int ret = 0; + + if (pdd->qpd.is_debug) + return ret; + + q->properties.is_being_destroyed = true; + + if (pdd->process->debug_trap_enabled && q->properties.is_suspended) { + dqm_unlock(dqm); + mutex_unlock(&q->process->mutex); + ret = wait_event_interruptible(dqm->destroy_wait, + !q->properties.is_suspended); + + mutex_lock(&q->process->mutex); + dqm_lock(dqm); + } + + return ret; +} + static int destroy_queue_cpsch(struct device_queue_manager *dqm, struct qcm_process_device *qpd, struct queue *q) @@ -1864,11 +2055,16 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, q->properties.queue_id); } - retval = 0; - /* remove queue from list to prevent rescheduling after preemption */ dqm_lock(dqm); + retval = wait_on_destroy_queue(dqm, q); + + if (retval) { + dqm_unlock(dqm); + return retval; + } + if (qpd->is_debug) { /* * error, currently we do not allow to destroy a queue @@ -1893,10 +2089,11 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, list_del(&q->list); qpd->queue_count--; if (q->properties.is_active) { - if (!dqm->dev->shared_resources.enable_mes) { + if (!dqm->dev->kfd->shared_resources.enable_mes) { decrement_queue_count(dqm, qpd, q); retval = execute_queues_cpsch(dqm, - KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); + KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, + USE_DEFAULT_GRACE_PERIOD); if (retval == -ETIME) qpd->reset_wavefronts = true; } else { @@ -1914,7 +2111,14 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, dqm_unlock(dqm); - /* Do free_mqd after dqm_unlock(dqm) to avoid circular locking */ + /* + * Do free_mqd and raise delete event after dqm_unlock(dqm) to avoid + * circular locking + */ + kfd_dbg_ev_raise(KFD_EC_MASK(EC_DEVICE_QUEUE_DELETE), + qpd->pqm->process, q->device, + -1, false, NULL, 0); + mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); return retval; @@ -2056,7 +2260,7 @@ static int get_wave_state(struct device_queue_manager *dqm, mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_CP]; if (q->properties.type != KFD_QUEUE_TYPE_COMPUTE || - q->properties.is_active || !q->device->cwsr_enabled || + q->properties.is_active || !q->device->kfd->cwsr_enabled || !mqd_mgr->get_wave_state) { dqm_unlock(dqm); return -EINVAL; @@ -2069,8 +2273,8 @@ static int get_wave_state(struct device_queue_manager *dqm, * and the queue should be protected against destruction by the process * lock. */ - return mqd_mgr->get_wave_state(mqd_mgr, q->mqd, ctl_stack, - ctl_stack_used_size, save_area_used_size); + return mqd_mgr->get_wave_state(mqd_mgr, q->mqd, &q->properties, + ctl_stack, ctl_stack_used_size, save_area_used_size); } static void get_queue_checkpoint_info(struct device_queue_manager *dqm, @@ -2105,7 +2309,7 @@ static int checkpoint_mqd(struct device_queue_manager *dqm, dqm_lock(dqm); - if (q->properties.is_active || !q->device->cwsr_enabled) { + if (q->properties.is_active || !q->device->kfd->cwsr_enabled) { r = -EINVAL; goto dqm_unlock; } @@ -2158,7 +2362,7 @@ static int process_termination_cpsch(struct device_queue_manager *dqm, if (q->properties.is_active) { decrement_queue_count(dqm, qpd, q); - if (dqm->dev->shared_resources.enable_mes) { + if (dqm->dev->kfd->shared_resources.enable_mes) { retval = remove_queue_mes(dqm, q, qpd); if (retval) pr_err("Failed to remove queue %d\n", @@ -2180,8 +2384,8 @@ static int process_termination_cpsch(struct device_queue_manager *dqm, } } - if (!dqm->dev->shared_resources.enable_mes) - retval = execute_queues_cpsch(dqm, filter, 0); + if (!dqm->dev->kfd->shared_resources.enable_mes) + retval = execute_queues_cpsch(dqm, filter, 0, USE_DEFAULT_GRACE_PERIOD); if ((!dqm->is_hws_hang) && (retval || qpd->reset_wavefronts)) { pr_warn("Resetting wave fronts (cpsch) on dev %p\n", dqm->dev); @@ -2242,12 +2446,13 @@ out_free: static int allocate_hiq_sdma_mqd(struct device_queue_manager *dqm) { int retval; - struct kfd_dev *dev = dqm->dev; + struct kfd_node *dev = dqm->dev; struct kfd_mem_obj *mem_obj = &dqm->hiq_sdma_mqd; uint32_t size = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size * get_num_all_sdma_engines(dqm) * - dev->device_info.num_sdma_queues_per_engine + - dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size; + dev->kfd->device_info.num_sdma_queues_per_engine + + (dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size * + NUM_XCC(dqm->dev->xcc_mask)); retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev, size, &(mem_obj->gtt_mem), &(mem_obj->gpu_addr), @@ -2256,7 +2461,7 @@ static int allocate_hiq_sdma_mqd(struct device_queue_manager *dqm) return retval; } -struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) +struct device_queue_manager *device_queue_manager_init(struct kfd_node *dev) { struct device_queue_manager *dqm; @@ -2373,20 +2578,22 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) if (init_mqd_managers(dqm)) goto out_free; - if (!dev->shared_resources.enable_mes && allocate_hiq_sdma_mqd(dqm)) { + if (!dev->kfd->shared_resources.enable_mes && allocate_hiq_sdma_mqd(dqm)) { pr_err("Failed to allocate hiq sdma mqd trunk buffer\n"); goto out_free; } - if (!dqm->ops.initialize(dqm)) + if (!dqm->ops.initialize(dqm)) { + init_waitqueue_head(&dqm->destroy_wait); return dqm; + } out_free: kfree(dqm); return NULL; } -static void deallocate_hiq_sdma_mqd(struct kfd_dev *dev, +static void deallocate_hiq_sdma_mqd(struct kfd_node *dev, struct kfd_mem_obj *mqd) { WARN(!mqd, "No hiq sdma mqd trunk to free"); @@ -2396,8 +2603,9 @@ static void deallocate_hiq_sdma_mqd(struct kfd_dev *dev, void device_queue_manager_uninit(struct device_queue_manager *dqm) { + dqm->ops.stop(dqm); dqm->ops.uninitialize(dqm); - if (!dqm->dev->shared_resources.enable_mes) + if (!dqm->dev->kfd->shared_resources.enable_mes) deallocate_hiq_sdma_mqd(dqm->dev, &dqm->hiq_sdma_mqd); kfree(dqm); } @@ -2426,6 +2634,498 @@ static void kfd_process_hw_exception(struct work_struct *work) amdgpu_amdkfd_gpu_reset(dqm->dev->adev); } +int reserve_debug_trap_vmid(struct device_queue_manager *dqm, + struct qcm_process_device *qpd) +{ + int r; + int updated_vmid_mask; + + if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) { + pr_err("Unsupported on sched_policy: %i\n", dqm->sched_policy); + return -EINVAL; + } + + dqm_lock(dqm); + + if (dqm->trap_debug_vmid != 0) { + pr_err("Trap debug id already reserved\n"); + r = -EBUSY; + goto out_unlock; + } + + r = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, + USE_DEFAULT_GRACE_PERIOD, false); + if (r) + goto out_unlock; + + updated_vmid_mask = dqm->dev->kfd->shared_resources.compute_vmid_bitmap; + updated_vmid_mask &= ~(1 << dqm->dev->vm_info.last_vmid_kfd); + + dqm->dev->kfd->shared_resources.compute_vmid_bitmap = updated_vmid_mask; + dqm->trap_debug_vmid = dqm->dev->vm_info.last_vmid_kfd; + r = set_sched_resources(dqm); + if (r) + goto out_unlock; + + r = map_queues_cpsch(dqm); + if (r) + goto out_unlock; + + pr_debug("Reserved VMID for trap debug: %i\n", dqm->trap_debug_vmid); + +out_unlock: + dqm_unlock(dqm); + return r; +} + +/* + * Releases vmid for the trap debugger + */ +int release_debug_trap_vmid(struct device_queue_manager *dqm, + struct qcm_process_device *qpd) +{ + int r; + int updated_vmid_mask; + uint32_t trap_debug_vmid; + + if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) { + pr_err("Unsupported on sched_policy: %i\n", dqm->sched_policy); + return -EINVAL; + } + + dqm_lock(dqm); + trap_debug_vmid = dqm->trap_debug_vmid; + if (dqm->trap_debug_vmid == 0) { + pr_err("Trap debug id is not reserved\n"); + r = -EINVAL; + goto out_unlock; + } + + r = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, + USE_DEFAULT_GRACE_PERIOD, false); + if (r) + goto out_unlock; + + updated_vmid_mask = dqm->dev->kfd->shared_resources.compute_vmid_bitmap; + updated_vmid_mask |= (1 << dqm->dev->vm_info.last_vmid_kfd); + + dqm->dev->kfd->shared_resources.compute_vmid_bitmap = updated_vmid_mask; + dqm->trap_debug_vmid = 0; + r = set_sched_resources(dqm); + if (r) + goto out_unlock; + + r = map_queues_cpsch(dqm); + if (r) + goto out_unlock; + + pr_debug("Released VMID for trap debug: %i\n", trap_debug_vmid); + +out_unlock: + dqm_unlock(dqm); + return r; +} + +#define QUEUE_NOT_FOUND -1 +/* invalidate queue operation in array */ +static void q_array_invalidate(uint32_t num_queues, uint32_t *queue_ids) +{ + int i; + + for (i = 0; i < num_queues; i++) + queue_ids[i] |= KFD_DBG_QUEUE_INVALID_MASK; +} + +/* find queue index in array */ +static int q_array_get_index(unsigned int queue_id, + uint32_t num_queues, + uint32_t *queue_ids) +{ + int i; + + for (i = 0; i < num_queues; i++) + if (queue_id == (queue_ids[i] & ~KFD_DBG_QUEUE_INVALID_MASK)) + return i; + + return QUEUE_NOT_FOUND; +} + +struct copy_context_work_handler_workarea { + struct work_struct copy_context_work; + struct kfd_process *p; +}; + +static void copy_context_work_handler (struct work_struct *work) +{ + struct copy_context_work_handler_workarea *workarea; + struct mqd_manager *mqd_mgr; + struct queue *q; + struct mm_struct *mm; + struct kfd_process *p; + uint32_t tmp_ctl_stack_used_size, tmp_save_area_used_size; + int i; + + workarea = container_of(work, + struct copy_context_work_handler_workarea, + copy_context_work); + + p = workarea->p; + mm = get_task_mm(p->lead_thread); + + if (!mm) + return; + + kthread_use_mm(mm); + for (i = 0; i < p->n_pdds; i++) { + struct kfd_process_device *pdd = p->pdds[i]; + struct device_queue_manager *dqm = pdd->dev->dqm; + struct qcm_process_device *qpd = &pdd->qpd; + + list_for_each_entry(q, &qpd->queues_list, list) { + mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_CP]; + + /* We ignore the return value from get_wave_state + * because + * i) right now, it always returns 0, and + * ii) if we hit an error, we would continue to the + * next queue anyway. + */ + mqd_mgr->get_wave_state(mqd_mgr, + q->mqd, + &q->properties, + (void __user *) q->properties.ctx_save_restore_area_address, + &tmp_ctl_stack_used_size, + &tmp_save_area_used_size); + } + } + kthread_unuse_mm(mm); + mmput(mm); +} + +static uint32_t *get_queue_ids(uint32_t num_queues, uint32_t *usr_queue_id_array) +{ + size_t array_size = num_queues * sizeof(uint32_t); + uint32_t *queue_ids = NULL; + + if (!usr_queue_id_array) + return NULL; + + queue_ids = kzalloc(array_size, GFP_KERNEL); + if (!queue_ids) + return ERR_PTR(-ENOMEM); + + if (copy_from_user(queue_ids, usr_queue_id_array, array_size)) + return ERR_PTR(-EFAULT); + + return queue_ids; +} + +int resume_queues(struct kfd_process *p, + uint32_t num_queues, + uint32_t *usr_queue_id_array) +{ + uint32_t *queue_ids = NULL; + int total_resumed = 0; + int i; + + if (usr_queue_id_array) { + queue_ids = get_queue_ids(num_queues, usr_queue_id_array); + + if (IS_ERR(queue_ids)) + return PTR_ERR(queue_ids); + + /* mask all queues as invalid. unmask per successful request */ + q_array_invalidate(num_queues, queue_ids); + } + + for (i = 0; i < p->n_pdds; i++) { + struct kfd_process_device *pdd = p->pdds[i]; + struct device_queue_manager *dqm = pdd->dev->dqm; + struct qcm_process_device *qpd = &pdd->qpd; + struct queue *q; + int r, per_device_resumed = 0; + + dqm_lock(dqm); + + /* unmask queues that resume or already resumed as valid */ + list_for_each_entry(q, &qpd->queues_list, list) { + int q_idx = QUEUE_NOT_FOUND; + + if (queue_ids) + q_idx = q_array_get_index( + q->properties.queue_id, + num_queues, + queue_ids); + + if (!queue_ids || q_idx != QUEUE_NOT_FOUND) { + int err = resume_single_queue(dqm, &pdd->qpd, q); + + if (queue_ids) { + if (!err) { + queue_ids[q_idx] &= + ~KFD_DBG_QUEUE_INVALID_MASK; + } else { + queue_ids[q_idx] |= + KFD_DBG_QUEUE_ERROR_MASK; + break; + } + } + + if (dqm->dev->kfd->shared_resources.enable_mes) { + wake_up_all(&dqm->destroy_wait); + if (!err) + total_resumed++; + } else { + per_device_resumed++; + } + } + } + + if (!per_device_resumed) { + dqm_unlock(dqm); + continue; + } + + r = execute_queues_cpsch(dqm, + KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, + 0, + USE_DEFAULT_GRACE_PERIOD); + if (r) { + pr_err("Failed to resume process queues\n"); + if (queue_ids) { + list_for_each_entry(q, &qpd->queues_list, list) { + int q_idx = q_array_get_index( + q->properties.queue_id, + num_queues, + queue_ids); + + /* mask queue as error on resume fail */ + if (q_idx != QUEUE_NOT_FOUND) + queue_ids[q_idx] |= + KFD_DBG_QUEUE_ERROR_MASK; + } + } + } else { + wake_up_all(&dqm->destroy_wait); + total_resumed += per_device_resumed; + } + + dqm_unlock(dqm); + } + + if (queue_ids) { + if (copy_to_user((void __user *)usr_queue_id_array, queue_ids, + num_queues * sizeof(uint32_t))) + pr_err("copy_to_user failed on queue resume\n"); + + kfree(queue_ids); + } + + return total_resumed; +} + +int suspend_queues(struct kfd_process *p, + uint32_t num_queues, + uint32_t grace_period, + uint64_t exception_clear_mask, + uint32_t *usr_queue_id_array) +{ + uint32_t *queue_ids = get_queue_ids(num_queues, usr_queue_id_array); + int total_suspended = 0; + int i; + + if (IS_ERR(queue_ids)) + return PTR_ERR(queue_ids); + + /* mask all queues as invalid. umask on successful request */ + q_array_invalidate(num_queues, queue_ids); + + for (i = 0; i < p->n_pdds; i++) { + struct kfd_process_device *pdd = p->pdds[i]; + struct device_queue_manager *dqm = pdd->dev->dqm; + struct qcm_process_device *qpd = &pdd->qpd; + struct queue *q; + int r, per_device_suspended = 0; + + mutex_lock(&p->event_mutex); + dqm_lock(dqm); + + /* unmask queues that suspend or already suspended */ + list_for_each_entry(q, &qpd->queues_list, list) { + int q_idx = q_array_get_index(q->properties.queue_id, + num_queues, + queue_ids); + + if (q_idx != QUEUE_NOT_FOUND) { + int err = suspend_single_queue(dqm, pdd, q); + bool is_mes = dqm->dev->kfd->shared_resources.enable_mes; + + if (!err) { + queue_ids[q_idx] &= ~KFD_DBG_QUEUE_INVALID_MASK; + if (exception_clear_mask && is_mes) + q->properties.exception_status &= + ~exception_clear_mask; + + if (is_mes) + total_suspended++; + else + per_device_suspended++; + } else if (err != -EBUSY) { + r = err; + queue_ids[q_idx] |= KFD_DBG_QUEUE_ERROR_MASK; + break; + } + } + } + + if (!per_device_suspended) { + dqm_unlock(dqm); + mutex_unlock(&p->event_mutex); + if (total_suspended) + amdgpu_amdkfd_debug_mem_fence(dqm->dev->adev); + continue; + } + + r = execute_queues_cpsch(dqm, + KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, + grace_period); + + if (r) + pr_err("Failed to suspend process queues.\n"); + else + total_suspended += per_device_suspended; + + list_for_each_entry(q, &qpd->queues_list, list) { + int q_idx = q_array_get_index(q->properties.queue_id, + num_queues, queue_ids); + + if (q_idx == QUEUE_NOT_FOUND) + continue; + + /* mask queue as error on suspend fail */ + if (r) + queue_ids[q_idx] |= KFD_DBG_QUEUE_ERROR_MASK; + else if (exception_clear_mask) + q->properties.exception_status &= + ~exception_clear_mask; + } + + dqm_unlock(dqm); + mutex_unlock(&p->event_mutex); + amdgpu_device_flush_hdp(dqm->dev->adev, NULL); + } + + if (total_suspended) { + struct copy_context_work_handler_workarea copy_context_worker; + + INIT_WORK_ONSTACK( + ©_context_worker.copy_context_work, + copy_context_work_handler); + + copy_context_worker.p = p; + + schedule_work(©_context_worker.copy_context_work); + + + flush_work(©_context_worker.copy_context_work); + destroy_work_on_stack(©_context_worker.copy_context_work); + } + + if (copy_to_user((void __user *)usr_queue_id_array, queue_ids, + num_queues * sizeof(uint32_t))) + pr_err("copy_to_user failed on queue suspend\n"); + + kfree(queue_ids); + + return total_suspended; +} + +static uint32_t set_queue_type_for_user(struct queue_properties *q_props) +{ + switch (q_props->type) { + case KFD_QUEUE_TYPE_COMPUTE: + return q_props->format == KFD_QUEUE_FORMAT_PM4 + ? KFD_IOC_QUEUE_TYPE_COMPUTE + : KFD_IOC_QUEUE_TYPE_COMPUTE_AQL; + case KFD_QUEUE_TYPE_SDMA: + return KFD_IOC_QUEUE_TYPE_SDMA; + case KFD_QUEUE_TYPE_SDMA_XGMI: + return KFD_IOC_QUEUE_TYPE_SDMA_XGMI; + default: + WARN_ONCE(true, "queue type not recognized!"); + return 0xffffffff; + }; +} + +void set_queue_snapshot_entry(struct queue *q, + uint64_t exception_clear_mask, + struct kfd_queue_snapshot_entry *qss_entry) +{ + qss_entry->ring_base_address = q->properties.queue_address; + qss_entry->write_pointer_address = (uint64_t)q->properties.write_ptr; + qss_entry->read_pointer_address = (uint64_t)q->properties.read_ptr; + qss_entry->ctx_save_restore_address = + q->properties.ctx_save_restore_area_address; + qss_entry->ctx_save_restore_area_size = + q->properties.ctx_save_restore_area_size; + qss_entry->exception_status = q->properties.exception_status; + qss_entry->queue_id = q->properties.queue_id; + qss_entry->gpu_id = q->device->id; + qss_entry->ring_size = (uint32_t)q->properties.queue_size; + qss_entry->queue_type = set_queue_type_for_user(&q->properties); + q->properties.exception_status &= ~exception_clear_mask; +} + +int debug_lock_and_unmap(struct device_queue_manager *dqm) +{ + int r; + + if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) { + pr_err("Unsupported on sched_policy: %i\n", dqm->sched_policy); + return -EINVAL; + } + + if (!kfd_dbg_is_per_vmid_supported(dqm->dev)) + return 0; + + dqm_lock(dqm); + + r = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, 0, false); + if (r) + dqm_unlock(dqm); + + return r; +} + +int debug_map_and_unlock(struct device_queue_manager *dqm) +{ + int r; + + if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) { + pr_err("Unsupported on sched_policy: %i\n", dqm->sched_policy); + return -EINVAL; + } + + if (!kfd_dbg_is_per_vmid_supported(dqm->dev)) + return 0; + + r = map_queues_cpsch(dqm); + + dqm_unlock(dqm); + + return r; +} + +int debug_refresh_runlist(struct device_queue_manager *dqm) +{ + int r = debug_lock_and_unmap(dqm); + + if (r) + return r; + + return debug_map_and_unlock(dqm); +} + #if defined(CONFIG_DEBUG_FS) static void seq_reg_dump(struct seq_file *m, @@ -2452,52 +3152,66 @@ static void seq_reg_dump(struct seq_file *m, int dqm_debugfs_hqds(struct seq_file *m, void *data) { struct device_queue_manager *dqm = data; + uint32_t xcc_mask = dqm->dev->xcc_mask; uint32_t (*dump)[2], n_regs; int pipe, queue; - int r = 0; + int r = 0, xcc_id; + uint32_t sdma_engine_start; if (!dqm->sched_running) { seq_puts(m, " Device is stopped\n"); return 0; } - r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->adev, - KFD_CIK_HIQ_PIPE, KFD_CIK_HIQ_QUEUE, - &dump, &n_regs); - if (!r) { - seq_printf(m, " HIQ on MEC %d Pipe %d Queue %d\n", - KFD_CIK_HIQ_PIPE/get_pipes_per_mec(dqm)+1, - KFD_CIK_HIQ_PIPE%get_pipes_per_mec(dqm), - KFD_CIK_HIQ_QUEUE); - seq_reg_dump(m, dump, n_regs); + for_each_inst(xcc_id, xcc_mask) { + r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->adev, + KFD_CIK_HIQ_PIPE, + KFD_CIK_HIQ_QUEUE, &dump, + &n_regs, xcc_id); + if (!r) { + seq_printf( + m, + " Inst %d, HIQ on MEC %d Pipe %d Queue %d\n", + xcc_id, + KFD_CIK_HIQ_PIPE / get_pipes_per_mec(dqm) + 1, + KFD_CIK_HIQ_PIPE % get_pipes_per_mec(dqm), + KFD_CIK_HIQ_QUEUE); + seq_reg_dump(m, dump, n_regs); - kfree(dump); - } + kfree(dump); + } - for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) { - int pipe_offset = pipe * get_queues_per_pipe(dqm); + for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) { + int pipe_offset = pipe * get_queues_per_pipe(dqm); - for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) { - if (!test_bit(pipe_offset + queue, - dqm->dev->shared_resources.cp_queue_bitmap)) - continue; + for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) { + if (!test_bit(pipe_offset + queue, + dqm->dev->kfd->shared_resources.cp_queue_bitmap)) + continue; - r = dqm->dev->kfd2kgd->hqd_dump( - dqm->dev->adev, pipe, queue, &dump, &n_regs); - if (r) - break; + r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->adev, + pipe, queue, + &dump, &n_regs, + xcc_id); + if (r) + break; - seq_printf(m, " CP Pipe %d, Queue %d\n", - pipe, queue); - seq_reg_dump(m, dump, n_regs); + seq_printf(m, + " Inst %d, CP Pipe %d, Queue %d\n", + xcc_id, pipe, queue); + seq_reg_dump(m, dump, n_regs); - kfree(dump); + kfree(dump); + } } } - for (pipe = 0; pipe < get_num_all_sdma_engines(dqm); pipe++) { + sdma_engine_start = dqm->dev->node_id * get_num_all_sdma_engines(dqm); + for (pipe = sdma_engine_start; + pipe < (sdma_engine_start + get_num_all_sdma_engines(dqm)); + pipe++) { for (queue = 0; - queue < dqm->dev->device_info.num_sdma_queues_per_engine; + queue < dqm->dev->kfd->device_info.num_sdma_queues_per_engine; queue++) { r = dqm->dev->kfd2kgd->hqd_sdma_dump( dqm->dev->adev, pipe, queue, &dump, &n_regs); @@ -2526,7 +3240,8 @@ int dqm_debugfs_hang_hws(struct device_queue_manager *dqm) return r; } dqm->active_runlist = true; - r = execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0); + r = execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, + 0, USE_DEFAULT_GRACE_PERIOD); dqm_unlock(dqm); return r; |