diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c')
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 271 |
1 files changed, 207 insertions, 64 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 98528ee94c15..71272f40feef 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -45,6 +45,7 @@ #include "amdgpu_ras.h" #include "amdgpu_reset.h" #include "amd_pcie.h" +#include "amdgpu_userq.h" void amdgpu_unregister_gpu_instance(struct amdgpu_device *adev) { @@ -82,7 +83,7 @@ void amdgpu_driver_unload_kms(struct drm_device *dev) { struct amdgpu_device *adev = drm_to_adev(dev); - if (adev == NULL) + if (adev == NULL || !adev->num_ip_blocks) return; amdgpu_unregister_gpu_instance(adev); @@ -90,8 +91,8 @@ void amdgpu_driver_unload_kms(struct drm_device *dev) if (adev->rmmio == NULL) return; - if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DRV_UNLOAD)) - DRM_WARN("smart shift update failed\n"); + if (amdgpu_acpi_smart_shift_update(adev, AMDGPU_SS_DRV_UNLOAD)) + drm_warn(dev, "smart shift update failed\n"); amdgpu_acpi_fini(adev); amdgpu_device_fini_hw(adev); @@ -104,7 +105,7 @@ void amdgpu_register_gpu_instance(struct amdgpu_device *adev) mutex_lock(&mgpu_info.mutex); if (mgpu_info.num_gpu >= MAX_GPU_INSTANCE) { - DRM_ERROR("Cannot register more gpu instance\n"); + drm_err(adev_to_drm(adev), "Cannot register more gpu instance\n"); mutex_unlock(&mgpu_info.mutex); return; } @@ -160,8 +161,8 @@ int amdgpu_driver_load_kms(struct amdgpu_device *adev, unsigned long flags) if (acpi_status) dev_dbg(dev->dev, "Error during ACPI methods call\n"); - if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DRV_LOAD)) - DRM_WARN("smart shift update failed\n"); + if (amdgpu_acpi_smart_shift_update(adev, AMDGPU_SS_DRV_LOAD)) + drm_warn(dev, "smart shift update failed\n"); out: if (r) @@ -200,6 +201,9 @@ static enum amd_ip_block_type type = (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_JPEG)) ? AMD_IP_BLOCK_TYPE_JPEG : AMD_IP_BLOCK_TYPE_VCN; break; + case AMDGPU_HW_IP_VPE: + type = AMD_IP_BLOCK_TYPE_VPE; + break; default: type = AMD_IP_BLOCK_TYPE_NUM; break; @@ -370,6 +374,62 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info, return 0; } +static int amdgpu_userq_metadata_info_gfx(struct amdgpu_device *adev, + struct drm_amdgpu_info *info, + struct drm_amdgpu_info_uq_metadata_gfx *meta) +{ + int ret = -EOPNOTSUPP; + + if (adev->gfx.funcs->get_gfx_shadow_info) { + struct amdgpu_gfx_shadow_info shadow = {}; + + adev->gfx.funcs->get_gfx_shadow_info(adev, &shadow, true); + meta->shadow_size = shadow.shadow_size; + meta->shadow_alignment = shadow.shadow_alignment; + meta->csa_size = shadow.csa_size; + meta->csa_alignment = shadow.csa_alignment; + ret = 0; + } + + return ret; +} + +static int amdgpu_userq_metadata_info_compute(struct amdgpu_device *adev, + struct drm_amdgpu_info *info, + struct drm_amdgpu_info_uq_metadata_compute *meta) +{ + int ret = -EOPNOTSUPP; + + if (adev->gfx.funcs->get_gfx_shadow_info) { + struct amdgpu_gfx_shadow_info shadow = {}; + + adev->gfx.funcs->get_gfx_shadow_info(adev, &shadow, true); + meta->eop_size = shadow.eop_size; + meta->eop_alignment = shadow.eop_alignment; + ret = 0; + } + + return ret; +} + +static int amdgpu_userq_metadata_info_sdma(struct amdgpu_device *adev, + struct drm_amdgpu_info *info, + struct drm_amdgpu_info_uq_metadata_sdma *meta) +{ + int ret = -EOPNOTSUPP; + + if (adev->sdma.get_csa_info) { + struct amdgpu_sdma_csa_info csa = {}; + + adev->sdma.get_csa_info(adev, &csa); + meta->csa_size = csa.size; + meta->csa_alignment = csa.alignment; + ret = 0; + } + + return ret; +} + static int amdgpu_hw_ip_info(struct amdgpu_device *adev, struct drm_amdgpu_info *info, struct drm_amdgpu_info_hw_ip *result) @@ -378,6 +438,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, uint32_t ib_size_alignment = 0; enum amd_ip_block_type type; unsigned int num_rings = 0; + uint32_t num_slots = 0; unsigned int i, j; if (info->query_hw_ip.ip_instance >= AMDGPU_HW_IP_INSTANCE_MAX_COUNT) @@ -387,24 +448,45 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, case AMDGPU_HW_IP_GFX: type = AMD_IP_BLOCK_TYPE_GFX; for (i = 0; i < adev->gfx.num_gfx_rings; i++) - if (adev->gfx.gfx_ring[i].sched.ready) + if (adev->gfx.gfx_ring[i].sched.ready && + !adev->gfx.gfx_ring[i].no_user_submission) ++num_rings; + + if (!adev->gfx.disable_uq) { + for (i = 0; i < AMDGPU_MES_MAX_GFX_PIPES; i++) + num_slots += hweight32(adev->mes.gfx_hqd_mask[i]); + } + ib_start_alignment = 32; ib_size_alignment = 32; break; case AMDGPU_HW_IP_COMPUTE: type = AMD_IP_BLOCK_TYPE_GFX; for (i = 0; i < adev->gfx.num_compute_rings; i++) - if (adev->gfx.compute_ring[i].sched.ready) + if (adev->gfx.compute_ring[i].sched.ready && + !adev->gfx.compute_ring[i].no_user_submission) ++num_rings; + + if (!adev->sdma.disable_uq) { + for (i = 0; i < AMDGPU_MES_MAX_COMPUTE_PIPES; i++) + num_slots += hweight32(adev->mes.compute_hqd_mask[i]); + } + ib_start_alignment = 32; ib_size_alignment = 32; break; case AMDGPU_HW_IP_DMA: type = AMD_IP_BLOCK_TYPE_SDMA; for (i = 0; i < adev->sdma.num_instances; i++) - if (adev->sdma.instance[i].ring.sched.ready) + if (adev->sdma.instance[i].ring.sched.ready && + !adev->sdma.instance[i].ring.no_user_submission) ++num_rings; + + if (!adev->gfx.disable_uq) { + for (i = 0; i < AMDGPU_MES_MAX_SDMA_PIPES; i++) + num_slots += hweight32(adev->mes.sdma_hqd_mask[i]); + } + ib_start_alignment = 256; ib_size_alignment = 4; break; @@ -414,7 +496,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, if (adev->uvd.harvest_config & (1 << i)) continue; - if (adev->uvd.inst[i].ring.sched.ready) + if (adev->uvd.inst[i].ring.sched.ready && + !adev->uvd.inst[i].ring.no_user_submission) ++num_rings; } ib_start_alignment = 256; @@ -423,7 +506,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, case AMDGPU_HW_IP_VCE: type = AMD_IP_BLOCK_TYPE_VCE; for (i = 0; i < adev->vce.num_rings; i++) - if (adev->vce.ring[i].sched.ready) + if (adev->vce.ring[i].sched.ready && + !adev->vce.ring[i].no_user_submission) ++num_rings; ib_start_alignment = 256; ib_size_alignment = 4; @@ -435,7 +519,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, continue; for (j = 0; j < adev->uvd.num_enc_rings; j++) - if (adev->uvd.inst[i].ring_enc[j].sched.ready) + if (adev->uvd.inst[i].ring_enc[j].sched.ready && + !adev->uvd.inst[i].ring_enc[j].no_user_submission) ++num_rings; } ib_start_alignment = 256; @@ -447,7 +532,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, if (adev->vcn.harvest_config & (1 << i)) continue; - if (adev->vcn.inst[i].ring_dec.sched.ready) + if (adev->vcn.inst[i].ring_dec.sched.ready && + !adev->vcn.inst[i].ring_dec.no_user_submission) ++num_rings; } ib_start_alignment = 256; @@ -459,8 +545,9 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, if (adev->vcn.harvest_config & (1 << i)) continue; - for (j = 0; j < adev->vcn.num_enc_rings; j++) - if (adev->vcn.inst[i].ring_enc[j].sched.ready) + for (j = 0; j < adev->vcn.inst[i].num_enc_rings; j++) + if (adev->vcn.inst[i].ring_enc[j].sched.ready && + !adev->vcn.inst[i].ring_enc[j].no_user_submission) ++num_rings; } ib_start_alignment = 256; @@ -475,7 +562,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, continue; for (j = 0; j < adev->jpeg.num_jpeg_rings; j++) - if (adev->jpeg.inst[i].ring_dec[j].sched.ready) + if (adev->jpeg.inst[i].ring_dec[j].sched.ready && + !adev->jpeg.inst[i].ring_dec[j].no_user_submission) ++num_rings; } ib_start_alignment = 256; @@ -483,7 +571,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, break; case AMDGPU_HW_IP_VPE: type = AMD_IP_BLOCK_TYPE_VPE; - if (adev->vpe.ring.sched.ready) + if (adev->vpe.ring.sched.ready && + !adev->vpe.ring.no_user_submission) ++num_rings; ib_start_alignment = 256; ib_size_alignment = 4; @@ -539,6 +628,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, } result->capabilities_flags = 0; result->available_rings = (1 << num_rings) - 1; + result->userq_num_slots = num_slots; result->ib_start_alignment = ib_start_alignment; result->ib_size_alignment = ib_size_alignment; return 0; @@ -670,6 +760,9 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) case AMD_IP_BLOCK_TYPE_UVD: count = adev->uvd.num_uvd_inst; break; + case AMD_IP_BLOCK_TYPE_VPE: + count = adev->vpe.num_instances; + break; /* For all other IP block types not listed in the switch statement * the ip status is valid here and the instance count is one. */ @@ -707,7 +800,8 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) ui64 = atomic64_read(&adev->num_vram_cpu_page_faults); return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0; case AMDGPU_INFO_VRAM_USAGE: - ui64 = ttm_resource_manager_usage(&adev->mman.vram_mgr.manager); + ui64 = ttm_resource_manager_used(&adev->mman.vram_mgr.manager) ? + ttm_resource_manager_usage(&adev->mman.vram_mgr.manager) : 0; return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0; case AMDGPU_INFO_VIS_VRAM_USAGE: ui64 = amdgpu_vram_mgr_vis_usage(&adev->mman.vram_mgr); @@ -753,8 +847,8 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) mem.vram.usable_heap_size = adev->gmc.real_vram_size - atomic64_read(&adev->vram_pin_size) - AMDGPU_VM_RESERVED_VRAM; - mem.vram.heap_usage = - ttm_resource_manager_usage(vram_man); + mem.vram.heap_usage = ttm_resource_manager_used(&adev->mman.vram_mgr.manager) ? + ttm_resource_manager_usage(vram_man) : 0; mem.vram.max_allocation = mem.vram.usable_heap_size * 3 / 4; mem.cpu_accessible_vram.total_heap_size = @@ -779,68 +873,59 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) ? -EFAULT : 0; } case AMDGPU_INFO_READ_MMR_REG: { - int ret = 0; - unsigned int n, alloc_size; - uint32_t *regs; unsigned int se_num = (info->read_mmr_reg.instance >> AMDGPU_INFO_MMR_SE_INDEX_SHIFT) & AMDGPU_INFO_MMR_SE_INDEX_MASK; unsigned int sh_num = (info->read_mmr_reg.instance >> AMDGPU_INFO_MMR_SH_INDEX_SHIFT) & AMDGPU_INFO_MMR_SH_INDEX_MASK; - - if (!down_read_trylock(&adev->reset_domain->sem)) - return -ENOENT; + unsigned int alloc_size; + uint32_t *regs; + int ret; /* set full masks if the userspace set all bits * in the bitfields */ - if (se_num == AMDGPU_INFO_MMR_SE_INDEX_MASK) { + if (se_num == AMDGPU_INFO_MMR_SE_INDEX_MASK) se_num = 0xffffffff; - } else if (se_num >= AMDGPU_GFX_MAX_SE) { - ret = -EINVAL; - goto out; - } + else if (se_num >= AMDGPU_GFX_MAX_SE) + return -EINVAL; - if (sh_num == AMDGPU_INFO_MMR_SH_INDEX_MASK) { + if (sh_num == AMDGPU_INFO_MMR_SH_INDEX_MASK) sh_num = 0xffffffff; - } else if (sh_num >= AMDGPU_GFX_MAX_SH_PER_SE) { - ret = -EINVAL; - goto out; - } + else if (sh_num >= AMDGPU_GFX_MAX_SH_PER_SE) + return -EINVAL; - if (info->read_mmr_reg.count > 128) { - ret = -EINVAL; - goto out; - } + if (info->read_mmr_reg.count > 128) + return -EINVAL; - regs = kmalloc_array(info->read_mmr_reg.count, sizeof(*regs), GFP_KERNEL); - if (!regs) { - ret = -ENOMEM; - goto out; - } + regs = kmalloc_array(info->read_mmr_reg.count, sizeof(*regs), + GFP_KERNEL); + if (!regs) + return -ENOMEM; + down_read(&adev->reset_domain->sem); alloc_size = info->read_mmr_reg.count * sizeof(*regs); - amdgpu_gfx_off_ctrl(adev, false); + ret = 0; for (i = 0; i < info->read_mmr_reg.count; i++) { if (amdgpu_asic_read_register(adev, se_num, sh_num, info->read_mmr_reg.dword_offset + i, ®s[i])) { DRM_DEBUG_KMS("unallowed offset %#x\n", info->read_mmr_reg.dword_offset + i); - kfree(regs); - amdgpu_gfx_off_ctrl(adev, true); ret = -EFAULT; - goto out; + break; } } amdgpu_gfx_off_ctrl(adev, true); - n = copy_to_user(out, regs, min(size, alloc_size)); - kfree(regs); - ret = (n ? -EFAULT : 0); -out: up_read(&adev->reset_domain->sem); + + if (!ret) { + ret = copy_to_user(out, regs, min(size, alloc_size)) + ? -EFAULT : 0; + } + kfree(regs); return ret; } case AMDGPU_INFO_DEV_INFO: { @@ -848,7 +933,7 @@ out: uint64_t vm_size; uint32_t pcie_gen_mask, pcie_width_mask; - dev_info = kzalloc(sizeof(*dev_info), GFP_KERNEL); + dev_info = kzalloc_obj(*dev_info); if (!dev_info) return -ENOMEM; @@ -888,6 +973,19 @@ out: if (adev->gfx.config.ta_cntl2_truncate_coord_mode) dev_info->ids_flags |= AMDGPU_IDS_FLAGS_CONFORMANT_TRUNC_COORD; + /* Gang submit is not supported under SRIOV currently */ + if (!amdgpu_sriov_vf(adev)) + dev_info->ids_flags |= AMDGPU_IDS_FLAGS_GANG_SUBMIT; + + if (amdgpu_passthrough(adev)) + dev_info->ids_flags |= (AMDGPU_IDS_FLAGS_MODE_PT << + AMDGPU_IDS_FLAGS_MODE_SHIFT) & + AMDGPU_IDS_FLAGS_MODE_MASK; + else if (amdgpu_sriov_vf(adev)) + dev_info->ids_flags |= (AMDGPU_IDS_FLAGS_MODE_VF << + AMDGPU_IDS_FLAGS_MODE_SHIFT) & + AMDGPU_IDS_FLAGS_MODE_MASK; + vm_size = adev->vm_manager.max_pfn * AMDGPU_GPU_PAGE_SIZE; vm_size -= AMDGPU_VA_RESERVED_TOP; @@ -969,6 +1067,8 @@ out: } } + dev_info->userq_ip_mask = amdgpu_userq_get_supported_ip_mask(adev); + ret = copy_to_user(out, dev_info, min((size_t)size, sizeof(*dev_info))) ? -EFAULT : 0; kfree(dev_info); @@ -1220,7 +1320,7 @@ out: return -EINVAL; } - caps = kzalloc(sizeof(*caps), GFP_KERNEL); + caps = kzalloc_obj(*caps); if (!caps) return -ENOMEM; @@ -1284,6 +1384,38 @@ out: return copy_to_user(out, &gpuvm_fault, min((size_t)size, sizeof(gpuvm_fault))) ? -EFAULT : 0; } + case AMDGPU_INFO_UQ_FW_AREAS: { + struct drm_amdgpu_info_uq_metadata meta_info = {}; + + switch (info->query_hw_ip.type) { + case AMDGPU_HW_IP_GFX: + ret = amdgpu_userq_metadata_info_gfx(adev, info, &meta_info.gfx); + if (ret) + return ret; + + ret = copy_to_user(out, &meta_info, + min((size_t)size, sizeof(meta_info))) ? -EFAULT : 0; + return 0; + case AMDGPU_HW_IP_COMPUTE: + ret = amdgpu_userq_metadata_info_compute(adev, info, &meta_info.compute); + if (ret) + return ret; + + ret = copy_to_user(out, &meta_info, + min((size_t)size, sizeof(meta_info))) ? -EFAULT : 0; + return 0; + case AMDGPU_HW_IP_DMA: + ret = amdgpu_userq_metadata_info_sdma(adev, info, &meta_info.sdma); + if (ret) + return ret; + + ret = copy_to_user(out, &meta_info, + min((size_t)size, sizeof(meta_info))) ? -EFAULT : 0; + return 0; + default: + return -EINVAL; + } + } default: DRM_DEBUG_KMS("Invalid request %d\n", info->query); return -EINVAL; @@ -1304,6 +1436,7 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) { struct amdgpu_device *adev = drm_to_adev(dev); struct amdgpu_fpriv *fpriv; + struct drm_exec exec; int r, pasid; /* Ensure IB tests are run on ring */ @@ -1337,15 +1470,22 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) if (r) goto error_pasid; - r = amdgpu_vm_init(adev, &fpriv->vm, fpriv->xcp_id); + amdgpu_debugfs_vm_init(file_priv); + + r = amdgpu_vm_init(adev, &fpriv->vm, fpriv->xcp_id, pasid); if (r) goto error_pasid; - r = amdgpu_vm_set_pasid(adev, &fpriv->vm, pasid); - if (r) - goto error_vm; + drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0); + drm_exec_until_all_locked(&exec) { + r = amdgpu_vm_lock_pd(&fpriv->vm, &exec, 0); + drm_exec_retry_on_contention(&exec); + if (unlikely(r)) + goto error_vm; + } fpriv->prt_va = amdgpu_vm_bo_add(adev, &fpriv->vm, NULL); + drm_exec_fini(&exec); if (!fpriv->prt_va) { r = -ENOMEM; goto error_vm; @@ -1367,6 +1507,13 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) mutex_init(&fpriv->bo_list_lock); idr_init_base(&fpriv->bo_list_handles, 1); + r = amdgpu_userq_mgr_init(&fpriv->userq_mgr, file_priv, adev); + if (r) + drm_warn(adev_to_drm(adev), + "Failed to init usermode queue manager (%d), use legacy workload submission only\n", + r); + + amdgpu_evf_mgr_init(&fpriv->evf_mgr); amdgpu_ctx_mgr_init(&fpriv->ctx_mgr, adev); file_priv->driver_priv = fpriv; @@ -1376,15 +1523,12 @@ error_vm: amdgpu_vm_fini(adev, &fpriv->vm); error_pasid: - if (pasid) { + if (pasid) amdgpu_pasid_free(pasid); - amdgpu_vm_set_pasid(adev, &fpriv->vm, 0); - } kfree(fpriv); out_suspend: - pm_runtime_mark_last_busy(dev->dev); pm_put: pm_runtime_put_autosuspend(dev->dev); @@ -1452,7 +1596,6 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev, kfree(fpriv); file_priv->driver_priv = NULL; - pm_runtime_mark_last_busy(dev->dev); pm_runtime_put_autosuspend(dev->dev); } |
