summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c271
1 files changed, 207 insertions, 64 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 98528ee94c15..71272f40feef 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -45,6 +45,7 @@
#include "amdgpu_ras.h"
#include "amdgpu_reset.h"
#include "amd_pcie.h"
+#include "amdgpu_userq.h"
void amdgpu_unregister_gpu_instance(struct amdgpu_device *adev)
{
@@ -82,7 +83,7 @@ void amdgpu_driver_unload_kms(struct drm_device *dev)
{
struct amdgpu_device *adev = drm_to_adev(dev);
- if (adev == NULL)
+ if (adev == NULL || !adev->num_ip_blocks)
return;
amdgpu_unregister_gpu_instance(adev);
@@ -90,8 +91,8 @@ void amdgpu_driver_unload_kms(struct drm_device *dev)
if (adev->rmmio == NULL)
return;
- if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DRV_UNLOAD))
- DRM_WARN("smart shift update failed\n");
+ if (amdgpu_acpi_smart_shift_update(adev, AMDGPU_SS_DRV_UNLOAD))
+ drm_warn(dev, "smart shift update failed\n");
amdgpu_acpi_fini(adev);
amdgpu_device_fini_hw(adev);
@@ -104,7 +105,7 @@ void amdgpu_register_gpu_instance(struct amdgpu_device *adev)
mutex_lock(&mgpu_info.mutex);
if (mgpu_info.num_gpu >= MAX_GPU_INSTANCE) {
- DRM_ERROR("Cannot register more gpu instance\n");
+ drm_err(adev_to_drm(adev), "Cannot register more gpu instance\n");
mutex_unlock(&mgpu_info.mutex);
return;
}
@@ -160,8 +161,8 @@ int amdgpu_driver_load_kms(struct amdgpu_device *adev, unsigned long flags)
if (acpi_status)
dev_dbg(dev->dev, "Error during ACPI methods call\n");
- if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DRV_LOAD))
- DRM_WARN("smart shift update failed\n");
+ if (amdgpu_acpi_smart_shift_update(adev, AMDGPU_SS_DRV_LOAD))
+ drm_warn(dev, "smart shift update failed\n");
out:
if (r)
@@ -200,6 +201,9 @@ static enum amd_ip_block_type
type = (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_JPEG)) ?
AMD_IP_BLOCK_TYPE_JPEG : AMD_IP_BLOCK_TYPE_VCN;
break;
+ case AMDGPU_HW_IP_VPE:
+ type = AMD_IP_BLOCK_TYPE_VPE;
+ break;
default:
type = AMD_IP_BLOCK_TYPE_NUM;
break;
@@ -370,6 +374,62 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info,
return 0;
}
+static int amdgpu_userq_metadata_info_gfx(struct amdgpu_device *adev,
+ struct drm_amdgpu_info *info,
+ struct drm_amdgpu_info_uq_metadata_gfx *meta)
+{
+ int ret = -EOPNOTSUPP;
+
+ if (adev->gfx.funcs->get_gfx_shadow_info) {
+ struct amdgpu_gfx_shadow_info shadow = {};
+
+ adev->gfx.funcs->get_gfx_shadow_info(adev, &shadow, true);
+ meta->shadow_size = shadow.shadow_size;
+ meta->shadow_alignment = shadow.shadow_alignment;
+ meta->csa_size = shadow.csa_size;
+ meta->csa_alignment = shadow.csa_alignment;
+ ret = 0;
+ }
+
+ return ret;
+}
+
+static int amdgpu_userq_metadata_info_compute(struct amdgpu_device *adev,
+ struct drm_amdgpu_info *info,
+ struct drm_amdgpu_info_uq_metadata_compute *meta)
+{
+ int ret = -EOPNOTSUPP;
+
+ if (adev->gfx.funcs->get_gfx_shadow_info) {
+ struct amdgpu_gfx_shadow_info shadow = {};
+
+ adev->gfx.funcs->get_gfx_shadow_info(adev, &shadow, true);
+ meta->eop_size = shadow.eop_size;
+ meta->eop_alignment = shadow.eop_alignment;
+ ret = 0;
+ }
+
+ return ret;
+}
+
+static int amdgpu_userq_metadata_info_sdma(struct amdgpu_device *adev,
+ struct drm_amdgpu_info *info,
+ struct drm_amdgpu_info_uq_metadata_sdma *meta)
+{
+ int ret = -EOPNOTSUPP;
+
+ if (adev->sdma.get_csa_info) {
+ struct amdgpu_sdma_csa_info csa = {};
+
+ adev->sdma.get_csa_info(adev, &csa);
+ meta->csa_size = csa.size;
+ meta->csa_alignment = csa.alignment;
+ ret = 0;
+ }
+
+ return ret;
+}
+
static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
struct drm_amdgpu_info *info,
struct drm_amdgpu_info_hw_ip *result)
@@ -378,6 +438,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
uint32_t ib_size_alignment = 0;
enum amd_ip_block_type type;
unsigned int num_rings = 0;
+ uint32_t num_slots = 0;
unsigned int i, j;
if (info->query_hw_ip.ip_instance >= AMDGPU_HW_IP_INSTANCE_MAX_COUNT)
@@ -387,24 +448,45 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
case AMDGPU_HW_IP_GFX:
type = AMD_IP_BLOCK_TYPE_GFX;
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
- if (adev->gfx.gfx_ring[i].sched.ready)
+ if (adev->gfx.gfx_ring[i].sched.ready &&
+ !adev->gfx.gfx_ring[i].no_user_submission)
++num_rings;
+
+ if (!adev->gfx.disable_uq) {
+ for (i = 0; i < AMDGPU_MES_MAX_GFX_PIPES; i++)
+ num_slots += hweight32(adev->mes.gfx_hqd_mask[i]);
+ }
+
ib_start_alignment = 32;
ib_size_alignment = 32;
break;
case AMDGPU_HW_IP_COMPUTE:
type = AMD_IP_BLOCK_TYPE_GFX;
for (i = 0; i < adev->gfx.num_compute_rings; i++)
- if (adev->gfx.compute_ring[i].sched.ready)
+ if (adev->gfx.compute_ring[i].sched.ready &&
+ !adev->gfx.compute_ring[i].no_user_submission)
++num_rings;
+
+ if (!adev->sdma.disable_uq) {
+ for (i = 0; i < AMDGPU_MES_MAX_COMPUTE_PIPES; i++)
+ num_slots += hweight32(adev->mes.compute_hqd_mask[i]);
+ }
+
ib_start_alignment = 32;
ib_size_alignment = 32;
break;
case AMDGPU_HW_IP_DMA:
type = AMD_IP_BLOCK_TYPE_SDMA;
for (i = 0; i < adev->sdma.num_instances; i++)
- if (adev->sdma.instance[i].ring.sched.ready)
+ if (adev->sdma.instance[i].ring.sched.ready &&
+ !adev->sdma.instance[i].ring.no_user_submission)
++num_rings;
+
+ if (!adev->gfx.disable_uq) {
+ for (i = 0; i < AMDGPU_MES_MAX_SDMA_PIPES; i++)
+ num_slots += hweight32(adev->mes.sdma_hqd_mask[i]);
+ }
+
ib_start_alignment = 256;
ib_size_alignment = 4;
break;
@@ -414,7 +496,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
if (adev->uvd.harvest_config & (1 << i))
continue;
- if (adev->uvd.inst[i].ring.sched.ready)
+ if (adev->uvd.inst[i].ring.sched.ready &&
+ !adev->uvd.inst[i].ring.no_user_submission)
++num_rings;
}
ib_start_alignment = 256;
@@ -423,7 +506,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
case AMDGPU_HW_IP_VCE:
type = AMD_IP_BLOCK_TYPE_VCE;
for (i = 0; i < adev->vce.num_rings; i++)
- if (adev->vce.ring[i].sched.ready)
+ if (adev->vce.ring[i].sched.ready &&
+ !adev->vce.ring[i].no_user_submission)
++num_rings;
ib_start_alignment = 256;
ib_size_alignment = 4;
@@ -435,7 +519,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
continue;
for (j = 0; j < adev->uvd.num_enc_rings; j++)
- if (adev->uvd.inst[i].ring_enc[j].sched.ready)
+ if (adev->uvd.inst[i].ring_enc[j].sched.ready &&
+ !adev->uvd.inst[i].ring_enc[j].no_user_submission)
++num_rings;
}
ib_start_alignment = 256;
@@ -447,7 +532,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
if (adev->vcn.harvest_config & (1 << i))
continue;
- if (adev->vcn.inst[i].ring_dec.sched.ready)
+ if (adev->vcn.inst[i].ring_dec.sched.ready &&
+ !adev->vcn.inst[i].ring_dec.no_user_submission)
++num_rings;
}
ib_start_alignment = 256;
@@ -459,8 +545,9 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
if (adev->vcn.harvest_config & (1 << i))
continue;
- for (j = 0; j < adev->vcn.num_enc_rings; j++)
- if (adev->vcn.inst[i].ring_enc[j].sched.ready)
+ for (j = 0; j < adev->vcn.inst[i].num_enc_rings; j++)
+ if (adev->vcn.inst[i].ring_enc[j].sched.ready &&
+ !adev->vcn.inst[i].ring_enc[j].no_user_submission)
++num_rings;
}
ib_start_alignment = 256;
@@ -475,7 +562,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
continue;
for (j = 0; j < adev->jpeg.num_jpeg_rings; j++)
- if (adev->jpeg.inst[i].ring_dec[j].sched.ready)
+ if (adev->jpeg.inst[i].ring_dec[j].sched.ready &&
+ !adev->jpeg.inst[i].ring_dec[j].no_user_submission)
++num_rings;
}
ib_start_alignment = 256;
@@ -483,7 +571,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
break;
case AMDGPU_HW_IP_VPE:
type = AMD_IP_BLOCK_TYPE_VPE;
- if (adev->vpe.ring.sched.ready)
+ if (adev->vpe.ring.sched.ready &&
+ !adev->vpe.ring.no_user_submission)
++num_rings;
ib_start_alignment = 256;
ib_size_alignment = 4;
@@ -539,6 +628,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
}
result->capabilities_flags = 0;
result->available_rings = (1 << num_rings) - 1;
+ result->userq_num_slots = num_slots;
result->ib_start_alignment = ib_start_alignment;
result->ib_size_alignment = ib_size_alignment;
return 0;
@@ -670,6 +760,9 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
case AMD_IP_BLOCK_TYPE_UVD:
count = adev->uvd.num_uvd_inst;
break;
+ case AMD_IP_BLOCK_TYPE_VPE:
+ count = adev->vpe.num_instances;
+ break;
/* For all other IP block types not listed in the switch statement
* the ip status is valid here and the instance count is one.
*/
@@ -707,7 +800,8 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
ui64 = atomic64_read(&adev->num_vram_cpu_page_faults);
return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0;
case AMDGPU_INFO_VRAM_USAGE:
- ui64 = ttm_resource_manager_usage(&adev->mman.vram_mgr.manager);
+ ui64 = ttm_resource_manager_used(&adev->mman.vram_mgr.manager) ?
+ ttm_resource_manager_usage(&adev->mman.vram_mgr.manager) : 0;
return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0;
case AMDGPU_INFO_VIS_VRAM_USAGE:
ui64 = amdgpu_vram_mgr_vis_usage(&adev->mman.vram_mgr);
@@ -753,8 +847,8 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
mem.vram.usable_heap_size = adev->gmc.real_vram_size -
atomic64_read(&adev->vram_pin_size) -
AMDGPU_VM_RESERVED_VRAM;
- mem.vram.heap_usage =
- ttm_resource_manager_usage(vram_man);
+ mem.vram.heap_usage = ttm_resource_manager_used(&adev->mman.vram_mgr.manager) ?
+ ttm_resource_manager_usage(vram_man) : 0;
mem.vram.max_allocation = mem.vram.usable_heap_size * 3 / 4;
mem.cpu_accessible_vram.total_heap_size =
@@ -779,68 +873,59 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
? -EFAULT : 0;
}
case AMDGPU_INFO_READ_MMR_REG: {
- int ret = 0;
- unsigned int n, alloc_size;
- uint32_t *regs;
unsigned int se_num = (info->read_mmr_reg.instance >>
AMDGPU_INFO_MMR_SE_INDEX_SHIFT) &
AMDGPU_INFO_MMR_SE_INDEX_MASK;
unsigned int sh_num = (info->read_mmr_reg.instance >>
AMDGPU_INFO_MMR_SH_INDEX_SHIFT) &
AMDGPU_INFO_MMR_SH_INDEX_MASK;
-
- if (!down_read_trylock(&adev->reset_domain->sem))
- return -ENOENT;
+ unsigned int alloc_size;
+ uint32_t *regs;
+ int ret;
/* set full masks if the userspace set all bits
* in the bitfields
*/
- if (se_num == AMDGPU_INFO_MMR_SE_INDEX_MASK) {
+ if (se_num == AMDGPU_INFO_MMR_SE_INDEX_MASK)
se_num = 0xffffffff;
- } else if (se_num >= AMDGPU_GFX_MAX_SE) {
- ret = -EINVAL;
- goto out;
- }
+ else if (se_num >= AMDGPU_GFX_MAX_SE)
+ return -EINVAL;
- if (sh_num == AMDGPU_INFO_MMR_SH_INDEX_MASK) {
+ if (sh_num == AMDGPU_INFO_MMR_SH_INDEX_MASK)
sh_num = 0xffffffff;
- } else if (sh_num >= AMDGPU_GFX_MAX_SH_PER_SE) {
- ret = -EINVAL;
- goto out;
- }
+ else if (sh_num >= AMDGPU_GFX_MAX_SH_PER_SE)
+ return -EINVAL;
- if (info->read_mmr_reg.count > 128) {
- ret = -EINVAL;
- goto out;
- }
+ if (info->read_mmr_reg.count > 128)
+ return -EINVAL;
- regs = kmalloc_array(info->read_mmr_reg.count, sizeof(*regs), GFP_KERNEL);
- if (!regs) {
- ret = -ENOMEM;
- goto out;
- }
+ regs = kmalloc_array(info->read_mmr_reg.count, sizeof(*regs),
+ GFP_KERNEL);
+ if (!regs)
+ return -ENOMEM;
+ down_read(&adev->reset_domain->sem);
alloc_size = info->read_mmr_reg.count * sizeof(*regs);
-
amdgpu_gfx_off_ctrl(adev, false);
+ ret = 0;
for (i = 0; i < info->read_mmr_reg.count; i++) {
if (amdgpu_asic_read_register(adev, se_num, sh_num,
info->read_mmr_reg.dword_offset + i,
&regs[i])) {
DRM_DEBUG_KMS("unallowed offset %#x\n",
info->read_mmr_reg.dword_offset + i);
- kfree(regs);
- amdgpu_gfx_off_ctrl(adev, true);
ret = -EFAULT;
- goto out;
+ break;
}
}
amdgpu_gfx_off_ctrl(adev, true);
- n = copy_to_user(out, regs, min(size, alloc_size));
- kfree(regs);
- ret = (n ? -EFAULT : 0);
-out:
up_read(&adev->reset_domain->sem);
+
+ if (!ret) {
+ ret = copy_to_user(out, regs, min(size, alloc_size))
+ ? -EFAULT : 0;
+ }
+ kfree(regs);
return ret;
}
case AMDGPU_INFO_DEV_INFO: {
@@ -848,7 +933,7 @@ out:
uint64_t vm_size;
uint32_t pcie_gen_mask, pcie_width_mask;
- dev_info = kzalloc(sizeof(*dev_info), GFP_KERNEL);
+ dev_info = kzalloc_obj(*dev_info);
if (!dev_info)
return -ENOMEM;
@@ -888,6 +973,19 @@ out:
if (adev->gfx.config.ta_cntl2_truncate_coord_mode)
dev_info->ids_flags |= AMDGPU_IDS_FLAGS_CONFORMANT_TRUNC_COORD;
+ /* Gang submit is not supported under SRIOV currently */
+ if (!amdgpu_sriov_vf(adev))
+ dev_info->ids_flags |= AMDGPU_IDS_FLAGS_GANG_SUBMIT;
+
+ if (amdgpu_passthrough(adev))
+ dev_info->ids_flags |= (AMDGPU_IDS_FLAGS_MODE_PT <<
+ AMDGPU_IDS_FLAGS_MODE_SHIFT) &
+ AMDGPU_IDS_FLAGS_MODE_MASK;
+ else if (amdgpu_sriov_vf(adev))
+ dev_info->ids_flags |= (AMDGPU_IDS_FLAGS_MODE_VF <<
+ AMDGPU_IDS_FLAGS_MODE_SHIFT) &
+ AMDGPU_IDS_FLAGS_MODE_MASK;
+
vm_size = adev->vm_manager.max_pfn * AMDGPU_GPU_PAGE_SIZE;
vm_size -= AMDGPU_VA_RESERVED_TOP;
@@ -969,6 +1067,8 @@ out:
}
}
+ dev_info->userq_ip_mask = amdgpu_userq_get_supported_ip_mask(adev);
+
ret = copy_to_user(out, dev_info,
min((size_t)size, sizeof(*dev_info))) ? -EFAULT : 0;
kfree(dev_info);
@@ -1220,7 +1320,7 @@ out:
return -EINVAL;
}
- caps = kzalloc(sizeof(*caps), GFP_KERNEL);
+ caps = kzalloc_obj(*caps);
if (!caps)
return -ENOMEM;
@@ -1284,6 +1384,38 @@ out:
return copy_to_user(out, &gpuvm_fault,
min((size_t)size, sizeof(gpuvm_fault))) ? -EFAULT : 0;
}
+ case AMDGPU_INFO_UQ_FW_AREAS: {
+ struct drm_amdgpu_info_uq_metadata meta_info = {};
+
+ switch (info->query_hw_ip.type) {
+ case AMDGPU_HW_IP_GFX:
+ ret = amdgpu_userq_metadata_info_gfx(adev, info, &meta_info.gfx);
+ if (ret)
+ return ret;
+
+ ret = copy_to_user(out, &meta_info,
+ min((size_t)size, sizeof(meta_info))) ? -EFAULT : 0;
+ return 0;
+ case AMDGPU_HW_IP_COMPUTE:
+ ret = amdgpu_userq_metadata_info_compute(adev, info, &meta_info.compute);
+ if (ret)
+ return ret;
+
+ ret = copy_to_user(out, &meta_info,
+ min((size_t)size, sizeof(meta_info))) ? -EFAULT : 0;
+ return 0;
+ case AMDGPU_HW_IP_DMA:
+ ret = amdgpu_userq_metadata_info_sdma(adev, info, &meta_info.sdma);
+ if (ret)
+ return ret;
+
+ ret = copy_to_user(out, &meta_info,
+ min((size_t)size, sizeof(meta_info))) ? -EFAULT : 0;
+ return 0;
+ default:
+ return -EINVAL;
+ }
+ }
default:
DRM_DEBUG_KMS("Invalid request %d\n", info->query);
return -EINVAL;
@@ -1304,6 +1436,7 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
{
struct amdgpu_device *adev = drm_to_adev(dev);
struct amdgpu_fpriv *fpriv;
+ struct drm_exec exec;
int r, pasid;
/* Ensure IB tests are run on ring */
@@ -1337,15 +1470,22 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
if (r)
goto error_pasid;
- r = amdgpu_vm_init(adev, &fpriv->vm, fpriv->xcp_id);
+ amdgpu_debugfs_vm_init(file_priv);
+
+ r = amdgpu_vm_init(adev, &fpriv->vm, fpriv->xcp_id, pasid);
if (r)
goto error_pasid;
- r = amdgpu_vm_set_pasid(adev, &fpriv->vm, pasid);
- if (r)
- goto error_vm;
+ drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0);
+ drm_exec_until_all_locked(&exec) {
+ r = amdgpu_vm_lock_pd(&fpriv->vm, &exec, 0);
+ drm_exec_retry_on_contention(&exec);
+ if (unlikely(r))
+ goto error_vm;
+ }
fpriv->prt_va = amdgpu_vm_bo_add(adev, &fpriv->vm, NULL);
+ drm_exec_fini(&exec);
if (!fpriv->prt_va) {
r = -ENOMEM;
goto error_vm;
@@ -1367,6 +1507,13 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
mutex_init(&fpriv->bo_list_lock);
idr_init_base(&fpriv->bo_list_handles, 1);
+ r = amdgpu_userq_mgr_init(&fpriv->userq_mgr, file_priv, adev);
+ if (r)
+ drm_warn(adev_to_drm(adev),
+ "Failed to init usermode queue manager (%d), use legacy workload submission only\n",
+ r);
+
+ amdgpu_evf_mgr_init(&fpriv->evf_mgr);
amdgpu_ctx_mgr_init(&fpriv->ctx_mgr, adev);
file_priv->driver_priv = fpriv;
@@ -1376,15 +1523,12 @@ error_vm:
amdgpu_vm_fini(adev, &fpriv->vm);
error_pasid:
- if (pasid) {
+ if (pasid)
amdgpu_pasid_free(pasid);
- amdgpu_vm_set_pasid(adev, &fpriv->vm, 0);
- }
kfree(fpriv);
out_suspend:
- pm_runtime_mark_last_busy(dev->dev);
pm_put:
pm_runtime_put_autosuspend(dev->dev);
@@ -1452,7 +1596,6 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
kfree(fpriv);
file_priv->driver_priv = NULL;
- pm_runtime_mark_last_busy(dev->dev);
pm_runtime_put_autosuspend(dev->dev);
}