diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 573 |
1 files changed, 352 insertions, 221 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index 3e94c3ba1ba2..1991dd3d1056 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -1,5 +1,5 @@ /* - * Copyright 2016 Advanced Micro Devices, Inc. + * Copyright 2016-2024 Advanced Micro Devices, Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -62,6 +62,7 @@ #define FIRMWARE_VCN4_0_6 "amdgpu/vcn_4_0_6.bin" #define FIRMWARE_VCN4_0_6_1 "amdgpu/vcn_4_0_6_1.bin" #define FIRMWARE_VCN5_0_0 "amdgpu/vcn_5_0_0.bin" +#define FIRMWARE_VCN5_0_1 "amdgpu/vcn_5_0_1.bin" MODULE_FIRMWARE(FIRMWARE_RAVEN); MODULE_FIRMWARE(FIRMWARE_PICASSO); @@ -88,46 +89,57 @@ MODULE_FIRMWARE(FIRMWARE_VCN4_0_5); MODULE_FIRMWARE(FIRMWARE_VCN4_0_6); MODULE_FIRMWARE(FIRMWARE_VCN4_0_6_1); MODULE_FIRMWARE(FIRMWARE_VCN5_0_0); +MODULE_FIRMWARE(FIRMWARE_VCN5_0_1); static void amdgpu_vcn_idle_work_handler(struct work_struct *work); -int amdgpu_vcn_early_init(struct amdgpu_device *adev) +int amdgpu_vcn_early_init(struct amdgpu_device *adev, int i) { char ucode_prefix[25]; - int r, i; + int r; + adev->vcn.inst[i].adev = adev; + adev->vcn.inst[i].inst = i; amdgpu_ucode_ip_version_decode(adev, UVD_HWIP, ucode_prefix, sizeof(ucode_prefix)); - for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - if (i == 1 && amdgpu_ip_version(adev, UVD_HWIP, 0) == IP_VERSION(4, 0, 6)) - r = amdgpu_ucode_request(adev, &adev->vcn.fw[i], "amdgpu/%s_%d.bin", ucode_prefix, i); - else - r = amdgpu_ucode_request(adev, &adev->vcn.fw[i], "amdgpu/%s.bin", ucode_prefix); - if (r) { - amdgpu_ucode_release(&adev->vcn.fw[i]); - return r; + + if (i != 0 && adev->vcn.per_inst_fw) { + r = amdgpu_ucode_request(adev, &adev->vcn.inst[i].fw, + AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_%d.bin", ucode_prefix, i); + if (r) + amdgpu_ucode_release(&adev->vcn.inst[i].fw); + } else { + if (!adev->vcn.inst[0].fw) { + r = amdgpu_ucode_request(adev, &adev->vcn.inst[0].fw, + AMDGPU_UCODE_REQUIRED, + "amdgpu/%s.bin", ucode_prefix); + if (r) + amdgpu_ucode_release(&adev->vcn.inst[0].fw); + } else { + r = 0; } + adev->vcn.inst[i].fw = adev->vcn.inst[0].fw; } + return r; } -int amdgpu_vcn_sw_init(struct amdgpu_device *adev) +int amdgpu_vcn_sw_init(struct amdgpu_device *adev, int i) { unsigned long bo_size; const struct common_firmware_header *hdr; unsigned char fw_check; unsigned int fw_shared_size, log_offset; - int i, r; - - INIT_DELAYED_WORK(&adev->vcn.idle_work, amdgpu_vcn_idle_work_handler); - mutex_init(&adev->vcn.vcn_pg_lock); - mutex_init(&adev->vcn.vcn1_jpeg1_workaround); - atomic_set(&adev->vcn.total_submission_cnt, 0); - for (i = 0; i < adev->vcn.num_vcn_inst; i++) - atomic_set(&adev->vcn.inst[i].dpg_enc_submission_cnt, 0); + int r; + mutex_init(&adev->vcn.inst[i].vcn1_jpeg1_workaround); + mutex_init(&adev->vcn.inst[i].vcn_pg_lock); + atomic_set(&adev->vcn.inst[i].total_submission_cnt, 0); + INIT_DELAYED_WORK(&adev->vcn.inst[i].idle_work, amdgpu_vcn_idle_work_handler); + atomic_set(&adev->vcn.inst[i].dpg_enc_submission_cnt, 0); if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) - adev->vcn.indirect_sram = true; + adev->vcn.inst[i].indirect_sram = true; /* * Some Steam Deck's BIOS versions are incompatible with the @@ -140,18 +152,19 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) const char *bios_ver = dmi_get_system_info(DMI_BIOS_VERSION); if (bios_ver && (!strncmp("F7A0113", bios_ver, 7) || - !strncmp("F7A0114", bios_ver, 7))) { - adev->vcn.indirect_sram = false; + !strncmp("F7A0114", bios_ver, 7))) { + adev->vcn.inst[i].indirect_sram = false; dev_info(adev->dev, - "Steam Deck quirk: indirect SRAM disabled on BIOS %s\n", bios_ver); + "Steam Deck quirk: indirect SRAM disabled on BIOS %s\n", bios_ver); } } /* from vcn4 and above, only unified queue is used */ - adev->vcn.using_unified_queue = + adev->vcn.inst[i].using_unified_queue = amdgpu_ip_version(adev, UVD_HWIP, 0) >= IP_VERSION(4, 0, 0); - hdr = (const struct common_firmware_header *)adev->vcn.fw[0]->data; + hdr = (const struct common_firmware_header *)adev->vcn.inst[i].fw->data; + adev->vcn.inst[i].fw_version = le32_to_cpu(hdr->ucode_version); adev->vcn.fw_version = le32_to_cpu(hdr->ucode_version); /* Bit 20-23, it is encode major and non-zero for new naming convention. @@ -169,16 +182,17 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) enc_major = fw_check; dec_ver = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xf; vep = (le32_to_cpu(hdr->ucode_version) >> 28) & 0xf; - DRM_INFO("Found VCN firmware Version ENC: %u.%u DEC: %u VEP: %u Revision: %u\n", - enc_major, enc_minor, dec_ver, vep, fw_rev); + dev_info(adev->dev, + "Found VCN firmware Version ENC: %u.%u DEC: %u VEP: %u Revision: %u\n", + enc_major, enc_minor, dec_ver, vep, fw_rev); } else { unsigned int version_major, version_minor, family_id; family_id = le32_to_cpu(hdr->ucode_version) & 0xff; version_major = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xff; version_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff; - DRM_INFO("Found VCN firmware Version: %u.%u Family ID: %u\n", - version_major, version_minor, family_id); + dev_info(adev->dev, "Found VCN firmware Version: %u.%u Family ID: %u\n", + version_major, version_minor, family_id); } bo_size = AMDGPU_VCN_STACK_SIZE + AMDGPU_VCN_CONTEXT_SIZE; @@ -201,80 +215,77 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) if (amdgpu_vcnfw_log) bo_size += AMDGPU_VCNFW_LOG_SIZE; - for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - if (adev->vcn.harvest_config & (1 << i)) - continue; + r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM | + AMDGPU_GEM_DOMAIN_GTT, + &adev->vcn.inst[i].vcpu_bo, + &adev->vcn.inst[i].gpu_addr, + &adev->vcn.inst[i].cpu_addr); + if (r) { + dev_err(adev->dev, "(%d) failed to allocate vcn bo\n", r); + return r; + } - r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE, + adev->vcn.inst[i].fw_shared.cpu_addr = adev->vcn.inst[i].cpu_addr + + bo_size - fw_shared_size; + adev->vcn.inst[i].fw_shared.gpu_addr = adev->vcn.inst[i].gpu_addr + + bo_size - fw_shared_size; + + adev->vcn.inst[i].fw_shared.mem_size = fw_shared_size; + + if (amdgpu_vcnfw_log) { + adev->vcn.inst[i].fw_shared.cpu_addr -= AMDGPU_VCNFW_LOG_SIZE; + adev->vcn.inst[i].fw_shared.gpu_addr -= AMDGPU_VCNFW_LOG_SIZE; + adev->vcn.inst[i].fw_shared.log_offset = log_offset; + } + + if (adev->vcn.inst[i].indirect_sram) { + r = amdgpu_bo_create_kernel(adev, 64 * 2 * 4, PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT, - &adev->vcn.inst[i].vcpu_bo, - &adev->vcn.inst[i].gpu_addr, - &adev->vcn.inst[i].cpu_addr); + &adev->vcn.inst[i].dpg_sram_bo, + &adev->vcn.inst[i].dpg_sram_gpu_addr, + &adev->vcn.inst[i].dpg_sram_cpu_addr); if (r) { - dev_err(adev->dev, "(%d) failed to allocate vcn bo\n", r); + dev_err(adev->dev, "VCN %d (%d) failed to allocate DPG bo\n", i, r); return r; } - - adev->vcn.inst[i].fw_shared.cpu_addr = adev->vcn.inst[i].cpu_addr + - bo_size - fw_shared_size; - adev->vcn.inst[i].fw_shared.gpu_addr = adev->vcn.inst[i].gpu_addr + - bo_size - fw_shared_size; - - adev->vcn.inst[i].fw_shared.mem_size = fw_shared_size; - - if (amdgpu_vcnfw_log) { - adev->vcn.inst[i].fw_shared.cpu_addr -= AMDGPU_VCNFW_LOG_SIZE; - adev->vcn.inst[i].fw_shared.gpu_addr -= AMDGPU_VCNFW_LOG_SIZE; - adev->vcn.inst[i].fw_shared.log_offset = log_offset; - } - - if (adev->vcn.indirect_sram) { - r = amdgpu_bo_create_kernel(adev, 64 * 2 * 4, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM | - AMDGPU_GEM_DOMAIN_GTT, - &adev->vcn.inst[i].dpg_sram_bo, - &adev->vcn.inst[i].dpg_sram_gpu_addr, - &adev->vcn.inst[i].dpg_sram_cpu_addr); - if (r) { - dev_err(adev->dev, "VCN %d (%d) failed to allocate DPG bo\n", i, r); - return r; - } - } } return 0; } -int amdgpu_vcn_sw_fini(struct amdgpu_device *adev) +int amdgpu_vcn_sw_fini(struct amdgpu_device *adev, int i) { - int i, j; + int j; - for (j = 0; j < adev->vcn.num_vcn_inst; ++j) { - if (adev->vcn.harvest_config & (1 << j)) - continue; + if (adev->vcn.harvest_config & (1 << i)) + return 0; - amdgpu_bo_free_kernel( - &adev->vcn.inst[j].dpg_sram_bo, - &adev->vcn.inst[j].dpg_sram_gpu_addr, - (void **)&adev->vcn.inst[j].dpg_sram_cpu_addr); + amdgpu_bo_free_kernel( + &adev->vcn.inst[i].dpg_sram_bo, + &adev->vcn.inst[i].dpg_sram_gpu_addr, + (void **)&adev->vcn.inst[i].dpg_sram_cpu_addr); - kvfree(adev->vcn.inst[j].saved_bo); + kvfree(adev->vcn.inst[i].saved_bo); - amdgpu_bo_free_kernel(&adev->vcn.inst[j].vcpu_bo, - &adev->vcn.inst[j].gpu_addr, - (void **)&adev->vcn.inst[j].cpu_addr); + amdgpu_bo_free_kernel(&adev->vcn.inst[i].vcpu_bo, + &adev->vcn.inst[i].gpu_addr, + (void **)&adev->vcn.inst[i].cpu_addr); - amdgpu_ring_fini(&adev->vcn.inst[j].ring_dec); + amdgpu_ring_fini(&adev->vcn.inst[i].ring_dec); - for (i = 0; i < adev->vcn.num_enc_rings; ++i) - amdgpu_ring_fini(&adev->vcn.inst[j].ring_enc[i]); + for (j = 0; j < adev->vcn.inst[i].num_enc_rings; ++j) + amdgpu_ring_fini(&adev->vcn.inst[i].ring_enc[j]); - amdgpu_ucode_release(&adev->vcn.fw[j]); + if (adev->vcn.per_inst_fw) { + amdgpu_ucode_release(&adev->vcn.inst[i].fw); + } else { + amdgpu_ucode_release(&adev->vcn.inst[0].fw); + adev->vcn.inst[i].fw = NULL; } - - mutex_destroy(&adev->vcn.vcn1_jpeg1_workaround); - mutex_destroy(&adev->vcn.vcn_pg_lock); + mutex_destroy(&adev->vcn.inst[i].vcn_pg_lock); + mutex_destroy(&adev->vcn.inst[i].vcn1_jpeg1_workaround); return 0; } @@ -282,7 +293,7 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev) bool amdgpu_vcn_is_disabled_vcn(struct amdgpu_device *adev, enum vcn_ring_type type, uint32_t vcn_instance) { bool ret = false; - int vcn_config = adev->vcn.vcn_config[vcn_instance]; + int vcn_config = adev->vcn.inst[vcn_instance].vcn_config; if ((type == VCN_ENCODE_RING) && (vcn_config & VCN_BLOCK_ENCODE_DISABLE_MASK)) ret = true; @@ -294,179 +305,208 @@ bool amdgpu_vcn_is_disabled_vcn(struct amdgpu_device *adev, enum vcn_ring_type t return ret; } -int amdgpu_vcn_save_vcpu_bo(struct amdgpu_device *adev) +static int amdgpu_vcn_save_vcpu_bo_inst(struct amdgpu_device *adev, int i) { unsigned int size; void *ptr; - int i, idx; + int idx; - for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { - if (adev->vcn.harvest_config & (1 << i)) - continue; - if (adev->vcn.inst[i].vcpu_bo == NULL) - return 0; + if (adev->vcn.harvest_config & (1 << i)) + return 0; + if (adev->vcn.inst[i].vcpu_bo == NULL) + return 0; - size = amdgpu_bo_size(adev->vcn.inst[i].vcpu_bo); - ptr = adev->vcn.inst[i].cpu_addr; + size = amdgpu_bo_size(adev->vcn.inst[i].vcpu_bo); + ptr = adev->vcn.inst[i].cpu_addr; - adev->vcn.inst[i].saved_bo = kvmalloc(size, GFP_KERNEL); - if (!adev->vcn.inst[i].saved_bo) - return -ENOMEM; + adev->vcn.inst[i].saved_bo = kvmalloc(size, GFP_KERNEL); + if (!adev->vcn.inst[i].saved_bo) + return -ENOMEM; - if (drm_dev_enter(adev_to_drm(adev), &idx)) { - memcpy_fromio(adev->vcn.inst[i].saved_bo, ptr, size); - drm_dev_exit(idx); - } + if (drm_dev_enter(adev_to_drm(adev), &idx)) { + memcpy_fromio(adev->vcn.inst[i].saved_bo, ptr, size); + drm_dev_exit(idx); } return 0; } -int amdgpu_vcn_suspend(struct amdgpu_device *adev) +int amdgpu_vcn_save_vcpu_bo(struct amdgpu_device *adev) +{ + int ret, i; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + ret = amdgpu_vcn_save_vcpu_bo_inst(adev, i); + if (ret) + return ret; + } + + return 0; +} + +int amdgpu_vcn_suspend(struct amdgpu_device *adev, int i) { bool in_ras_intr = amdgpu_ras_intr_triggered(); - cancel_delayed_work_sync(&adev->vcn.idle_work); + if (adev->vcn.harvest_config & (1 << i)) + return 0; + + cancel_delayed_work_sync(&adev->vcn.inst[i].idle_work); /* err_event_athub will corrupt VCPU buffer, so we need to * restore fw data and clear buffer in amdgpu_vcn_resume() */ if (in_ras_intr) return 0; - return amdgpu_vcn_save_vcpu_bo(adev); + return amdgpu_vcn_save_vcpu_bo_inst(adev, i); } -int amdgpu_vcn_resume(struct amdgpu_device *adev) +int amdgpu_vcn_resume(struct amdgpu_device *adev, int i) { unsigned int size; void *ptr; - int i, idx; + int idx; - for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { - if (adev->vcn.harvest_config & (1 << i)) - continue; - if (adev->vcn.inst[i].vcpu_bo == NULL) - return -EINVAL; + if (adev->vcn.harvest_config & (1 << i)) + return 0; + if (adev->vcn.inst[i].vcpu_bo == NULL) + return -EINVAL; + + size = amdgpu_bo_size(adev->vcn.inst[i].vcpu_bo); + ptr = adev->vcn.inst[i].cpu_addr; - size = amdgpu_bo_size(adev->vcn.inst[i].vcpu_bo); - ptr = adev->vcn.inst[i].cpu_addr; + if (adev->vcn.inst[i].saved_bo != NULL) { + if (drm_dev_enter(adev_to_drm(adev), &idx)) { + memcpy_toio(ptr, adev->vcn.inst[i].saved_bo, size); + drm_dev_exit(idx); + } + kvfree(adev->vcn.inst[i].saved_bo); + adev->vcn.inst[i].saved_bo = NULL; + } else { + const struct common_firmware_header *hdr; + unsigned int offset; - if (adev->vcn.inst[i].saved_bo != NULL) { + hdr = (const struct common_firmware_header *)adev->vcn.inst[i].fw->data; + if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { + offset = le32_to_cpu(hdr->ucode_array_offset_bytes); if (drm_dev_enter(adev_to_drm(adev), &idx)) { - memcpy_toio(ptr, adev->vcn.inst[i].saved_bo, size); + memcpy_toio(adev->vcn.inst[i].cpu_addr, + adev->vcn.inst[i].fw->data + offset, + le32_to_cpu(hdr->ucode_size_bytes)); drm_dev_exit(idx); } - kvfree(adev->vcn.inst[i].saved_bo); - adev->vcn.inst[i].saved_bo = NULL; - } else { - const struct common_firmware_header *hdr; - unsigned int offset; - - hdr = (const struct common_firmware_header *)adev->vcn.fw[i]->data; - if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { - offset = le32_to_cpu(hdr->ucode_array_offset_bytes); - if (drm_dev_enter(adev_to_drm(adev), &idx)) { - memcpy_toio(adev->vcn.inst[i].cpu_addr, - adev->vcn.fw[i]->data + offset, - le32_to_cpu(hdr->ucode_size_bytes)); - drm_dev_exit(idx); - } - size -= le32_to_cpu(hdr->ucode_size_bytes); - ptr += le32_to_cpu(hdr->ucode_size_bytes); - } - memset_io(ptr, 0, size); + size -= le32_to_cpu(hdr->ucode_size_bytes); + ptr += le32_to_cpu(hdr->ucode_size_bytes); } + memset_io(ptr, 0, size); } + return 0; } static void amdgpu_vcn_idle_work_handler(struct work_struct *work) { - struct amdgpu_device *adev = - container_of(work, struct amdgpu_device, vcn.idle_work.work); + struct amdgpu_vcn_inst *vcn_inst = + container_of(work, struct amdgpu_vcn_inst, idle_work.work); + struct amdgpu_device *adev = vcn_inst->adev; unsigned int fences = 0, fence[AMDGPU_MAX_VCN_INSTANCES] = {0}; - unsigned int i, j; + unsigned int i = vcn_inst->inst, j; int r = 0; - for (j = 0; j < adev->vcn.num_vcn_inst; ++j) { - if (adev->vcn.harvest_config & (1 << j)) - continue; + if (adev->vcn.harvest_config & (1 << i)) + return; - for (i = 0; i < adev->vcn.num_enc_rings; ++i) - fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_enc[i]); + for (j = 0; j < adev->vcn.inst[i].num_enc_rings; ++j) + fence[i] += amdgpu_fence_count_emitted(&vcn_inst->ring_enc[j]); - /* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */ - if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG && - !adev->vcn.using_unified_queue) { - struct dpg_pause_state new_state; - - if (fence[j] || - unlikely(atomic_read(&adev->vcn.inst[j].dpg_enc_submission_cnt))) - new_state.fw_based = VCN_DPG_STATE__PAUSE; - else - new_state.fw_based = VCN_DPG_STATE__UNPAUSE; + /* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */ + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG && + !adev->vcn.inst[i].using_unified_queue) { + struct dpg_pause_state new_state; - adev->vcn.pause_dpg_mode(adev, j, &new_state); - } + if (fence[i] || + unlikely(atomic_read(&vcn_inst->dpg_enc_submission_cnt))) + new_state.fw_based = VCN_DPG_STATE__PAUSE; + else + new_state.fw_based = VCN_DPG_STATE__UNPAUSE; - fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_dec); - fences += fence[j]; + adev->vcn.inst[i].pause_dpg_mode(vcn_inst, &new_state); } - if (!fences && !atomic_read(&adev->vcn.total_submission_cnt)) { - amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, - AMD_PG_STATE_GATE); - r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO, - false); - if (r) - dev_warn(adev->dev, "(%d) failed to disable video power profile mode\n", r); + fence[i] += amdgpu_fence_count_emitted(&vcn_inst->ring_dec); + fences += fence[i]; + + if (!fences && !atomic_read(&vcn_inst->total_submission_cnt)) { + vcn_inst->set_pg_state(vcn_inst, AMD_PG_STATE_GATE); + mutex_lock(&adev->vcn.workload_profile_mutex); + if (adev->vcn.workload_profile_active) { + r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO, + false); + if (r) + dev_warn(adev->dev, "(%d) failed to disable video power profile mode\n", r); + adev->vcn.workload_profile_active = false; + } + mutex_unlock(&adev->vcn.workload_profile_mutex); } else { - schedule_delayed_work(&adev->vcn.idle_work, VCN_IDLE_TIMEOUT); + schedule_delayed_work(&vcn_inst->idle_work, VCN_IDLE_TIMEOUT); } } void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; + struct amdgpu_vcn_inst *vcn_inst = &adev->vcn.inst[ring->me]; int r = 0; - atomic_inc(&adev->vcn.total_submission_cnt); + atomic_inc(&vcn_inst->total_submission_cnt); + + cancel_delayed_work_sync(&vcn_inst->idle_work); + + /* We can safely return early here because we've cancelled the + * the delayed work so there is no one else to set it to false + * and we don't care if someone else sets it to true. + */ + if (adev->vcn.workload_profile_active) + goto pg_lock; - if (!cancel_delayed_work_sync(&adev->vcn.idle_work)) { + mutex_lock(&adev->vcn.workload_profile_mutex); + if (!adev->vcn.workload_profile_active) { r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO, - true); + true); if (r) dev_warn(adev->dev, "(%d) failed to switch to video power profile mode\n", r); + adev->vcn.workload_profile_active = true; } + mutex_unlock(&adev->vcn.workload_profile_mutex); - mutex_lock(&adev->vcn.vcn_pg_lock); - amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, - AMD_PG_STATE_UNGATE); +pg_lock: + mutex_lock(&vcn_inst->vcn_pg_lock); + vcn_inst->set_pg_state(vcn_inst, AMD_PG_STATE_UNGATE); /* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG && - !adev->vcn.using_unified_queue) { + !vcn_inst->using_unified_queue) { struct dpg_pause_state new_state; if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC) { - atomic_inc(&adev->vcn.inst[ring->me].dpg_enc_submission_cnt); + atomic_inc(&vcn_inst->dpg_enc_submission_cnt); new_state.fw_based = VCN_DPG_STATE__PAUSE; } else { unsigned int fences = 0; unsigned int i; - for (i = 0; i < adev->vcn.num_enc_rings; ++i) - fences += amdgpu_fence_count_emitted(&adev->vcn.inst[ring->me].ring_enc[i]); + for (i = 0; i < vcn_inst->num_enc_rings; ++i) + fences += amdgpu_fence_count_emitted(&vcn_inst->ring_enc[i]); - if (fences || atomic_read(&adev->vcn.inst[ring->me].dpg_enc_submission_cnt)) + if (fences || atomic_read(&vcn_inst->dpg_enc_submission_cnt)) new_state.fw_based = VCN_DPG_STATE__PAUSE; else new_state.fw_based = VCN_DPG_STATE__UNPAUSE; } - adev->vcn.pause_dpg_mode(adev, ring->me, &new_state); + vcn_inst->pause_dpg_mode(vcn_inst, &new_state); } - mutex_unlock(&adev->vcn.vcn_pg_lock); + mutex_unlock(&vcn_inst->vcn_pg_lock); } void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring) @@ -476,12 +516,13 @@ void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring) /* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */ if (ring->adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG && ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC && - !adev->vcn.using_unified_queue) + !adev->vcn.inst[ring->me].using_unified_queue) atomic_dec(&ring->adev->vcn.inst[ring->me].dpg_enc_submission_cnt); - atomic_dec(&ring->adev->vcn.total_submission_cnt); + atomic_dec(&ring->adev->vcn.inst[ring->me].total_submission_cnt); - schedule_delayed_work(&ring->adev->vcn.idle_work, VCN_IDLE_TIMEOUT); + schedule_delayed_work(&ring->adev->vcn.inst[ring->me].idle_work, + VCN_IDLE_TIMEOUT); } int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring) @@ -499,7 +540,7 @@ int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring) r = amdgpu_ring_alloc(ring, 3); if (r) return r; - amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.scratch9, 0)); + amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.scratch9, 0)); amdgpu_ring_write(ring, 0xDEADBEEF); amdgpu_ring_commit(ring); for (i = 0; i < adev->usec_timeout; i++) { @@ -564,14 +605,14 @@ static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring, goto err; ib = &job->ibs[0]; - ib->ptr[0] = PACKET0(adev->vcn.internal.data0, 0); + ib->ptr[0] = PACKET0(adev->vcn.inst[ring->me].internal.data0, 0); ib->ptr[1] = addr; - ib->ptr[2] = PACKET0(adev->vcn.internal.data1, 0); + ib->ptr[2] = PACKET0(adev->vcn.inst[ring->me].internal.data1, 0); ib->ptr[3] = addr >> 32; - ib->ptr[4] = PACKET0(adev->vcn.internal.cmd, 0); + ib->ptr[4] = PACKET0(adev->vcn.inst[ring->me].internal.cmd, 0); ib->ptr[5] = 0; for (i = 6; i < 16; i += 2) { - ib->ptr[i] = PACKET0(adev->vcn.internal.nop, 0); + ib->ptr[i] = PACKET0(adev->vcn.inst[ring->me].internal.nop, 0); ib->ptr[i+1] = 0; } ib->length_dw = 16; @@ -580,7 +621,7 @@ static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring, if (r) goto err_free; - amdgpu_ib_free(adev, ib_msg, f); + amdgpu_ib_free(ib_msg, f); if (fence) *fence = dma_fence_get(f); @@ -591,7 +632,7 @@ static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring, err_free: amdgpu_job_free(job); err: - amdgpu_ib_free(adev, ib_msg, f); + amdgpu_ib_free(ib_msg, f); return r; } @@ -734,7 +775,7 @@ static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring, uint32_t ib_pack_in_dw; int i, r; - if (adev->vcn.using_unified_queue) + if (adev->vcn.inst[ring->me].using_unified_queue) ib_size_dw += 8; r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, @@ -747,7 +788,7 @@ static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring, ib->length_dw = 0; /* single queue headers */ - if (adev->vcn.using_unified_queue) { + if (adev->vcn.inst[ring->me].using_unified_queue) { ib_pack_in_dw = sizeof(struct amdgpu_vcn_decode_buffer) / sizeof(uint32_t) + 4 + 2; /* engine info + decoding ib in dw */ ib_checksum = amdgpu_vcn_unified_ring_ib_header(ib, ib_pack_in_dw, false); @@ -766,14 +807,14 @@ static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring, for (i = ib->length_dw; i < ib_size_dw; ++i) ib->ptr[i] = 0x0; - if (adev->vcn.using_unified_queue) + if (adev->vcn.inst[ring->me].using_unified_queue) amdgpu_vcn_unified_ring_ib_checksum(&ib_checksum, ib_pack_in_dw); r = amdgpu_job_submit_direct(job, ring, &f); if (r) goto err_free; - amdgpu_ib_free(adev, ib_msg, f); + amdgpu_ib_free(ib_msg, f); if (fence) *fence = dma_fence_get(f); @@ -784,7 +825,7 @@ static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring, err_free: amdgpu_job_free(job); err: - amdgpu_ib_free(adev, ib_msg, f); + amdgpu_ib_free(ib_msg, f); return r; } @@ -864,7 +905,7 @@ static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t hand uint64_t addr; int i, r; - if (adev->vcn.using_unified_queue) + if (adev->vcn.inst[ring->me].using_unified_queue) ib_size_dw += 8; r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, @@ -878,7 +919,7 @@ static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t hand ib->length_dw = 0; - if (adev->vcn.using_unified_queue) + if (adev->vcn.inst[ring->me].using_unified_queue) ib_checksum = amdgpu_vcn_unified_ring_ib_header(ib, 0x11, true); ib->ptr[ib->length_dw++] = 0x00000018; @@ -900,7 +941,7 @@ static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t hand for (i = ib->length_dw; i < ib_size_dw; ++i) ib->ptr[i] = 0x0; - if (adev->vcn.using_unified_queue) + if (adev->vcn.inst[ring->me].using_unified_queue) amdgpu_vcn_unified_ring_ib_checksum(&ib_checksum, 0x11); r = amdgpu_job_submit_direct(job, ring, &f); @@ -931,7 +972,7 @@ static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han uint64_t addr; int i, r; - if (adev->vcn.using_unified_queue) + if (adev->vcn.inst[ring->me].using_unified_queue) ib_size_dw += 8; r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, @@ -945,7 +986,7 @@ static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han ib->length_dw = 0; - if (adev->vcn.using_unified_queue) + if (adev->vcn.inst[ring->me].using_unified_queue) ib_checksum = amdgpu_vcn_unified_ring_ib_header(ib, 0x11, true); ib->ptr[ib->length_dw++] = 0x00000018; @@ -967,7 +1008,7 @@ static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han for (i = ib->length_dw; i < ib_size_dw; ++i) ib->ptr[i] = 0x0; - if (adev->vcn.using_unified_queue) + if (adev->vcn.inst[ring->me].using_unified_queue) amdgpu_vcn_unified_ring_ib_checksum(&ib_checksum, 0x11); r = amdgpu_job_submit_direct(job, ring, &f); @@ -1014,7 +1055,7 @@ int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout) r = 0; error: - amdgpu_ib_free(adev, &ib, fence); + amdgpu_ib_free(&ib, fence); dma_fence_put(fence); return r; @@ -1025,7 +1066,8 @@ int amdgpu_vcn_unified_ring_test_ib(struct amdgpu_ring *ring, long timeout) struct amdgpu_device *adev = ring->adev; long r; - if (amdgpu_ip_version(adev, UVD_HWIP, 0) != IP_VERSION(4, 0, 3)) { + if ((amdgpu_ip_version(adev, UVD_HWIP, 0) != IP_VERSION(4, 0, 3)) && + (amdgpu_ip_version(adev, UVD_HWIP, 0) != IP_VERSION(5, 0, 1))) { r = amdgpu_vcn_enc_ring_test_ib(ring, timeout); if (r) goto error; @@ -1051,34 +1093,32 @@ enum amdgpu_ring_priority_level amdgpu_vcn_get_enc_ring_prio(int ring) } } -void amdgpu_vcn_setup_ucode(struct amdgpu_device *adev) +void amdgpu_vcn_setup_ucode(struct amdgpu_device *adev, int i) { - int i; unsigned int idx; if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { const struct common_firmware_header *hdr; - for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - if (adev->vcn.harvest_config & (1 << i)) - continue; - - hdr = (const struct common_firmware_header *)adev->vcn.fw[i]->data; - /* currently only support 2 FW instances */ - if (i >= 2) { - dev_info(adev->dev, "More then 2 VCN FW instances!\n"); - break; - } - idx = AMDGPU_UCODE_ID_VCN + i; - adev->firmware.ucode[idx].ucode_id = idx; - adev->firmware.ucode[idx].fw = adev->vcn.fw[i]; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE); - - if (amdgpu_ip_version(adev, UVD_HWIP, 0) == - IP_VERSION(4, 0, 3)) - break; + if (adev->vcn.harvest_config & (1 << i)) + return; + + if ((amdgpu_ip_version(adev, UVD_HWIP, 0) == IP_VERSION(4, 0, 3) || + amdgpu_ip_version(adev, UVD_HWIP, 0) == IP_VERSION(5, 0, 1)) + && (i > 0)) + return; + + hdr = (const struct common_firmware_header *)adev->vcn.inst[i].fw->data; + /* currently only support 2 FW instances */ + if (i >= 2) { + dev_info(adev->dev, "More then 2 VCN FW instances!\n"); + return; } + idx = AMDGPU_UCODE_ID_VCN + i; + adev->firmware.ucode[idx].ucode_id = idx; + adev->firmware.ucode[idx].fw = adev->vcn.inst[i].fw; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE); } } @@ -1320,3 +1360,94 @@ void amdgpu_vcn_sysfs_reset_mask_fini(struct amdgpu_device *adev) device_remove_file(adev->dev, &dev_attr_vcn_reset_mask); } } + +/* + * debugfs to enable/disable vcn job submission to specific core or + * instance. It is created only if the queue type is unified. + */ +#if defined(CONFIG_DEBUG_FS) +static int amdgpu_debugfs_vcn_sched_mask_set(void *data, u64 val) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)data; + u32 i; + u64 mask; + struct amdgpu_ring *ring; + + if (!adev) + return -ENODEV; + + mask = (1ULL << adev->vcn.num_vcn_inst) - 1; + if ((val & mask) == 0) + return -EINVAL; + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + ring = &adev->vcn.inst[i].ring_enc[0]; + if (val & (1ULL << i)) + ring->sched.ready = true; + else + ring->sched.ready = false; + } + /* publish sched.ready flag update effective immediately across smp */ + smp_rmb(); + return 0; +} + +static int amdgpu_debugfs_vcn_sched_mask_get(void *data, u64 *val) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)data; + u32 i; + u64 mask = 0; + struct amdgpu_ring *ring; + + if (!adev) + return -ENODEV; + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + ring = &adev->vcn.inst[i].ring_enc[0]; + if (ring->sched.ready) + mask |= 1ULL << i; + } + *val = mask; + return 0; +} + +DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_debugfs_vcn_sched_mask_fops, + amdgpu_debugfs_vcn_sched_mask_get, + amdgpu_debugfs_vcn_sched_mask_set, "%llx\n"); +#endif + +void amdgpu_debugfs_vcn_sched_mask_init(struct amdgpu_device *adev) +{ +#if defined(CONFIG_DEBUG_FS) + struct drm_minor *minor = adev_to_drm(adev)->primary; + struct dentry *root = minor->debugfs_root; + char name[32]; + + if (adev->vcn.num_vcn_inst <= 1 || !adev->vcn.inst[0].using_unified_queue) + return; + sprintf(name, "amdgpu_vcn_sched_mask"); + debugfs_create_file(name, 0600, root, adev, + &amdgpu_debugfs_vcn_sched_mask_fops); +#endif +} + +/** + * vcn_set_powergating_state - set VCN block powergating state + * + * @ip_block: amdgpu_ip_block pointer + * @state: power gating state + * + * Set VCN block powergating state + */ +int vcn_set_powergating_state(struct amdgpu_ip_block *ip_block, + enum amd_powergating_state state) +{ + struct amdgpu_device *adev = ip_block->adev; + int ret = 0, i; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[i]; + + ret |= vinst->set_pg_state(vinst, state); + } + + return ret; +} |