diff options
Diffstat (limited to 'drivers/gpu/drm/amd')
219 files changed, 16960 insertions, 4607 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 466da5954a68..fdd0ca4b0f0b 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -23,7 +23,7 @@ # Makefile for the drm device driver. This driver provides support for the # Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher. -FULL_AMD_PATH=$(src)/.. +FULL_AMD_PATH=$(srctree)/$(src)/.. DISPLAY_FOLDER_NAME=display FULL_AMD_DISPLAY_PATH = $(FULL_AMD_PATH)/$(DISPLAY_FOLDER_NAME) @@ -53,7 +53,8 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \ amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \ amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o \ amdgpu_vf_error.o amdgpu_sched.o amdgpu_debugfs.o amdgpu_ids.o \ - amdgpu_gmc.o amdgpu_xgmi.o amdgpu_csa.o + amdgpu_gmc.o amdgpu_xgmi.o amdgpu_csa.o amdgpu_ras.o amdgpu_vm_cpu.o \ + amdgpu_vm_sdma.o # add asic specific block amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 8d0d7f3dd5fb..14398f55f602 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -83,6 +83,7 @@ #include "amdgpu_gem.h" #include "amdgpu_doorbell.h" #include "amdgpu_amdkfd.h" +#include "amdgpu_smu.h" #define MAX_GPU_INSTANCE 16 @@ -156,6 +157,8 @@ extern int amdgpu_emu_mode; extern uint amdgpu_smu_memory_pool_size; extern uint amdgpu_dc_feature_mask; extern struct amdgpu_mgpu_info mgpu_info; +extern int amdgpu_ras_enable; +extern uint amdgpu_ras_mask; #ifdef CONFIG_DRM_AMDGPU_SI extern int amdgpu_si_support; @@ -433,6 +436,12 @@ struct amdgpu_cs_chunk { void *kdata; }; +struct amdgpu_cs_post_dep { + struct drm_syncobj *syncobj; + struct dma_fence_chain *chain; + u64 point; +}; + struct amdgpu_cs_parser { struct amdgpu_device *adev; struct drm_file *filp; @@ -462,8 +471,8 @@ struct amdgpu_cs_parser { /* user fence */ struct amdgpu_bo_list_entry uf_entry; - unsigned num_post_dep_syncobjs; - struct drm_syncobj **post_dep_syncobjs; + unsigned num_post_deps; + struct amdgpu_cs_post_dep *post_deps; }; static inline u32 amdgpu_get_ib_value(struct amdgpu_cs_parser *p, @@ -702,7 +711,6 @@ enum amd_hw_ip_block_type { struct amd_powerplay { void *pp_handle; const struct amd_pm_funcs *pp_funcs; - uint32_t pp_feature; }; #define AMDGPU_RESET_MAGIC_NUM 64 @@ -825,6 +833,7 @@ struct amdgpu_device { /* For pre-DCE11. DCE11 and later are in "struct amdgpu_device->dm" */ struct work_struct hotplug_work; struct amdgpu_irq_src crtc_irq; + struct amdgpu_irq_src vupdate_irq; struct amdgpu_irq_src pageflip_irq; struct amdgpu_irq_src hpd_irq; @@ -842,6 +851,9 @@ struct amdgpu_device { struct amd_powerplay powerplay; bool pp_force_state_enabled; + /* smu */ + struct smu_context smu; + /* dpm */ struct amdgpu_pm pm; u32 cg_flags; @@ -922,6 +934,8 @@ struct amdgpu_device { int asic_reset_res; struct work_struct xgmi_reset_work; + + bool in_baco_reset; }; static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device *bdev) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index fe1d7368c1e6..aeead072fa79 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -335,6 +335,43 @@ void amdgpu_amdkfd_free_gtt_mem(struct kgd_dev *kgd, void *mem_obj) amdgpu_bo_unref(&(bo)); } +uint32_t amdgpu_amdkfd_get_fw_version(struct kgd_dev *kgd, + enum kgd_engine_type type) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; + + switch (type) { + case KGD_ENGINE_PFP: + return adev->gfx.pfp_fw_version; + + case KGD_ENGINE_ME: + return adev->gfx.me_fw_version; + + case KGD_ENGINE_CE: + return adev->gfx.ce_fw_version; + + case KGD_ENGINE_MEC1: + return adev->gfx.mec_fw_version; + + case KGD_ENGINE_MEC2: + return adev->gfx.mec2_fw_version; + + case KGD_ENGINE_RLC: + return adev->gfx.rlc_fw_version; + + case KGD_ENGINE_SDMA1: + return adev->sdma.instance[0].fw_version; + + case KGD_ENGINE_SDMA2: + return adev->sdma.instance[1].fw_version; + + default: + return 0; + } + + return 0; +} + void amdgpu_amdkfd_get_local_mem_info(struct kgd_dev *kgd, struct kfd_local_mem_info *mem_info) { @@ -640,4 +677,8 @@ int kgd2kfd_post_reset(struct kfd_dev *kfd) void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry) { } + +void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd) +{ +} #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 0b31a1859023..4e37fa7e85b1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -81,6 +81,18 @@ struct amdgpu_kfd_dev { uint64_t vram_used; }; +enum kgd_engine_type { + KGD_ENGINE_PFP = 1, + KGD_ENGINE_ME, + KGD_ENGINE_CE, + KGD_ENGINE_MEC1, + KGD_ENGINE_MEC2, + KGD_ENGINE_RLC, + KGD_ENGINE_SDMA1, + KGD_ENGINE_SDMA2, + KGD_ENGINE_MAX +}; + struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context, struct mm_struct *mm); bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm); @@ -142,6 +154,8 @@ int amdgpu_amdkfd_alloc_gtt_mem(struct kgd_dev *kgd, size_t size, void **mem_obj, uint64_t *gpu_addr, void **cpu_ptr, bool mqd_gfx9); void amdgpu_amdkfd_free_gtt_mem(struct kgd_dev *kgd, void *mem_obj); +uint32_t amdgpu_amdkfd_get_fw_version(struct kgd_dev *kgd, + enum kgd_engine_type type); void amdgpu_amdkfd_get_local_mem_info(struct kgd_dev *kgd, struct kfd_local_mem_info *mem_info); uint64_t amdgpu_amdkfd_get_gpu_clock_counter(struct kgd_dev *kgd); @@ -230,5 +244,6 @@ int kgd2kfd_quiesce_mm(struct mm_struct *mm); int kgd2kfd_resume_mm(struct mm_struct *mm); int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm, struct dma_fence *fence); +void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd); #endif /* AMDGPU_AMDKFD_H_INCLUDED */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index ff7fac7df34b..fa09e11a600c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c @@ -22,14 +22,12 @@ #include <linux/fdtable.h> #include <linux/uaccess.h> -#include <linux/firmware.h> #include <linux/mmu_context.h> #include <drm/drmP.h> #include "amdgpu.h" #include "amdgpu_amdkfd.h" #include "cikd.h" #include "cik_sdma.h" -#include "amdgpu_ucode.h" #include "gfx_v7_0.h" #include "gca/gfx_7_2_d.h" #include "gca/gfx_7_2_enum.h" @@ -139,7 +137,6 @@ static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, uint8_t vmid); static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, uint8_t vmid); -static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type); static void set_scratch_backing_va(struct kgd_dev *kgd, uint64_t va, uint32_t vmid); static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, @@ -191,7 +188,6 @@ static const struct kfd2kgd_calls kfd2kgd = { .address_watch_get_offset = kgd_address_watch_get_offset, .get_atc_vmid_pasid_mapping_pasid = get_atc_vmid_pasid_mapping_pasid, .get_atc_vmid_pasid_mapping_valid = get_atc_vmid_pasid_mapping_valid, - .get_fw_version = get_fw_version, .set_scratch_backing_va = set_scratch_backing_va, .get_tile_config = get_tile_config, .set_vm_context_page_table_base = set_vm_context_page_table_base, @@ -792,63 +788,6 @@ static void set_scratch_backing_va(struct kgd_dev *kgd, unlock_srbm(kgd); } -static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) -{ - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - const union amdgpu_firmware_header *hdr; - - switch (type) { - case KGD_ENGINE_PFP: - hdr = (const union amdgpu_firmware_header *) - adev->gfx.pfp_fw->data; - break; - - case KGD_ENGINE_ME: - hdr = (const union amdgpu_firmware_header *) - adev->gfx.me_fw->data; - break; - - case KGD_ENGINE_CE: - hdr = (const union amdgpu_firmware_header *) - adev->gfx.ce_fw->data; - break; - - case KGD_ENGINE_MEC1: - hdr = (const union amdgpu_firmware_header *) - adev->gfx.mec_fw->data; - break; - - case KGD_ENGINE_MEC2: - hdr = (const union amdgpu_firmware_header *) - adev->gfx.mec2_fw->data; - break; - - case KGD_ENGINE_RLC: - hdr = (const union amdgpu_firmware_header *) - adev->gfx.rlc_fw->data; - break; - - case KGD_ENGINE_SDMA1: - hdr = (const union amdgpu_firmware_header *) - adev->sdma.instance[0].fw->data; - break; - - case KGD_ENGINE_SDMA2: - hdr = (const union amdgpu_firmware_header *) - adev->sdma.instance[1].fw->data; - break; - - default: - return 0; - } - - if (hdr == NULL) - return 0; - - /* Only 12 bit in use*/ - return hdr->common.ucode_version; -} - static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, uint64_t page_table_base) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c index 56ea929f524b..fec3a6aa1de6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c @@ -23,12 +23,10 @@ #include <linux/module.h> #include <linux/fdtable.h> #include <linux/uaccess.h> -#include <linux/firmware.h> #include <linux/mmu_context.h> #include <drm/drmP.h> #include "amdgpu.h" #include "amdgpu_amdkfd.h" -#include "amdgpu_ucode.h" #include "gfx_v8_0.h" #include "gca/gfx_8_0_sh_mask.h" #include "gca/gfx_8_0_d.h" @@ -95,7 +93,6 @@ static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, uint8_t vmid); static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, uint8_t vmid); -static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type); static void set_scratch_backing_va(struct kgd_dev *kgd, uint64_t va, uint32_t vmid); static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, @@ -148,7 +145,6 @@ static const struct kfd2kgd_calls kfd2kgd = { get_atc_vmid_pasid_mapping_pasid, .get_atc_vmid_pasid_mapping_valid = get_atc_vmid_pasid_mapping_valid, - .get_fw_version = get_fw_version, .set_scratch_backing_va = set_scratch_backing_va, .get_tile_config = get_tile_config, .set_vm_context_page_table_base = set_vm_context_page_table_base, @@ -751,63 +747,6 @@ static void set_scratch_backing_va(struct kgd_dev *kgd, unlock_srbm(kgd); } -static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) -{ - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - const union amdgpu_firmware_header *hdr; - - switch (type) { - case KGD_ENGINE_PFP: - hdr = (const union amdgpu_firmware_header *) - adev->gfx.pfp_fw->data; - break; - - case KGD_ENGINE_ME: - hdr = (const union amdgpu_firmware_header *) - adev->gfx.me_fw->data; - break; - - case KGD_ENGINE_CE: - hdr = (const union amdgpu_firmware_header *) - adev->gfx.ce_fw->data; - break; - - case KGD_ENGINE_MEC1: - hdr = (const union amdgpu_firmware_header *) - adev->gfx.mec_fw->data; - break; - - case KGD_ENGINE_MEC2: - hdr = (const union amdgpu_firmware_header *) - adev->gfx.mec2_fw->data; - break; - - case KGD_ENGINE_RLC: - hdr = (const union amdgpu_firmware_header *) - adev->gfx.rlc_fw->data; - break; - - case KGD_ENGINE_SDMA1: - hdr = (const union amdgpu_firmware_header *) - adev->sdma.instance[0].fw->data; - break; - - case KGD_ENGINE_SDMA2: - hdr = (const union amdgpu_firmware_header *) - adev->sdma.instance[1].fw->data; - break; - - default: - return 0; - } - - if (hdr == NULL) - return 0; - - /* Only 12 bit in use*/ - return hdr->common.ucode_version; -} - static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, uint64_t page_table_base) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index 5c51d4910650..ef3d93b995b2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -25,12 +25,10 @@ #include <linux/module.h> #include <linux/fdtable.h> #include <linux/uaccess.h> -#include <linux/firmware.h> #include <linux/mmu_context.h> #include <drm/drmP.h> #include "amdgpu.h" #include "amdgpu_amdkfd.h" -#include "amdgpu_ucode.h" #include "soc15_hw_ip.h" #include "gc/gc_9_0_offset.h" #include "gc/gc_9_0_sh_mask.h" @@ -111,7 +109,6 @@ static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, uint8_t vmid); static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, uint64_t page_table_base); -static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type); static void set_scratch_backing_va(struct kgd_dev *kgd, uint64_t va, uint32_t vmid); static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid); @@ -158,7 +155,6 @@ static const struct kfd2kgd_calls kfd2kgd = { get_atc_vmid_pasid_mapping_pasid, .get_atc_vmid_pasid_mapping_valid = get_atc_vmid_pasid_mapping_valid, - .get_fw_version = get_fw_version, .set_scratch_backing_va = set_scratch_backing_va, .get_tile_config = amdgpu_amdkfd_get_tile_config, .set_vm_context_page_table_base = set_vm_context_page_table_base, @@ -874,56 +870,6 @@ static void set_scratch_backing_va(struct kgd_dev *kgd, */ } -/* FIXME: Does this need to be ASIC-specific code? */ -static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) -{ - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - const union amdgpu_firmware_header *hdr; - - switch (type) { - case KGD_ENGINE_PFP: - hdr = (const union amdgpu_firmware_header *)adev->gfx.pfp_fw->data; - break; - - case KGD_ENGINE_ME: - hdr = (const union amdgpu_firmware_header *)adev->gfx.me_fw->data; - break; - - case KGD_ENGINE_CE: - hdr = (const union amdgpu_firmware_header *)adev->gfx.ce_fw->data; - break; - - case KGD_ENGINE_MEC1: - hdr = (const union amdgpu_firmware_header *)adev->gfx.mec_fw->data; - break; - - case KGD_ENGINE_MEC2: - hdr = (const union amdgpu_firmware_header *)adev->gfx.mec2_fw->data; - break; - - case KGD_ENGINE_RLC: - hdr = (const union amdgpu_firmware_header *)adev->gfx.rlc_fw->data; - break; - - case KGD_ENGINE_SDMA1: - hdr = (const union amdgpu_firmware_header *)adev->sdma.instance[0].fw->data; - break; - - case KGD_ENGINE_SDMA2: - hdr = (const union amdgpu_firmware_header *)adev->sdma.instance[1].fw->data; - break; - - default: - return 0; - } - - if (hdr == NULL) - return 0; - - /* Only 12 bit in use*/ - return hdr->common.ucode_version; -} - static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, uint64_t page_table_base) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 1921dec3df7a..a6e5184d436c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -410,15 +410,7 @@ static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem, if (p_bo_va_entry) *p_bo_va_entry = bo_va_entry; - /* Allocate new page tables if needed and validate - * them. - */ - ret = amdgpu_vm_alloc_pts(adev, vm, va, amdgpu_bo_size(bo)); - if (ret) { - pr_err("Failed to allocate pts, err=%d\n", ret); - goto err_alloc_pts; - } - + /* Allocate validate page tables if needed */ ret = vm_validate_pt_pd_bos(vm); if (ret) { pr_err("validate_pt_pd_bos() failed\n"); @@ -741,13 +733,7 @@ static int update_gpuvm_pte(struct amdgpu_device *adev, struct amdgpu_sync *sync) { int ret; - struct amdgpu_vm *vm; - struct amdgpu_bo_va *bo_va; - struct amdgpu_bo *bo; - - bo_va = entry->bo_va; - vm = bo_va->base.vm; - bo = bo_va->base.bo; + struct amdgpu_bo_va *bo_va = entry->bo_va; /* Update the page tables */ ret = amdgpu_vm_bo_update(adev, bo_va, false); @@ -906,7 +892,8 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info, pr_err("validate_pt_pd_bos() failed\n"); goto validate_pd_fail; } - amdgpu_bo_sync_wait(vm->root.base.bo, AMDGPU_FENCE_OWNER_KFD, false); + ret = amdgpu_bo_sync_wait(vm->root.base.bo, + AMDGPU_FENCE_OWNER_KFD, false); if (ret) goto wait_pd_fail; amdgpu_bo_fence(vm->root.base.bo, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c index b61e1dc61b4c..f96d75c6e099 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c @@ -28,8 +28,6 @@ #include "atom.h" #include "atombios.h" -#define get_index_into_master_table(master_table, table_name) (offsetof(struct master_table, table_name) / sizeof(uint16_t)) - bool amdgpu_atomfirmware_gpu_supports_virtualization(struct amdgpu_device *adev) { int index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, @@ -238,10 +236,71 @@ int amdgpu_atomfirmware_get_vram_type(struct amdgpu_device *adev) return 0; } +/* + * Return true if vbios enabled ecc by default, if umc info table is available + * or false if ecc is not enabled or umc info table is not available + */ +bool amdgpu_atomfirmware_mem_ecc_supported(struct amdgpu_device *adev) +{ + struct amdgpu_mode_info *mode_info = &adev->mode_info; + int index; + u16 data_offset, size; + union umc_info *umc_info; + u8 frev, crev; + bool ecc_default_enabled = false; + + index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, + umc_info); + + if (amdgpu_atom_parse_data_header(mode_info->atom_context, + index, &size, &frev, &crev, &data_offset)) { + /* support umc_info 3.1+ */ + if ((frev == 3 && crev >= 1) || (frev > 3)) { + umc_info = (union umc_info *) + (mode_info->atom_context->bios + data_offset); + ecc_default_enabled = + (le32_to_cpu(umc_info->v31.umc_config) & + UMC_CONFIG__DEFAULT_MEM_ECC_ENABLE) ? true : false; + } + } + + return ecc_default_enabled; +} + union firmware_info { struct atom_firmware_info_v3_1 v31; }; +/* + * Return true if vbios supports sram ecc or false if not + */ +bool amdgpu_atomfirmware_sram_ecc_supported(struct amdgpu_device *adev) +{ + struct amdgpu_mode_info *mode_info = &adev->mode_info; + int index; + u16 data_offset, size; + union firmware_info *firmware_info; + u8 frev, crev; + bool sram_ecc_supported = false; + + index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, + firmwareinfo); + + if (amdgpu_atom_parse_data_header(adev->mode_info.atom_context, + index, &size, &frev, &crev, &data_offset)) { + /* support firmware_info 3.1 + */ + if ((frev == 3 && crev >=1) || (frev > 3)) { + firmware_info = (union firmware_info *) + (mode_info->atom_context->bios + data_offset); + sram_ecc_supported = + (le32_to_cpu(firmware_info->v31.firmware_capability) & + ATOM_FIRMWARE_CAP_SRAM_ECC) ? true : false; + } + } + + return sram_ecc_supported; +} + union smu_info { struct atom_smu_info_v3_1 v31; }; @@ -346,11 +405,11 @@ int amdgpu_atomfirmware_get_gfx_info(struct amdgpu_device *adev) (mode_info->atom_context->bios + data_offset); switch (crev) { case 4: - adev->gfx.config.max_shader_engines = gfx_info->v24.gc_num_se; - adev->gfx.config.max_cu_per_sh = gfx_info->v24.gc_num_cu_per_sh; - adev->gfx.config.max_sh_per_se = gfx_info->v24.gc_num_sh_per_se; - adev->gfx.config.max_backends_per_se = gfx_info->v24.gc_num_rb_per_se; - adev->gfx.config.max_texture_channel_caches = gfx_info->v24.gc_num_tccs; + adev->gfx.config.max_shader_engines = gfx_info->v24.max_shader_engines; + adev->gfx.config.max_cu_per_sh = gfx_info->v24.max_cu_per_sh; + adev->gfx.config.max_sh_per_se = gfx_info->v24.max_sh_per_se; + adev->gfx.config.max_backends_per_se = gfx_info->v24.max_backends_per_se; + adev->gfx.config.max_texture_channel_caches = gfx_info->v24.max_texture_channel_caches; adev->gfx.config.max_gprs = le16_to_cpu(gfx_info->v24.gc_num_gprs); adev->gfx.config.max_gs_threads = gfx_info->v24.gc_num_max_gs_thds; adev->gfx.config.gs_vgt_table_depth = gfx_info->v24.gc_gs_table_depth; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h index 20f158fd3b76..5ec6f92f353c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h @@ -24,6 +24,8 @@ #ifndef __AMDGPU_ATOMFIRMWARE_H__ #define __AMDGPU_ATOMFIRMWARE_H__ +#define get_index_into_master_table(master_table, table_name) (offsetof(struct master_table, table_name) / sizeof(uint16_t)) + bool amdgpu_atomfirmware_gpu_supports_virtualization(struct amdgpu_device *adev); void amdgpu_atomfirmware_scratch_regs_init(struct amdgpu_device *adev); int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev); @@ -31,5 +33,7 @@ int amdgpu_atomfirmware_get_vram_width(struct amdgpu_device *adev); int amdgpu_atomfirmware_get_vram_type(struct amdgpu_device *adev); int amdgpu_atomfirmware_get_clock_info(struct amdgpu_device *adev); int amdgpu_atomfirmware_get_gfx_info(struct amdgpu_device *adev); +bool amdgpu_atomfirmware_mem_ecc_supported(struct amdgpu_device *adev); +bool amdgpu_atomfirmware_sram_ecc_supported(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 52a5e4fdc95b..2f6239b6be6f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -215,6 +215,8 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs case AMDGPU_CHUNK_ID_SYNCOBJ_IN: case AMDGPU_CHUNK_ID_SYNCOBJ_OUT: case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES: + case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT: + case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL: break; default: @@ -804,9 +806,11 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, ttm_eu_backoff_reservation(&parser->ticket, &parser->validated); - for (i = 0; i < parser->num_post_dep_syncobjs; i++) - drm_syncobj_put(parser->post_dep_syncobjs[i]); - kfree(parser->post_dep_syncobjs); + for (i = 0; i < parser->num_post_deps; i++) { + drm_syncobj_put(parser->post_deps[i].syncobj); + kfree(parser->post_deps[i].chain); + } + kfree(parser->post_deps); dma_fence_put(parser->fence); @@ -1117,13 +1121,18 @@ static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p, } static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p, - uint32_t handle) + uint32_t handle, u64 point, + u64 flags) { - int r; struct dma_fence *fence; - r = drm_syncobj_find_fence(p->filp, handle, 0, 0, &fence); - if (r) + int r; + + r = drm_syncobj_find_fence(p->filp, handle, point, flags, &fence); + if (r) { + DRM_ERROR("syncobj %u failed to find fence @ %llu (%d)!\n", + handle, point, r); return r; + } r = amdgpu_sync_fence(p->adev, &p->job->sync, fence, true); dma_fence_put(fence); @@ -1134,46 +1143,118 @@ static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p, static int amdgpu_cs_process_syncobj_in_dep(struct amdgpu_cs_parser *p, struct amdgpu_cs_chunk *chunk) { + struct drm_amdgpu_cs_chunk_sem *deps; unsigned num_deps; int i, r; - struct drm_amdgpu_cs_chunk_sem *deps; deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata; num_deps = chunk->length_dw * 4 / sizeof(struct drm_amdgpu_cs_chunk_sem); + for (i = 0; i < num_deps; ++i) { + r = amdgpu_syncobj_lookup_and_add_to_sync(p, deps[i].handle, + 0, 0); + if (r) + return r; + } + + return 0; +} + +static int amdgpu_cs_process_syncobj_timeline_in_dep(struct amdgpu_cs_parser *p, + struct amdgpu_cs_chunk *chunk) +{ + struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps; + unsigned num_deps; + int i, r; + + syncobj_deps = (struct drm_amdgpu_cs_chunk_syncobj *)chunk->kdata; + num_deps = chunk->length_dw * 4 / + sizeof(struct drm_amdgpu_cs_chunk_syncobj); for (i = 0; i < num_deps; ++i) { - r = amdgpu_syncobj_lookup_and_add_to_sync(p, deps[i].handle); + r = amdgpu_syncobj_lookup_and_add_to_sync(p, + syncobj_deps[i].handle, + syncobj_deps[i].point, + syncobj_deps[i].flags); if (r) return r; } + return 0; } static int amdgpu_cs_process_syncobj_out_dep(struct amdgpu_cs_parser *p, struct amdgpu_cs_chunk *chunk) { + struct drm_amdgpu_cs_chunk_sem *deps; unsigned num_deps; int i; - struct drm_amdgpu_cs_chunk_sem *deps; + deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata; num_deps = chunk->length_dw * 4 / sizeof(struct drm_amdgpu_cs_chunk_sem); - p->post_dep_syncobjs = kmalloc_array(num_deps, - sizeof(struct drm_syncobj *), - GFP_KERNEL); - p->num_post_dep_syncobjs = 0; + p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps), + GFP_KERNEL); + p->num_post_deps = 0; - if (!p->post_dep_syncobjs) + if (!p->post_deps) return -ENOMEM; + for (i = 0; i < num_deps; ++i) { - p->post_dep_syncobjs[i] = drm_syncobj_find(p->filp, deps[i].handle); - if (!p->post_dep_syncobjs[i]) + p->post_deps[i].syncobj = + drm_syncobj_find(p->filp, deps[i].handle); + if (!p->post_deps[i].syncobj) return -EINVAL; - p->num_post_dep_syncobjs++; + p->post_deps[i].chain = NULL; + p->post_deps[i].point = 0; + p->num_post_deps++; } + + return 0; +} + + +static int amdgpu_cs_process_syncobj_timeline_out_dep(struct amdgpu_cs_parser *p, + struct amdgpu_cs_chunk + *chunk) +{ + struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps; + unsigned num_deps; + int i; + + syncobj_deps = (struct drm_amdgpu_cs_chunk_syncobj *)chunk->kdata; + num_deps = chunk->length_dw * 4 / + sizeof(struct drm_amdgpu_cs_chunk_syncobj); + + p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps), + GFP_KERNEL); + p->num_post_deps = 0; + + if (!p->post_deps) + return -ENOMEM; + + for (i = 0; i < num_deps; ++i) { + struct amdgpu_cs_post_dep *dep = &p->post_deps[i]; + + dep->chain = NULL; + if (syncobj_deps[i].point) { + dep->chain = kmalloc(sizeof(*dep->chain), GFP_KERNEL); + if (!dep->chain) + return -ENOMEM; + } + + dep->syncobj = drm_syncobj_find(p->filp, + syncobj_deps[i].handle); + if (!dep->syncobj) { + kfree(dep->chain); + return -EINVAL; + } + dep->point = syncobj_deps[i].point; + p->num_post_deps++; + } + return 0; } @@ -1187,19 +1268,33 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev, chunk = &p->chunks[i]; - if (chunk->chunk_id == AMDGPU_CHUNK_ID_DEPENDENCIES || - chunk->chunk_id == AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES) { + switch (chunk->chunk_id) { + case AMDGPU_CHUNK_ID_DEPENDENCIES: + case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES: r = amdgpu_cs_process_fence_dep(p, chunk); if (r) return r; - } else if (chunk->chunk_id == AMDGPU_CHUNK_ID_SYNCOBJ_IN) { + break; + case AMDGPU_CHUNK_ID_SYNCOBJ_IN: r = amdgpu_cs_process_syncobj_in_dep(p, chunk); if (r) return r; - } else if (chunk->chunk_id == AMDGPU_CHUNK_ID_SYNCOBJ_OUT) { + break; + case AMDGPU_CHUNK_ID_SYNCOBJ_OUT: r = amdgpu_cs_process_syncobj_out_dep(p, chunk); if (r) return r; + break; + case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT: + r = amdgpu_cs_process_syncobj_timeline_in_dep(p, chunk); + if (r) + return r; + break; + case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL: + r = amdgpu_cs_process_syncobj_timeline_out_dep(p, chunk); + if (r) + return r; + break; } } @@ -1210,8 +1305,17 @@ static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p) { int i; - for (i = 0; i < p->num_post_dep_syncobjs; ++i) - drm_syncobj_replace_fence(p->post_dep_syncobjs[i], p->fence); + for (i = 0; i < p->num_post_deps; ++i) { + if (p->post_deps[i].chain && p->post_deps[i].point) { + drm_syncobj_add_point(p->post_deps[i].syncobj, + p->post_deps[i].chain, + p->fence, p->post_deps[i].point); + p->post_deps[i].chain = NULL; + } else { + drm_syncobj_replace_fence(p->post_deps[i].syncobj, + p->fence); + } + } } static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c index 7e22be7ca68a..54dd02a898b9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c @@ -92,15 +92,6 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm, return -ENOMEM; } - r = amdgpu_vm_alloc_pts(adev, (*bo_va)->base.vm, csa_addr, - size); - if (r) { - DRM_ERROR("failed to allocate pts for static CSA, err=%d\n", r); - amdgpu_vm_bo_rmv(adev, *bo_va); - ttm_eu_backoff_reservation(&ticket, &list); - return r; - } - r = amdgpu_vm_bo_map(adev, *bo_va, csa_addr, 0, size, AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE | AMDGPU_PTE_EXECUTABLE); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 7b526593eb77..a28a3d722ba2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -26,6 +26,7 @@ #include <drm/drm_auth.h> #include "amdgpu.h" #include "amdgpu_sched.h" +#include "amdgpu_ras.h" #define to_amdgpu_ctx_entity(e) \ container_of((e), struct amdgpu_ctx_entity, entity) @@ -344,6 +345,7 @@ static int amdgpu_ctx_query2(struct amdgpu_device *adev, { struct amdgpu_ctx *ctx; struct amdgpu_ctx_mgr *mgr; + uint32_t ras_counter; if (!fpriv) return -EINVAL; @@ -368,6 +370,21 @@ static int amdgpu_ctx_query2(struct amdgpu_device *adev, if (atomic_read(&ctx->guilty)) out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_GUILTY; + /*query ue count*/ + ras_counter = amdgpu_ras_query_error_count(adev, false); + /*ras counter is monotonic increasing*/ + if (ras_counter != ctx->ras_counter_ue) { + out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RAS_UE; + ctx->ras_counter_ue = ras_counter; + } + + /*query ce count*/ + ras_counter = amdgpu_ras_query_error_count(adev, true); + if (ras_counter != ctx->ras_counter_ce) { + out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RAS_CE; + ctx->ras_counter_ce = ras_counter; + } + mutex_unlock(&mgr->lock); return 0; } @@ -541,32 +558,26 @@ void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr) idr_init(&mgr->ctx_handles); } -void amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr) +long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout) { unsigned num_entities = amdgput_ctx_total_num_entities(); struct amdgpu_ctx *ctx; struct idr *idp; uint32_t id, i; - long max_wait = MAX_WAIT_SCHED_ENTITY_Q_EMPTY; idp = &mgr->ctx_handles; mutex_lock(&mgr->lock); idr_for_each_entry(idp, ctx, id) { - - if (!ctx->adev) { - mutex_unlock(&mgr->lock); - return; - } - for (i = 0; i < num_entities; i++) { struct drm_sched_entity *entity; entity = &ctx->entities[0][i].entity; - max_wait = drm_sched_entity_flush(entity, max_wait); + timeout = drm_sched_entity_flush(entity, timeout); } } mutex_unlock(&mgr->lock); + return timeout; } void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr) @@ -579,10 +590,6 @@ void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr) idp = &mgr->ctx_handles; idr_for_each_entry(idp, ctx, id) { - - if (!ctx->adev) - return; - if (kref_read(&ctx->refcount) != 1) { DRM_ERROR("ctx %p is still alive\n", ctx); continue; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h index b3b012c0a7da..5f1b54c9bcdb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h @@ -49,6 +49,8 @@ struct amdgpu_ctx { enum drm_sched_priority override_priority; struct mutex lock; atomic_t guilty; + uint32_t ras_counter_ce; + uint32_t ras_counter_ue; }; struct amdgpu_ctx_mgr { @@ -82,7 +84,7 @@ int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr); void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr); -void amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr); +long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout); void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index 4ae3ff9a1d4c..8930d66f2204 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -568,10 +568,9 @@ static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf, idx = *pos >> 2; valuesize = sizeof(values); - if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->read_sensor) - r = amdgpu_dpm_read_sensor(adev, idx, &values[0], &valuesize); - else - return -EINVAL; + r = amdgpu_dpm_read_sensor(adev, idx, &values[0], &valuesize); + if (r) + return r; if (size > valuesize) return -EINVAL; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 79fb302fb954..cc8ad3831982 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -60,6 +60,7 @@ #include "amdgpu_pm.h" #include "amdgpu_xgmi.h" +#include "amdgpu_ras.h" MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin"); MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin"); @@ -1506,7 +1507,9 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) return -EAGAIN; } - adev->powerplay.pp_feature = amdgpu_pp_feature_mask; + adev->pm.pp_feature = amdgpu_pp_feature_mask; + if (amdgpu_sriov_vf(adev)) + adev->pm.pp_feature &= ~PP_GFXOFF_MASK; for (i = 0; i < adev->num_ip_blocks; i++) { if ((amdgpu_ip_block_mask & (1 << i)) == 0) { @@ -1638,6 +1641,10 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) { int i, r; + r = amdgpu_ras_init(adev); + if (r) + return r; + for (i = 0; i < adev->num_ip_blocks; i++) { if (!adev->ip_blocks[i].status.valid) continue; @@ -1681,6 +1688,13 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) } } + r = amdgpu_ib_pool_init(adev); + if (r) { + dev_err(adev->dev, "IB initialization failed (%d).\n", r); + amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r); + goto init_failed; + } + r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/ if (r) goto init_failed; @@ -1869,6 +1883,8 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev) { int i, r; + amdgpu_ras_pre_fini(adev); + if (adev->gmc.xgmi.num_physical_nodes > 1) amdgpu_xgmi_remove_device(adev); @@ -1917,6 +1933,7 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev) amdgpu_free_static_csa(&adev->virt.csa_obj); amdgpu_device_wb_fini(adev); amdgpu_device_vram_scratch_fini(adev); + amdgpu_ib_pool_fini(adev); } r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev); @@ -1937,6 +1954,8 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev) adev->ip_blocks[i].status.late_initialized = false; } + amdgpu_ras_fini(adev); + if (amdgpu_sriov_vf(adev)) if (amdgpu_virt_release_full_gpu(adev, false)) DRM_ERROR("failed to release exclusive mode on fini\n"); @@ -1999,6 +2018,10 @@ static void amdgpu_device_ip_late_init_func_handler(struct work_struct *work) r = amdgpu_device_enable_mgpu_fan_boost(); if (r) DRM_ERROR("enable mgpu fan boost failed (%d).\n", r); + + /*set to low pstate by default */ + amdgpu_xgmi_set_pstate(adev, 0); + } static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work) @@ -2369,7 +2392,7 @@ static void amdgpu_device_xgmi_reset_func(struct work_struct *__work) adev->asic_reset_res = amdgpu_asic_reset(adev); if (adev->asic_reset_res) - DRM_WARN("ASIC reset failed with err r, %d for drm dev, %s", + DRM_WARN("ASIC reset failed with error, %d for drm dev, %s", adev->asic_reset_res, adev->ddev->unique); } @@ -2448,6 +2471,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, mutex_init(&adev->virt.vf_errors.lock); hash_init(adev->mn_hash); mutex_init(&adev->lock_reset); + mutex_init(&adev->virt.dpm_mutex); amdgpu_device_check_arguments(adev); @@ -2642,13 +2666,6 @@ fence_driver_init: /* Get a log2 for easy divisions. */ adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps)); - r = amdgpu_ib_pool_init(adev); - if (r) { - dev_err(adev->dev, "IB initialization failed (%d).\n", r); - amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r); - goto failed; - } - amdgpu_fbdev_init(adev); r = amdgpu_pm_sysfs_init(adev); @@ -2694,6 +2711,9 @@ fence_driver_init: goto failed; } + /* must succeed. */ + amdgpu_ras_post_init(adev); + return 0; failed: @@ -2726,7 +2746,6 @@ void amdgpu_device_fini(struct amdgpu_device *adev) else drm_atomic_helper_shutdown(adev->ddev); } - amdgpu_ib_pool_fini(adev); amdgpu_fence_driver_fini(adev); amdgpu_pm_sysfs_fini(adev); amdgpu_fbdev_fini(adev); @@ -3225,6 +3244,8 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, if (r) return r; + amdgpu_amdkfd_pre_reset(adev); + /* Resume IP prior to SMC */ r = amdgpu_device_ip_reinit_early_sriov(adev); if (r) @@ -3244,6 +3265,7 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, amdgpu_irq_gpu_reset_resume_helper(adev); r = amdgpu_ib_ring_tests(adev); + amdgpu_amdkfd_post_reset(adev); error: amdgpu_virt_init_data_exchange(adev); @@ -3376,7 +3398,7 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive, r = amdgpu_asic_reset(tmp_adev); if (r) { - DRM_ERROR("ASIC reset failed with err r, %d for drm dev, %s", + DRM_ERROR("ASIC reset failed with error, %d for drm dev, %s", r, tmp_adev->ddev->unique); break; } @@ -3393,6 +3415,11 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive, break; } } + + list_for_each_entry(tmp_adev, device_list_handle, + gmc.xgmi.head) { + amdgpu_ras_reserve_bad_pages(tmp_adev); + } } } @@ -3411,7 +3438,7 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive, vram_lost = amdgpu_device_check_vram_lost(tmp_adev); if (vram_lost) { - DRM_ERROR("VRAM is lost!\n"); + DRM_INFO("VRAM is lost due to GPU reset!\n"); atomic_inc(&tmp_adev->vram_lost_counter); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c index 344967df3137..523b8ab6b04e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c @@ -904,3 +904,19 @@ amdgpu_get_vce_clock_state(void *handle, u32 idx) return NULL; } + +int amdgpu_dpm_get_sclk(struct amdgpu_device *adev, bool low) +{ + if (is_support_sw_smu(adev)) + return smu_get_sclk(&adev->smu, low); + else + return (adev)->powerplay.pp_funcs->get_sclk((adev)->powerplay.pp_handle, (low)); +} + +int amdgpu_dpm_get_mclk(struct amdgpu_device *adev, bool low) +{ + if (is_support_sw_smu(adev)) + return smu_get_mclk(&adev->smu, low); + else + return (adev)->powerplay.pp_funcs->get_mclk((adev)->powerplay.pp_handle, (low)); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h index e871e022c129..dca35407879d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h @@ -260,9 +260,6 @@ enum amdgpu_pcie_gen { #define amdgpu_dpm_enable_bapm(adev, e) \ ((adev)->powerplay.pp_funcs->enable_bapm((adev)->powerplay.pp_handle, (e))) -#define amdgpu_dpm_read_sensor(adev, idx, value, size) \ - ((adev)->powerplay.pp_funcs->read_sensor((adev)->powerplay.pp_handle, (idx), (value), (size))) - #define amdgpu_dpm_set_fan_control_mode(adev, m) \ ((adev)->powerplay.pp_funcs->set_fan_control_mode((adev)->powerplay.pp_handle, (m))) @@ -281,18 +278,18 @@ enum amdgpu_pcie_gen { #define amdgpu_dpm_set_fan_speed_rpm(adev, s) \ ((adev)->powerplay.pp_funcs->set_fan_speed_rpm)((adev)->powerplay.pp_handle, (s)) -#define amdgpu_dpm_get_sclk(adev, l) \ - ((adev)->powerplay.pp_funcs->get_sclk((adev)->powerplay.pp_handle, (l))) - -#define amdgpu_dpm_get_mclk(adev, l) \ - ((adev)->powerplay.pp_funcs->get_mclk((adev)->powerplay.pp_handle, (l))) - #define amdgpu_dpm_force_performance_level(adev, l) \ ((adev)->powerplay.pp_funcs->force_performance_level((adev)->powerplay.pp_handle, (l))) #define amdgpu_dpm_get_current_power_state(adev) \ ((adev)->powerplay.pp_funcs->get_current_power_state((adev)->powerplay.pp_handle)) +#define amdgpu_smu_get_current_power_state(adev) \ + ((adev)->smu.ppt_funcs->get_current_power_state(&((adev)->smu))) + +#define amdgpu_smu_set_power_state(adev) \ + ((adev)->smu.ppt_funcs->set_power_state(&((adev)->smu))) + #define amdgpu_dpm_get_pp_num_states(adev, data) \ ((adev)->powerplay.pp_funcs->get_pp_num_states((adev)->powerplay.pp_handle, data)) @@ -448,6 +445,9 @@ struct amdgpu_pm { uint32_t smu_prv_buffer_size; struct amdgpu_bo *smu_prv_buffer; bool ac_power; + /* powerplay feature */ + uint32_t pp_feature; + }; #define R600_SSTU_DFLT 0 @@ -486,6 +486,8 @@ void amdgpu_dpm_print_ps_status(struct amdgpu_device *adev, u32 amdgpu_dpm_get_vblank_time(struct amdgpu_device *adev); u32 amdgpu_dpm_get_vrefresh(struct amdgpu_device *adev); void amdgpu_dpm_get_active_displays(struct amdgpu_device *adev); +int amdgpu_dpm_read_sensor(struct amdgpu_device *adev, enum amd_pp_sensors sensor, + void *data, uint32_t *size); bool amdgpu_is_internal_thermal_sensor(enum amdgpu_int_thermal_type sensor); @@ -504,4 +506,8 @@ enum amdgpu_pcie_gen amdgpu_get_pcie_gen_support(struct amdgpu_device *adev, struct amd_vce_state* amdgpu_get_vce_clock_state(void *handle, u32 idx); +extern int amdgpu_dpm_get_sclk(struct amdgpu_device *adev, bool low); + +extern int amdgpu_dpm_get_mclk(struct amdgpu_device *adev, bool low); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 7419ea8a388b..1e2cc9d68a05 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -74,9 +74,11 @@ * - 3.28.0 - Add AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES * - 3.29.0 - Add AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID * - 3.30.0 - Add AMDGPU_SCHED_OP_CONTEXT_PRIORITY_OVERRIDE. + * - 3.31.0 - Add support for per-flip tiling attribute changes with DC + * - 3.32.0 - Add syncobj timeline support to AMDGPU_CS. */ #define KMS_DRIVER_MAJOR 3 -#define KMS_DRIVER_MINOR 30 +#define KMS_DRIVER_MINOR 32 #define KMS_DRIVER_PATCHLEVEL 0 int amdgpu_vram_limit = 0; @@ -117,8 +119,8 @@ uint amdgpu_pg_mask = 0xffffffff; uint amdgpu_sdma_phase_quantum = 32; char *amdgpu_disable_cu = NULL; char *amdgpu_virtual_display = NULL; -/* OverDrive(bit 14),gfxoff(bit 15),stutter mode(bit 17) disabled by default*/ -uint amdgpu_pp_feature_mask = 0xfffd3fff; +/* OverDrive(bit 14) disabled by default*/ +uint amdgpu_pp_feature_mask = 0xffffbfff; int amdgpu_ngg = 0; int amdgpu_prim_buf_per_se = 0; int amdgpu_pos_buf_per_se = 0; @@ -136,6 +138,8 @@ uint amdgpu_dc_feature_mask = 0; struct amdgpu_mgpu_info mgpu_info = { .mutex = __MUTEX_INITIALIZER(mgpu_info.mutex), }; +int amdgpu_ras_enable = -1; +uint amdgpu_ras_mask = 0xffffffff; /** * DOC: vramlimit (int) @@ -495,6 +499,21 @@ MODULE_PARM_DESC(emu_mode, "Emulation mode, (1 = enable, 0 = disable)"); module_param_named(emu_mode, amdgpu_emu_mode, int, 0444); /** + * DOC: ras_enable (int) + * Enable RAS features on the GPU (0 = disable, 1 = enable, -1 = auto (default)) + */ +MODULE_PARM_DESC(ras_enable, "Enable RAS features on the GPU (0 = disable, 1 = enable, -1 = auto (default))"); +module_param_named(ras_enable, amdgpu_ras_enable, int, 0444); + +/** + * DOC: ras_mask (uint) + * Mask of RAS features to enable (default 0xffffffff), only valid when ras_enable == 1 + * See the flags in drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h + */ +MODULE_PARM_DESC(ras_mask, "Mask of RAS features to enable (default 0xffffffff), only valid when ras_enable == 1"); +module_param_named(ras_mask, amdgpu_ras_mask, uint, 0444); + +/** * DOC: si_support (int) * Set SI support driver. This parameter works after set config CONFIG_DRM_AMDGPU_SI. For SI asic, when radeon driver is enabled, * set value 0 to use radeon driver, while set value 1 to use amdgpu driver. The default is using radeon driver when it available, @@ -974,6 +993,7 @@ amdgpu_pci_remove(struct pci_dev *pdev) DRM_ERROR("Device removal is currently not supported outside of fbcon\n"); drm_dev_unplug(dev); + drm_dev_put(dev); pci_disable_device(pdev); pci_set_drvdata(pdev, NULL); } @@ -1158,13 +1178,14 @@ static int amdgpu_flush(struct file *f, fl_owner_t id) { struct drm_file *file_priv = f->private_data; struct amdgpu_fpriv *fpriv = file_priv->driver_priv; + long timeout = MAX_WAIT_SCHED_ENTITY_Q_EMPTY; - amdgpu_ctx_mgr_entity_flush(&fpriv->ctx_mgr); + timeout = amdgpu_ctx_mgr_entity_flush(&fpriv->ctx_mgr, timeout); + timeout = amdgpu_vm_wait_idle(&fpriv->vm, timeout); - return 0; + return timeout >= 0 ? 0 : timeout; } - static const struct file_operations amdgpu_driver_kms_fops = { .owner = THIS_MODULE, .open = drm_open, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c index 5cbde74b97dd..e47609218839 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c @@ -49,12 +49,11 @@ static int amdgpufb_open(struct fb_info *info, int user) { - struct amdgpu_fbdev *rfbdev = info->par; - struct amdgpu_device *adev = rfbdev->adev; - int ret = pm_runtime_get_sync(adev->ddev->dev); + struct drm_fb_helper *fb_helper = info->par; + int ret = pm_runtime_get_sync(fb_helper->dev->dev); if (ret < 0 && ret != -EACCES) { - pm_runtime_mark_last_busy(adev->ddev->dev); - pm_runtime_put_autosuspend(adev->ddev->dev); + pm_runtime_mark_last_busy(fb_helper->dev->dev); + pm_runtime_put_autosuspend(fb_helper->dev->dev); return ret; } return 0; @@ -63,11 +62,10 @@ amdgpufb_open(struct fb_info *info, int user) static int amdgpufb_release(struct fb_info *info, int user) { - struct amdgpu_fbdev *rfbdev = info->par; - struct amdgpu_device *adev = rfbdev->adev; + struct drm_fb_helper *fb_helper = info->par; - pm_runtime_mark_last_busy(adev->ddev->dev); - pm_runtime_put_autosuspend(adev->ddev->dev); + pm_runtime_mark_last_busy(fb_helper->dev->dev); + pm_runtime_put_autosuspend(fb_helper->dev->dev); return 0; } @@ -233,9 +231,6 @@ static int amdgpufb_create(struct drm_fb_helper *helper, goto out; } - info->par = rfbdev; - info->skip_vt_switch = true; - ret = amdgpu_display_framebuffer_init(adev->ddev, &rfbdev->rfb, &mode_cmd, gobj); if (ret) { @@ -248,10 +243,6 @@ static int amdgpufb_create(struct drm_fb_helper *helper, /* setup helper */ rfbdev->helper.fb = fb; - strcpy(info->fix.id, "amdgpudrmfb"); - - drm_fb_helper_fill_fix(info, fb->pitches[0], fb->format->depth); - info->fbops = &amdgpufb_ops; tmp = amdgpu_bo_gpu_offset(abo) - adev->gmc.vram_start; @@ -260,7 +251,7 @@ static int amdgpufb_create(struct drm_fb_helper *helper, info->screen_base = amdgpu_bo_kptr(abo); info->screen_size = amdgpu_bo_size(abo); - drm_fb_helper_fill_var(info, &rfbdev->helper, sizes->fb_width, sizes->fb_height); + drm_fb_helper_fill_info(info, &rfbdev->helper, sizes); /* setup aperture base/size for vesafb takeover */ info->apertures->ranges[0].base = adev->ddev->mode_config.fb_base; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index ee47c11e92ce..4dee2326b29c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -136,8 +136,9 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, { struct amdgpu_device *adev = ring->adev; struct amdgpu_fence *fence; - struct dma_fence *old, **ptr; + struct dma_fence __rcu **ptr; uint32_t seq; + int r; fence = kmem_cache_alloc(amdgpu_fence_slab, GFP_KERNEL); if (fence == NULL) @@ -153,15 +154,24 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, seq, flags | AMDGPU_FENCE_FLAG_INT); ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask]; + if (unlikely(rcu_dereference_protected(*ptr, 1))) { + struct dma_fence *old; + + rcu_read_lock(); + old = dma_fence_get_rcu_safe(ptr); + rcu_read_unlock(); + + if (old) { + r = dma_fence_wait(old, false); + dma_fence_put(old); + if (r) + return r; + } + } + /* This function can't be called concurrently anyway, otherwise * emitting the fence would mess up the hardware ring buffer. */ - old = rcu_dereference_protected(*ptr, 1); - if (old && !dma_fence_is_signaled(old)) { - DRM_INFO("rcu slot is busy\n"); - dma_fence_wait(old, false); - } - rcu_assign_pointer(*ptr, dma_fence_get(&fence->base)); *f = &fence->base; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index d21dd2f369da..d4fcf5475464 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -31,6 +31,7 @@ #include <drm/amdgpu_drm.h> #include "amdgpu.h" #include "amdgpu_display.h" +#include "amdgpu_xgmi.h" void amdgpu_gem_object_free(struct drm_gem_object *gobj) { @@ -627,11 +628,6 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, switch (args->operation) { case AMDGPU_VA_OP_MAP: - r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm, args->va_address, - args->map_size); - if (r) - goto error_backoff; - va_flags = amdgpu_gmc_get_pte_flags(adev, args->flags); r = amdgpu_vm_bo_map(adev, bo_va, args->va_address, args->offset_in_bo, args->map_size, @@ -647,11 +643,6 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, args->map_size); break; case AMDGPU_VA_OP_REPLACE: - r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm, args->va_address, - args->map_size); - if (r) - goto error_backoff; - va_flags = amdgpu_gmc_get_pte_flags(adev, args->flags); r = amdgpu_vm_bo_replace_map(adev, bo_va, args->va_address, args->offset_in_bo, args->map_size, @@ -678,6 +669,7 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data, struct amdgpu_device *adev = dev->dev_private; struct drm_amdgpu_gem_op *args = data; struct drm_gem_object *gobj; + struct amdgpu_vm_bo_base *base; struct amdgpu_bo *robj; int r; @@ -716,6 +708,15 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data, amdgpu_bo_unreserve(robj); break; } + for (base = robj->vm_bo; base; base = base->next) + if (amdgpu_xgmi_same_hive(amdgpu_ttm_adev(robj->tbo.bdev), + amdgpu_ttm_adev(base->vm->root.base.bo->tbo.bdev))) { + r = -EINVAL; + amdgpu_bo_unreserve(robj); + goto out; + } + + robj->preferred_domains = args->value & (AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT | AMDGPU_GEM_DOMAIN_CPU); @@ -745,17 +746,25 @@ int amdgpu_mode_dumb_create(struct drm_file *file_priv, struct amdgpu_device *adev = dev->dev_private; struct drm_gem_object *gobj; uint32_t handle; + u64 flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; u32 domain; int r; + /* + * The buffer returned from this function should be cleared, but + * it can only be done if the ring is enabled or we'll fail to + * create the buffer. + */ + if (adev->mman.buffer_funcs_enabled) + flags |= AMDGPU_GEM_CREATE_VRAM_CLEARED; + args->pitch = amdgpu_align_pitch(adev, args->width, DIV_ROUND_UP(args->bpp, 8), 0); args->size = (u64)args->pitch * args->height; args->size = ALIGN(args->size, PAGE_SIZE); domain = amdgpu_bo_get_preferred_pin_domain(adev, amdgpu_display_supported_domains(adev)); - r = amdgpu_gem_object_create(adev, args->size, 0, domain, - AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, + r = amdgpu_gem_object_create(adev, args->size, 0, domain, flags, ttm_bo_type_device, NULL, &gobj); if (r) return -ENOMEM; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 97a60da62004..997932ebbb83 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -390,7 +390,7 @@ void amdgpu_gfx_compute_mqd_sw_fini(struct amdgpu_device *adev) void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable) { - if (!(adev->powerplay.pp_feature & PP_GFXOFF_MASK)) + if (!(adev->pm.pp_feature & PP_GFXOFF_MASK)) return; if (!adev->powerplay.pp_funcs || !adev->powerplay.pp_funcs->set_powergating_by_smu) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index f790e15bcd08..09fc53af3d35 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -258,6 +258,9 @@ struct amdgpu_gfx { /* pipe reservation */ struct mutex pipe_reserve_mutex; DECLARE_BITMAP (pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); + + /*ras */ + struct ras_common_if *ras_if; }; #define amdgpu_gfx_get_gpu_clock_counter(adev) (adev)->gfx.funcs->get_gpu_clock_counter((adev)) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index d73367cab4f3..250d9212cc38 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -80,6 +80,33 @@ uint64_t amdgpu_gmc_pd_addr(struct amdgpu_bo *bo) } /** + * amdgpu_gmc_set_pte_pde - update the page tables using CPU + * + * @adev: amdgpu_device pointer + * @cpu_pt_addr: cpu address of the page table + * @gpu_page_idx: entry in the page table to update + * @addr: dst addr to write into pte/pde + * @flags: access flags + * + * Update the page tables using CPU. + */ +int amdgpu_gmc_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr, + uint32_t gpu_page_idx, uint64_t addr, + uint64_t flags) +{ + void __iomem *ptr = (void *)cpu_pt_addr; + uint64_t value; + + /* + * The following is for PTE only. GART does not have PDEs. + */ + value = addr & 0x0000FFFFFFFFF000ULL; + value |= flags; + writeq(value, ptr + (gpu_page_idx * 8)); + return 0; +} + +/** * amdgpu_gmc_agp_addr - return the address in the AGP address space * * @tbo: TTM BO which needs the address, must be in GTT domain @@ -213,3 +240,58 @@ void amdgpu_gmc_agp_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc) dev_info(adev->dev, "AGP: %lluM 0x%016llX - 0x%016llX\n", mc->agp_size >> 20, mc->agp_start, mc->agp_end); } + +/** + * amdgpu_gmc_filter_faults - filter VM faults + * + * @adev: amdgpu device structure + * @addr: address of the VM fault + * @pasid: PASID of the process causing the fault + * @timestamp: timestamp of the fault + * + * Returns: + * True if the fault was filtered and should not be processed further. + * False if the fault is a new one and needs to be handled. + */ +bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr, + uint16_t pasid, uint64_t timestamp) +{ + struct amdgpu_gmc *gmc = &adev->gmc; + + uint64_t stamp, key = addr << 4 | pasid; + struct amdgpu_gmc_fault *fault; + uint32_t hash; + + /* If we don't have space left in the ring buffer return immediately */ + stamp = max(timestamp, AMDGPU_GMC_FAULT_TIMEOUT + 1) - + AMDGPU_GMC_FAULT_TIMEOUT; + if (gmc->fault_ring[gmc->last_fault].timestamp >= stamp) + return true; + + /* Try to find the fault in the hash */ + hash = hash_64(key, AMDGPU_GMC_FAULT_HASH_ORDER); + fault = &gmc->fault_ring[gmc->fault_hash[hash].idx]; + while (fault->timestamp >= stamp) { + uint64_t tmp; + + if (fault->key == key) + return true; + + tmp = fault->timestamp; + fault = &gmc->fault_ring[fault->next]; + + /* Check if the entry was reused */ + if (fault->timestamp >= tmp) + break; + } + + /* Add the fault to the ring */ + fault = &gmc->fault_ring[gmc->last_fault]; + fault->key = key; + fault->timestamp = timestamp; + + /* And update the hash */ + fault->next = gmc->fault_hash[hash].idx; + gmc->fault_hash[hash].idx = gmc->last_fault++; + return false; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index 81e6070d255b..071145ac67b5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -43,9 +43,35 @@ */ #define AMDGPU_GMC_HOLE_MASK 0x0000ffffffffffffULL +/* + * Ring size as power of two for the log of recent faults. + */ +#define AMDGPU_GMC_FAULT_RING_ORDER 8 +#define AMDGPU_GMC_FAULT_RING_SIZE (1 << AMDGPU_GMC_FAULT_RING_ORDER) + +/* + * Hash size as power of two for the log of recent faults + */ +#define AMDGPU_GMC_FAULT_HASH_ORDER 8 +#define AMDGPU_GMC_FAULT_HASH_SIZE (1 << AMDGPU_GMC_FAULT_HASH_ORDER) + +/* + * Number of IH timestamp ticks until a fault is considered handled + */ +#define AMDGPU_GMC_FAULT_TIMEOUT 5000ULL + struct firmware; /* + * GMC page fault information + */ +struct amdgpu_gmc_fault { + uint64_t timestamp; + uint64_t next:AMDGPU_GMC_FAULT_RING_ORDER; + uint64_t key:52; +}; + +/* * VMHUB structures, functions & helpers */ struct amdgpu_vmhub { @@ -71,12 +97,6 @@ struct amdgpu_gmc_funcs { /* Change the VMID -> PASID mapping */ void (*emit_pasid_mapping)(struct amdgpu_ring *ring, unsigned vmid, unsigned pasid); - /* write pte/pde updates using the cpu */ - int (*set_pte_pde)(struct amdgpu_device *adev, - void *cpu_pt_addr, /* cpu addr of page table */ - uint32_t gpu_page_idx, /* pte/pde to update */ - uint64_t addr, /* addr to write into pte/pde */ - uint64_t flags); /* access flags */ /* enable/disable PRT support */ void (*set_prt)(struct amdgpu_device *adev, bool enable); /* set pte flags based per asic */ @@ -147,15 +167,22 @@ struct amdgpu_gmc { struct kfd_vm_fault_info *vm_fault_info; atomic_t vm_fault_info_updated; + struct amdgpu_gmc_fault fault_ring[AMDGPU_GMC_FAULT_RING_SIZE]; + struct { + uint64_t idx:AMDGPU_GMC_FAULT_RING_ORDER; + } fault_hash[AMDGPU_GMC_FAULT_HASH_SIZE]; + uint64_t last_fault:AMDGPU_GMC_FAULT_RING_ORDER; + const struct amdgpu_gmc_funcs *gmc_funcs; struct amdgpu_xgmi xgmi; + struct amdgpu_irq_src ecc_irq; + struct ras_common_if *ras_if; }; #define amdgpu_gmc_flush_gpu_tlb(adev, vmid, type) (adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (type)) #define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr)) #define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid)) -#define amdgpu_gmc_set_pte_pde(adev, pt, idx, addr, flags) (adev)->gmc.gmc_funcs->set_pte_pde((adev), (pt), (idx), (addr), (flags)) #define amdgpu_gmc_get_vm_pde(adev, level, dst, flags) (adev)->gmc.gmc_funcs->get_vm_pde((adev), (level), (dst), (flags)) #define amdgpu_gmc_get_pte_flags(adev, flags) (adev)->gmc.gmc_funcs->get_vm_pte_flags((adev),(flags)) @@ -189,6 +216,9 @@ static inline uint64_t amdgpu_gmc_sign_extend(uint64_t addr) void amdgpu_gmc_get_pde_for_bo(struct amdgpu_bo *bo, int level, uint64_t *addr, uint64_t *flags); +int amdgpu_gmc_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr, + uint32_t gpu_page_idx, uint64_t addr, + uint64_t flags); uint64_t amdgpu_gmc_pd_addr(struct amdgpu_bo *bo); uint64_t amdgpu_gmc_agp_addr(struct ttm_buffer_object *bo); void amdgpu_gmc_vram_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc, @@ -197,5 +227,7 @@ void amdgpu_gmc_gart_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc); void amdgpu_gmc_agp_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc); +bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr, + uint16_t pasid, uint64_t timestamp); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c index da7b1b92d9cf..62591d081856 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c @@ -37,6 +37,47 @@ struct amdgpu_gtt_node { }; /** + * DOC: mem_info_gtt_total + * + * The amdgpu driver provides a sysfs API for reporting current total size of + * the GTT. + * The file mem_info_gtt_total is used for this, and returns the total size of + * the GTT block, in bytes + */ +static ssize_t amdgpu_mem_info_gtt_total_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = ddev->dev_private; + + return snprintf(buf, PAGE_SIZE, "%llu\n", + (adev->mman.bdev.man[TTM_PL_TT].size) * PAGE_SIZE); +} + +/** + * DOC: mem_info_gtt_used + * + * The amdgpu driver provides a sysfs API for reporting current total amount of + * used GTT. + * The file mem_info_gtt_used is used for this, and returns the current used + * size of the GTT block, in bytes + */ +static ssize_t amdgpu_mem_info_gtt_used_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = ddev->dev_private; + + return snprintf(buf, PAGE_SIZE, "%llu\n", + amdgpu_gtt_mgr_usage(&adev->mman.bdev.man[TTM_PL_TT])); +} + +static DEVICE_ATTR(mem_info_gtt_total, S_IRUGO, + amdgpu_mem_info_gtt_total_show, NULL); +static DEVICE_ATTR(mem_info_gtt_used, S_IRUGO, + amdgpu_mem_info_gtt_used_show, NULL); + +/** * amdgpu_gtt_mgr_init - init GTT manager and DRM MM * * @man: TTM memory type manager @@ -50,6 +91,7 @@ static int amdgpu_gtt_mgr_init(struct ttm_mem_type_manager *man, struct amdgpu_device *adev = amdgpu_ttm_adev(man->bdev); struct amdgpu_gtt_mgr *mgr; uint64_t start, size; + int ret; mgr = kzalloc(sizeof(*mgr), GFP_KERNEL); if (!mgr) @@ -61,6 +103,18 @@ static int amdgpu_gtt_mgr_init(struct ttm_mem_type_manager *man, spin_lock_init(&mgr->lock); atomic64_set(&mgr->available, p_size); man->priv = mgr; + + ret = device_create_file(adev->dev, &dev_attr_mem_info_gtt_total); + if (ret) { + DRM_ERROR("Failed to create device file mem_info_gtt_total\n"); + return ret; + } + ret = device_create_file(adev->dev, &dev_attr_mem_info_gtt_used); + if (ret) { + DRM_ERROR("Failed to create device file mem_info_gtt_used\n"); + return ret; + } + return 0; } @@ -74,12 +128,17 @@ static int amdgpu_gtt_mgr_init(struct ttm_mem_type_manager *man, */ static int amdgpu_gtt_mgr_fini(struct ttm_mem_type_manager *man) { + struct amdgpu_device *adev = amdgpu_ttm_adev(man->bdev); struct amdgpu_gtt_mgr *mgr = man->priv; spin_lock(&mgr->lock); drm_mm_takedown(&mgr->mm); spin_unlock(&mgr->lock); kfree(mgr); man->priv = NULL; + + device_remove_file(adev->dev, &dev_attr_mem_info_gtt_total); + device_remove_file(adev->dev, &dev_attr_mem_info_gtt_used); + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c index 1c50be3ab8a9..934dfdcb4e73 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c @@ -142,6 +142,7 @@ void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih) */ int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih) { + unsigned int count = AMDGPU_IH_MAX_NUM_IVS; u32 wptr; if (!ih->enabled || adev->shutdown) @@ -159,7 +160,7 @@ restart_ih: /* Order reading of wptr vs. reading of IH ring data */ rmb(); - while (ih->rptr != wptr) { + while (ih->rptr != wptr && --count) { amdgpu_irq_dispatch(adev, ih); ih->rptr &= ih->ptr_mask; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h index 113a1ba13d4a..4e0bb645176d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h @@ -24,6 +24,9 @@ #ifndef __AMDGPU_IH_H__ #define __AMDGPU_IH_H__ +/* Maximum number of IVs processed at once */ +#define AMDGPU_IH_MAX_NUM_IVS 32 + struct amdgpu_device; struct amdgpu_iv_entry; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index e860412043bb..b17d0545728e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -39,6 +39,7 @@ #include "amdgpu_amdkfd.h" #include "amdgpu_gem.h" #include "amdgpu_display.h" +#include "amdgpu_ras.h" static void amdgpu_unregister_gpu_instance(struct amdgpu_device *adev) { @@ -296,6 +297,17 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info, fw_info->ver = adev->pm.fw_version; fw_info->feature = 0; break; + case AMDGPU_INFO_FW_TA: + if (query_fw->index > 1) + return -EINVAL; + if (query_fw->index == 0) { + fw_info->ver = adev->psp.ta_fw_version; + fw_info->feature = adev->psp.ta_xgmi_ucode_version; + } else { + fw_info->ver = adev->psp.ta_fw_version; + fw_info->feature = adev->psp.ta_ras_ucode_version; + } + break; case AMDGPU_INFO_FW_SDMA: if (query_fw->index >= adev->sdma.num_instances) return -EINVAL; @@ -684,6 +696,10 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file if (adev->pm.dpm_enabled) { dev_info.max_engine_clock = amdgpu_dpm_get_sclk(adev, false) * 10; dev_info.max_memory_clock = amdgpu_dpm_get_mclk(adev, false) * 10; + } else if (amdgpu_sriov_vf(adev) && amdgim_is_hwperf(adev) && + adev->virt.ops->get_pp_clk) { + dev_info.max_engine_clock = amdgpu_virt_get_sclk(adev, false) * 10; + dev_info.max_memory_clock = amdgpu_virt_get_mclk(adev, false) * 10; } else { dev_info.max_engine_clock = adev->clock.default_sclk * 10; dev_info.max_memory_clock = adev->clock.default_mclk * 10; @@ -909,6 +925,18 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file case AMDGPU_INFO_VRAM_LOST_COUNTER: ui32 = atomic_read(&adev->vram_lost_counter); return copy_to_user(out, &ui32, min(size, 4u)) ? -EFAULT : 0; + case AMDGPU_INFO_RAS_ENABLED_FEATURES: { + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + uint64_t ras_mask; + + if (!ras) + return -EINVAL; + ras_mask = (uint64_t)ras->supported << 32 | ras->features; + + return copy_to_user(out, &ras_mask, + min_t(u64, size, sizeof(ras_mask))) ? + -EFAULT : 0; + } default: DRM_DEBUG_KMS("Invalid request %d\n", info->query); return -EINVAL; @@ -1328,6 +1356,16 @@ static int amdgpu_debugfs_firmware_info(struct seq_file *m, void *data) seq_printf(m, "ASD feature version: %u, firmware version: 0x%08x\n", fw_info.feature, fw_info.ver); + query_fw.fw_type = AMDGPU_INFO_FW_TA; + for (i = 0; i < 2; i++) { + query_fw.index = i; + ret = amdgpu_firmware_info(&fw_info, &query_fw, adev); + if (ret) + continue; + seq_printf(m, "TA %s feature version: %u, firmware version: 0x%08x\n", + i ? "RAS" : "XGMI", fw_info.feature, fw_info.ver); + } + /* SMC */ query_fw.fw_type = AMDGPU_INFO_FW_SMC; ret = amdgpu_firmware_info(&fw_info, &query_fw, adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h index 889e443eeee7..2e9e3db778c6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h @@ -58,7 +58,7 @@ struct amdgpu_hpd; #define to_amdgpu_encoder(x) container_of(x, struct amdgpu_encoder, base) #define to_amdgpu_framebuffer(x) container_of(x, struct amdgpu_framebuffer, base) -#define to_dm_plane_state(x) container_of(x, struct dm_plane_state, base); +#define to_dm_plane_state(x) container_of(x, struct dm_plane_state, base) #define AMDGPU_MAX_HPD_PINS 6 #define AMDGPU_MAX_CRTCS 6 @@ -406,7 +406,7 @@ struct amdgpu_crtc { struct amdgpu_flip_work *pflip_works; enum amdgpu_flip_status pflip_status; int deferred_flip_completion; - u64 last_flip_vblank; + u32 last_flip_vblank; /* pll sharing */ struct amdgpu_atom_ss ss; bool ss_enabled; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index ec9e45004bff..93b2c5a48a71 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -88,12 +88,14 @@ static void amdgpu_bo_destroy(struct ttm_buffer_object *tbo) if (bo->gem_base.import_attach) drm_prime_gem_destroy(&bo->gem_base, bo->tbo.sg); drm_gem_object_release(&bo->gem_base); - amdgpu_bo_unref(&bo->parent); + /* in case amdgpu_device_recover_vram got NULL of bo->parent */ if (!list_empty(&bo->shadow_list)) { mutex_lock(&adev->shadow_list_lock); list_del_init(&bo->shadow_list); mutex_unlock(&adev->shadow_list_lock); } + amdgpu_bo_unref(&bo->parent); + kfree(bo->metadata); kfree(bo); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index 220a6a7b1bc1..c430e8259038 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -72,6 +72,8 @@ struct amdgpu_bo_va { /* If the mappings are cleared or filled */ bool cleared; + + bool is_xgmi; }; struct amdgpu_bo { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index a7adb7b6bd98..95144e49c7f9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -28,6 +28,7 @@ #include "amdgpu_pm.h" #include "amdgpu_dpm.h" #include "amdgpu_display.h" +#include "amdgpu_smu.h" #include "atom.h" #include <linux/power_supply.h> #include <linux/hwmon.h> @@ -80,6 +81,27 @@ void amdgpu_pm_acpi_event_handler(struct amdgpu_device *adev) } } +int amdgpu_dpm_read_sensor(struct amdgpu_device *adev, enum amd_pp_sensors sensor, + void *data, uint32_t *size) +{ + int ret = 0; + + if (!data || !size) + return -EINVAL; + + if (is_support_sw_smu(adev)) + ret = smu_read_sensor(&adev->smu, sensor, data, size); + else { + if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->read_sensor) + ret = adev->powerplay.pp_funcs->read_sensor((adev)->powerplay.pp_handle, + sensor, data, size); + else + ret = -EINVAL; + } + + return ret; +} + /** * DOC: power_dpm_state * @@ -122,7 +144,9 @@ static ssize_t amdgpu_get_dpm_state(struct device *dev, struct amdgpu_device *adev = ddev->dev_private; enum amd_pm_state_type pm; - if (adev->powerplay.pp_funcs->get_current_power_state) + if (is_support_sw_smu(adev) && adev->smu.ppt_funcs->get_current_power_state) + pm = amdgpu_smu_get_current_power_state(adev); + else if (adev->powerplay.pp_funcs->get_current_power_state) pm = amdgpu_dpm_get_current_power_state(adev); else pm = adev->pm.dpm.user_state; @@ -240,7 +264,9 @@ static ssize_t amdgpu_get_dpm_forced_performance_level(struct device *dev, (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) return snprintf(buf, PAGE_SIZE, "off\n"); - if (adev->powerplay.pp_funcs->get_performance_level) + if (is_support_sw_smu(adev)) + level = smu_get_performance_level(&adev->smu); + else if (adev->powerplay.pp_funcs->get_performance_level) level = amdgpu_dpm_get_performance_level(adev); else level = adev->pm.dpm.forced_level; @@ -273,7 +299,9 @@ static ssize_t amdgpu_set_dpm_forced_performance_level(struct device *dev, (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) return -EINVAL; - if (adev->powerplay.pp_funcs->get_performance_level) + if (is_support_sw_smu(adev)) + current_level = smu_get_performance_level(&adev->smu); + else if (adev->powerplay.pp_funcs->get_performance_level) current_level = amdgpu_dpm_get_performance_level(adev); if (strncmp("low", buf, strlen("low")) == 0) { @@ -299,10 +327,35 @@ static ssize_t amdgpu_set_dpm_forced_performance_level(struct device *dev, goto fail; } + if (amdgpu_sriov_vf(adev)) { + if (amdgim_is_hwperf(adev) && + adev->virt.ops->force_dpm_level) { + mutex_lock(&adev->pm.mutex); + adev->virt.ops->force_dpm_level(adev, level); + mutex_unlock(&adev->pm.mutex); + return count; + } else { + return -EINVAL; + } + } + if (current_level == level) return count; - if (adev->powerplay.pp_funcs->force_performance_level) { + if (is_support_sw_smu(adev)) { + mutex_lock(&adev->pm.mutex); + if (adev->pm.dpm.thermal_active) { + count = -EINVAL; + mutex_unlock(&adev->pm.mutex); + goto fail; + } + ret = smu_force_performance_level(&adev->smu, level); + if (ret) + count = -EINVAL; + else + adev->pm.dpm.forced_level = level; + mutex_unlock(&adev->pm.mutex); + } else if (adev->powerplay.pp_funcs->force_performance_level) { mutex_lock(&adev->pm.mutex); if (adev->pm.dpm.thermal_active) { count = -EINVAL; @@ -328,9 +381,13 @@ static ssize_t amdgpu_get_pp_num_states(struct device *dev, struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; struct pp_states_info data; - int i, buf_len; + int i, buf_len, ret; - if (adev->powerplay.pp_funcs->get_pp_num_states) + if (is_support_sw_smu(adev)) { + ret = smu_get_power_num_states(&adev->smu, &data); + if (ret) + return ret; + } else if (adev->powerplay.pp_funcs->get_pp_num_states) amdgpu_dpm_get_pp_num_states(adev, &data); buf_len = snprintf(buf, PAGE_SIZE, "states: %d\n", data.nums); @@ -351,23 +408,29 @@ static ssize_t amdgpu_get_pp_cur_state(struct device *dev, struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; struct pp_states_info data; + struct smu_context *smu = &adev->smu; enum amd_pm_state_type pm = 0; - int i = 0; + int i = 0, ret = 0; - if (adev->powerplay.pp_funcs->get_current_power_state + if (is_support_sw_smu(adev)) { + pm = smu_get_current_power_state(smu); + ret = smu_get_power_num_states(smu, &data); + if (ret) + return ret; + } else if (adev->powerplay.pp_funcs->get_current_power_state && adev->powerplay.pp_funcs->get_pp_num_states) { pm = amdgpu_dpm_get_current_power_state(adev); amdgpu_dpm_get_pp_num_states(adev, &data); + } - for (i = 0; i < data.nums; i++) { - if (pm == data.states[i]) - break; - } - - if (i == data.nums) - i = -EINVAL; + for (i = 0; i < data.nums; i++) { + if (pm == data.states[i]) + break; } + if (i == data.nums) + i = -EINVAL; + return snprintf(buf, PAGE_SIZE, "%d\n", i); } @@ -397,6 +460,8 @@ static ssize_t amdgpu_set_pp_force_state(struct device *dev, if (strlen(buf) == 1) adev->pp_force_state_enabled = false; + else if (is_support_sw_smu(adev)) + adev->pp_force_state_enabled = false; else if (adev->powerplay.pp_funcs->dispatch_tasks && adev->powerplay.pp_funcs->get_pp_num_states) { struct pp_states_info data; @@ -442,7 +507,12 @@ static ssize_t amdgpu_get_pp_table(struct device *dev, char *table = NULL; int size; - if (adev->powerplay.pp_funcs->get_pp_table) + if (is_support_sw_smu(adev)) { + size = smu_sys_get_pp_table(&adev->smu, (void **)&table); + if (size < 0) + return size; + } + else if (adev->powerplay.pp_funcs->get_pp_table) size = amdgpu_dpm_get_pp_table(adev, &table); else return 0; @@ -462,8 +532,13 @@ static ssize_t amdgpu_set_pp_table(struct device *dev, { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; + int ret = 0; - if (adev->powerplay.pp_funcs->set_pp_table) + if (is_support_sw_smu(adev)) { + ret = smu_sys_set_pp_table(&adev->smu, (void *)buf, count); + if (ret) + return ret; + } else if (adev->powerplay.pp_funcs->set_pp_table) amdgpu_dpm_set_pp_table(adev, buf, count); return count; @@ -586,19 +661,29 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev, tmp_str++; } - if (adev->powerplay.pp_funcs->odn_edit_dpm_table) - ret = amdgpu_dpm_odn_edit_dpm_table(adev, type, - parameter, parameter_size); + if (is_support_sw_smu(adev)) { + ret = smu_od_edit_dpm_table(&adev->smu, type, + parameter, parameter_size); - if (ret) - return -EINVAL; + if (ret) + return -EINVAL; + } else { + if (adev->powerplay.pp_funcs->odn_edit_dpm_table) + ret = amdgpu_dpm_odn_edit_dpm_table(adev, type, + parameter, parameter_size); - if (type == PP_OD_COMMIT_DPM_TABLE) { - if (adev->powerplay.pp_funcs->dispatch_tasks) { - amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_READJUST_POWER_STATE, NULL); - return count; - } else { + if (ret) return -EINVAL; + + if (type == PP_OD_COMMIT_DPM_TABLE) { + if (adev->powerplay.pp_funcs->dispatch_tasks) { + amdgpu_dpm_dispatch_task(adev, + AMD_PP_TASK_READJUST_POWER_STATE, + NULL); + return count; + } else { + return -EINVAL; + } } } @@ -613,7 +698,13 @@ static ssize_t amdgpu_get_pp_od_clk_voltage(struct device *dev, struct amdgpu_device *adev = ddev->dev_private; uint32_t size = 0; - if (adev->powerplay.pp_funcs->print_clock_levels) { + if (is_support_sw_smu(adev)) { + size = smu_print_clk_levels(&adev->smu, OD_SCLK, buf); + size += smu_print_clk_levels(&adev->smu, OD_MCLK, buf+size); + size += smu_print_clk_levels(&adev->smu, OD_VDDC_CURVE, buf+size); + size += smu_print_clk_levels(&adev->smu, OD_RANGE, buf+size); + return size; + } else if (adev->powerplay.pp_funcs->print_clock_levels) { size = amdgpu_dpm_print_clock_levels(adev, OD_SCLK, buf); size += amdgpu_dpm_print_clock_levels(adev, OD_MCLK, buf+size); size += amdgpu_dpm_print_clock_levels(adev, OD_VDDC_CURVE, buf+size); @@ -711,7 +802,13 @@ static ssize_t amdgpu_get_pp_dpm_sclk(struct device *dev, struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; - if (adev->powerplay.pp_funcs->print_clock_levels) + if (amdgpu_sriov_vf(adev) && amdgim_is_hwperf(adev) && + adev->virt.ops->get_pp_clk) + return adev->virt.ops->get_pp_clk(adev, PP_SCLK, buf); + + if (is_support_sw_smu(adev)) + return smu_print_clk_levels(&adev->smu, PP_SCLK, buf); + else if (adev->powerplay.pp_funcs->print_clock_levels) return amdgpu_dpm_print_clock_levels(adev, PP_SCLK, buf); else return snprintf(buf, PAGE_SIZE, "\n"); @@ -767,7 +864,9 @@ static ssize_t amdgpu_set_pp_dpm_sclk(struct device *dev, if (ret) return ret; - if (adev->powerplay.pp_funcs->force_clock_level) + if (is_support_sw_smu(adev)) + ret = smu_force_clk_levels(&adev->smu, PP_SCLK, mask); + else if (adev->powerplay.pp_funcs->force_clock_level) ret = amdgpu_dpm_force_clock_level(adev, PP_SCLK, mask); if (ret) @@ -783,7 +882,9 @@ static ssize_t amdgpu_get_pp_dpm_mclk(struct device *dev, struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; - if (adev->powerplay.pp_funcs->print_clock_levels) + if (is_support_sw_smu(adev)) + return smu_print_clk_levels(&adev->smu, PP_MCLK, buf); + else if (adev->powerplay.pp_funcs->print_clock_levels) return amdgpu_dpm_print_clock_levels(adev, PP_MCLK, buf); else return snprintf(buf, PAGE_SIZE, "\n"); @@ -803,7 +904,9 @@ static ssize_t amdgpu_set_pp_dpm_mclk(struct device *dev, if (ret) return ret; - if (adev->powerplay.pp_funcs->force_clock_level) + if (is_support_sw_smu(adev)) + ret = smu_force_clk_levels(&adev->smu, PP_MCLK, mask); + else if (adev->powerplay.pp_funcs->force_clock_level) ret = amdgpu_dpm_force_clock_level(adev, PP_MCLK, mask); if (ret) @@ -819,7 +922,9 @@ static ssize_t amdgpu_get_pp_dpm_socclk(struct device *dev, struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; - if (adev->powerplay.pp_funcs->print_clock_levels) + if (is_support_sw_smu(adev)) + return smu_print_clk_levels(&adev->smu, PP_SOCCLK, buf); + else if (adev->powerplay.pp_funcs->print_clock_levels) return amdgpu_dpm_print_clock_levels(adev, PP_SOCCLK, buf); else return snprintf(buf, PAGE_SIZE, "\n"); @@ -839,7 +944,9 @@ static ssize_t amdgpu_set_pp_dpm_socclk(struct device *dev, if (ret) return ret; - if (adev->powerplay.pp_funcs->force_clock_level) + if (is_support_sw_smu(adev)) + ret = smu_force_clk_levels(&adev->smu, PP_SOCCLK, mask); + else if (adev->powerplay.pp_funcs->force_clock_level) ret = amdgpu_dpm_force_clock_level(adev, PP_SOCCLK, mask); if (ret) @@ -855,7 +962,9 @@ static ssize_t amdgpu_get_pp_dpm_fclk(struct device *dev, struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; - if (adev->powerplay.pp_funcs->print_clock_levels) + if (is_support_sw_smu(adev)) + return smu_print_clk_levels(&adev->smu, PP_FCLK, buf); + else if (adev->powerplay.pp_funcs->print_clock_levels) return amdgpu_dpm_print_clock_levels(adev, PP_FCLK, buf); else return snprintf(buf, PAGE_SIZE, "\n"); @@ -875,7 +984,9 @@ static ssize_t amdgpu_set_pp_dpm_fclk(struct device *dev, if (ret) return ret; - if (adev->powerplay.pp_funcs->force_clock_level) + if (is_support_sw_smu(adev)) + ret = smu_force_clk_levels(&adev->smu, PP_FCLK, mask); + else if (adev->powerplay.pp_funcs->force_clock_level) ret = amdgpu_dpm_force_clock_level(adev, PP_FCLK, mask); if (ret) @@ -891,7 +1002,9 @@ static ssize_t amdgpu_get_pp_dpm_dcefclk(struct device *dev, struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; - if (adev->powerplay.pp_funcs->print_clock_levels) + if (is_support_sw_smu(adev)) + return smu_print_clk_levels(&adev->smu, PP_DCEFCLK, buf); + else if (adev->powerplay.pp_funcs->print_clock_levels) return amdgpu_dpm_print_clock_levels(adev, PP_DCEFCLK, buf); else return snprintf(buf, PAGE_SIZE, "\n"); @@ -911,7 +1024,9 @@ static ssize_t amdgpu_set_pp_dpm_dcefclk(struct device *dev, if (ret) return ret; - if (adev->powerplay.pp_funcs->force_clock_level) + if (is_support_sw_smu(adev)) + ret = smu_force_clk_levels(&adev->smu, PP_DCEFCLK, mask); + else if (adev->powerplay.pp_funcs->force_clock_level) ret = amdgpu_dpm_force_clock_level(adev, PP_DCEFCLK, mask); if (ret) @@ -927,7 +1042,9 @@ static ssize_t amdgpu_get_pp_dpm_pcie(struct device *dev, struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; - if (adev->powerplay.pp_funcs->print_clock_levels) + if (is_support_sw_smu(adev)) + return smu_print_clk_levels(&adev->smu, PP_PCIE, buf); + else if (adev->powerplay.pp_funcs->print_clock_levels) return amdgpu_dpm_print_clock_levels(adev, PP_PCIE, buf); else return snprintf(buf, PAGE_SIZE, "\n"); @@ -947,7 +1064,9 @@ static ssize_t amdgpu_set_pp_dpm_pcie(struct device *dev, if (ret) return ret; - if (adev->powerplay.pp_funcs->force_clock_level) + if (is_support_sw_smu(adev)) + ret = smu_force_clk_levels(&adev->smu, PP_PCIE, mask); + else if (adev->powerplay.pp_funcs->force_clock_level) ret = amdgpu_dpm_force_clock_level(adev, PP_PCIE, mask); if (ret) @@ -964,7 +1083,9 @@ static ssize_t amdgpu_get_pp_sclk_od(struct device *dev, struct amdgpu_device *adev = ddev->dev_private; uint32_t value = 0; - if (adev->powerplay.pp_funcs->get_sclk_od) + if (is_support_sw_smu(adev)) + value = smu_get_od_percentage(&(adev->smu), OD_SCLK); + else if (adev->powerplay.pp_funcs->get_sclk_od) value = amdgpu_dpm_get_sclk_od(adev); return snprintf(buf, PAGE_SIZE, "%d\n", value); @@ -986,14 +1107,19 @@ static ssize_t amdgpu_set_pp_sclk_od(struct device *dev, count = -EINVAL; goto fail; } - if (adev->powerplay.pp_funcs->set_sclk_od) - amdgpu_dpm_set_sclk_od(adev, (uint32_t)value); - if (adev->powerplay.pp_funcs->dispatch_tasks) { - amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_READJUST_POWER_STATE, NULL); + if (is_support_sw_smu(adev)) { + value = smu_set_od_percentage(&(adev->smu), OD_SCLK, (uint32_t)value); } else { - adev->pm.dpm.current_ps = adev->pm.dpm.boot_ps; - amdgpu_pm_compute_clocks(adev); + if (adev->powerplay.pp_funcs->set_sclk_od) + amdgpu_dpm_set_sclk_od(adev, (uint32_t)value); + + if (adev->powerplay.pp_funcs->dispatch_tasks) { + amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_READJUST_POWER_STATE, NULL); + } else { + adev->pm.dpm.current_ps = adev->pm.dpm.boot_ps; + amdgpu_pm_compute_clocks(adev); + } } fail: @@ -1008,7 +1134,9 @@ static ssize_t amdgpu_get_pp_mclk_od(struct device *dev, struct amdgpu_device *adev = ddev->dev_private; uint32_t value = 0; - if (adev->powerplay.pp_funcs->get_mclk_od) + if (is_support_sw_smu(adev)) + value = smu_get_od_percentage(&(adev->smu), OD_MCLK); + else if (adev->powerplay.pp_funcs->get_mclk_od) value = amdgpu_dpm_get_mclk_od(adev); return snprintf(buf, PAGE_SIZE, "%d\n", value); @@ -1030,14 +1158,19 @@ static ssize_t amdgpu_set_pp_mclk_od(struct device *dev, count = -EINVAL; goto fail; } - if (adev->powerplay.pp_funcs->set_mclk_od) - amdgpu_dpm_set_mclk_od(adev, (uint32_t)value); - if (adev->powerplay.pp_funcs->dispatch_tasks) { - amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_READJUST_POWER_STATE, NULL); + if (is_support_sw_smu(adev)) { + value = smu_set_od_percentage(&(adev->smu), OD_MCLK, (uint32_t)value); } else { - adev->pm.dpm.current_ps = adev->pm.dpm.boot_ps; - amdgpu_pm_compute_clocks(adev); + if (adev->powerplay.pp_funcs->set_mclk_od) + amdgpu_dpm_set_mclk_od(adev, (uint32_t)value); + + if (adev->powerplay.pp_funcs->dispatch_tasks) { + amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_READJUST_POWER_STATE, NULL); + } else { + adev->pm.dpm.current_ps = adev->pm.dpm.boot_ps; + amdgpu_pm_compute_clocks(adev); + } } fail: @@ -1071,7 +1204,9 @@ static ssize_t amdgpu_get_pp_power_profile_mode(struct device *dev, struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; - if (adev->powerplay.pp_funcs->get_power_profile_mode) + if (is_support_sw_smu(adev)) + return smu_get_power_profile_mode(&adev->smu, buf); + else if (adev->powerplay.pp_funcs->get_power_profile_mode) return amdgpu_dpm_get_power_profile_mode(adev, buf); return snprintf(buf, PAGE_SIZE, "\n"); @@ -1121,9 +1256,10 @@ static ssize_t amdgpu_set_pp_power_profile_mode(struct device *dev, } } parameter[parameter_size] = profile_mode; - if (adev->powerplay.pp_funcs->set_power_profile_mode) + if (is_support_sw_smu(adev)) + ret = smu_set_power_profile_mode(&adev->smu, parameter, parameter_size); + else if (adev->powerplay.pp_funcs->set_power_profile_mode) ret = amdgpu_dpm_set_power_profile_mode(adev, parameter, parameter_size); - if (!ret) return count; fail: @@ -1146,14 +1282,10 @@ static ssize_t amdgpu_get_busy_percent(struct device *dev, struct amdgpu_device *adev = ddev->dev_private; int r, value, size = sizeof(value); - /* sanity check PP is enabled */ - if (!(adev->powerplay.pp_funcs && - adev->powerplay.pp_funcs->read_sensor)) - return -EINVAL; - /* read the IP busy sensor */ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_LOAD, (void *)&value, &size); + if (r) return r; @@ -1247,11 +1379,6 @@ static ssize_t amdgpu_hwmon_show_temp(struct device *dev, (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) return -EINVAL; - /* sanity check PP is enabled */ - if (!(adev->powerplay.pp_funcs && - adev->powerplay.pp_funcs->read_sensor)) - return -EINVAL; - /* get the temperature */ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_TEMP, (void *)&temp, &size); @@ -1283,11 +1410,14 @@ static ssize_t amdgpu_hwmon_get_pwm1_enable(struct device *dev, { struct amdgpu_device *adev = dev_get_drvdata(dev); u32 pwm_mode = 0; + if (is_support_sw_smu(adev)) { + pwm_mode = smu_get_fan_control_mode(&adev->smu); + } else { + if (!adev->powerplay.pp_funcs->get_fan_control_mode) + return -EINVAL; - if (!adev->powerplay.pp_funcs->get_fan_control_mode) - return -EINVAL; - - pwm_mode = amdgpu_dpm_get_fan_control_mode(adev); + pwm_mode = amdgpu_dpm_get_fan_control_mode(adev); + } return sprintf(buf, "%i\n", pwm_mode); } @@ -1306,14 +1436,22 @@ static ssize_t amdgpu_hwmon_set_pwm1_enable(struct device *dev, (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON)) return -EINVAL; - if (!adev->powerplay.pp_funcs->set_fan_control_mode) - return -EINVAL; + if (is_support_sw_smu(adev)) { + err = kstrtoint(buf, 10, &value); + if (err) + return err; - err = kstrtoint(buf, 10, &value); - if (err) - return err; + smu_set_fan_control_mode(&adev->smu, value); + } else { + if (!adev->powerplay.pp_funcs->set_fan_control_mode) + return -EINVAL; + + err = kstrtoint(buf, 10, &value); + if (err) + return err; - amdgpu_dpm_set_fan_control_mode(adev, value); + amdgpu_dpm_set_fan_control_mode(adev, value); + } return count; } @@ -1345,8 +1483,10 @@ static ssize_t amdgpu_hwmon_set_pwm1(struct device *dev, if ((adev->flags & AMD_IS_PX) && (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON)) return -EINVAL; - - pwm_mode = amdgpu_dpm_get_fan_control_mode(adev); + if (is_support_sw_smu(adev)) + pwm_mode = smu_get_fan_control_mode(&adev->smu); + else + pwm_mode = amdgpu_dpm_get_fan_control_mode(adev); if (pwm_mode != AMD_FAN_CTRL_MANUAL) { pr_info("manual fan speed control should be enabled first\n"); return -EINVAL; @@ -1358,7 +1498,11 @@ static ssize_t amdgpu_hwmon_set_pwm1(struct device *dev, value = (value * 100) / 255; - if (adev->powerplay.pp_funcs->set_fan_speed_percent) { + if (is_support_sw_smu(adev)) { + err = smu_set_fan_speed_percent(&adev->smu, value); + if (err) + return err; + } else if (adev->powerplay.pp_funcs->set_fan_speed_percent) { err = amdgpu_dpm_set_fan_speed_percent(adev, value); if (err) return err; @@ -1380,7 +1524,11 @@ static ssize_t amdgpu_hwmon_get_pwm1(struct device *dev, (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON)) return -EINVAL; - if (adev->powerplay.pp_funcs->get_fan_speed_percent) { + if (is_support_sw_smu(adev)) { + err = smu_get_fan_speed_percent(&adev->smu, &speed); + if (err) + return err; + } else if (adev->powerplay.pp_funcs->get_fan_speed_percent) { err = amdgpu_dpm_get_fan_speed_percent(adev, &speed); if (err) return err; @@ -1404,7 +1552,11 @@ static ssize_t amdgpu_hwmon_get_fan1_input(struct device *dev, (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON)) return -EINVAL; - if (adev->powerplay.pp_funcs->get_fan_speed_rpm) { + if (is_support_sw_smu(adev)) { + err = smu_get_current_rpm(&adev->smu, &speed); + if (err) + return err; + } else if (adev->powerplay.pp_funcs->get_fan_speed_rpm) { err = amdgpu_dpm_get_fan_speed_rpm(adev, &speed); if (err) return err; @@ -1422,9 +1574,6 @@ static ssize_t amdgpu_hwmon_get_fan1_min(struct device *dev, u32 size = sizeof(min_rpm); int r; - if (!adev->powerplay.pp_funcs->read_sensor) - return -EINVAL; - r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MIN_FAN_RPM, (void *)&min_rpm, &size); if (r) @@ -1442,9 +1591,6 @@ static ssize_t amdgpu_hwmon_get_fan1_max(struct device *dev, u32 size = sizeof(max_rpm); int r; - if (!adev->powerplay.pp_funcs->read_sensor) - return -EINVAL; - r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MAX_FAN_RPM, (void *)&max_rpm, &size); if (r) @@ -1466,7 +1612,11 @@ static ssize_t amdgpu_hwmon_get_fan1_target(struct device *dev, (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON)) return -EINVAL; - if (adev->powerplay.pp_funcs->get_fan_speed_rpm) { + if (is_support_sw_smu(adev)) { + err = smu_get_current_rpm(&adev->smu, &rpm); + if (err) + return err; + } else if (adev->powerplay.pp_funcs->get_fan_speed_rpm) { err = amdgpu_dpm_get_fan_speed_rpm(adev, &rpm); if (err) return err; @@ -1484,7 +1634,11 @@ static ssize_t amdgpu_hwmon_set_fan1_target(struct device *dev, u32 value; u32 pwm_mode; - pwm_mode = amdgpu_dpm_get_fan_control_mode(adev); + if (is_support_sw_smu(adev)) + pwm_mode = smu_get_fan_control_mode(&adev->smu); + else + pwm_mode = amdgpu_dpm_get_fan_control_mode(adev); + if (pwm_mode != AMD_FAN_CTRL_MANUAL) return -ENODATA; @@ -1497,7 +1651,11 @@ static ssize_t amdgpu_hwmon_set_fan1_target(struct device *dev, if (err) return err; - if (adev->powerplay.pp_funcs->set_fan_speed_rpm) { + if (is_support_sw_smu(adev)) { + err = smu_set_fan_speed_rpm(&adev->smu, value); + if (err) + return err; + } else if (adev->powerplay.pp_funcs->set_fan_speed_rpm) { err = amdgpu_dpm_set_fan_speed_rpm(adev, value); if (err) return err; @@ -1513,11 +1671,14 @@ static ssize_t amdgpu_hwmon_get_fan1_enable(struct device *dev, struct amdgpu_device *adev = dev_get_drvdata(dev); u32 pwm_mode = 0; - if (!adev->powerplay.pp_funcs->get_fan_control_mode) - return -EINVAL; - - pwm_mode = amdgpu_dpm_get_fan_control_mode(adev); + if (is_support_sw_smu(adev)) { + pwm_mode = smu_get_fan_control_mode(&adev->smu); + } else { + if (!adev->powerplay.pp_funcs->get_fan_control_mode) + return -EINVAL; + pwm_mode = amdgpu_dpm_get_fan_control_mode(adev); + } return sprintf(buf, "%i\n", pwm_mode == AMD_FAN_CTRL_AUTO ? 0 : 1); } @@ -1536,8 +1697,6 @@ static ssize_t amdgpu_hwmon_set_fan1_enable(struct device *dev, (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON)) return -EINVAL; - if (!adev->powerplay.pp_funcs->set_fan_control_mode) - return -EINVAL; err = kstrtoint(buf, 10, &value); if (err) @@ -1550,7 +1709,13 @@ static ssize_t amdgpu_hwmon_set_fan1_enable(struct device *dev, else return -EINVAL; - amdgpu_dpm_set_fan_control_mode(adev, pwm_mode); + if (is_support_sw_smu(adev)) { + smu_set_fan_control_mode(&adev->smu, pwm_mode); + } else { + if (!adev->powerplay.pp_funcs->set_fan_control_mode) + return -EINVAL; + amdgpu_dpm_set_fan_control_mode(adev, pwm_mode); + } return count; } @@ -1569,11 +1734,6 @@ static ssize_t amdgpu_hwmon_show_vddgfx(struct device *dev, (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) return -EINVAL; - /* sanity check PP is enabled */ - if (!(adev->powerplay.pp_funcs && - adev->powerplay.pp_funcs->read_sensor)) - return -EINVAL; - /* get the voltage */ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDGFX, (void *)&vddgfx, &size); @@ -1608,11 +1768,6 @@ static ssize_t amdgpu_hwmon_show_vddnb(struct device *dev, (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) return -EINVAL; - /* sanity check PP is enabled */ - if (!(adev->powerplay.pp_funcs && - adev->powerplay.pp_funcs->read_sensor)) - return -EINVAL; - /* get the voltage */ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDNB, (void *)&vddnb, &size); @@ -1644,11 +1799,6 @@ static ssize_t amdgpu_hwmon_show_power_avg(struct device *dev, (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) return -EINVAL; - /* sanity check PP is enabled */ - if (!(adev->powerplay.pp_funcs && - adev->powerplay.pp_funcs->read_sensor)) - return -EINVAL; - /* get the voltage */ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_POWER, (void *)&query, &size); @@ -1675,7 +1825,10 @@ static ssize_t amdgpu_hwmon_show_power_cap_max(struct device *dev, struct amdgpu_device *adev = dev_get_drvdata(dev); uint32_t limit = 0; - if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->get_power_limit) { + if (is_support_sw_smu(adev)) { + smu_get_power_limit(&adev->smu, &limit, true); + return snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000); + } else if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->get_power_limit) { adev->powerplay.pp_funcs->get_power_limit(adev->powerplay.pp_handle, &limit, true); return snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000); } else { @@ -1690,7 +1843,10 @@ static ssize_t amdgpu_hwmon_show_power_cap(struct device *dev, struct amdgpu_device *adev = dev_get_drvdata(dev); uint32_t limit = 0; - if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->get_power_limit) { + if (is_support_sw_smu(adev)) { + smu_get_power_limit(&adev->smu, &limit, false); + return snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000); + } else if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->get_power_limit) { adev->powerplay.pp_funcs->get_power_limit(adev->powerplay.pp_handle, &limit, false); return snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000); } else { @@ -1713,7 +1869,9 @@ static ssize_t amdgpu_hwmon_set_power_cap(struct device *dev, return err; value = value / 1000000; /* convert to Watt */ - if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->set_power_limit) { + if (is_support_sw_smu(adev)) { + adev->smu.funcs->set_power_limit(&adev->smu, value); + } else if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->set_power_limit) { err = adev->powerplay.pp_funcs->set_power_limit(adev->powerplay.pp_handle, value); if (err) return err; @@ -1967,18 +2125,20 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj, attr == &sensor_dev_attr_fan1_enable.dev_attr.attr)) return 0; - /* mask fan attributes if we have no bindings for this asic to expose */ - if ((!adev->powerplay.pp_funcs->get_fan_speed_percent && - attr == &sensor_dev_attr_pwm1.dev_attr.attr) || /* can't query fan */ - (!adev->powerplay.pp_funcs->get_fan_control_mode && - attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr)) /* can't query state */ - effective_mode &= ~S_IRUGO; + if (!is_support_sw_smu(adev)) { + /* mask fan attributes if we have no bindings for this asic to expose */ + if ((!adev->powerplay.pp_funcs->get_fan_speed_percent && + attr == &sensor_dev_attr_pwm1.dev_attr.attr) || /* can't query fan */ + (!adev->powerplay.pp_funcs->get_fan_control_mode && + attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr)) /* can't query state */ + effective_mode &= ~S_IRUGO; - if ((!adev->powerplay.pp_funcs->set_fan_speed_percent && - attr == &sensor_dev_attr_pwm1.dev_attr.attr) || /* can't manage fan */ - (!adev->powerplay.pp_funcs->set_fan_control_mode && - attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr)) /* can't manage state */ - effective_mode &= ~S_IWUSR; + if ((!adev->powerplay.pp_funcs->set_fan_speed_percent && + attr == &sensor_dev_attr_pwm1.dev_attr.attr) || /* can't manage fan */ + (!adev->powerplay.pp_funcs->set_fan_control_mode && + attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr)) /* can't manage state */ + effective_mode &= ~S_IWUSR; + } if ((adev->flags & AMD_IS_APU) && (attr == &sensor_dev_attr_power1_average.dev_attr.attr || @@ -1987,20 +2147,22 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj, attr == &sensor_dev_attr_power1_cap.dev_attr.attr)) return 0; - /* hide max/min values if we can't both query and manage the fan */ - if ((!adev->powerplay.pp_funcs->set_fan_speed_percent && - !adev->powerplay.pp_funcs->get_fan_speed_percent) && - (!adev->powerplay.pp_funcs->set_fan_speed_rpm && - !adev->powerplay.pp_funcs->get_fan_speed_rpm) && - (attr == &sensor_dev_attr_pwm1_max.dev_attr.attr || - attr == &sensor_dev_attr_pwm1_min.dev_attr.attr)) - return 0; + if (!is_support_sw_smu(adev)) { + /* hide max/min values if we can't both query and manage the fan */ + if ((!adev->powerplay.pp_funcs->set_fan_speed_percent && + !adev->powerplay.pp_funcs->get_fan_speed_percent) && + (!adev->powerplay.pp_funcs->set_fan_speed_rpm && + !adev->powerplay.pp_funcs->get_fan_speed_rpm) && + (attr == &sensor_dev_attr_pwm1_max.dev_attr.attr || + attr == &sensor_dev_attr_pwm1_min.dev_attr.attr)) + return 0; - if ((!adev->powerplay.pp_funcs->set_fan_speed_rpm && - !adev->powerplay.pp_funcs->get_fan_speed_rpm) && - (attr == &sensor_dev_attr_fan1_max.dev_attr.attr || - attr == &sensor_dev_attr_fan1_min.dev_attr.attr)) - return 0; + if ((!adev->powerplay.pp_funcs->set_fan_speed_rpm && + !adev->powerplay.pp_funcs->get_fan_speed_rpm) && + (attr == &sensor_dev_attr_fan1_max.dev_attr.attr || + attr == &sensor_dev_attr_fan1_min.dev_attr.attr)) + return 0; + } /* only APUs have vddnb */ if (!(adev->flags & AMD_IS_APU) && @@ -2039,9 +2201,7 @@ void amdgpu_dpm_thermal_work_handler(struct work_struct *work) if (!adev->pm.dpm_enabled) return; - if (adev->powerplay.pp_funcs && - adev->powerplay.pp_funcs->read_sensor && - !amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_TEMP, + if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_TEMP, (void *)&temp, &size)) { if (temp < adev->pm.dpm.thermal.min_temp) /* switch back the user state */ @@ -2267,7 +2427,13 @@ static void amdgpu_dpm_change_power_state_locked(struct amdgpu_device *adev) void amdgpu_dpm_enable_uvd(struct amdgpu_device *adev, bool enable) { - if (adev->powerplay.pp_funcs->set_powergating_by_smu) { + int ret = 0; + if (is_support_sw_smu(adev)) { + ret = smu_dpm_set_power_gate(&adev->smu, AMD_IP_BLOCK_TYPE_UVD, enable); + if (ret) + DRM_ERROR("[SW SMU]: dpm enable uvd failed, state = %s, ret = %d. \n", + enable ? "true" : "false", ret); + } else if (adev->powerplay.pp_funcs->set_powergating_by_smu) { /* enable/disable UVD */ mutex_lock(&adev->pm.mutex); amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_UVD, !enable); @@ -2288,7 +2454,13 @@ void amdgpu_dpm_enable_uvd(struct amdgpu_device *adev, bool enable) void amdgpu_dpm_enable_vce(struct amdgpu_device *adev, bool enable) { - if (adev->powerplay.pp_funcs->set_powergating_by_smu) { + int ret = 0; + if (is_support_sw_smu(adev)) { + ret = smu_dpm_set_power_gate(&adev->smu, AMD_IP_BLOCK_TYPE_VCE, enable); + if (ret) + DRM_ERROR("[SW SMU]: dpm enable vce failed, state = %s, ret = %d. \n", + enable ? "true" : "false", ret); + } else if (adev->powerplay.pp_funcs->set_powergating_by_smu) { /* enable/disable VCE */ mutex_lock(&adev->pm.mutex); amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_VCE, !enable); @@ -2413,7 +2585,8 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev) "pp_power_profile_mode\n"); return ret; } - if (hwmgr->od_enabled) { + if ((is_support_sw_smu(adev) && adev->smu.od_enabled) || + (!is_support_sw_smu(adev) && hwmgr->od_enabled)) { ret = device_create_file(adev->dev, &dev_attr_pp_od_clk_voltage); if (ret) { @@ -2489,7 +2662,8 @@ void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev) device_remove_file(adev->dev, &dev_attr_pp_mclk_od); device_remove_file(adev->dev, &dev_attr_pp_power_profile_mode); - if (hwmgr->od_enabled) + if ((is_support_sw_smu(adev) && adev->smu.od_enabled) || + (!is_support_sw_smu(adev) && hwmgr->od_enabled)) device_remove_file(adev->dev, &dev_attr_pp_od_clk_voltage); device_remove_file(adev->dev, &dev_attr_gpu_busy_percent); @@ -2516,28 +2690,38 @@ void amdgpu_pm_compute_clocks(struct amdgpu_device *adev) amdgpu_fence_wait_empty(ring); } - if (adev->powerplay.pp_funcs->dispatch_tasks) { - if (!amdgpu_device_has_dc_support(adev)) { + if (is_support_sw_smu(adev)) { + struct smu_context *smu = &adev->smu; + struct smu_dpm_context *smu_dpm = &adev->smu.smu_dpm; + mutex_lock(&(smu->mutex)); + smu_handle_task(&adev->smu, + smu_dpm->dpm_level, + AMD_PP_TASK_DISPLAY_CONFIG_CHANGE); + mutex_unlock(&(smu->mutex)); + } else { + if (adev->powerplay.pp_funcs->dispatch_tasks) { + if (!amdgpu_device_has_dc_support(adev)) { + mutex_lock(&adev->pm.mutex); + amdgpu_dpm_get_active_displays(adev); + adev->pm.pm_display_cfg.num_display = adev->pm.dpm.new_active_crtc_count; + adev->pm.pm_display_cfg.vrefresh = amdgpu_dpm_get_vrefresh(adev); + adev->pm.pm_display_cfg.min_vblank_time = amdgpu_dpm_get_vblank_time(adev); + /* we have issues with mclk switching with refresh rates over 120 hz on the non-DC code. */ + if (adev->pm.pm_display_cfg.vrefresh > 120) + adev->pm.pm_display_cfg.min_vblank_time = 0; + if (adev->powerplay.pp_funcs->display_configuration_change) + adev->powerplay.pp_funcs->display_configuration_change( + adev->powerplay.pp_handle, + &adev->pm.pm_display_cfg); + mutex_unlock(&adev->pm.mutex); + } + amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_DISPLAY_CONFIG_CHANGE, NULL); + } else { mutex_lock(&adev->pm.mutex); amdgpu_dpm_get_active_displays(adev); - adev->pm.pm_display_cfg.num_display = adev->pm.dpm.new_active_crtc_count; - adev->pm.pm_display_cfg.vrefresh = amdgpu_dpm_get_vrefresh(adev); - adev->pm.pm_display_cfg.min_vblank_time = amdgpu_dpm_get_vblank_time(adev); - /* we have issues with mclk switching with refresh rates over 120 hz on the non-DC code. */ - if (adev->pm.pm_display_cfg.vrefresh > 120) - adev->pm.pm_display_cfg.min_vblank_time = 0; - if (adev->powerplay.pp_funcs->display_configuration_change) - adev->powerplay.pp_funcs->display_configuration_change( - adev->powerplay.pp_handle, - &adev->pm.pm_display_cfg); + amdgpu_dpm_change_power_state_locked(adev); mutex_unlock(&adev->pm.mutex); } - amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_DISPLAY_CONFIG_CHANGE, NULL); - } else { - mutex_lock(&adev->pm.mutex); - amdgpu_dpm_get_active_displays(adev); - amdgpu_dpm_change_power_state_locked(adev); - mutex_unlock(&adev->pm.mutex); } } @@ -2553,11 +2737,6 @@ static int amdgpu_debugfs_pm_info_pp(struct seq_file *m, struct amdgpu_device *a uint32_t query = 0; int size; - /* sanity check PP is enabled */ - if (!(adev->powerplay.pp_funcs && - adev->powerplay.pp_funcs->read_sensor)) - return -EINVAL; - /* GPU Clocks */ size = sizeof(value); seq_printf(m, "GFX Clocks and Power:\n"); @@ -2649,7 +2828,7 @@ static int amdgpu_debugfs_pm_info(struct seq_file *m, void *data) if ((adev->flags & AMD_IS_PX) && (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) { seq_printf(m, "PX asic powered off\n"); - } else if (adev->powerplay.pp_funcs->debugfs_print_current_performance_level) { + } else if (!is_support_sw_smu(adev) && adev->powerplay.pp_funcs->debugfs_print_current_performance_level) { mutex_lock(&adev->pm.mutex); if (adev->powerplay.pp_funcs->debugfs_print_current_performance_level) adev->powerplay.pp_funcs->debugfs_print_current_performance_level(adev, m); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 3091488cd8cc..905cce1814f3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -120,6 +120,7 @@ psp_cmd_submit_buf(struct psp_context *psp, { int ret; int index; + int timeout = 2000; memset(psp->cmd_buf_mem, 0, PSP_CMD_BUFFER_SIZE); @@ -133,8 +134,11 @@ psp_cmd_submit_buf(struct psp_context *psp, return ret; } - while (*((unsigned int *)psp->fence_buf) != index) + while (*((unsigned int *)psp->fence_buf) != index) { + if (--timeout == 0) + break; msleep(1); + } /* In some cases, psp response status is not 0 even there is no * problem while the command is submitted. Some version of PSP FW @@ -143,12 +147,14 @@ psp_cmd_submit_buf(struct psp_context *psp, * during psp initialization to avoid breaking hw_init and it doesn't * return -EINVAL. */ - if (psp->cmd_buf_mem->resp.status) { + if (psp->cmd_buf_mem->resp.status || !timeout) { if (ucode) DRM_WARN("failed to load ucode id (%d) ", ucode->ucode_id); DRM_WARN("psp command failed and response status is (%d)\n", psp->cmd_buf_mem->resp.status); + if (!timeout) + return -EINVAL; } /* get xGMI session id from response buffer */ @@ -181,13 +187,13 @@ static int psp_tmr_init(struct psp_context *psp) int ret; /* - * Allocate 3M memory aligned to 1M from Frame Buffer (local - * physical). + * According to HW engineer, they prefer the TMR address be "naturally + * aligned" , e.g. the start address be an integer divide of TMR size. * * Note: this memory need be reserved till the driver * uninitializes. */ - ret = amdgpu_bo_create_kernel(psp->adev, PSP_TMR_SIZE, 0x100000, + ret = amdgpu_bo_create_kernel(psp->adev, PSP_TMR_SIZE, PSP_TMR_SIZE, AMDGPU_GEM_DOMAIN_VRAM, &psp->tmr_bo, &psp->tmr_mc_addr, &psp->tmr_buf); @@ -466,6 +472,206 @@ static int psp_xgmi_initialize(struct psp_context *psp) return ret; } +// ras begin +static void psp_prep_ras_ta_load_cmd_buf(struct psp_gfx_cmd_resp *cmd, + uint64_t ras_ta_mc, uint64_t ras_mc_shared, + uint32_t ras_ta_size, uint32_t shared_size) +{ + cmd->cmd_id = GFX_CMD_ID_LOAD_TA; + cmd->cmd.cmd_load_ta.app_phy_addr_lo = lower_32_bits(ras_ta_mc); + cmd->cmd.cmd_load_ta.app_phy_addr_hi = upper_32_bits(ras_ta_mc); + cmd->cmd.cmd_load_ta.app_len = ras_ta_size; + + cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_lo = lower_32_bits(ras_mc_shared); + cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_hi = upper_32_bits(ras_mc_shared); + cmd->cmd.cmd_load_ta.cmd_buf_len = shared_size; +} + +static int psp_ras_init_shared_buf(struct psp_context *psp) +{ + int ret; + + /* + * Allocate 16k memory aligned to 4k from Frame Buffer (local + * physical) for ras ta <-> Driver + */ + ret = amdgpu_bo_create_kernel(psp->adev, PSP_RAS_SHARED_MEM_SIZE, + PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, + &psp->ras.ras_shared_bo, + &psp->ras.ras_shared_mc_addr, + &psp->ras.ras_shared_buf); + + return ret; +} + +static int psp_ras_load(struct psp_context *psp) +{ + int ret; + struct psp_gfx_cmd_resp *cmd; + + /* + * TODO: bypass the loading in sriov for now + */ + if (amdgpu_sriov_vf(psp->adev)) + return 0; + + cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); + if (!cmd) + return -ENOMEM; + + memset(psp->fw_pri_buf, 0, PSP_1_MEG); + memcpy(psp->fw_pri_buf, psp->ta_ras_start_addr, psp->ta_ras_ucode_size); + + psp_prep_ras_ta_load_cmd_buf(cmd, psp->fw_pri_mc_addr, + psp->ras.ras_shared_mc_addr, + psp->ta_ras_ucode_size, PSP_RAS_SHARED_MEM_SIZE); + + ret = psp_cmd_submit_buf(psp, NULL, cmd, + psp->fence_buf_mc_addr); + + if (!ret) { + psp->ras.ras_initialized = 1; + psp->ras.session_id = cmd->resp.session_id; + } + + kfree(cmd); + + return ret; +} + +static void psp_prep_ras_ta_unload_cmd_buf(struct psp_gfx_cmd_resp *cmd, + uint32_t ras_session_id) +{ + cmd->cmd_id = GFX_CMD_ID_UNLOAD_TA; + cmd->cmd.cmd_unload_ta.session_id = ras_session_id; +} + +static int psp_ras_unload(struct psp_context *psp) +{ + int ret; + struct psp_gfx_cmd_resp *cmd; + + /* + * TODO: bypass the unloading in sriov for now + */ + if (amdgpu_sriov_vf(psp->adev)) + return 0; + + cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); + if (!cmd) + return -ENOMEM; + + psp_prep_ras_ta_unload_cmd_buf(cmd, psp->ras.session_id); + + ret = psp_cmd_submit_buf(psp, NULL, cmd, + psp->fence_buf_mc_addr); + + kfree(cmd); + + return ret; +} + +static void psp_prep_ras_ta_invoke_cmd_buf(struct psp_gfx_cmd_resp *cmd, + uint32_t ta_cmd_id, + uint32_t ras_session_id) +{ + cmd->cmd_id = GFX_CMD_ID_INVOKE_CMD; + cmd->cmd.cmd_invoke_cmd.session_id = ras_session_id; + cmd->cmd.cmd_invoke_cmd.ta_cmd_id = ta_cmd_id; + /* Note: cmd_invoke_cmd.buf is not used for now */ +} + +int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id) +{ + int ret; + struct psp_gfx_cmd_resp *cmd; + + /* + * TODO: bypass the loading in sriov for now + */ + if (amdgpu_sriov_vf(psp->adev)) + return 0; + + cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); + if (!cmd) + return -ENOMEM; + + psp_prep_ras_ta_invoke_cmd_buf(cmd, ta_cmd_id, + psp->ras.session_id); + + ret = psp_cmd_submit_buf(psp, NULL, cmd, + psp->fence_buf_mc_addr); + + kfree(cmd); + + return ret; +} + +int psp_ras_enable_features(struct psp_context *psp, + union ta_ras_cmd_input *info, bool enable) +{ + struct ta_ras_shared_memory *ras_cmd; + int ret; + + if (!psp->ras.ras_initialized) + return -EINVAL; + + ras_cmd = (struct ta_ras_shared_memory *)psp->ras.ras_shared_buf; + memset(ras_cmd, 0, sizeof(struct ta_ras_shared_memory)); + + if (enable) + ras_cmd->cmd_id = TA_RAS_COMMAND__ENABLE_FEATURES; + else + ras_cmd->cmd_id = TA_RAS_COMMAND__DISABLE_FEATURES; + + ras_cmd->ras_in_message = *info; + + ret = psp_ras_invoke(psp, ras_cmd->cmd_id); + if (ret) + return -EINVAL; + + return ras_cmd->ras_status; +} + +static int psp_ras_terminate(struct psp_context *psp) +{ + int ret; + + if (!psp->ras.ras_initialized) + return 0; + + ret = psp_ras_unload(psp); + if (ret) + return ret; + + psp->ras.ras_initialized = 0; + + /* free ras shared memory */ + amdgpu_bo_free_kernel(&psp->ras.ras_shared_bo, + &psp->ras.ras_shared_mc_addr, + &psp->ras.ras_shared_buf); + + return 0; +} + +static int psp_ras_initialize(struct psp_context *psp) +{ + int ret; + + if (!psp->ras.ras_initialized) { + ret = psp_ras_init_shared_buf(psp); + if (ret) + return ret; + } + + ret = psp_ras_load(psp); + if (ret) + return ret; + + return 0; +} +// ras end + static int psp_hw_start(struct psp_context *psp) { struct amdgpu_device *adev = psp->adev; @@ -473,25 +679,35 @@ static int psp_hw_start(struct psp_context *psp) if (!amdgpu_sriov_vf(adev) || !adev->in_gpu_reset) { ret = psp_bootloader_load_sysdrv(psp); - if (ret) + if (ret) { + DRM_ERROR("PSP load sysdrv failed!\n"); return ret; + } ret = psp_bootloader_load_sos(psp); - if (ret) + if (ret) { + DRM_ERROR("PSP load sos failed!\n"); return ret; + } } ret = psp_ring_create(psp, PSP_RING_TYPE__KM); - if (ret) + if (ret) { + DRM_ERROR("PSP create ring failed!\n"); return ret; + } ret = psp_tmr_load(psp); - if (ret) + if (ret) { + DRM_ERROR("PSP load tmr failed!\n"); return ret; + } ret = psp_asd_load(psp); - if (ret) + if (ret) { + DRM_ERROR("PSP load asd failed!\n"); return ret; + } if (adev->gmc.xgmi.num_physical_nodes > 1) { ret = psp_xgmi_initialize(psp); @@ -502,6 +718,15 @@ static int psp_hw_start(struct psp_context *psp) dev_err(psp->adev->dev, "XGMI: Failed to initialize XGMI session\n"); } + + + if (psp->adev->psp.ta_fw) { + ret = psp_ras_initialize(psp); + if (ret) + dev_err(psp->adev->dev, + "RAS: Failed to initialize RAS\n"); + } + return 0; } @@ -665,53 +890,52 @@ static int psp_load_fw(struct amdgpu_device *adev) &psp->fence_buf_mc_addr, &psp->fence_buf); if (ret) - goto failed_mem2; + goto failed; ret = amdgpu_bo_create_kernel(adev, PSP_CMD_BUFFER_SIZE, PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, &psp->cmd_buf_bo, &psp->cmd_buf_mc_addr, (void **)&psp->cmd_buf_mem); if (ret) - goto failed_mem1; + goto failed; memset(psp->fence_buf, 0, PSP_FENCE_BUFFER_SIZE); ret = psp_ring_init(psp, PSP_RING_TYPE__KM); - if (ret) - goto failed_mem; + if (ret) { + DRM_ERROR("PSP ring init failed!\n"); + goto failed; + } ret = psp_tmr_init(psp); - if (ret) - goto failed_mem; + if (ret) { + DRM_ERROR("PSP tmr init failed!\n"); + goto failed; + } ret = psp_asd_init(psp); - if (ret) - goto failed_mem; + if (ret) { + DRM_ERROR("PSP asd init failed!\n"); + goto failed; + } skip_memalloc: ret = psp_hw_start(psp); if (ret) - goto failed_mem; + goto failed; ret = psp_np_fw_load(psp); if (ret) - goto failed_mem; + goto failed; return 0; -failed_mem: - amdgpu_bo_free_kernel(&psp->cmd_buf_bo, - &psp->cmd_buf_mc_addr, - (void **)&psp->cmd_buf_mem); -failed_mem1: - amdgpu_bo_free_kernel(&psp->fence_buf_bo, - &psp->fence_buf_mc_addr, &psp->fence_buf); -failed_mem2: - amdgpu_bo_free_kernel(&psp->fw_pri_bo, - &psp->fw_pri_mc_addr, &psp->fw_pri_buf); failed: - kfree(psp->cmd); - psp->cmd = NULL; + /* + * all cleanup jobs (xgmi terminate, ras terminate, + * ring destroy, cmd/fence/fw buffers destory, + * psp->cmd destory) are delayed to psp_hw_fini + */ return ret; } @@ -753,6 +977,9 @@ static int psp_hw_fini(void *handle) psp->xgmi_context.initialized == 1) psp_xgmi_terminate(psp); + if (psp->adev->psp.ta_fw) + psp_ras_terminate(psp); + psp_ring_destroy(psp, PSP_RING_TYPE__KM); amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, &psp->tmr_buf); @@ -786,6 +1013,14 @@ static int psp_suspend(void *handle) } } + if (psp->adev->psp.ta_fw) { + ret = psp_ras_terminate(psp); + if (ret) { + DRM_ERROR("Failed to terminate ras ta\n"); + return ret; + } + } + ret = psp_ring_stop(psp, PSP_RING_TYPE__KM); if (ret) { DRM_ERROR("PSP ring stop failed\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index 2ef98cc755d6..cde113f07c96 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -28,11 +28,13 @@ #include "amdgpu.h" #include "psp_gfx_if.h" #include "ta_xgmi_if.h" +#include "ta_ras_if.h" #define PSP_FENCE_BUFFER_SIZE 0x1000 #define PSP_CMD_BUFFER_SIZE 0x1000 #define PSP_ASD_SHARED_MEM_SIZE 0x4000 #define PSP_XGMI_SHARED_MEM_SIZE 0x4000 +#define PSP_RAS_SHARED_MEM_SIZE 0x4000 #define PSP_1_MEG 0x100000 #define PSP_TMR_SIZE 0x400000 @@ -88,6 +90,9 @@ struct psp_funcs int (*xgmi_set_topology_info)(struct psp_context *psp, int number_devices, struct psp_xgmi_topology_info *topology); bool (*support_vmr_ring)(struct psp_context *psp); + int (*ras_trigger_error)(struct psp_context *psp, + struct ta_ras_trigger_error_input *info); + int (*ras_cure_posion)(struct psp_context *psp, uint64_t *mode_ptr); }; struct psp_xgmi_context { @@ -98,6 +103,16 @@ struct psp_xgmi_context { void *xgmi_shared_buf; }; +struct psp_ras_context { + /*ras fw*/ + bool ras_initialized; + uint32_t session_id; + struct amdgpu_bo *ras_shared_bo; + uint64_t ras_shared_mc_addr; + void *ras_shared_buf; + struct amdgpu_ras *ras; +}; + struct psp_context { struct amdgpu_device *adev; @@ -150,10 +165,15 @@ struct psp_context /* xgmi ta firmware and buffer */ const struct firmware *ta_fw; + uint32_t ta_fw_version; uint32_t ta_xgmi_ucode_version; uint32_t ta_xgmi_ucode_size; uint8_t *ta_xgmi_start_addr; + uint32_t ta_ras_ucode_version; + uint32_t ta_ras_ucode_size; + uint8_t *ta_ras_start_addr; struct psp_xgmi_context xgmi_context; + struct psp_ras_context ras; }; struct amdgpu_psp_funcs { @@ -207,6 +227,13 @@ struct psp_xgmi_topology_info { #define amdgpu_psp_check_fw_loading_status(adev, i) (adev)->firmware.funcs->check_fw_loading_status((adev), (i)) +#define psp_ras_trigger_error(psp, info) \ + ((psp)->funcs->ras_trigger_error ? \ + (psp)->funcs->ras_trigger_error((psp), (info)) : -EINVAL) +#define psp_ras_cure_posion(psp, addr) \ + ((psp)->funcs->ras_cure_posion ? \ + (psp)->funcs->ras_cure_posion(psp, (addr)) : -EINVAL) + extern const struct amd_ip_funcs psp_ip_funcs; extern const struct amdgpu_ip_block_version psp_v3_1_ip_block; @@ -217,6 +244,11 @@ extern const struct amdgpu_ip_block_version psp_v10_0_ip_block; int psp_gpu_reset(struct amdgpu_device *adev); int psp_xgmi_invoke(struct psp_context *psp, uint32_t ta_cmd_id); + +int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id); +int psp_ras_enable_features(struct psp_context *psp, + union ta_ras_cmd_input *info, bool enable); + extern const struct amdgpu_ip_block_version psp_v11_0_ip_block; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c new file mode 100644 index 000000000000..22bd21efe6b1 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -0,0 +1,1482 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * + */ +#include <linux/debugfs.h> +#include <linux/list.h> +#include <linux/module.h> +#include "amdgpu.h" +#include "amdgpu_ras.h" +#include "amdgpu_atomfirmware.h" + +struct ras_ih_data { + /* interrupt bottom half */ + struct work_struct ih_work; + int inuse; + /* IP callback */ + ras_ih_cb cb; + /* full of entries */ + unsigned char *ring; + unsigned int ring_size; + unsigned int element_size; + unsigned int aligned_element_size; + unsigned int rptr; + unsigned int wptr; +}; + +struct ras_fs_data { + char sysfs_name[32]; + char debugfs_name[32]; +}; + +struct ras_err_data { + unsigned long ue_count; + unsigned long ce_count; +}; + +struct ras_err_handler_data { + /* point to bad pages array */ + struct { + unsigned long bp; + struct amdgpu_bo *bo; + } *bps; + /* the count of entries */ + int count; + /* the space can place new entries */ + int space_left; + /* last reserved entry's index + 1 */ + int last_reserved; +}; + +struct ras_manager { + struct ras_common_if head; + /* reference count */ + int use; + /* ras block link */ + struct list_head node; + /* the device */ + struct amdgpu_device *adev; + /* debugfs */ + struct dentry *ent; + /* sysfs */ + struct device_attribute sysfs_attr; + int attr_inuse; + + /* fs node name */ + struct ras_fs_data fs_data; + + /* IH data */ + struct ras_ih_data ih_data; + + struct ras_err_data err_data; +}; + +const char *ras_error_string[] = { + "none", + "parity", + "single_correctable", + "multi_uncorrectable", + "poison", +}; + +const char *ras_block_string[] = { + "umc", + "sdma", + "gfx", + "mmhub", + "athub", + "pcie_bif", + "hdp", + "xgmi_wafl", + "df", + "smn", + "sem", + "mp0", + "mp1", + "fuse", +}; + +#define ras_err_str(i) (ras_error_string[ffs(i)]) +#define ras_block_str(i) (ras_block_string[i]) + +#define AMDGPU_RAS_FLAG_INIT_BY_VBIOS 1 +#define RAS_DEFAULT_FLAGS (AMDGPU_RAS_FLAG_INIT_BY_VBIOS) + +static void amdgpu_ras_self_test(struct amdgpu_device *adev) +{ + /* TODO */ +} + +static ssize_t amdgpu_ras_debugfs_read(struct file *f, char __user *buf, + size_t size, loff_t *pos) +{ + struct ras_manager *obj = (struct ras_manager *)file_inode(f)->i_private; + struct ras_query_if info = { + .head = obj->head, + }; + ssize_t s; + char val[128]; + + if (amdgpu_ras_error_query(obj->adev, &info)) + return -EINVAL; + + s = snprintf(val, sizeof(val), "%s: %lu\n%s: %lu\n", + "ue", info.ue_count, + "ce", info.ce_count); + if (*pos >= s) + return 0; + + s -= *pos; + s = min_t(u64, s, size); + + + if (copy_to_user(buf, &val[*pos], s)) + return -EINVAL; + + *pos += s; + + return s; +} + +static const struct file_operations amdgpu_ras_debugfs_ops = { + .owner = THIS_MODULE, + .read = amdgpu_ras_debugfs_read, + .write = NULL, + .llseek = default_llseek +}; + +static int amdgpu_ras_find_block_id_by_name(const char *name, int *block_id) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(ras_block_string); i++) { + *block_id = i; + if (strcmp(name, ras_block_str(i)) == 0) + return 0; + } + return -EINVAL; +} + +static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f, + const char __user *buf, size_t size, + loff_t *pos, struct ras_debug_if *data) +{ + ssize_t s = min_t(u64, 64, size); + char str[65]; + char block_name[33]; + char err[9] = "ue"; + int op = -1; + int block_id; + u64 address, value; + + if (*pos) + return -EINVAL; + *pos = size; + + memset(str, 0, sizeof(str)); + memset(data, 0, sizeof(*data)); + + if (copy_from_user(str, buf, s)) + return -EINVAL; + + if (sscanf(str, "disable %32s", block_name) == 1) + op = 0; + else if (sscanf(str, "enable %32s %8s", block_name, err) == 2) + op = 1; + else if (sscanf(str, "inject %32s %8s", block_name, err) == 2) + op = 2; + else if (str[0] && str[1] && str[2] && str[3]) + /* ascii string, but commands are not matched. */ + return -EINVAL; + + if (op != -1) { + if (amdgpu_ras_find_block_id_by_name(block_name, &block_id)) + return -EINVAL; + + data->head.block = block_id; + data->head.type = memcmp("ue", err, 2) == 0 ? + AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE : + AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE; + data->op = op; + + if (op == 2) { + if (sscanf(str, "%*s %*s %*s %llu %llu", + &address, &value) != 2) + if (sscanf(str, "%*s %*s %*s 0x%llx 0x%llx", + &address, &value) != 2) + return -EINVAL; + data->inject.address = address; + data->inject.value = value; + } + } else { + if (size < sizeof(*data)) + return -EINVAL; + + if (copy_from_user(data, buf, sizeof(*data))) + return -EINVAL; + } + + return 0; +} +/* + * DOC: ras debugfs control interface + * + * It accepts struct ras_debug_if who has two members. + * + * First member: ras_debug_if::head or ras_debug_if::inject. + * + * head is used to indicate which IP block will be under control. + * + * head has four members, they are block, type, sub_block_index, name. + * block: which IP will be under control. + * type: what kind of error will be enabled/disabled/injected. + * sub_block_index: some IPs have subcomponets. say, GFX, sDMA. + * name: the name of IP. + * + * inject has two more members than head, they are address, value. + * As their names indicate, inject operation will write the + * value to the address. + * + * Second member: struct ras_debug_if::op. + * It has three kinds of operations. + * 0: disable RAS on the block. Take ::head as its data. + * 1: enable RAS on the block. Take ::head as its data. + * 2: inject errors on the block. Take ::inject as its data. + * + * How to use the interface? + * programs: + * copy the struct ras_debug_if in your codes and initialize it. + * write the struct to the control node. + * + * bash: + * echo op block [error [address value]] > .../ras/ras_ctrl + * op: disable, enable, inject + * disable: only block is needed + * enable: block and error are needed + * inject: error, address, value are needed + * block: umc, smda, gfx, ......... + * see ras_block_string[] for details + * error: ue, ce + * ue: multi_uncorrectable + * ce: single_correctable + * + * here are some examples for bash commands, + * echo inject umc ue 0x0 0x0 > /sys/kernel/debug/dri/0/ras/ras_ctrl + * echo inject umc ce 0 0 > /sys/kernel/debug/dri/0/ras/ras_ctrl + * echo disable umc > /sys/kernel/debug/dri/0/ras/ras_ctrl + * + * How to check the result? + * + * For disable/enable, please check ras features at + * /sys/class/drm/card[0/1/2...]/device/ras/features + * + * For inject, please check corresponding err count at + * /sys/class/drm/card[0/1/2...]/device/ras/[gfx/sdma/...]_err_count + * + * NOTE: operation is only allowed on blocks which are supported. + * Please check ras mask at /sys/module/amdgpu/parameters/ras_mask + */ +static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user *buf, + size_t size, loff_t *pos) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private; + struct ras_debug_if data; + int ret = 0; + + ret = amdgpu_ras_debugfs_ctrl_parse_data(f, buf, size, pos, &data); + if (ret) + return -EINVAL; + + if (!amdgpu_ras_is_supported(adev, data.head.block)) + return -EINVAL; + + switch (data.op) { + case 0: + ret = amdgpu_ras_feature_enable(adev, &data.head, 0); + break; + case 1: + ret = amdgpu_ras_feature_enable(adev, &data.head, 1); + break; + case 2: + ret = amdgpu_ras_error_inject(adev, &data.inject); + break; + default: + ret = -EINVAL; + break; + }; + + if (ret) + return -EINVAL; + + return size; +} + +static const struct file_operations amdgpu_ras_debugfs_ctrl_ops = { + .owner = THIS_MODULE, + .read = NULL, + .write = amdgpu_ras_debugfs_ctrl_write, + .llseek = default_llseek +}; + +static ssize_t amdgpu_ras_sysfs_read(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct ras_manager *obj = container_of(attr, struct ras_manager, sysfs_attr); + struct ras_query_if info = { + .head = obj->head, + }; + + if (amdgpu_ras_error_query(obj->adev, &info)) + return -EINVAL; + + return snprintf(buf, PAGE_SIZE, "%s: %lu\n%s: %lu\n", + "ue", info.ue_count, + "ce", info.ce_count); +} + +/* obj begin */ + +#define get_obj(obj) do { (obj)->use++; } while (0) +#define alive_obj(obj) ((obj)->use) + +static inline void put_obj(struct ras_manager *obj) +{ + if (obj && --obj->use == 0) + list_del(&obj->node); + if (obj && obj->use < 0) { + DRM_ERROR("RAS ERROR: Unbalance obj(%s) use\n", obj->head.name); + } +} + +/* make one obj and return it. */ +static struct ras_manager *amdgpu_ras_create_obj(struct amdgpu_device *adev, + struct ras_common_if *head) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + struct ras_manager *obj; + + if (!con) + return NULL; + + if (head->block >= AMDGPU_RAS_BLOCK_COUNT) + return NULL; + + obj = &con->objs[head->block]; + /* already exist. return obj? */ + if (alive_obj(obj)) + return NULL; + + obj->head = *head; + obj->adev = adev; + list_add(&obj->node, &con->head); + get_obj(obj); + + return obj; +} + +/* return an obj equal to head, or the first when head is NULL */ +static struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev, + struct ras_common_if *head) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + struct ras_manager *obj; + int i; + + if (!con) + return NULL; + + if (head) { + if (head->block >= AMDGPU_RAS_BLOCK_COUNT) + return NULL; + + obj = &con->objs[head->block]; + + if (alive_obj(obj)) { + WARN_ON(head->block != obj->head.block); + return obj; + } + } else { + for (i = 0; i < AMDGPU_RAS_BLOCK_COUNT; i++) { + obj = &con->objs[i]; + if (alive_obj(obj)) { + WARN_ON(i != obj->head.block); + return obj; + } + } + } + + return NULL; +} +/* obj end */ + +/* feature ctl begin */ +static int amdgpu_ras_is_feature_allowed(struct amdgpu_device *adev, + struct ras_common_if *head) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + + return con->hw_supported & BIT(head->block); +} + +static int amdgpu_ras_is_feature_enabled(struct amdgpu_device *adev, + struct ras_common_if *head) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + + return con->features & BIT(head->block); +} + +/* + * if obj is not created, then create one. + * set feature enable flag. + */ +static int __amdgpu_ras_feature_enable(struct amdgpu_device *adev, + struct ras_common_if *head, int enable) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + struct ras_manager *obj = amdgpu_ras_find_obj(adev, head); + + /* If hardware does not support ras, then do not create obj. + * But if hardware support ras, we can create the obj. + * Ras framework checks con->hw_supported to see if it need do + * corresponding initialization. + * IP checks con->support to see if it need disable ras. + */ + if (!amdgpu_ras_is_feature_allowed(adev, head)) + return 0; + if (!(!!enable ^ !!amdgpu_ras_is_feature_enabled(adev, head))) + return 0; + + if (enable) { + if (!obj) { + obj = amdgpu_ras_create_obj(adev, head); + if (!obj) + return -EINVAL; + } else { + /* In case we create obj somewhere else */ + get_obj(obj); + } + con->features |= BIT(head->block); + } else { + if (obj && amdgpu_ras_is_feature_enabled(adev, head)) { + con->features &= ~BIT(head->block); + put_obj(obj); + } + } + + return 0; +} + +/* wrapper of psp_ras_enable_features */ +int amdgpu_ras_feature_enable(struct amdgpu_device *adev, + struct ras_common_if *head, bool enable) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + union ta_ras_cmd_input info; + int ret; + + if (!con) + return -EINVAL; + + if (!enable) { + info.disable_features = (struct ta_ras_disable_features_input) { + .block_id = amdgpu_ras_block_to_ta(head->block), + .error_type = amdgpu_ras_error_to_ta(head->type), + }; + } else { + info.enable_features = (struct ta_ras_enable_features_input) { + .block_id = amdgpu_ras_block_to_ta(head->block), + .error_type = amdgpu_ras_error_to_ta(head->type), + }; + } + + /* Do not enable if it is not allowed. */ + WARN_ON(enable && !amdgpu_ras_is_feature_allowed(adev, head)); + /* Are we alerady in that state we are going to set? */ + if (!(!!enable ^ !!amdgpu_ras_is_feature_enabled(adev, head))) + return 0; + + ret = psp_ras_enable_features(&adev->psp, &info, enable); + if (ret) { + DRM_ERROR("RAS ERROR: %s %s feature failed ret %d\n", + enable ? "enable":"disable", + ras_block_str(head->block), + ret); + return -EINVAL; + } + + /* setup the obj */ + __amdgpu_ras_feature_enable(adev, head, enable); + + return 0; +} + +/* Only used in device probe stage and called only once. */ +int amdgpu_ras_feature_enable_on_boot(struct amdgpu_device *adev, + struct ras_common_if *head, bool enable) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + int ret; + + if (!con) + return -EINVAL; + + if (con->flags & AMDGPU_RAS_FLAG_INIT_BY_VBIOS) { + /* If ras is enabled by vbios, we set up ras object first in + * both case. For enable, that is all what we need do. For + * disable, we need perform a ras TA disable cmd after that. + */ + ret = __amdgpu_ras_feature_enable(adev, head, 1); + if (ret) + return ret; + + if (!enable) + ret = amdgpu_ras_feature_enable(adev, head, 0); + } else + ret = amdgpu_ras_feature_enable(adev, head, enable); + + return ret; +} + +static int amdgpu_ras_disable_all_features(struct amdgpu_device *adev, + bool bypass) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + struct ras_manager *obj, *tmp; + + list_for_each_entry_safe(obj, tmp, &con->head, node) { + /* bypass psp. + * aka just release the obj and corresponding flags + */ + if (bypass) { + if (__amdgpu_ras_feature_enable(adev, &obj->head, 0)) + break; + } else { + if (amdgpu_ras_feature_enable(adev, &obj->head, 0)) + break; + } + } + + return con->features; +} + +static int amdgpu_ras_enable_all_features(struct amdgpu_device *adev, + bool bypass) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + int ras_block_count = AMDGPU_RAS_BLOCK_COUNT; + int i; + const enum amdgpu_ras_error_type default_ras_type = + AMDGPU_RAS_ERROR__NONE; + + for (i = 0; i < ras_block_count; i++) { + struct ras_common_if head = { + .block = i, + .type = default_ras_type, + .sub_block_index = 0, + }; + strcpy(head.name, ras_block_str(i)); + if (bypass) { + /* + * bypass psp. vbios enable ras for us. + * so just create the obj + */ + if (__amdgpu_ras_feature_enable(adev, &head, 1)) + break; + } else { + if (amdgpu_ras_feature_enable(adev, &head, 1)) + break; + } + } + + return con->features; +} +/* feature ctl end */ + +/* query/inject/cure begin */ +int amdgpu_ras_error_query(struct amdgpu_device *adev, + struct ras_query_if *info) +{ + struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head); + + if (!obj) + return -EINVAL; + /* TODO might read the register to read the count */ + + info->ue_count = obj->err_data.ue_count; + info->ce_count = obj->err_data.ce_count; + + return 0; +} + +/* wrapper of psp_ras_trigger_error */ +int amdgpu_ras_error_inject(struct amdgpu_device *adev, + struct ras_inject_if *info) +{ + struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head); + struct ta_ras_trigger_error_input block_info = { + .block_id = amdgpu_ras_block_to_ta(info->head.block), + .inject_error_type = amdgpu_ras_error_to_ta(info->head.type), + .sub_block_index = info->head.sub_block_index, + .address = info->address, + .value = info->value, + }; + int ret = 0; + + if (!obj) + return -EINVAL; + + ret = psp_ras_trigger_error(&adev->psp, &block_info); + if (ret) + DRM_ERROR("RAS ERROR: inject %s error failed ret %d\n", + ras_block_str(info->head.block), + ret); + + return ret; +} + +int amdgpu_ras_error_cure(struct amdgpu_device *adev, + struct ras_cure_if *info) +{ + /* psp fw has no cure interface for now. */ + return 0; +} + +/* get the total error counts on all IPs */ +int amdgpu_ras_query_error_count(struct amdgpu_device *adev, + bool is_ce) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + struct ras_manager *obj; + struct ras_err_data data = {0, 0}; + + if (!con) + return -EINVAL; + + list_for_each_entry(obj, &con->head, node) { + struct ras_query_if info = { + .head = obj->head, + }; + + if (amdgpu_ras_error_query(adev, &info)) + return -EINVAL; + + data.ce_count += info.ce_count; + data.ue_count += info.ue_count; + } + + return is_ce ? data.ce_count : data.ue_count; +} +/* query/inject/cure end */ + + +/* sysfs begin */ + +static ssize_t amdgpu_ras_sysfs_features_read(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct amdgpu_ras *con = + container_of(attr, struct amdgpu_ras, features_attr); + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = ddev->dev_private; + struct ras_common_if head; + int ras_block_count = AMDGPU_RAS_BLOCK_COUNT; + int i; + ssize_t s; + struct ras_manager *obj; + + s = scnprintf(buf, PAGE_SIZE, "feature mask: 0x%x\n", con->features); + + for (i = 0; i < ras_block_count; i++) { + head.block = i; + + if (amdgpu_ras_is_feature_enabled(adev, &head)) { + obj = amdgpu_ras_find_obj(adev, &head); + s += scnprintf(&buf[s], PAGE_SIZE - s, + "%s: %s\n", + ras_block_str(i), + ras_err_str(obj->head.type)); + } else + s += scnprintf(&buf[s], PAGE_SIZE - s, + "%s: disabled\n", + ras_block_str(i)); + } + + return s; +} + +static int amdgpu_ras_sysfs_create_feature_node(struct amdgpu_device *adev) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + struct attribute *attrs[] = { + &con->features_attr.attr, + NULL + }; + struct attribute_group group = { + .name = "ras", + .attrs = attrs, + }; + + con->features_attr = (struct device_attribute) { + .attr = { + .name = "features", + .mode = S_IRUGO, + }, + .show = amdgpu_ras_sysfs_features_read, + }; + sysfs_attr_init(attrs[0]); + + return sysfs_create_group(&adev->dev->kobj, &group); +} + +static int amdgpu_ras_sysfs_remove_feature_node(struct amdgpu_device *adev) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + struct attribute *attrs[] = { + &con->features_attr.attr, + NULL + }; + struct attribute_group group = { + .name = "ras", + .attrs = attrs, + }; + + sysfs_remove_group(&adev->dev->kobj, &group); + + return 0; +} + +int amdgpu_ras_sysfs_create(struct amdgpu_device *adev, + struct ras_fs_if *head) +{ + struct ras_manager *obj = amdgpu_ras_find_obj(adev, &head->head); + + if (!obj || obj->attr_inuse) + return -EINVAL; + + get_obj(obj); + + memcpy(obj->fs_data.sysfs_name, + head->sysfs_name, + sizeof(obj->fs_data.sysfs_name)); + + obj->sysfs_attr = (struct device_attribute){ + .attr = { + .name = obj->fs_data.sysfs_name, + .mode = S_IRUGO, + }, + .show = amdgpu_ras_sysfs_read, + }; + sysfs_attr_init(&obj->sysfs_attr.attr); + + if (sysfs_add_file_to_group(&adev->dev->kobj, + &obj->sysfs_attr.attr, + "ras")) { + put_obj(obj); + return -EINVAL; + } + + obj->attr_inuse = 1; + + return 0; +} + +int amdgpu_ras_sysfs_remove(struct amdgpu_device *adev, + struct ras_common_if *head) +{ + struct ras_manager *obj = amdgpu_ras_find_obj(adev, head); + + if (!obj || !obj->attr_inuse) + return -EINVAL; + + sysfs_remove_file_from_group(&adev->dev->kobj, + &obj->sysfs_attr.attr, + "ras"); + obj->attr_inuse = 0; + put_obj(obj); + + return 0; +} + +static int amdgpu_ras_sysfs_remove_all(struct amdgpu_device *adev) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + struct ras_manager *obj, *tmp; + + list_for_each_entry_safe(obj, tmp, &con->head, node) { + amdgpu_ras_sysfs_remove(adev, &obj->head); + } + + amdgpu_ras_sysfs_remove_feature_node(adev); + + return 0; +} +/* sysfs end */ + +/* debugfs begin */ +static int amdgpu_ras_debugfs_create_ctrl_node(struct amdgpu_device *adev) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + struct drm_minor *minor = adev->ddev->primary; + struct dentry *root = minor->debugfs_root, *dir; + struct dentry *ent; + + dir = debugfs_create_dir("ras", root); + if (IS_ERR(dir)) + return -EINVAL; + + con->dir = dir; + + ent = debugfs_create_file("ras_ctrl", + S_IWUGO | S_IRUGO, con->dir, + adev, &amdgpu_ras_debugfs_ctrl_ops); + if (IS_ERR(ent)) { + debugfs_remove(con->dir); + return -EINVAL; + } + + con->ent = ent; + return 0; +} + +int amdgpu_ras_debugfs_create(struct amdgpu_device *adev, + struct ras_fs_if *head) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + struct ras_manager *obj = amdgpu_ras_find_obj(adev, &head->head); + struct dentry *ent; + + if (!obj || obj->ent) + return -EINVAL; + + get_obj(obj); + + memcpy(obj->fs_data.debugfs_name, + head->debugfs_name, + sizeof(obj->fs_data.debugfs_name)); + + ent = debugfs_create_file(obj->fs_data.debugfs_name, + S_IWUGO | S_IRUGO, con->dir, + obj, &amdgpu_ras_debugfs_ops); + + if (IS_ERR(ent)) + return -EINVAL; + + obj->ent = ent; + + return 0; +} + +int amdgpu_ras_debugfs_remove(struct amdgpu_device *adev, + struct ras_common_if *head) +{ + struct ras_manager *obj = amdgpu_ras_find_obj(adev, head); + + if (!obj || !obj->ent) + return 0; + + debugfs_remove(obj->ent); + obj->ent = NULL; + put_obj(obj); + + return 0; +} + +static int amdgpu_ras_debugfs_remove_all(struct amdgpu_device *adev) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + struct ras_manager *obj, *tmp; + + list_for_each_entry_safe(obj, tmp, &con->head, node) { + amdgpu_ras_debugfs_remove(adev, &obj->head); + } + + debugfs_remove(con->ent); + debugfs_remove(con->dir); + con->dir = NULL; + con->ent = NULL; + + return 0; +} +/* debugfs end */ + +/* ras fs */ + +static int amdgpu_ras_fs_init(struct amdgpu_device *adev) +{ + amdgpu_ras_sysfs_create_feature_node(adev); + amdgpu_ras_debugfs_create_ctrl_node(adev); + + return 0; +} + +static int amdgpu_ras_fs_fini(struct amdgpu_device *adev) +{ + amdgpu_ras_debugfs_remove_all(adev); + amdgpu_ras_sysfs_remove_all(adev); + return 0; +} +/* ras fs end */ + +/* ih begin */ +static void amdgpu_ras_interrupt_handler(struct ras_manager *obj) +{ + struct ras_ih_data *data = &obj->ih_data; + struct amdgpu_iv_entry entry; + int ret; + + while (data->rptr != data->wptr) { + rmb(); + memcpy(&entry, &data->ring[data->rptr], + data->element_size); + + wmb(); + data->rptr = (data->aligned_element_size + + data->rptr) % data->ring_size; + + /* Let IP handle its data, maybe we need get the output + * from the callback to udpate the error type/count, etc + */ + if (data->cb) { + ret = data->cb(obj->adev, &entry); + /* ue will trigger an interrupt, and in that case + * we need do a reset to recovery the whole system. + * But leave IP do that recovery, here we just dispatch + * the error. + */ + if (ret == AMDGPU_RAS_UE) { + obj->err_data.ue_count++; + } + /* Might need get ce count by register, but not all IP + * saves ce count, some IP just use one bit or two bits + * to indicate ce happened. + */ + } + } +} + +static void amdgpu_ras_interrupt_process_handler(struct work_struct *work) +{ + struct ras_ih_data *data = + container_of(work, struct ras_ih_data, ih_work); + struct ras_manager *obj = + container_of(data, struct ras_manager, ih_data); + + amdgpu_ras_interrupt_handler(obj); +} + +int amdgpu_ras_interrupt_dispatch(struct amdgpu_device *adev, + struct ras_dispatch_if *info) +{ + struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head); + struct ras_ih_data *data = &obj->ih_data; + + if (!obj) + return -EINVAL; + + if (data->inuse == 0) + return 0; + + /* Might be overflow... */ + memcpy(&data->ring[data->wptr], info->entry, + data->element_size); + + wmb(); + data->wptr = (data->aligned_element_size + + data->wptr) % data->ring_size; + + schedule_work(&data->ih_work); + + return 0; +} + +int amdgpu_ras_interrupt_remove_handler(struct amdgpu_device *adev, + struct ras_ih_if *info) +{ + struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head); + struct ras_ih_data *data; + + if (!obj) + return -EINVAL; + + data = &obj->ih_data; + if (data->inuse == 0) + return 0; + + cancel_work_sync(&data->ih_work); + + kfree(data->ring); + memset(data, 0, sizeof(*data)); + put_obj(obj); + + return 0; +} + +int amdgpu_ras_interrupt_add_handler(struct amdgpu_device *adev, + struct ras_ih_if *info) +{ + struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head); + struct ras_ih_data *data; + + if (!obj) { + /* in case we registe the IH before enable ras feature */ + obj = amdgpu_ras_create_obj(adev, &info->head); + if (!obj) + return -EINVAL; + } else + get_obj(obj); + + data = &obj->ih_data; + /* add the callback.etc */ + *data = (struct ras_ih_data) { + .inuse = 0, + .cb = info->cb, + .element_size = sizeof(struct amdgpu_iv_entry), + .rptr = 0, + .wptr = 0, + }; + + INIT_WORK(&data->ih_work, amdgpu_ras_interrupt_process_handler); + + data->aligned_element_size = ALIGN(data->element_size, 8); + /* the ring can store 64 iv entries. */ + data->ring_size = 64 * data->aligned_element_size; + data->ring = kmalloc(data->ring_size, GFP_KERNEL); + if (!data->ring) { + put_obj(obj); + return -ENOMEM; + } + + /* IH is ready */ + data->inuse = 1; + + return 0; +} + +static int amdgpu_ras_interrupt_remove_all(struct amdgpu_device *adev) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + struct ras_manager *obj, *tmp; + + list_for_each_entry_safe(obj, tmp, &con->head, node) { + struct ras_ih_if info = { + .head = obj->head, + }; + amdgpu_ras_interrupt_remove_handler(adev, &info); + } + + return 0; +} +/* ih end */ + +/* recovery begin */ +static void amdgpu_ras_do_recovery(struct work_struct *work) +{ + struct amdgpu_ras *ras = + container_of(work, struct amdgpu_ras, recovery_work); + + amdgpu_device_gpu_recover(ras->adev, 0); + atomic_set(&ras->in_recovery, 0); +} + +static int amdgpu_ras_release_vram(struct amdgpu_device *adev, + struct amdgpu_bo **bo_ptr) +{ + /* no need to free it actually. */ + amdgpu_bo_free_kernel(bo_ptr, NULL, NULL); + return 0; +} + +/* reserve vram with size@offset */ +static int amdgpu_ras_reserve_vram(struct amdgpu_device *adev, + uint64_t offset, uint64_t size, + struct amdgpu_bo **bo_ptr) +{ + struct ttm_operation_ctx ctx = { false, false }; + struct amdgpu_bo_param bp; + int r = 0; + int i; + struct amdgpu_bo *bo; + + if (bo_ptr) + *bo_ptr = NULL; + memset(&bp, 0, sizeof(bp)); + bp.size = size; + bp.byte_align = PAGE_SIZE; + bp.domain = AMDGPU_GEM_DOMAIN_VRAM; + bp.flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | + AMDGPU_GEM_CREATE_NO_CPU_ACCESS; + bp.type = ttm_bo_type_kernel; + bp.resv = NULL; + + r = amdgpu_bo_create(adev, &bp, &bo); + if (r) + return -EINVAL; + + r = amdgpu_bo_reserve(bo, false); + if (r) + goto error_reserve; + + offset = ALIGN(offset, PAGE_SIZE); + for (i = 0; i < bo->placement.num_placement; ++i) { + bo->placements[i].fpfn = offset >> PAGE_SHIFT; + bo->placements[i].lpfn = (offset + size) >> PAGE_SHIFT; + } + + ttm_bo_mem_put(&bo->tbo, &bo->tbo.mem); + r = ttm_bo_mem_space(&bo->tbo, &bo->placement, &bo->tbo.mem, &ctx); + if (r) + goto error_pin; + + r = amdgpu_bo_pin_restricted(bo, + AMDGPU_GEM_DOMAIN_VRAM, + offset, + offset + size); + if (r) + goto error_pin; + + if (bo_ptr) + *bo_ptr = bo; + + amdgpu_bo_unreserve(bo); + return r; + +error_pin: + amdgpu_bo_unreserve(bo); +error_reserve: + amdgpu_bo_unref(&bo); + return r; +} + +/* alloc/realloc bps array */ +static int amdgpu_ras_realloc_eh_data_space(struct amdgpu_device *adev, + struct ras_err_handler_data *data, int pages) +{ + unsigned int old_space = data->count + data->space_left; + unsigned int new_space = old_space + pages; + unsigned int align_space = ALIGN(new_space, 1024); + void *tmp = kmalloc(align_space * sizeof(*data->bps), GFP_KERNEL); + + if (!tmp) + return -ENOMEM; + + if (data->bps) { + memcpy(tmp, data->bps, + data->count * sizeof(*data->bps)); + kfree(data->bps); + } + + data->bps = tmp; + data->space_left += align_space - old_space; + return 0; +} + +/* it deal with vram only. */ +int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev, + unsigned long *bps, int pages) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + struct ras_err_handler_data *data; + int i = pages; + int ret = 0; + + if (!con || !con->eh_data || !bps || pages <= 0) + return 0; + + mutex_lock(&con->recovery_lock); + data = con->eh_data; + if (!data) + goto out; + + if (data->space_left <= pages) + if (amdgpu_ras_realloc_eh_data_space(adev, data, pages)) { + ret = -ENOMEM; + goto out; + } + + while (i--) + data->bps[data->count++].bp = bps[i]; + + data->space_left -= pages; +out: + mutex_unlock(&con->recovery_lock); + + return ret; +} + +/* called in gpu recovery/init */ +int amdgpu_ras_reserve_bad_pages(struct amdgpu_device *adev) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + struct ras_err_handler_data *data; + uint64_t bp; + struct amdgpu_bo *bo; + int i; + + if (!con || !con->eh_data) + return 0; + + mutex_lock(&con->recovery_lock); + data = con->eh_data; + if (!data) + goto out; + /* reserve vram at driver post stage. */ + for (i = data->last_reserved; i < data->count; i++) { + bp = data->bps[i].bp; + + if (amdgpu_ras_reserve_vram(adev, bp << PAGE_SHIFT, + PAGE_SIZE, &bo)) + DRM_ERROR("RAS ERROR: reserve vram %llx fail\n", bp); + + data->bps[i].bo = bo; + data->last_reserved = i + 1; + } +out: + mutex_unlock(&con->recovery_lock); + return 0; +} + +/* called when driver unload */ +static int amdgpu_ras_release_bad_pages(struct amdgpu_device *adev) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + struct ras_err_handler_data *data; + struct amdgpu_bo *bo; + int i; + + if (!con || !con->eh_data) + return 0; + + mutex_lock(&con->recovery_lock); + data = con->eh_data; + if (!data) + goto out; + + for (i = data->last_reserved - 1; i >= 0; i--) { + bo = data->bps[i].bo; + + amdgpu_ras_release_vram(adev, &bo); + + data->bps[i].bo = bo; + data->last_reserved = i; + } +out: + mutex_unlock(&con->recovery_lock); + return 0; +} + +static int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev) +{ + /* TODO + * write the array to eeprom when SMU disabled. + */ + return 0; +} + +static int amdgpu_ras_load_bad_pages(struct amdgpu_device *adev) +{ + /* TODO + * read the array to eeprom when SMU disabled. + */ + return 0; +} + +static int amdgpu_ras_recovery_init(struct amdgpu_device *adev) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + struct ras_err_handler_data **data = &con->eh_data; + + *data = kmalloc(sizeof(**data), + GFP_KERNEL|__GFP_ZERO); + if (!*data) + return -ENOMEM; + + mutex_init(&con->recovery_lock); + INIT_WORK(&con->recovery_work, amdgpu_ras_do_recovery); + atomic_set(&con->in_recovery, 0); + con->adev = adev; + + amdgpu_ras_load_bad_pages(adev); + amdgpu_ras_reserve_bad_pages(adev); + + return 0; +} + +static int amdgpu_ras_recovery_fini(struct amdgpu_device *adev) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + struct ras_err_handler_data *data = con->eh_data; + + cancel_work_sync(&con->recovery_work); + amdgpu_ras_save_bad_pages(adev); + amdgpu_ras_release_bad_pages(adev); + + mutex_lock(&con->recovery_lock); + con->eh_data = NULL; + kfree(data->bps); + kfree(data); + mutex_unlock(&con->recovery_lock); + + return 0; +} +/* recovery end */ + +/* + * check hardware's ras ability which will be saved in hw_supported. + * if hardware does not support ras, we can skip some ras initializtion and + * forbid some ras operations from IP. + * if software itself, say boot parameter, limit the ras ability. We still + * need allow IP do some limited operations, like disable. In such case, + * we have to initialize ras as normal. but need check if operation is + * allowed or not in each function. + */ +static void amdgpu_ras_check_supported(struct amdgpu_device *adev, + uint32_t *hw_supported, uint32_t *supported) +{ + *hw_supported = 0; + *supported = 0; + + if (amdgpu_sriov_vf(adev) || + adev->asic_type != CHIP_VEGA20) + return; + + if (adev->is_atom_fw && + (amdgpu_atomfirmware_mem_ecc_supported(adev) || + amdgpu_atomfirmware_sram_ecc_supported(adev))) + *hw_supported = AMDGPU_RAS_BLOCK_MASK; + + *supported = amdgpu_ras_enable == 0 ? + 0 : *hw_supported & amdgpu_ras_mask; +} + +int amdgpu_ras_init(struct amdgpu_device *adev) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + + if (con) + return 0; + + con = kmalloc(sizeof(struct amdgpu_ras) + + sizeof(struct ras_manager) * AMDGPU_RAS_BLOCK_COUNT, + GFP_KERNEL|__GFP_ZERO); + if (!con) + return -ENOMEM; + + con->objs = (struct ras_manager *)(con + 1); + + amdgpu_ras_set_context(adev, con); + + amdgpu_ras_check_supported(adev, &con->hw_supported, + &con->supported); + con->features = 0; + INIT_LIST_HEAD(&con->head); + /* Might need get this flag from vbios. */ + con->flags = RAS_DEFAULT_FLAGS; + + if (amdgpu_ras_recovery_init(adev)) + goto recovery_out; + + amdgpu_ras_mask &= AMDGPU_RAS_BLOCK_MASK; + + if (amdgpu_ras_fs_init(adev)) + goto fs_out; + + amdgpu_ras_self_test(adev); + + DRM_INFO("RAS INFO: ras initialized successfully, " + "hardware ability[%x] ras_mask[%x]\n", + con->hw_supported, con->supported); + return 0; +fs_out: + amdgpu_ras_recovery_fini(adev); +recovery_out: + amdgpu_ras_set_context(adev, NULL); + kfree(con); + + return -EINVAL; +} + +/* do some init work after IP late init as dependence */ +void amdgpu_ras_post_init(struct amdgpu_device *adev) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + struct ras_manager *obj, *tmp; + + if (!con) + return; + + if (con->flags & AMDGPU_RAS_FLAG_INIT_BY_VBIOS) { + /* Set up all other IPs which are not implemented. There is a + * tricky thing that IP's actual ras error type should be + * MULTI_UNCORRECTABLE, but as driver does not handle it, so + * ERROR_NONE make sense anyway. + */ + amdgpu_ras_enable_all_features(adev, 1); + + /* We enable ras on all hw_supported block, but as boot + * parameter might disable some of them and one or more IP has + * not implemented yet. So we disable them on behalf. + */ + list_for_each_entry_safe(obj, tmp, &con->head, node) { + if (!amdgpu_ras_is_supported(adev, obj->head.block)) { + amdgpu_ras_feature_enable(adev, &obj->head, 0); + /* there should be no any reference. */ + WARN_ON(alive_obj(obj)); + } + } + } +} + +/* do some fini work before IP fini as dependence */ +int amdgpu_ras_pre_fini(struct amdgpu_device *adev) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + + if (!con) + return 0; + + /* Need disable ras on all IPs here before ip [hw/sw]fini */ + amdgpu_ras_disable_all_features(adev, 0); + amdgpu_ras_recovery_fini(adev); + return 0; +} + +int amdgpu_ras_fini(struct amdgpu_device *adev) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + + if (!con) + return 0; + + amdgpu_ras_fs_fini(adev); + amdgpu_ras_interrupt_remove_all(adev); + + WARN(con->features, "Feature mask is not cleared"); + + if (con->features) + amdgpu_ras_disable_all_features(adev, 1); + + amdgpu_ras_set_context(adev, NULL); + kfree(con); + + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h new file mode 100644 index 000000000000..eaef5edefc34 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -0,0 +1,294 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * + */ +#ifndef _AMDGPU_RAS_H +#define _AMDGPU_RAS_H + +#include <linux/debugfs.h> +#include <linux/list.h> +#include "amdgpu.h" +#include "amdgpu_psp.h" +#include "ta_ras_if.h" + +enum amdgpu_ras_block { + AMDGPU_RAS_BLOCK__UMC = 0, + AMDGPU_RAS_BLOCK__SDMA, + AMDGPU_RAS_BLOCK__GFX, + AMDGPU_RAS_BLOCK__MMHUB, + AMDGPU_RAS_BLOCK__ATHUB, + AMDGPU_RAS_BLOCK__PCIE_BIF, + AMDGPU_RAS_BLOCK__HDP, + AMDGPU_RAS_BLOCK__XGMI_WAFL, + AMDGPU_RAS_BLOCK__DF, + AMDGPU_RAS_BLOCK__SMN, + AMDGPU_RAS_BLOCK__SEM, + AMDGPU_RAS_BLOCK__MP0, + AMDGPU_RAS_BLOCK__MP1, + AMDGPU_RAS_BLOCK__FUSE, + + AMDGPU_RAS_BLOCK__LAST +}; + +#define AMDGPU_RAS_BLOCK_COUNT AMDGPU_RAS_BLOCK__LAST +#define AMDGPU_RAS_BLOCK_MASK ((1ULL << AMDGPU_RAS_BLOCK_COUNT) - 1) + +enum amdgpu_ras_error_type { + AMDGPU_RAS_ERROR__NONE = 0, + AMDGPU_RAS_ERROR__PARITY = 1, + AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE = 2, + AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE = 4, + AMDGPU_RAS_ERROR__POISON = 8, +}; + +enum amdgpu_ras_ret { + AMDGPU_RAS_SUCCESS = 0, + AMDGPU_RAS_FAIL, + AMDGPU_RAS_UE, + AMDGPU_RAS_CE, + AMDGPU_RAS_PT, +}; + +struct ras_common_if { + enum amdgpu_ras_block block; + enum amdgpu_ras_error_type type; + uint32_t sub_block_index; + /* block name */ + char name[32]; +}; + +typedef int (*ras_ih_cb)(struct amdgpu_device *adev, + struct amdgpu_iv_entry *entry); + +struct amdgpu_ras { + /* ras infrastructure */ + /* for ras itself. */ + uint32_t hw_supported; + /* for IP to check its ras ability. */ + uint32_t supported; + uint32_t features; + struct list_head head; + /* debugfs */ + struct dentry *dir; + /* debugfs ctrl */ + struct dentry *ent; + /* sysfs */ + struct device_attribute features_attr; + /* block array */ + struct ras_manager *objs; + + /* gpu recovery */ + struct work_struct recovery_work; + atomic_t in_recovery; + struct amdgpu_device *adev; + /* error handler data */ + struct ras_err_handler_data *eh_data; + struct mutex recovery_lock; + + uint32_t flags; +}; + +/* interfaces for IP */ + +struct ras_fs_if { + struct ras_common_if head; + char sysfs_name[32]; + char debugfs_name[32]; +}; + +struct ras_query_if { + struct ras_common_if head; + unsigned long ue_count; + unsigned long ce_count; +}; + +struct ras_inject_if { + struct ras_common_if head; + uint64_t address; + uint64_t value; +}; + +struct ras_cure_if { + struct ras_common_if head; + uint64_t address; +}; + +struct ras_ih_if { + struct ras_common_if head; + ras_ih_cb cb; +}; + +struct ras_dispatch_if { + struct ras_common_if head; + struct amdgpu_iv_entry *entry; +}; + +struct ras_debug_if { + union { + struct ras_common_if head; + struct ras_inject_if inject; + }; + int op; +}; +/* work flow + * vbios + * 1: ras feature enable (enabled by default) + * psp + * 2: ras framework init (in ip_init) + * IP + * 3: IH add + * 4: debugfs/sysfs create + * 5: query/inject + * 6: debugfs/sysfs remove + * 7: IH remove + * 8: feature disable + */ + +#define amdgpu_ras_get_context(adev) ((adev)->psp.ras.ras) +#define amdgpu_ras_set_context(adev, ras_con) ((adev)->psp.ras.ras = (ras_con)) + +/* check if ras is supported on block, say, sdma, gfx */ +static inline int amdgpu_ras_is_supported(struct amdgpu_device *adev, + unsigned int block) +{ + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + + return ras && (ras->supported & (1 << block)); +} + +int amdgpu_ras_query_error_count(struct amdgpu_device *adev, + bool is_ce); + +/* error handling functions */ +int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev, + unsigned long *bps, int pages); + +int amdgpu_ras_reserve_bad_pages(struct amdgpu_device *adev); + +static inline int amdgpu_ras_reset_gpu(struct amdgpu_device *adev, + bool is_baco) +{ + /* remove me when gpu reset works on vega20 A1. */ +#if 0 + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + + if (atomic_cmpxchg(&ras->in_recovery, 0, 1) == 0) + schedule_work(&ras->recovery_work); +#endif + return 0; +} + +static inline enum ta_ras_block +amdgpu_ras_block_to_ta(enum amdgpu_ras_block block) { + switch (block) { + case AMDGPU_RAS_BLOCK__UMC: + return TA_RAS_BLOCK__UMC; + case AMDGPU_RAS_BLOCK__SDMA: + return TA_RAS_BLOCK__SDMA; + case AMDGPU_RAS_BLOCK__GFX: + return TA_RAS_BLOCK__GFX; + case AMDGPU_RAS_BLOCK__MMHUB: + return TA_RAS_BLOCK__MMHUB; + case AMDGPU_RAS_BLOCK__ATHUB: + return TA_RAS_BLOCK__ATHUB; + case AMDGPU_RAS_BLOCK__PCIE_BIF: + return TA_RAS_BLOCK__PCIE_BIF; + case AMDGPU_RAS_BLOCK__HDP: + return TA_RAS_BLOCK__HDP; + case AMDGPU_RAS_BLOCK__XGMI_WAFL: + return TA_RAS_BLOCK__XGMI_WAFL; + case AMDGPU_RAS_BLOCK__DF: + return TA_RAS_BLOCK__DF; + case AMDGPU_RAS_BLOCK__SMN: + return TA_RAS_BLOCK__SMN; + case AMDGPU_RAS_BLOCK__SEM: + return TA_RAS_BLOCK__SEM; + case AMDGPU_RAS_BLOCK__MP0: + return TA_RAS_BLOCK__MP0; + case AMDGPU_RAS_BLOCK__MP1: + return TA_RAS_BLOCK__MP1; + case AMDGPU_RAS_BLOCK__FUSE: + return TA_RAS_BLOCK__FUSE; + default: + WARN_ONCE(1, "RAS ERROR: unexpected block id %d\n", block); + return TA_RAS_BLOCK__UMC; + } +} + +static inline enum ta_ras_error_type +amdgpu_ras_error_to_ta(enum amdgpu_ras_error_type error) { + switch (error) { + case AMDGPU_RAS_ERROR__NONE: + return TA_RAS_ERROR__NONE; + case AMDGPU_RAS_ERROR__PARITY: + return TA_RAS_ERROR__PARITY; + case AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE: + return TA_RAS_ERROR__SINGLE_CORRECTABLE; + case AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE: + return TA_RAS_ERROR__MULTI_UNCORRECTABLE; + case AMDGPU_RAS_ERROR__POISON: + return TA_RAS_ERROR__POISON; + default: + WARN_ONCE(1, "RAS ERROR: unexpected error type %d\n", error); + return TA_RAS_ERROR__NONE; + } +} + +/* called in ip_init and ip_fini */ +int amdgpu_ras_init(struct amdgpu_device *adev); +void amdgpu_ras_post_init(struct amdgpu_device *adev); +int amdgpu_ras_fini(struct amdgpu_device *adev); +int amdgpu_ras_pre_fini(struct amdgpu_device *adev); + +int amdgpu_ras_feature_enable(struct amdgpu_device *adev, + struct ras_common_if *head, bool enable); + +int amdgpu_ras_feature_enable_on_boot(struct amdgpu_device *adev, + struct ras_common_if *head, bool enable); + +int amdgpu_ras_sysfs_create(struct amdgpu_device *adev, + struct ras_fs_if *head); + +int amdgpu_ras_sysfs_remove(struct amdgpu_device *adev, + struct ras_common_if *head); + +int amdgpu_ras_debugfs_create(struct amdgpu_device *adev, + struct ras_fs_if *head); + +int amdgpu_ras_debugfs_remove(struct amdgpu_device *adev, + struct ras_common_if *head); + +int amdgpu_ras_error_query(struct amdgpu_device *adev, + struct ras_query_if *info); + +int amdgpu_ras_error_inject(struct amdgpu_device *adev, + struct ras_inject_if *info); + +int amdgpu_ras_interrupt_add_handler(struct amdgpu_device *adev, + struct ras_ih_if *info); + +int amdgpu_ras_interrupt_remove_handler(struct amdgpu_device *adev, + struct ras_ih_if *info); + +int amdgpu_ras_interrupt_dispatch(struct amdgpu_device *adev, + struct ras_dispatch_if *info); +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 335a0edf114b..8f5026c123ef 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -248,6 +248,8 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, */ if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) sched_hw_submission = max(sched_hw_submission, 256); + else if (ring == &adev->sdma.instance[0].page) + sched_hw_submission = 256; if (ring->adev == NULL) { if (adev->num_rings >= AMDGPU_MAX_RINGS) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h index 16b1a6ae5ba6..1ba9ba3b54f7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h @@ -28,9 +28,8 @@ #define AMDGPU_MAX_SDMA_INSTANCES 2 enum amdgpu_sdma_irq { - AMDGPU_SDMA_IRQ_TRAP0 = 0, - AMDGPU_SDMA_IRQ_TRAP1, - + AMDGPU_SDMA_IRQ_INSTANCE0 = 0, + AMDGPU_SDMA_IRQ_INSTANCE1, AMDGPU_SDMA_IRQ_LAST }; @@ -49,9 +48,11 @@ struct amdgpu_sdma { struct amdgpu_sdma_instance instance[AMDGPU_MAX_SDMA_INSTANCES]; struct amdgpu_irq_src trap_irq; struct amdgpu_irq_src illegal_inst_irq; + struct amdgpu_irq_src ecc_irq; int num_instances; uint32_t srbm_soft_reset; bool has_page_queue; + struct ras_common_if *ras_if; }; /* diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 73e71e61dc99..0c52d1f9fe0f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -50,8 +50,6 @@ #include "amdgpu_sdma.h" #include "bif/bif_4_1_d.h" -#define DRM_FILE_PAGE_OFFSET (0x100000000ULL >> PAGE_SHIFT) - static int amdgpu_map_buffer(struct ttm_buffer_object *bo, struct ttm_mem_reg *mem, unsigned num_pages, uint64_t offset, unsigned window, @@ -1424,6 +1422,13 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, struct dma_fence *f; int i; + /* Don't evict VM page tables while they are busy, otherwise we can't + * cleanly handle page faults. + */ + if (bo->type == ttm_bo_type_kernel && + !reservation_object_test_signaled_rcu(bo->resv, true)) + return false; + /* If bo is a KFD BO, check if the bo belongs to the current process. * If true, then return false as any KFD process needs all its BOs to * be resident to run successfully @@ -1671,7 +1676,6 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) r = ttm_bo_device_init(&adev->mman.bdev, &amdgpu_bo_driver, adev->ddev->anon_inode->i_mapping, - DRM_FILE_PAGE_OFFSET, adev->need_dma32); if (r) { DRM_ERROR("failed initializing buffer object driver(%d).\n", r); @@ -1877,14 +1881,9 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable) int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma) { - struct drm_file *file_priv; - struct amdgpu_device *adev; - - if (unlikely(vma->vm_pgoff < DRM_FILE_PAGE_OFFSET)) - return -EINVAL; + struct drm_file *file_priv = filp->private_data; + struct amdgpu_device *adev = file_priv->minor->dev->dev_private; - file_priv = filp->private_data; - adev = file_priv->minor->dev->dev_private; if (adev == NULL) return -EINVAL; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 462a04e0f5e6..7d484fad3909 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -36,6 +36,7 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev) /* enable virtual display */ adev->mode_info.num_crtc = 1; adev->enable_virtual_display = true; + adev->ddev->driver->driver_features &= ~DRIVER_ATOMIC; adev->cg_flags = 0; adev->pg_flags = 0; } @@ -375,4 +376,53 @@ void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev) } } +static uint32_t parse_clk(char *buf, bool min) +{ + char *ptr = buf; + uint32_t clk = 0; + + do { + ptr = strchr(ptr, ':'); + if (!ptr) + break; + ptr+=2; + clk = simple_strtoul(ptr, NULL, 10); + } while (!min); + + return clk * 100; +} + +uint32_t amdgpu_virt_get_sclk(struct amdgpu_device *adev, bool lowest) +{ + char *buf = NULL; + uint32_t clk = 0; + + buf = kzalloc(PAGE_SIZE, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + adev->virt.ops->get_pp_clk(adev, PP_SCLK, buf); + clk = parse_clk(buf, lowest); + + kfree(buf); + + return clk; +} + +uint32_t amdgpu_virt_get_mclk(struct amdgpu_device *adev, bool lowest) +{ + char *buf = NULL; + uint32_t clk = 0; + + buf = kzalloc(PAGE_SIZE, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + adev->virt.ops->get_pp_clk(adev, PP_MCLK, buf); + clk = parse_clk(buf, lowest); + + kfree(buf); + + return clk; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index 722deefc0a7e..584947b7ccf3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -57,6 +57,8 @@ struct amdgpu_virt_ops { int (*reset_gpu)(struct amdgpu_device *adev); int (*wait_reset)(struct amdgpu_device *adev); void (*trans_msg)(struct amdgpu_device *adev, u32 req, u32 data1, u32 data2, u32 data3); + int (*get_pp_clk)(struct amdgpu_device *adev, u32 type, char *buf); + int (*force_dpm_level)(struct amdgpu_device *adev, u32 level); }; /* @@ -83,6 +85,8 @@ enum AMDGIM_FEATURE_FLAG { AMDGIM_FEATURE_GIM_LOAD_UCODES = 0x2, /* VRAM LOST by GIM */ AMDGIM_FEATURE_GIM_FLR_VRAMLOST = 0x4, + /* HW PERF SIM in GIM */ + AMDGIM_FEATURE_HW_PERF_SIMULATION = (1 << 3), }; struct amd_sriov_msg_pf2vf_info_header { @@ -252,6 +256,8 @@ struct amdgpu_virt { struct amdgpu_vf_error_buffer vf_errors; struct amdgpu_virt_fw_reserve fw_reserve; uint32_t gim_feature; + /* protect DPM events to GIM */ + struct mutex dpm_mutex; }; #define amdgpu_sriov_enabled(adev) \ @@ -278,6 +284,9 @@ static inline bool is_virtual_machine(void) #endif } +#define amdgim_is_hwperf(adev) \ + ((adev)->virt.gim_feature & AMDGIM_FEATURE_HW_PERF_SIMULATION) + bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev); void amdgpu_virt_init_setting(struct amdgpu_device *adev); uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg); @@ -295,5 +304,7 @@ int amdgpu_virt_fw_reserve_get_checksum(void *obj, unsigned long obj_size, unsigned int key, unsigned int chksum); void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev); +uint32_t amdgpu_virt_get_sclk(struct amdgpu_device *adev, bool lowest); +uint32_t amdgpu_virt_get_mclk(struct amdgpu_device *adev, bool lowest); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 16fcb56c232b..a07c85815b7a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -34,6 +34,7 @@ #include "amdgpu_trace.h" #include "amdgpu_amdkfd.h" #include "amdgpu_gmc.h" +#include "amdgpu_xgmi.h" /** * DOC: GPUVM @@ -66,50 +67,6 @@ INTERVAL_TREE_DEFINE(struct amdgpu_bo_va_mapping, rb, uint64_t, __subtree_last, #undef LAST /** - * struct amdgpu_pte_update_params - Local structure - * - * Encapsulate some VM table update parameters to reduce - * the number of function parameters - * - */ -struct amdgpu_pte_update_params { - - /** - * @adev: amdgpu device we do this update for - */ - struct amdgpu_device *adev; - - /** - * @vm: optional amdgpu_vm we do this update for - */ - struct amdgpu_vm *vm; - - /** - * @src: address where to copy page table entries from - */ - uint64_t src; - - /** - * @ib: indirect buffer to fill with commands - */ - struct amdgpu_ib *ib; - - /** - * @func: Function which actually does the update - */ - void (*func)(struct amdgpu_pte_update_params *params, - struct amdgpu_bo *bo, uint64_t pe, - uint64_t addr, unsigned count, uint32_t incr, - uint64_t flags); - /** - * @pages_addr: - * - * DMA addresses to use for mapping, used during VM update by CPU - */ - dma_addr_t *pages_addr; -}; - -/** * struct amdgpu_prt_cb - Helper to disable partial resident texture feature from a fence callback */ struct amdgpu_prt_cb { @@ -183,6 +140,22 @@ static unsigned amdgpu_vm_num_entries(struct amdgpu_device *adev, } /** + * amdgpu_vm_num_ats_entries - return the number of ATS entries in the root PD + * + * @adev: amdgpu_device pointer + * + * Returns: + * The number of entries in the root page directory which needs the ATS setting. + */ +static unsigned amdgpu_vm_num_ats_entries(struct amdgpu_device *adev) +{ + unsigned shift; + + shift = amdgpu_vm_level_shift(adev, adev->vm_manager.root_level); + return AMDGPU_GMC_HOLE_START >> (shift + AMDGPU_GPU_PAGE_SHIFT); +} + +/** * amdgpu_vm_entries_mask - the mask to get the entry number of a PD/PT * * @adev: amdgpu_device pointer @@ -333,7 +306,7 @@ static void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base, return; vm->bulk_moveable = false; - if (bo->tbo.type == ttm_bo_type_kernel) + if (bo->tbo.type == ttm_bo_type_kernel && bo->parent) amdgpu_vm_bo_relocated(base); else amdgpu_vm_bo_idle(base); @@ -505,61 +478,39 @@ static void amdgpu_vm_pt_next(struct amdgpu_device *adev, } /** - * amdgpu_vm_pt_first_leaf - get first leaf PD/PT + * amdgpu_vm_pt_first_dfs - start a deep first search * - * @adev: amdgpu_device pointer + * @adev: amdgpu_device structure * @vm: amdgpu_vm structure - * @start: start addr of the walk * @cursor: state to initialize * - * Start a walk and go directly to the leaf node. - */ -static void amdgpu_vm_pt_first_leaf(struct amdgpu_device *adev, - struct amdgpu_vm *vm, uint64_t start, - struct amdgpu_vm_pt_cursor *cursor) -{ - amdgpu_vm_pt_start(adev, vm, start, cursor); - while (amdgpu_vm_pt_descendant(adev, cursor)); -} - -/** - * amdgpu_vm_pt_next_leaf - get next leaf PD/PT - * - * @adev: amdgpu_device pointer - * @cursor: current state - * - * Walk the PD/PT tree to the next leaf node. + * Starts a deep first traversal of the PD/PT tree. */ -static void amdgpu_vm_pt_next_leaf(struct amdgpu_device *adev, +static void amdgpu_vm_pt_first_dfs(struct amdgpu_device *adev, + struct amdgpu_vm *vm, + struct amdgpu_vm_pt_cursor *start, struct amdgpu_vm_pt_cursor *cursor) { - amdgpu_vm_pt_next(adev, cursor); - if (cursor->pfn != ~0ll) - while (amdgpu_vm_pt_descendant(adev, cursor)); + if (start) + *cursor = *start; + else + amdgpu_vm_pt_start(adev, vm, 0, cursor); + while (amdgpu_vm_pt_descendant(adev, cursor)); } /** - * for_each_amdgpu_vm_pt_leaf - walk over all leaf PDs/PTs in the hierarchy - */ -#define for_each_amdgpu_vm_pt_leaf(adev, vm, start, end, cursor) \ - for (amdgpu_vm_pt_first_leaf((adev), (vm), (start), &(cursor)); \ - (cursor).pfn <= end; amdgpu_vm_pt_next_leaf((adev), &(cursor))) - -/** - * amdgpu_vm_pt_first_dfs - start a deep first search + * amdgpu_vm_pt_continue_dfs - check if the deep first search should continue * - * @adev: amdgpu_device structure - * @vm: amdgpu_vm structure - * @cursor: state to initialize + * @start: starting point for the search + * @entry: current entry * - * Starts a deep first traversal of the PD/PT tree. + * Returns: + * True when the search should continue, false otherwise. */ -static void amdgpu_vm_pt_first_dfs(struct amdgpu_device *adev, - struct amdgpu_vm *vm, - struct amdgpu_vm_pt_cursor *cursor) +static bool amdgpu_vm_pt_continue_dfs(struct amdgpu_vm_pt_cursor *start, + struct amdgpu_vm_pt *entry) { - amdgpu_vm_pt_start(adev, vm, 0, cursor); - while (amdgpu_vm_pt_descendant(adev, cursor)); + return entry && (!start || entry != start->entry); } /** @@ -587,11 +538,11 @@ static void amdgpu_vm_pt_next_dfs(struct amdgpu_device *adev, /** * for_each_amdgpu_vm_pt_dfs_safe - safe deep first search of all PDs/PTs */ -#define for_each_amdgpu_vm_pt_dfs_safe(adev, vm, cursor, entry) \ - for (amdgpu_vm_pt_first_dfs((adev), (vm), &(cursor)), \ +#define for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry) \ + for (amdgpu_vm_pt_first_dfs((adev), (vm), (start), &(cursor)), \ (entry) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), &(cursor));\ - (entry); (entry) = (cursor).entry, \ - amdgpu_vm_pt_next_dfs((adev), &(cursor))) + amdgpu_vm_pt_continue_dfs((start), (entry)); \ + (entry) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), &(cursor))) /** * amdgpu_vm_get_pd_bo - add the VM PD to a validation list @@ -712,18 +663,11 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, if (bo->tbo.type != ttm_bo_type_kernel) { amdgpu_vm_bo_moved(bo_base); } else { - if (vm->use_cpu_for_update) - r = amdgpu_bo_kmap(bo, NULL); + vm->update_funcs->map_table(bo); + if (bo->parent) + amdgpu_vm_bo_relocated(bo_base); else - r = amdgpu_ttm_alloc_gart(&bo->tbo); - if (r) - break; - if (bo->shadow) { - r = amdgpu_ttm_alloc_gart(&bo->shadow->tbo); - if (r) - break; - } - amdgpu_vm_bo_relocated(bo_base); + amdgpu_vm_bo_idle(bo_base); } } @@ -751,8 +695,6 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm) * @adev: amdgpu_device pointer * @vm: VM to clear BO from * @bo: BO to clear - * @level: level this BO is at - * @pte_support_ats: indicate ATS support from PTE * * Root PD needs to be reserved when calling this. * @@ -760,99 +702,112 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm) * 0 on success, errno otherwise. */ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, - struct amdgpu_vm *vm, struct amdgpu_bo *bo, - unsigned level, bool pte_support_ats) + struct amdgpu_vm *vm, + struct amdgpu_bo *bo) { struct ttm_operation_ctx ctx = { true, false }; - struct dma_fence *fence = NULL; + unsigned level = adev->vm_manager.root_level; + struct amdgpu_vm_update_params params; + struct amdgpu_bo *ancestor = bo; unsigned entries, ats_entries; - struct amdgpu_ring *ring; - struct amdgpu_job *job; uint64_t addr; int r; + /* Figure out our place in the hierarchy */ + if (ancestor->parent) { + ++level; + while (ancestor->parent->parent) { + ++level; + ancestor = ancestor->parent; + } + } + entries = amdgpu_bo_size(bo) / 8; + if (!vm->pte_support_ats) { + ats_entries = 0; + + } else if (!bo->parent) { + ats_entries = amdgpu_vm_num_ats_entries(adev); + ats_entries = min(ats_entries, entries); + entries -= ats_entries; + + } else { + struct amdgpu_vm_pt *pt; - if (pte_support_ats) { - if (level == adev->vm_manager.root_level) { - ats_entries = amdgpu_vm_level_shift(adev, level); - ats_entries += AMDGPU_GPU_PAGE_SHIFT; - ats_entries = AMDGPU_GMC_HOLE_START >> ats_entries; - ats_entries = min(ats_entries, entries); - entries -= ats_entries; + pt = container_of(ancestor->vm_bo, struct amdgpu_vm_pt, base); + ats_entries = amdgpu_vm_num_ats_entries(adev); + if ((pt - vm->root.entries) >= ats_entries) { + ats_entries = 0; } else { ats_entries = entries; entries = 0; } - } else { - ats_entries = 0; } - ring = container_of(vm->entity.rq->sched, struct amdgpu_ring, sched); - r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); if (r) - goto error; + return r; + + if (bo->shadow) { + r = ttm_bo_validate(&bo->shadow->tbo, &bo->shadow->placement, + &ctx); + if (r) + return r; + } - r = amdgpu_ttm_alloc_gart(&bo->tbo); + r = vm->update_funcs->map_table(bo); if (r) return r; - r = amdgpu_job_alloc_with_ib(adev, 64, &job); + memset(¶ms, 0, sizeof(params)); + params.adev = adev; + params.vm = vm; + + r = vm->update_funcs->prepare(¶ms, AMDGPU_FENCE_OWNER_KFD, NULL); if (r) - goto error; + return r; - addr = amdgpu_bo_gpu_offset(bo); + addr = 0; if (ats_entries) { - uint64_t ats_value; + uint64_t value = 0, flags; - ats_value = AMDGPU_PTE_DEFAULT_ATC; - if (level != AMDGPU_VM_PTB) - ats_value |= AMDGPU_PDE_PTE; + flags = AMDGPU_PTE_DEFAULT_ATC; + if (level != AMDGPU_VM_PTB) { + /* Handle leaf PDEs as PTEs */ + flags |= AMDGPU_PDE_PTE; + amdgpu_gmc_get_vm_pde(adev, level, &value, &flags); + } + + r = vm->update_funcs->update(¶ms, bo, addr, 0, ats_entries, + value, flags); + if (r) + return r; - amdgpu_vm_set_pte_pde(adev, &job->ibs[0], addr, 0, - ats_entries, 0, ats_value); addr += ats_entries * 8; } if (entries) { - uint64_t value = 0; - - /* Workaround for fault priority problem on GMC9 */ - if (level == AMDGPU_VM_PTB && adev->asic_type >= CHIP_VEGA10) - value = AMDGPU_PTE_EXECUTABLE; + uint64_t value = 0, flags = 0; + + if (adev->asic_type >= CHIP_VEGA10) { + if (level != AMDGPU_VM_PTB) { + /* Handle leaf PDEs as PTEs */ + flags |= AMDGPU_PDE_PTE; + amdgpu_gmc_get_vm_pde(adev, level, + &value, &flags); + } else { + /* Workaround for fault priority problem on GMC9 */ + flags = AMDGPU_PTE_EXECUTABLE; + } + } - amdgpu_vm_set_pte_pde(adev, &job->ibs[0], addr, 0, - entries, 0, value); + r = vm->update_funcs->update(¶ms, bo, addr, 0, entries, + value, flags); + if (r) + return r; } - amdgpu_ring_pad_ib(ring, &job->ibs[0]); - - WARN_ON(job->ibs[0].length_dw > 64); - r = amdgpu_sync_resv(adev, &job->sync, bo->tbo.resv, - AMDGPU_FENCE_OWNER_KFD, false); - if (r) - goto error_free; - - r = amdgpu_job_submit(job, &vm->entity, AMDGPU_FENCE_OWNER_UNDEFINED, - &fence); - if (r) - goto error_free; - - amdgpu_bo_fence(bo, fence, true); - dma_fence_put(fence); - - if (bo->shadow) - return amdgpu_vm_clear_bo(adev, vm, bo->shadow, - level, pte_support_ats); - - return 0; - -error_free: - amdgpu_job_free(job); - -error: - return r; + return vm->update_funcs->commit(¶ms, NULL); } /** @@ -883,89 +838,56 @@ static void amdgpu_vm_bo_param(struct amdgpu_device *adev, struct amdgpu_vm *vm, } /** - * amdgpu_vm_alloc_pts - Allocate page tables. + * amdgpu_vm_alloc_pts - Allocate a specific page table * * @adev: amdgpu_device pointer * @vm: VM to allocate page tables for - * @saddr: Start address which needs to be allocated - * @size: Size from start address we need. + * @cursor: Which page table to allocate * - * Make sure the page directories and page tables are allocated + * Make sure a specific page table or directory is allocated. * * Returns: - * 0 on success, errno otherwise. + * 1 if page table needed to be allocated, 0 if page table was already + * allocated, negative errno if an error occurred. */ -int amdgpu_vm_alloc_pts(struct amdgpu_device *adev, - struct amdgpu_vm *vm, - uint64_t saddr, uint64_t size) +static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev, + struct amdgpu_vm *vm, + struct amdgpu_vm_pt_cursor *cursor) { - struct amdgpu_vm_pt_cursor cursor; + struct amdgpu_vm_pt *entry = cursor->entry; + struct amdgpu_bo_param bp; struct amdgpu_bo *pt; - bool ats = false; - uint64_t eaddr; int r; - /* validate the parameters */ - if (saddr & AMDGPU_GPU_PAGE_MASK || size & AMDGPU_GPU_PAGE_MASK) - return -EINVAL; - - eaddr = saddr + size - 1; - - if (vm->pte_support_ats) - ats = saddr < AMDGPU_GMC_HOLE_START; - - saddr /= AMDGPU_GPU_PAGE_SIZE; - eaddr /= AMDGPU_GPU_PAGE_SIZE; + if (cursor->level < AMDGPU_VM_PTB && !entry->entries) { + unsigned num_entries; - if (eaddr >= adev->vm_manager.max_pfn) { - dev_err(adev->dev, "va above limit (0x%08llX >= 0x%08llX)\n", - eaddr, adev->vm_manager.max_pfn); - return -EINVAL; + num_entries = amdgpu_vm_num_entries(adev, cursor->level); + entry->entries = kvmalloc_array(num_entries, + sizeof(*entry->entries), + GFP_KERNEL | __GFP_ZERO); + if (!entry->entries) + return -ENOMEM; } - for_each_amdgpu_vm_pt_leaf(adev, vm, saddr, eaddr, cursor) { - struct amdgpu_vm_pt *entry = cursor.entry; - struct amdgpu_bo_param bp; - - if (cursor.level < AMDGPU_VM_PTB) { - unsigned num_entries; - - num_entries = amdgpu_vm_num_entries(adev, cursor.level); - entry->entries = kvmalloc_array(num_entries, - sizeof(*entry->entries), - GFP_KERNEL | - __GFP_ZERO); - if (!entry->entries) - return -ENOMEM; - } - - - if (entry->base.bo) - continue; - - amdgpu_vm_bo_param(adev, vm, cursor.level, &bp); + if (entry->base.bo) + return 0; - r = amdgpu_bo_create(adev, &bp, &pt); - if (r) - return r; + amdgpu_vm_bo_param(adev, vm, cursor->level, &bp); - if (vm->use_cpu_for_update) { - r = amdgpu_bo_kmap(pt, NULL); - if (r) - goto error_free_pt; - } - - /* Keep a reference to the root directory to avoid - * freeing them up in the wrong order. - */ - pt->parent = amdgpu_bo_ref(cursor.parent->base.bo); + r = amdgpu_bo_create(adev, &bp, &pt); + if (r) + return r; - amdgpu_vm_bo_base_init(&entry->base, vm, pt); + /* Keep a reference to the root directory to avoid + * freeing them up in the wrong order. + */ + pt->parent = amdgpu_bo_ref(cursor->parent->base.bo); + amdgpu_vm_bo_base_init(&entry->base, vm, pt); - r = amdgpu_vm_clear_bo(adev, vm, pt, cursor.level, ats); - if (r) - goto error_free_pt; - } + r = amdgpu_vm_clear_bo(adev, vm, pt); + if (r) + goto error_free_pt; return 0; @@ -976,31 +898,45 @@ error_free_pt: } /** + * amdgpu_vm_free_table - fre one PD/PT + * + * @entry: PDE to free + */ +static void amdgpu_vm_free_table(struct amdgpu_vm_pt *entry) +{ + if (entry->base.bo) { + entry->base.bo->vm_bo = NULL; + list_del(&entry->base.vm_status); + amdgpu_bo_unref(&entry->base.bo->shadow); + amdgpu_bo_unref(&entry->base.bo); + } + kvfree(entry->entries); + entry->entries = NULL; +} + +/** * amdgpu_vm_free_pts - free PD/PT levels * * @adev: amdgpu device structure * @vm: amdgpu vm structure + * @start: optional cursor where to start freeing PDs/PTs * * Free the page directory or page table level and all sub levels. */ static void amdgpu_vm_free_pts(struct amdgpu_device *adev, - struct amdgpu_vm *vm) + struct amdgpu_vm *vm, + struct amdgpu_vm_pt_cursor *start) { struct amdgpu_vm_pt_cursor cursor; struct amdgpu_vm_pt *entry; - for_each_amdgpu_vm_pt_dfs_safe(adev, vm, cursor, entry) { + vm->bulk_moveable = false; - if (entry->base.bo) { - entry->base.bo->vm_bo = NULL; - list_del(&entry->base.vm_status); - amdgpu_bo_unref(&entry->base.bo->shadow); - amdgpu_bo_unref(&entry->base.bo); - } - kvfree(entry->entries); - } + for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry) + amdgpu_vm_free_table(entry); - BUG_ON(vm->root.base.bo); + if (start) + amdgpu_vm_free_table(start->entry); } /** @@ -1212,66 +1148,6 @@ struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm, } /** - * amdgpu_vm_do_set_ptes - helper to call the right asic function - * - * @params: see amdgpu_pte_update_params definition - * @bo: PD/PT to update - * @pe: addr of the page entry - * @addr: dst addr to write into pe - * @count: number of page entries to update - * @incr: increase next addr by incr bytes - * @flags: hw access flags - * - * Traces the parameters and calls the right asic functions - * to setup the page table using the DMA. - */ -static void amdgpu_vm_do_set_ptes(struct amdgpu_pte_update_params *params, - struct amdgpu_bo *bo, - uint64_t pe, uint64_t addr, - unsigned count, uint32_t incr, - uint64_t flags) -{ - pe += amdgpu_bo_gpu_offset(bo); - trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags); - - if (count < 3) { - amdgpu_vm_write_pte(params->adev, params->ib, pe, - addr | flags, count, incr); - - } else { - amdgpu_vm_set_pte_pde(params->adev, params->ib, pe, addr, - count, incr, flags); - } -} - -/** - * amdgpu_vm_do_copy_ptes - copy the PTEs from the GART - * - * @params: see amdgpu_pte_update_params definition - * @bo: PD/PT to update - * @pe: addr of the page entry - * @addr: dst addr to write into pe - * @count: number of page entries to update - * @incr: increase next addr by incr bytes - * @flags: hw access flags - * - * Traces the parameters and calls the DMA function to copy the PTEs. - */ -static void amdgpu_vm_do_copy_ptes(struct amdgpu_pte_update_params *params, - struct amdgpu_bo *bo, - uint64_t pe, uint64_t addr, - unsigned count, uint32_t incr, - uint64_t flags) -{ - uint64_t src = (params->src + (addr >> 12) * 8); - - pe += amdgpu_bo_gpu_offset(bo); - trace_amdgpu_vm_copy_ptes(pe, src, count); - - amdgpu_vm_copy_pte(params->adev, params->ib, pe, src, count); -} - -/** * amdgpu_vm_map_gart - Resolve gart mapping of addr * * @pages_addr: optional DMA address to use for lookup @@ -1283,7 +1159,7 @@ static void amdgpu_vm_do_copy_ptes(struct amdgpu_pte_update_params *params, * Returns: * The pointer for the page table entry. */ -static uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr) +uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr) { uint64_t result; @@ -1298,88 +1174,31 @@ static uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr) return result; } -/** - * amdgpu_vm_cpu_set_ptes - helper to update page tables via CPU - * - * @params: see amdgpu_pte_update_params definition - * @bo: PD/PT to update - * @pe: kmap addr of the page entry - * @addr: dst addr to write into pe - * @count: number of page entries to update - * @incr: increase next addr by incr bytes - * @flags: hw access flags - * - * Write count number of PT/PD entries directly. - */ -static void amdgpu_vm_cpu_set_ptes(struct amdgpu_pte_update_params *params, - struct amdgpu_bo *bo, - uint64_t pe, uint64_t addr, - unsigned count, uint32_t incr, - uint64_t flags) -{ - unsigned int i; - uint64_t value; - - pe += (unsigned long)amdgpu_bo_kptr(bo); - - trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags); - - for (i = 0; i < count; i++) { - value = params->pages_addr ? - amdgpu_vm_map_gart(params->pages_addr, addr) : - addr; - amdgpu_gmc_set_pte_pde(params->adev, (void *)(uintptr_t)pe, - i, value, flags); - addr += incr; - } -} - -/** - * amdgpu_vm_update_func - helper to call update function - * - * Calls the update function for both the given BO as well as its shadow. - */ -static void amdgpu_vm_update_func(struct amdgpu_pte_update_params *params, - struct amdgpu_bo *bo, - uint64_t pe, uint64_t addr, - unsigned count, uint32_t incr, - uint64_t flags) -{ - if (bo->shadow) - params->func(params, bo->shadow, pe, addr, count, incr, flags); - params->func(params, bo, pe, addr, count, incr, flags); -} - /* * amdgpu_vm_update_pde - update a single level in the hierarchy * * @param: parameters for the update * @vm: requested vm - * @parent: parent directory * @entry: entry to update * * Makes sure the requested entry in parent is up to date. */ -static void amdgpu_vm_update_pde(struct amdgpu_pte_update_params *params, - struct amdgpu_vm *vm, - struct amdgpu_vm_pt *parent, - struct amdgpu_vm_pt *entry) +static int amdgpu_vm_update_pde(struct amdgpu_vm_update_params *params, + struct amdgpu_vm *vm, + struct amdgpu_vm_pt *entry) { + struct amdgpu_vm_pt *parent = amdgpu_vm_pt_parent(entry); struct amdgpu_bo *bo = parent->base.bo, *pbo; uint64_t pde, pt, flags; unsigned level; - /* Don't update huge pages here */ - if (entry->huge) - return; - for (level = 0, pbo = bo->parent; pbo; ++level) pbo = pbo->parent; level += params->adev->vm_manager.root_level; amdgpu_gmc_get_pde_for_bo(entry->base.bo, level, &pt, &flags); pde = (entry - parent->entries) * 8; - amdgpu_vm_update_func(params, bo, pde, pt, 1, 0, flags); + return vm->update_funcs->update(params, bo, pde, pt, 1, 0, flags); } /* @@ -1396,7 +1215,7 @@ static void amdgpu_vm_invalidate_pds(struct amdgpu_device *adev, struct amdgpu_vm_pt_cursor cursor; struct amdgpu_vm_pt *entry; - for_each_amdgpu_vm_pt_dfs_safe(adev, vm, cursor, entry) + for_each_amdgpu_vm_pt_dfs_safe(adev, vm, NULL, cursor, entry) if (entry->base.bo && !entry->base.moved) amdgpu_vm_bo_relocated(&entry->base); } @@ -1415,89 +1234,39 @@ static void amdgpu_vm_invalidate_pds(struct amdgpu_device *adev, int amdgpu_vm_update_directories(struct amdgpu_device *adev, struct amdgpu_vm *vm) { - struct amdgpu_pte_update_params params; - struct amdgpu_job *job; - unsigned ndw = 0; - int r = 0; + struct amdgpu_vm_update_params params; + int r; if (list_empty(&vm->relocated)) return 0; -restart: memset(¶ms, 0, sizeof(params)); params.adev = adev; + params.vm = vm; - if (vm->use_cpu_for_update) { - r = amdgpu_bo_sync_wait(vm->root.base.bo, - AMDGPU_FENCE_OWNER_VM, true); - if (unlikely(r)) - return r; - - params.func = amdgpu_vm_cpu_set_ptes; - } else { - ndw = 512 * 8; - r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job); - if (r) - return r; - - params.ib = &job->ibs[0]; - params.func = amdgpu_vm_do_set_ptes; - } + r = vm->update_funcs->prepare(¶ms, AMDGPU_FENCE_OWNER_VM, NULL); + if (r) + return r; while (!list_empty(&vm->relocated)) { - struct amdgpu_vm_pt *pt, *entry; + struct amdgpu_vm_pt *entry; entry = list_first_entry(&vm->relocated, struct amdgpu_vm_pt, base.vm_status); amdgpu_vm_bo_idle(&entry->base); - pt = amdgpu_vm_pt_parent(entry); - if (!pt) - continue; - - amdgpu_vm_update_pde(¶ms, vm, pt, entry); - - if (!vm->use_cpu_for_update && - (ndw - params.ib->length_dw) < 32) - break; - } - - if (vm->use_cpu_for_update) { - /* Flush HDP */ - mb(); - amdgpu_asic_flush_hdp(adev, NULL); - } else if (params.ib->length_dw == 0) { - amdgpu_job_free(job); - } else { - struct amdgpu_bo *root = vm->root.base.bo; - struct amdgpu_ring *ring; - struct dma_fence *fence; - - ring = container_of(vm->entity.rq->sched, struct amdgpu_ring, - sched); - - amdgpu_ring_pad_ib(ring, params.ib); - amdgpu_sync_resv(adev, &job->sync, root->tbo.resv, - AMDGPU_FENCE_OWNER_VM, false); - WARN_ON(params.ib->length_dw > ndw); - r = amdgpu_job_submit(job, &vm->entity, AMDGPU_FENCE_OWNER_VM, - &fence); + r = amdgpu_vm_update_pde(¶ms, vm, entry); if (r) goto error; - - amdgpu_bo_fence(root, fence, true); - dma_fence_put(vm->last_update); - vm->last_update = fence; } - if (!list_empty(&vm->relocated)) - goto restart; - + r = vm->update_funcs->commit(¶ms, &vm->last_update); + if (r) + goto error; return 0; error: amdgpu_vm_invalidate_pds(adev, vm); - amdgpu_job_free(job); return r; } @@ -1506,7 +1275,7 @@ error: * * Make sure to set the right flags for the PTEs at the desired level. */ -static void amdgpu_vm_update_flags(struct amdgpu_pte_update_params *params, +static void amdgpu_vm_update_flags(struct amdgpu_vm_update_params *params, struct amdgpu_bo *bo, unsigned level, uint64_t pe, uint64_t addr, unsigned count, uint32_t incr, @@ -1525,13 +1294,14 @@ static void amdgpu_vm_update_flags(struct amdgpu_pte_update_params *params, flags |= AMDGPU_PTE_EXECUTABLE; } - amdgpu_vm_update_func(params, bo, pe, addr, count, incr, flags); + params->vm->update_funcs->update(params, bo, pe, addr, count, incr, + flags); } /** * amdgpu_vm_fragment - get fragment for PTEs * - * @params: see amdgpu_pte_update_params definition + * @params: see amdgpu_vm_update_params definition * @start: first PTE to handle * @end: last PTE to handle * @flags: hw mapping flags @@ -1540,7 +1310,7 @@ static void amdgpu_vm_update_flags(struct amdgpu_pte_update_params *params, * * Returns the first possible fragment for the start and end address. */ -static void amdgpu_vm_fragment(struct amdgpu_pte_update_params *params, +static void amdgpu_vm_fragment(struct amdgpu_vm_update_params *params, uint64_t start, uint64_t end, uint64_t flags, unsigned int *frag, uint64_t *frag_end) { @@ -1573,7 +1343,7 @@ static void amdgpu_vm_fragment(struct amdgpu_pte_update_params *params, max_frag = 31; /* system pages are non continuously */ - if (params->src) { + if (params->pages_addr) { *frag = 0; *frag_end = end; return; @@ -1592,7 +1362,7 @@ static void amdgpu_vm_fragment(struct amdgpu_pte_update_params *params, /** * amdgpu_vm_update_ptes - make sure that page tables are valid * - * @params: see amdgpu_pte_update_params definition + * @params: see amdgpu_vm_update_params definition * @start: start of GPU address range * @end: end of GPU address range * @dst: destination address to map to, the next dst inside the function @@ -1603,7 +1373,7 @@ static void amdgpu_vm_fragment(struct amdgpu_pte_update_params *params, * Returns: * 0 for success, -EINVAL for failure. */ -static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, +static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params, uint64_t start, uint64_t end, uint64_t dst, uint64_t flags) { @@ -1611,6 +1381,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, struct amdgpu_vm_pt_cursor cursor; uint64_t frag_start = start, frag_end; unsigned int frag; + int r; /* figure out the initial fragment */ amdgpu_vm_fragment(params, frag_start, end, flags, &frag, &frag_end); @@ -1618,12 +1389,15 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, /* walk over the address space and update the PTs */ amdgpu_vm_pt_start(adev, params->vm, start, &cursor); while (cursor.pfn < end) { - struct amdgpu_bo *pt = cursor.entry->base.bo; unsigned shift, parent_shift, mask; uint64_t incr, entry_end, pe_start; + struct amdgpu_bo *pt; - if (!pt) - return -ENOENT; + r = amdgpu_vm_alloc_pts(params->adev, params->vm, &cursor); + if (r) + return r; + + pt = cursor.entry->base.bo; /* The root level can't be a huge page */ if (cursor.level == adev->vm_manager.root_level) { @@ -1632,16 +1406,10 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, continue; } - /* If it isn't already handled it can't be a huge page */ - if (cursor.entry->huge) { - /* Add the entry to the relocated list to update it. */ - cursor.entry->huge = false; - amdgpu_vm_bo_relocated(&cursor.entry->base); - } - shift = amdgpu_vm_level_shift(adev, cursor.level); parent_shift = amdgpu_vm_level_shift(adev, cursor.level - 1); - if (adev->asic_type < CHIP_VEGA10) { + if (adev->asic_type < CHIP_VEGA10 && + (flags & AMDGPU_PTE_VALID)) { /* No huge page support before GMC v9 */ if (cursor.level != AMDGPU_VM_PTB) { if (!amdgpu_vm_pt_descendant(adev, &cursor)) @@ -1697,9 +1465,9 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, } while (frag_start < entry_end); if (amdgpu_vm_pt_descendant(adev, &cursor)) { - /* Mark all child entries as huge */ + /* Free all child entries */ while (cursor.pfn < frag_start) { - cursor.entry->huge = true; + amdgpu_vm_free_pts(adev, params->vm, &cursor); amdgpu_vm_pt_next(adev, &cursor); } @@ -1738,137 +1506,28 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, uint64_t flags, uint64_t addr, struct dma_fence **fence) { - struct amdgpu_ring *ring; + struct amdgpu_vm_update_params params; void *owner = AMDGPU_FENCE_OWNER_VM; - unsigned nptes, ncmds, ndw; - struct amdgpu_job *job; - struct amdgpu_pte_update_params params; - struct dma_fence *f = NULL; int r; memset(¶ms, 0, sizeof(params)); params.adev = adev; params.vm = vm; + params.pages_addr = pages_addr; /* sync to everything except eviction fences on unmapping */ if (!(flags & AMDGPU_PTE_VALID)) owner = AMDGPU_FENCE_OWNER_KFD; - if (vm->use_cpu_for_update) { - /* params.src is used as flag to indicate system Memory */ - if (pages_addr) - params.src = ~0; - - /* Wait for PT BOs to be idle. PTs share the same resv. object - * as the root PD BO - */ - r = amdgpu_bo_sync_wait(vm->root.base.bo, owner, true); - if (unlikely(r)) - return r; - - /* Wait for any BO move to be completed */ - if (exclusive) { - r = dma_fence_wait(exclusive, true); - if (unlikely(r)) - return r; - } - - params.func = amdgpu_vm_cpu_set_ptes; - params.pages_addr = pages_addr; - return amdgpu_vm_update_ptes(¶ms, start, last + 1, - addr, flags); - } - - ring = container_of(vm->entity.rq->sched, struct amdgpu_ring, sched); - - nptes = last - start + 1; - - /* - * reserve space for two commands every (1 << BLOCK_SIZE) - * entries or 2k dwords (whatever is smaller) - */ - ncmds = ((nptes >> min(adev->vm_manager.block_size, 11u)) + 1); - - /* The second command is for the shadow pagetables. */ - if (vm->root.base.bo->shadow) - ncmds *= 2; - - /* padding, etc. */ - ndw = 64; - - if (pages_addr) { - /* copy commands needed */ - ndw += ncmds * adev->vm_manager.vm_pte_funcs->copy_pte_num_dw; - - /* and also PTEs */ - ndw += nptes * 2; - - params.func = amdgpu_vm_do_copy_ptes; - - } else { - /* set page commands needed */ - ndw += ncmds * 10; - - /* extra commands for begin/end fragments */ - ncmds = 2 * adev->vm_manager.fragment_size; - if (vm->root.base.bo->shadow) - ncmds *= 2; - - ndw += 10 * ncmds; - - params.func = amdgpu_vm_do_set_ptes; - } - - r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job); + r = vm->update_funcs->prepare(¶ms, owner, exclusive); if (r) return r; - params.ib = &job->ibs[0]; - - if (pages_addr) { - uint64_t *pte; - unsigned i; - - /* Put the PTEs at the end of the IB. */ - i = ndw - nptes * 2; - pte= (uint64_t *)&(job->ibs->ptr[i]); - params.src = job->ibs->gpu_addr + i * 4; - - for (i = 0; i < nptes; ++i) { - pte[i] = amdgpu_vm_map_gart(pages_addr, addr + i * - AMDGPU_GPU_PAGE_SIZE); - pte[i] |= flags; - } - addr = 0; - } - - r = amdgpu_sync_fence(adev, &job->sync, exclusive, false); - if (r) - goto error_free; - - r = amdgpu_sync_resv(adev, &job->sync, vm->root.base.bo->tbo.resv, - owner, false); - if (r) - goto error_free; - r = amdgpu_vm_update_ptes(¶ms, start, last + 1, addr, flags); if (r) - goto error_free; - - amdgpu_ring_pad_ib(ring, params.ib); - WARN_ON(params.ib->length_dw > ndw); - r = amdgpu_job_submit(job, &vm->entity, AMDGPU_FENCE_OWNER_VM, &f); - if (r) - goto error_free; - - amdgpu_bo_fence(vm->root.base.bo, f, true); - dma_fence_put(*fence); - *fence = f; - return 0; + return r; -error_free: - amdgpu_job_free(job); - return r; + return vm->update_funcs->commit(¶ms, fence); } /** @@ -1880,6 +1539,7 @@ error_free: * @vm: requested vm * @mapping: mapped range and flags to use for the update * @flags: HW flags for the mapping + * @bo_adev: amdgpu_device pointer that bo actually been allocated * @nodes: array of drm_mm_nodes with the MC addresses * @fence: optional resulting fence * @@ -1895,6 +1555,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct amdgpu_bo_va_mapping *mapping, uint64_t flags, + struct amdgpu_device *bo_adev, struct drm_mm_node *nodes, struct dma_fence **fence) { @@ -1949,7 +1610,6 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, if (pages_addr) { uint64_t count; - max_entries = min(max_entries, 16ull * 1024ull); for (count = 1; count < max_entries / AMDGPU_GPU_PAGES_IN_CPU_PAGE; ++count) { @@ -1969,7 +1629,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, } } else if (flags & AMDGPU_PTE_VALID) { - addr += adev->vm_manager.vram_base_offset; + addr += bo_adev->vm_manager.vram_base_offset; addr += pfn << PAGE_SHIFT; } @@ -2016,6 +1676,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct drm_mm_node *nodes; struct dma_fence *exclusive, **last_update; uint64_t flags; + struct amdgpu_device *bo_adev = adev; int r; if (clear || !bo) { @@ -2034,10 +1695,12 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, exclusive = reservation_object_get_excl(bo->tbo.resv); } - if (bo) + if (bo) { flags = amdgpu_ttm_tt_pte_flags(adev, bo->tbo.ttm, mem); - else + bo_adev = amdgpu_ttm_adev(bo->tbo.bdev); + } else { flags = 0x0; + } if (clear || (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv)) last_update = &vm->last_update; @@ -2054,7 +1717,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, list_for_each_entry(mapping, &bo_va->invalids, list) { r = amdgpu_vm_bo_split_mapping(adev, exclusive, pages_addr, vm, - mapping, flags, nodes, + mapping, flags, bo_adev, nodes, last_update); if (r) return r; @@ -2374,6 +2037,16 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev, INIT_LIST_HEAD(&bo_va->valids); INIT_LIST_HEAD(&bo_va->invalids); + if (bo && amdgpu_xgmi_same_hive(adev, amdgpu_ttm_adev(bo->tbo.bdev)) && + (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM)) { + bo_va->is_xgmi = true; + mutex_lock(&adev->vm_manager.lock_pstate); + /* Power up XGMI if it can be potentially used */ + if (++adev->vm_manager.xgmi_map_counter == 1) + amdgpu_xgmi_set_pstate(adev, 1); + mutex_unlock(&adev->vm_manager.lock_pstate); + } + return bo_va; } @@ -2792,6 +2465,14 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, } dma_fence_put(bo_va->last_pt_update); + + if (bo && bo_va->is_xgmi) { + mutex_lock(&adev->vm_manager.lock_pstate); + if (--adev->vm_manager.xgmi_map_counter == 0) + amdgpu_xgmi_set_pstate(adev, 0); + mutex_unlock(&adev->vm_manager.lock_pstate); + } + kfree(bo_va); } @@ -2949,20 +2630,16 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size, adev->vm_manager.fragment_size); } -static struct amdgpu_retryfault_hashtable *init_fault_hash(void) +/** + * amdgpu_vm_wait_idle - wait for the VM to become idle + * + * @vm: VM object to wait for + * @timeout: timeout to wait for VM to become idle + */ +long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout) { - struct amdgpu_retryfault_hashtable *fault_hash; - - fault_hash = kmalloc(sizeof(*fault_hash), GFP_KERNEL); - if (!fault_hash) - return fault_hash; - - INIT_CHASH_TABLE(fault_hash->hash, - AMDGPU_PAGEFAULT_HASH_BITS, 8, 0); - spin_lock_init(&fault_hash->lock); - fault_hash->count = 0; - - return fault_hash; + return reservation_object_wait_timeout_rcu(vm->root.base.bo->tbo.resv, + true, true, timeout); } /** @@ -3018,6 +2695,11 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, vm->use_cpu_for_update ? "CPU" : "SDMA"); WARN_ONCE((vm->use_cpu_for_update && !amdgpu_gmc_vram_full_visible(&adev->gmc)), "CPU update of VM recommended only for large BAR system\n"); + + if (vm->use_cpu_for_update) + vm->update_funcs = &amdgpu_vm_cpu_funcs; + else + vm->update_funcs = &amdgpu_vm_sdma_funcs; vm->last_update = NULL; amdgpu_vm_bo_param(adev, vm, adev->vm_manager.root_level, &bp); @@ -3037,9 +2719,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, amdgpu_vm_bo_base_init(&vm->root.base, vm, root); - r = amdgpu_vm_clear_bo(adev, vm, root, - adev->vm_manager.root_level, - vm->pte_support_ats); + r = amdgpu_vm_clear_bo(adev, vm, root); if (r) goto error_unreserve; @@ -3058,12 +2738,6 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, vm->pasid = pasid; } - vm->fault_hash = init_fault_hash(); - if (!vm->fault_hash) { - r = -ENOMEM; - goto error_free_root; - } - INIT_KFIFO(vm->faults); return 0; @@ -3134,9 +2808,8 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm, uns * changing any other state, in case it fails. */ if (pte_support_ats != vm->pte_support_ats) { - r = amdgpu_vm_clear_bo(adev, vm, vm->root.base.bo, - adev->vm_manager.root_level, - pte_support_ats); + vm->pte_support_ats = pte_support_ats; + r = amdgpu_vm_clear_bo(adev, vm, vm->root.base.bo); if (r) goto free_idr; } @@ -3144,7 +2817,6 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm, uns /* Update VM state */ vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & AMDGPU_VM_USE_CPU_FOR_COMPUTE); - vm->pte_support_ats = pte_support_ats; DRM_DEBUG_DRIVER("VM update mode is %s\n", vm->use_cpu_for_update ? "CPU" : "SDMA"); WARN_ONCE((vm->use_cpu_for_update && !amdgpu_gmc_vram_full_visible(&adev->gmc)), @@ -3219,15 +2891,10 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) struct amdgpu_bo_va_mapping *mapping, *tmp; bool prt_fini_needed = !!adev->gmc.gmc_funcs->set_prt; struct amdgpu_bo *root; - u64 fault; int i, r; amdgpu_amdkfd_gpuvm_destroy_cb(adev, vm); - /* Clear pending page faults from IH when the VM is destroyed */ - while (kfifo_get(&vm->faults, &fault)) - amdgpu_vm_clear_fault(vm->fault_hash, fault); - if (vm->pasid) { unsigned long flags; @@ -3236,9 +2903,6 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags); } - kfree(vm->fault_hash); - vm->fault_hash = NULL; - drm_sched_entity_destroy(&vm->entity); if (!RB_EMPTY_ROOT(&vm->va.rb_root)) { @@ -3267,10 +2931,11 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) if (r) { dev_err(adev->dev, "Leaking page tables because BO reservation failed\n"); } else { - amdgpu_vm_free_pts(adev, vm); + amdgpu_vm_free_pts(adev, vm, NULL); amdgpu_bo_unreserve(root); } amdgpu_bo_unref(&root); + WARN_ON(vm->root.base.bo); dma_fence_put(vm->last_update); for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) amdgpu_vmid_free_reserved(adev, vm, i); @@ -3315,6 +2980,9 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev) idr_init(&adev->vm_manager.pasid_idr); spin_lock_init(&adev->vm_manager.pasid_lock); + + adev->vm_manager.xgmi_map_counter = 0; + mutex_init(&adev->vm_manager.lock_pstate); } /** @@ -3405,78 +3073,3 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm) } } } - -/** - * amdgpu_vm_add_fault - Add a page fault record to fault hash table - * - * @fault_hash: fault hash table - * @key: 64-bit encoding of PASID and address - * - * This should be called when a retry page fault interrupt is - * received. If this is a new page fault, it will be added to a hash - * table. The return value indicates whether this is a new fault, or - * a fault that was already known and is already being handled. - * - * If there are too many pending page faults, this will fail. Retry - * interrupts should be ignored in this case until there is enough - * free space. - * - * Returns 0 if the fault was added, 1 if the fault was already known, - * -ENOSPC if there are too many pending faults. - */ -int amdgpu_vm_add_fault(struct amdgpu_retryfault_hashtable *fault_hash, u64 key) -{ - unsigned long flags; - int r = -ENOSPC; - - if (WARN_ON_ONCE(!fault_hash)) - /* Should be allocated in amdgpu_vm_init - */ - return r; - - spin_lock_irqsave(&fault_hash->lock, flags); - - /* Only let the hash table fill up to 50% for best performance */ - if (fault_hash->count >= (1 << (AMDGPU_PAGEFAULT_HASH_BITS-1))) - goto unlock_out; - - r = chash_table_copy_in(&fault_hash->hash, key, NULL); - if (!r) - fault_hash->count++; - - /* chash_table_copy_in should never fail unless we're losing count */ - WARN_ON_ONCE(r < 0); - -unlock_out: - spin_unlock_irqrestore(&fault_hash->lock, flags); - return r; -} - -/** - * amdgpu_vm_clear_fault - Remove a page fault record - * - * @fault_hash: fault hash table - * @key: 64-bit encoding of PASID and address - * - * This should be called when a page fault has been handled. Any - * future interrupt with this key will be processed as a new - * page fault. - */ -void amdgpu_vm_clear_fault(struct amdgpu_retryfault_hashtable *fault_hash, u64 key) -{ - unsigned long flags; - int r; - - if (!fault_hash) - return; - - spin_lock_irqsave(&fault_hash->lock, flags); - - r = chash_table_remove(&fault_hash->hash, key, NULL); - if (!WARN_ON_ONCE(r < 0)) { - fault_hash->count--; - WARN_ON_ONCE(fault_hash->count < 0); - } - - spin_unlock_irqrestore(&fault_hash->lock, flags); -} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 81ff8177f092..91baf95212a6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -30,7 +30,6 @@ #include <drm/gpu_scheduler.h> #include <drm/drm_file.h> #include <drm/ttm/ttm_bo_driver.h> -#include <linux/chash.h> #include "amdgpu_sync.h" #include "amdgpu_ring.h" @@ -140,7 +139,6 @@ struct amdgpu_vm_bo_base { struct amdgpu_vm_pt { struct amdgpu_vm_bo_base base; - bool huge; /* array of page tables, one for each directory entry */ struct amdgpu_vm_pt *entries; @@ -167,11 +165,6 @@ struct amdgpu_vm_pte_funcs { uint32_t incr, uint64_t flags); }; -#define AMDGPU_VM_FAULT(pasid, addr) (((u64)(pasid) << 48) | (addr)) -#define AMDGPU_VM_FAULT_PASID(fault) ((u64)(fault) >> 48) -#define AMDGPU_VM_FAULT_ADDR(fault) ((u64)(fault) & 0xfffffffff000ULL) - - struct amdgpu_task_info { char process_name[TASK_COMM_LEN]; char task_name[TASK_COMM_LEN]; @@ -179,11 +172,52 @@ struct amdgpu_task_info { pid_t tgid; }; -#define AMDGPU_PAGEFAULT_HASH_BITS 8 -struct amdgpu_retryfault_hashtable { - DECLARE_CHASH_TABLE(hash, AMDGPU_PAGEFAULT_HASH_BITS, 8, 0); - spinlock_t lock; - int count; +/** + * struct amdgpu_vm_update_params + * + * Encapsulate some VM table update parameters to reduce + * the number of function parameters + * + */ +struct amdgpu_vm_update_params { + + /** + * @adev: amdgpu device we do this update for + */ + struct amdgpu_device *adev; + + /** + * @vm: optional amdgpu_vm we do this update for + */ + struct amdgpu_vm *vm; + + /** + * @pages_addr: + * + * DMA addresses to use for mapping + */ + dma_addr_t *pages_addr; + + /** + * @job: job to used for hw submission + */ + struct amdgpu_job *job; + + /** + * @num_dw_left: number of dw left for the IB + */ + unsigned int num_dw_left; +}; + +struct amdgpu_vm_update_funcs { + int (*map_table)(struct amdgpu_bo *bo); + int (*prepare)(struct amdgpu_vm_update_params *p, void * owner, + struct dma_fence *exclusive); + int (*update)(struct amdgpu_vm_update_params *p, + struct amdgpu_bo *bo, uint64_t pe, uint64_t addr, + unsigned count, uint32_t incr, uint64_t flags); + int (*commit)(struct amdgpu_vm_update_params *p, + struct dma_fence **fence); }; struct amdgpu_vm { @@ -221,7 +255,10 @@ struct amdgpu_vm { struct amdgpu_vmid *reserved_vmid[AMDGPU_MAX_VMHUBS]; /* Flag to indicate if VM tables are updated by CPU or GPU (SDMA) */ - bool use_cpu_for_update; + bool use_cpu_for_update; + + /* Functions to use for VM table updates */ + const struct amdgpu_vm_update_funcs *update_funcs; /* Flag to indicate ATS support from PTE for GFX9 */ bool pte_support_ats; @@ -245,7 +282,6 @@ struct amdgpu_vm { struct ttm_lru_bulk_move lru_bulk_move; /* mark whether can do the bulk move */ bool bulk_moveable; - struct amdgpu_retryfault_hashtable *fault_hash; }; struct amdgpu_vm_manager { @@ -267,6 +303,7 @@ struct amdgpu_vm_manager { const struct amdgpu_vm_pte_funcs *vm_pte_funcs; struct drm_sched_rq *vm_pte_rqs[AMDGPU_MAX_RINGS]; unsigned vm_pte_num_rqs; + struct amdgpu_ring *page_fault; /* partial resident texture handling */ spinlock_t prt_lock; @@ -283,14 +320,23 @@ struct amdgpu_vm_manager { */ struct idr pasid_idr; spinlock_t pasid_lock; + + /* counter of mapped memory through xgmi */ + uint32_t xgmi_map_counter; + struct mutex lock_pstate; }; #define amdgpu_vm_copy_pte(adev, ib, pe, src, count) ((adev)->vm_manager.vm_pte_funcs->copy_pte((ib), (pe), (src), (count))) #define amdgpu_vm_write_pte(adev, ib, pe, value, count, incr) ((adev)->vm_manager.vm_pte_funcs->write_pte((ib), (pe), (value), (count), (incr))) #define amdgpu_vm_set_pte_pde(adev, ib, pe, addr, count, incr, flags) ((adev)->vm_manager.vm_pte_funcs->set_pte_pde((ib), (pe), (addr), (count), (incr), (flags))) +extern const struct amdgpu_vm_update_funcs amdgpu_vm_cpu_funcs; +extern const struct amdgpu_vm_update_funcs amdgpu_vm_sdma_funcs; + void amdgpu_vm_manager_init(struct amdgpu_device *adev); void amdgpu_vm_manager_fini(struct amdgpu_device *adev); + +long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout); int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int vm_context, unsigned int pasid); int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm, unsigned int pasid); @@ -303,9 +349,6 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm); int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, int (*callback)(void *p, struct amdgpu_bo *bo), void *param); -int amdgpu_vm_alloc_pts(struct amdgpu_device *adev, - struct amdgpu_vm *vm, - uint64_t saddr, uint64_t size); int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync); int amdgpu_vm_update_directories(struct amdgpu_device *adev, struct amdgpu_vm *vm); @@ -319,6 +362,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, bool clear); void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, struct amdgpu_bo *bo, bool evicted); +uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr); struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm, struct amdgpu_bo *bo); struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev, @@ -358,11 +402,6 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm); void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev, struct amdgpu_vm *vm); - -int amdgpu_vm_add_fault(struct amdgpu_retryfault_hashtable *fault_hash, u64 key); - -void amdgpu_vm_clear_fault(struct amdgpu_retryfault_hashtable *fault_hash, u64 key); - void amdgpu_vm_del_from_lru_notify(struct ttm_buffer_object *bo); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c new file mode 100644 index 000000000000..5222d165abfc --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c @@ -0,0 +1,127 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "amdgpu_vm.h" +#include "amdgpu_object.h" +#include "amdgpu_trace.h" + +/** + * amdgpu_vm_cpu_map_table - make sure new PDs/PTs are kmapped + * + * @table: newly allocated or validated PD/PT + */ +static int amdgpu_vm_cpu_map_table(struct amdgpu_bo *table) +{ + return amdgpu_bo_kmap(table, NULL); +} + +/** + * amdgpu_vm_cpu_prepare - prepare page table update with the CPU + * + * @p: see amdgpu_vm_update_params definition + * @owner: owner we need to sync to + * @exclusive: exclusive move fence we need to sync to + * + * Returns: + * Negativ errno, 0 for success. + */ +static int amdgpu_vm_cpu_prepare(struct amdgpu_vm_update_params *p, void *owner, + struct dma_fence *exclusive) +{ + int r; + + /* Wait for PT BOs to be idle. PTs share the same resv. object + * as the root PD BO + */ + r = amdgpu_bo_sync_wait(p->vm->root.base.bo, owner, true); + if (unlikely(r)) + return r; + + /* Wait for any BO move to be completed */ + if (exclusive) { + r = dma_fence_wait(exclusive, true); + if (unlikely(r)) + return r; + } + + return 0; +} + +/** + * amdgpu_vm_cpu_update - helper to update page tables via CPU + * + * @p: see amdgpu_vm_update_params definition + * @bo: PD/PT to update + * @pe: kmap addr of the page entry + * @addr: dst addr to write into pe + * @count: number of page entries to update + * @incr: increase next addr by incr bytes + * @flags: hw access flags + * + * Write count number of PT/PD entries directly. + */ +static int amdgpu_vm_cpu_update(struct amdgpu_vm_update_params *p, + struct amdgpu_bo *bo, uint64_t pe, + uint64_t addr, unsigned count, uint32_t incr, + uint64_t flags) +{ + unsigned int i; + uint64_t value; + + pe += (unsigned long)amdgpu_bo_kptr(bo); + + trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags); + + for (i = 0; i < count; i++) { + value = p->pages_addr ? + amdgpu_vm_map_gart(p->pages_addr, addr) : + addr; + amdgpu_gmc_set_pte_pde(p->adev, (void *)(uintptr_t)pe, + i, value, flags); + addr += incr; + } + return 0; +} + +/** + * amdgpu_vm_cpu_commit - commit page table update to the HW + * + * @p: see amdgpu_vm_update_params definition + * @fence: unused + * + * Make sure that the hardware sees the page table updates. + */ +static int amdgpu_vm_cpu_commit(struct amdgpu_vm_update_params *p, + struct dma_fence **fence) +{ + /* Flush HDP */ + mb(); + amdgpu_asic_flush_hdp(p->adev, NULL); + return 0; +} + +const struct amdgpu_vm_update_funcs amdgpu_vm_cpu_funcs = { + .map_table = amdgpu_vm_cpu_map_table, + .prepare = amdgpu_vm_cpu_prepare, + .update = amdgpu_vm_cpu_update, + .commit = amdgpu_vm_cpu_commit +}; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c new file mode 100644 index 000000000000..ddd181f5ed37 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c @@ -0,0 +1,270 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "amdgpu_vm.h" +#include "amdgpu_job.h" +#include "amdgpu_object.h" +#include "amdgpu_trace.h" + +#define AMDGPU_VM_SDMA_MIN_NUM_DW 256u +#define AMDGPU_VM_SDMA_MAX_NUM_DW (16u * 1024u) + +/** + * amdgpu_vm_sdma_map_table - make sure new PDs/PTs are GTT mapped + * + * @table: newly allocated or validated PD/PT + */ +static int amdgpu_vm_sdma_map_table(struct amdgpu_bo *table) +{ + int r; + + r = amdgpu_ttm_alloc_gart(&table->tbo); + if (r) + return r; + + if (table->shadow) + r = amdgpu_ttm_alloc_gart(&table->shadow->tbo); + + return r; +} + +/** + * amdgpu_vm_sdma_prepare - prepare SDMA command submission + * + * @p: see amdgpu_vm_update_params definition + * @owner: owner we need to sync to + * @exclusive: exclusive move fence we need to sync to + * + * Returns: + * Negativ errno, 0 for success. + */ +static int amdgpu_vm_sdma_prepare(struct amdgpu_vm_update_params *p, + void *owner, struct dma_fence *exclusive) +{ + struct amdgpu_bo *root = p->vm->root.base.bo; + unsigned int ndw = AMDGPU_VM_SDMA_MIN_NUM_DW; + int r; + + r = amdgpu_job_alloc_with_ib(p->adev, ndw * 4, &p->job); + if (r) + return r; + + r = amdgpu_sync_fence(p->adev, &p->job->sync, exclusive, false); + if (r) + return r; + + r = amdgpu_sync_resv(p->adev, &p->job->sync, root->tbo.resv, + owner, false); + if (r) + return r; + + p->num_dw_left = ndw; + return 0; +} + +/** + * amdgpu_vm_sdma_commit - commit SDMA command submission + * + * @p: see amdgpu_vm_update_params definition + * @fence: resulting fence + * + * Returns: + * Negativ errno, 0 for success. + */ +static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p, + struct dma_fence **fence) +{ + struct amdgpu_bo *root = p->vm->root.base.bo; + struct amdgpu_ib *ib = p->job->ibs; + struct amdgpu_ring *ring; + struct dma_fence *f; + int r; + + ring = container_of(p->vm->entity.rq->sched, struct amdgpu_ring, sched); + + WARN_ON(ib->length_dw == 0); + amdgpu_ring_pad_ib(ring, ib); + WARN_ON(ib->length_dw > p->num_dw_left); + r = amdgpu_job_submit(p->job, &p->vm->entity, + AMDGPU_FENCE_OWNER_VM, &f); + if (r) + goto error; + + amdgpu_bo_fence(root, f, true); + if (fence) + swap(*fence, f); + dma_fence_put(f); + return 0; + +error: + amdgpu_job_free(p->job); + return r; +} + + +/** + * amdgpu_vm_sdma_copy_ptes - copy the PTEs from mapping + * + * @p: see amdgpu_vm_update_params definition + * @bo: PD/PT to update + * @pe: addr of the page entry + * @count: number of page entries to copy + * + * Traces the parameters and calls the DMA function to copy the PTEs. + */ +static void amdgpu_vm_sdma_copy_ptes(struct amdgpu_vm_update_params *p, + struct amdgpu_bo *bo, uint64_t pe, + unsigned count) +{ + struct amdgpu_ib *ib = p->job->ibs; + uint64_t src = ib->gpu_addr; + + src += p->num_dw_left * 4; + + pe += amdgpu_bo_gpu_offset(bo); + trace_amdgpu_vm_copy_ptes(pe, src, count); + + amdgpu_vm_copy_pte(p->adev, ib, pe, src, count); +} + +/** + * amdgpu_vm_sdma_set_ptes - helper to call the right asic function + * + * @p: see amdgpu_vm_update_params definition + * @bo: PD/PT to update + * @pe: addr of the page entry + * @addr: dst addr to write into pe + * @count: number of page entries to update + * @incr: increase next addr by incr bytes + * @flags: hw access flags + * + * Traces the parameters and calls the right asic functions + * to setup the page table using the DMA. + */ +static void amdgpu_vm_sdma_set_ptes(struct amdgpu_vm_update_params *p, + struct amdgpu_bo *bo, uint64_t pe, + uint64_t addr, unsigned count, + uint32_t incr, uint64_t flags) +{ + struct amdgpu_ib *ib = p->job->ibs; + + pe += amdgpu_bo_gpu_offset(bo); + trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags); + if (count < 3) { + amdgpu_vm_write_pte(p->adev, ib, pe, addr | flags, + count, incr); + } else { + amdgpu_vm_set_pte_pde(p->adev, ib, pe, addr, + count, incr, flags); + } +} + +/** + * amdgpu_vm_sdma_update - execute VM update + * + * @p: see amdgpu_vm_update_params definition + * @bo: PD/PT to update + * @pe: addr of the page entry + * @addr: dst addr to write into pe + * @count: number of page entries to update + * @incr: increase next addr by incr bytes + * @flags: hw access flags + * + * Reserve space in the IB, setup mapping buffer on demand and write commands to + * the IB. + */ +static int amdgpu_vm_sdma_update(struct amdgpu_vm_update_params *p, + struct amdgpu_bo *bo, uint64_t pe, + uint64_t addr, unsigned count, uint32_t incr, + uint64_t flags) +{ + unsigned int i, ndw, nptes; + uint64_t *pte; + int r; + + do { + ndw = p->num_dw_left; + ndw -= p->job->ibs->length_dw; + + if (ndw < 32) { + r = amdgpu_vm_sdma_commit(p, NULL); + if (r) + return r; + + /* estimate how many dw we need */ + ndw = 32; + if (p->pages_addr) + ndw += count * 2; + ndw = max(ndw, AMDGPU_VM_SDMA_MIN_NUM_DW); + ndw = min(ndw, AMDGPU_VM_SDMA_MAX_NUM_DW); + + r = amdgpu_job_alloc_with_ib(p->adev, ndw * 4, &p->job); + if (r) + return r; + + p->num_dw_left = ndw; + } + + if (!p->pages_addr) { + /* set page commands needed */ + if (bo->shadow) + amdgpu_vm_sdma_set_ptes(p, bo->shadow, pe, addr, + count, incr, flags); + amdgpu_vm_sdma_set_ptes(p, bo, pe, addr, count, + incr, flags); + return 0; + } + + /* copy commands needed */ + ndw -= p->adev->vm_manager.vm_pte_funcs->copy_pte_num_dw * + (bo->shadow ? 2 : 1); + + /* for padding */ + ndw -= 7; + + nptes = min(count, ndw / 2); + + /* Put the PTEs at the end of the IB. */ + p->num_dw_left -= nptes * 2; + pte = (uint64_t *)&(p->job->ibs->ptr[p->num_dw_left]); + for (i = 0; i < nptes; ++i, addr += incr) { + pte[i] = amdgpu_vm_map_gart(p->pages_addr, addr); + pte[i] |= flags; + } + + if (bo->shadow) + amdgpu_vm_sdma_copy_ptes(p, bo->shadow, pe, nptes); + amdgpu_vm_sdma_copy_ptes(p, bo, pe, nptes); + + pe += nptes * 8; + count -= nptes; + } while (count); + + return 0; +} + +const struct amdgpu_vm_update_funcs amdgpu_vm_sdma_funcs = { + .map_table = amdgpu_vm_sdma_map_table, + .prepare = amdgpu_vm_sdma_prepare, + .update = amdgpu_vm_sdma_update, + .commit = amdgpu_vm_sdma_commit +}; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index 3f9d5d00c9b3..ec9ea3fdbb4a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -33,6 +33,85 @@ struct amdgpu_vram_mgr { }; /** + * DOC: mem_info_vram_total + * + * The amdgpu driver provides a sysfs API for reporting current total VRAM + * available on the device + * The file mem_info_vram_total is used for this and returns the total + * amount of VRAM in bytes + */ +static ssize_t amdgpu_mem_info_vram_total_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = ddev->dev_private; + + return snprintf(buf, PAGE_SIZE, "%llu\n", adev->gmc.real_vram_size); +} + +/** + * DOC: mem_info_vis_vram_total + * + * The amdgpu driver provides a sysfs API for reporting current total + * visible VRAM available on the device + * The file mem_info_vis_vram_total is used for this and returns the total + * amount of visible VRAM in bytes + */ +static ssize_t amdgpu_mem_info_vis_vram_total_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = ddev->dev_private; + + return snprintf(buf, PAGE_SIZE, "%llu\n", adev->gmc.visible_vram_size); +} + +/** + * DOC: mem_info_vram_used + * + * The amdgpu driver provides a sysfs API for reporting current total VRAM + * available on the device + * The file mem_info_vram_used is used for this and returns the total + * amount of currently used VRAM in bytes + */ +static ssize_t amdgpu_mem_info_vram_used_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = ddev->dev_private; + + return snprintf(buf, PAGE_SIZE, "%llu\n", + amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM])); +} + +/** + * DOC: mem_info_vis_vram_used + * + * The amdgpu driver provides a sysfs API for reporting current total of + * used visible VRAM + * The file mem_info_vis_vram_used is used for this and returns the total + * amount of currently used visible VRAM in bytes + */ +static ssize_t amdgpu_mem_info_vis_vram_used_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = ddev->dev_private; + + return snprintf(buf, PAGE_SIZE, "%llu\n", + amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM])); +} + +static DEVICE_ATTR(mem_info_vram_total, S_IRUGO, + amdgpu_mem_info_vram_total_show, NULL); +static DEVICE_ATTR(mem_info_vis_vram_total, S_IRUGO, + amdgpu_mem_info_vis_vram_total_show,NULL); +static DEVICE_ATTR(mem_info_vram_used, S_IRUGO, + amdgpu_mem_info_vram_used_show, NULL); +static DEVICE_ATTR(mem_info_vis_vram_used, S_IRUGO, + amdgpu_mem_info_vis_vram_used_show, NULL); + +/** * amdgpu_vram_mgr_init - init VRAM manager and DRM MM * * @man: TTM memory type manager @@ -43,7 +122,9 @@ struct amdgpu_vram_mgr { static int amdgpu_vram_mgr_init(struct ttm_mem_type_manager *man, unsigned long p_size) { + struct amdgpu_device *adev = amdgpu_ttm_adev(man->bdev); struct amdgpu_vram_mgr *mgr; + int ret; mgr = kzalloc(sizeof(*mgr), GFP_KERNEL); if (!mgr) @@ -52,6 +133,29 @@ static int amdgpu_vram_mgr_init(struct ttm_mem_type_manager *man, drm_mm_init(&mgr->mm, 0, p_size); spin_lock_init(&mgr->lock); man->priv = mgr; + + /* Add the two VRAM-related sysfs files */ + ret = device_create_file(adev->dev, &dev_attr_mem_info_vram_total); + if (ret) { + DRM_ERROR("Failed to create device file mem_info_vram_total\n"); + return ret; + } + ret = device_create_file(adev->dev, &dev_attr_mem_info_vis_vram_total); + if (ret) { + DRM_ERROR("Failed to create device file mem_info_vis_vram_total\n"); + return ret; + } + ret = device_create_file(adev->dev, &dev_attr_mem_info_vram_used); + if (ret) { + DRM_ERROR("Failed to create device file mem_info_vram_used\n"); + return ret; + } + ret = device_create_file(adev->dev, &dev_attr_mem_info_vis_vram_used); + if (ret) { + DRM_ERROR("Failed to create device file mem_info_vis_vram_used\n"); + return ret; + } + return 0; } @@ -65,6 +169,7 @@ static int amdgpu_vram_mgr_init(struct ttm_mem_type_manager *man, */ static int amdgpu_vram_mgr_fini(struct ttm_mem_type_manager *man) { + struct amdgpu_device *adev = amdgpu_ttm_adev(man->bdev); struct amdgpu_vram_mgr *mgr = man->priv; spin_lock(&mgr->lock); @@ -72,6 +177,10 @@ static int amdgpu_vram_mgr_fini(struct ttm_mem_type_manager *man) spin_unlock(&mgr->lock); kfree(mgr); man->priv = NULL; + device_remove_file(adev->dev, &dev_attr_mem_info_vram_total); + device_remove_file(adev->dev, &dev_attr_mem_info_vis_vram_total); + device_remove_file(adev->dev, &dev_attr_mem_info_vram_used); + device_remove_file(adev->dev, &dev_attr_mem_info_vis_vram_used); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 407dd16cc35c..a48c84c51775 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -24,6 +24,7 @@ #include <linux/list.h> #include "amdgpu.h" #include "amdgpu_xgmi.h" +#include "amdgpu_smu.h" static DEFINE_MUTEX(xgmi_mutex); @@ -34,12 +35,132 @@ static DEFINE_MUTEX(xgmi_mutex); static struct amdgpu_hive_info xgmi_hives[AMDGPU_MAX_XGMI_HIVE]; static unsigned hive_count = 0; - void *amdgpu_xgmi_hive_try_lock(struct amdgpu_hive_info *hive) { return &hive->device_list; } +static ssize_t amdgpu_xgmi_show_hive_id(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct amdgpu_hive_info *hive = + container_of(attr, struct amdgpu_hive_info, dev_attr); + + return snprintf(buf, PAGE_SIZE, "%llu\n", hive->hive_id); +} + +static int amdgpu_xgmi_sysfs_create(struct amdgpu_device *adev, + struct amdgpu_hive_info *hive) +{ + int ret = 0; + + if (WARN_ON(hive->kobj)) + return -EINVAL; + + hive->kobj = kobject_create_and_add("xgmi_hive_info", &adev->dev->kobj); + if (!hive->kobj) { + dev_err(adev->dev, "XGMI: Failed to allocate sysfs entry!\n"); + return -EINVAL; + } + + hive->dev_attr = (struct device_attribute) { + .attr = { + .name = "xgmi_hive_id", + .mode = S_IRUGO, + + }, + .show = amdgpu_xgmi_show_hive_id, + }; + + ret = sysfs_create_file(hive->kobj, &hive->dev_attr.attr); + if (ret) { + dev_err(adev->dev, "XGMI: Failed to create device file xgmi_hive_id\n"); + kobject_del(hive->kobj); + kobject_put(hive->kobj); + hive->kobj = NULL; + } + + return ret; +} + +static void amdgpu_xgmi_sysfs_destroy(struct amdgpu_device *adev, + struct amdgpu_hive_info *hive) +{ + sysfs_remove_file(hive->kobj, &hive->dev_attr.attr); + kobject_del(hive->kobj); + kobject_put(hive->kobj); + hive->kobj = NULL; +} + +static ssize_t amdgpu_xgmi_show_device_id(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = ddev->dev_private; + + return snprintf(buf, PAGE_SIZE, "%llu\n", adev->gmc.xgmi.node_id); + +} + + +static DEVICE_ATTR(xgmi_device_id, S_IRUGO, amdgpu_xgmi_show_device_id, NULL); + + +static int amdgpu_xgmi_sysfs_add_dev_info(struct amdgpu_device *adev, + struct amdgpu_hive_info *hive) +{ + int ret = 0; + char node[10] = { 0 }; + + /* Create xgmi device id file */ + ret = device_create_file(adev->dev, &dev_attr_xgmi_device_id); + if (ret) { + dev_err(adev->dev, "XGMI: Failed to create device file xgmi_device_id\n"); + return ret; + } + + /* Create sysfs link to hive info folder on the first device */ + if (adev != hive->adev) { + ret = sysfs_create_link(&adev->dev->kobj, hive->kobj, + "xgmi_hive_info"); + if (ret) { + dev_err(adev->dev, "XGMI: Failed to create link to hive info"); + goto remove_file; + } + } + + sprintf(node, "node%d", hive->number_devices); + /* Create sysfs link form the hive folder to yourself */ + ret = sysfs_create_link(hive->kobj, &adev->dev->kobj, node); + if (ret) { + dev_err(adev->dev, "XGMI: Failed to create link from hive info"); + goto remove_link; + } + + goto success; + + +remove_link: + sysfs_remove_link(&adev->dev->kobj, adev->ddev->unique); + +remove_file: + device_remove_file(adev->dev, &dev_attr_xgmi_device_id); + +success: + return ret; +} + +static void amdgpu_xgmi_sysfs_rem_dev_info(struct amdgpu_device *adev, + struct amdgpu_hive_info *hive) +{ + device_remove_file(adev->dev, &dev_attr_xgmi_device_id); + sysfs_remove_link(&adev->dev->kobj, adev->ddev->unique); + sysfs_remove_link(hive->kobj, adev->ddev->unique); +} + + + struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev, int lock) { int i; @@ -66,18 +187,50 @@ struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev, int lo /* initialize new hive if not exist */ tmp = &xgmi_hives[hive_count++]; + + if (amdgpu_xgmi_sysfs_create(adev, tmp)) { + mutex_unlock(&xgmi_mutex); + return NULL; + } + + tmp->adev = adev; tmp->hive_id = adev->gmc.xgmi.hive_id; INIT_LIST_HEAD(&tmp->device_list); mutex_init(&tmp->hive_lock); mutex_init(&tmp->reset_lock); + if (lock) mutex_lock(&tmp->hive_lock); - + tmp->pstate = -1; mutex_unlock(&xgmi_mutex); return tmp; } +int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate) +{ + int ret = 0; + struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, 0); + + if (!hive) + return 0; + + if (hive->pstate == pstate) + return 0; + + dev_dbg(adev->dev, "Set xgmi pstate %d.\n", pstate); + + if (is_support_sw_smu(adev)) + ret = smu_set_xgmi_pstate(&adev->smu, pstate); + if (ret) + dev_err(adev->dev, + "XGMI: Set pstate failure on device %llx, hive %llx, ret %d", + adev->gmc.xgmi.node_id, + adev->gmc.xgmi.hive_id, ret); + + return ret; +} + int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_device *adev) { int ret = -EINVAL; @@ -156,8 +309,17 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev) break; } - dev_info(adev->dev, "XGMI: Add node %d, hive 0x%llx.\n", - adev->gmc.xgmi.physical_node_id, adev->gmc.xgmi.hive_id); + if (!ret) + ret = amdgpu_xgmi_sysfs_add_dev_info(adev, hive); + + if (!ret) + dev_info(adev->dev, "XGMI: Add node %d, hive 0x%llx.\n", + adev->gmc.xgmi.physical_node_id, adev->gmc.xgmi.hive_id); + else + dev_err(adev->dev, "XGMI: Failed to add node %d, hive 0x%llx ret: %d\n", + adev->gmc.xgmi.physical_node_id, adev->gmc.xgmi.hive_id, + ret); + mutex_unlock(&hive->hive_lock); exit: @@ -176,9 +338,11 @@ void amdgpu_xgmi_remove_device(struct amdgpu_device *adev) return; if (!(hive->number_devices--)) { + amdgpu_xgmi_sysfs_destroy(adev, hive); mutex_destroy(&hive->hive_lock); mutex_destroy(&hive->reset_lock); } else { + amdgpu_xgmi_sysfs_rem_dev_info(adev, hive); mutex_unlock(&hive->hive_lock); } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h index 14bc60664159..3e9c91e9a4bf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h @@ -29,13 +29,25 @@ struct amdgpu_hive_info { struct list_head device_list; struct psp_xgmi_topology_info topology_info; int number_devices; - struct mutex hive_lock, - reset_lock; + struct mutex hive_lock, reset_lock; + struct kobject *kobj; + struct device_attribute dev_attr; + struct amdgpu_device *adev; + int pstate; /*0 -- low , 1 -- high , -1 unknown*/ }; struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev, int lock); int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_device *adev); int amdgpu_xgmi_add_device(struct amdgpu_device *adev); void amdgpu_xgmi_remove_device(struct amdgpu_device *adev); +int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate); + +static inline bool amdgpu_xgmi_same_hive(struct amdgpu_device *adev, + struct amdgpu_device *bo_adev) +{ + return (adev != bo_adev && + adev->gmc.xgmi.hive_id && + adev->gmc.xgmi.hive_id == bo_adev->gmc.xgmi.hive_id); +} #endif diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index 189599b694e8..d42808b05971 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -977,8 +977,8 @@ static int cik_sdma_sw_init(void *handle) r = amdgpu_ring_init(adev, ring, 1024, &adev->sdma.trap_irq, (i == 0) ? - AMDGPU_SDMA_IRQ_TRAP0 : - AMDGPU_SDMA_IRQ_TRAP1); + AMDGPU_SDMA_IRQ_INSTANCE0 : + AMDGPU_SDMA_IRQ_INSTANCE1); if (r) return r; } @@ -1114,7 +1114,7 @@ static int cik_sdma_set_trap_irq_state(struct amdgpu_device *adev, u32 sdma_cntl; switch (type) { - case AMDGPU_SDMA_IRQ_TRAP0: + case AMDGPU_SDMA_IRQ_INSTANCE0: switch (state) { case AMDGPU_IRQ_STATE_DISABLE: sdma_cntl = RREG32(mmSDMA0_CNTL + SDMA0_REGISTER_OFFSET); @@ -1130,7 +1130,7 @@ static int cik_sdma_set_trap_irq_state(struct amdgpu_device *adev, break; } break; - case AMDGPU_SDMA_IRQ_TRAP1: + case AMDGPU_SDMA_IRQ_INSTANCE1: switch (state) { case AMDGPU_IRQ_STATE_DISABLE: sdma_cntl = RREG32(mmSDMA0_CNTL + SDMA1_REGISTER_OFFSET); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c index 305276c7e4bf..c0cb244f58cd 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c @@ -782,6 +782,25 @@ static void gfx_v6_0_tiling_mode_table_init(struct amdgpu_device *adev) BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2); + tilemode[18] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_1D_TILED_THICK) | + PIPE_CONFIG(ADDR_SURF_P4_8x16); + tilemode[19] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_16_BANK) | + TILE_SPLIT(split_equal_to_row_size); + tilemode[20] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THICK) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_16_BANK) | + TILE_SPLIT(split_equal_to_row_size); tilemode[21] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | ARRAY_MODE(ARRAY_2D_TILED_THIN1) | PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index b8e50a34bdb3..02955e6e9dd9 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -3236,6 +3236,7 @@ static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev) dev_warn(adev->dev, "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n", adev->asic_type); + /* fall through */ case CHIP_CARRIZO: modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index a11db2b1a63f..ba67d1023264 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -40,6 +40,8 @@ #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h" +#include "amdgpu_ras.h" + #define GFX9_NUM_GFX_RINGS 1 #define GFX9_MEC_HPD_SIZE 4096 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L @@ -576,6 +578,27 @@ static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev) } } +static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev) +{ + switch (adev->asic_type) { + case CHIP_VEGA10: + case CHIP_VEGA12: + case CHIP_VEGA20: + break; + case CHIP_RAVEN: + if (adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8) + break; + if ((adev->gfx.rlc_fw_version < 531) || + (adev->gfx.rlc_fw_version == 53815) || + (adev->gfx.rlc_feature_version < 1) || + !adev->gfx.rlc.is_rlc_v2_1) + adev->pm.pp_feature &= ~PP_GFXOFF_MASK; + break; + default: + break; + } +} + static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) { const char *chip_name; @@ -828,6 +851,7 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) } out: + gfx_v9_0_check_if_need_gfxoff(adev); gfx_v9_0_check_fw_write_wait(adev); if (err) { dev_err(adev->dev, @@ -1639,6 +1663,18 @@ static int gfx_v9_0_sw_init(void *handle) if (r) return r; + /* ECC error */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR, + &adev->gfx.cp_ecc_error_irq); + if (r) + return r; + + /* FUE error */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR, + &adev->gfx.cp_ecc_error_irq); + if (r) + return r; + adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE; gfx_v9_0_scratch_init(adev); @@ -1731,6 +1767,20 @@ static int gfx_v9_0_sw_fini(void *handle) int i; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX) && + adev->gfx.ras_if) { + struct ras_common_if *ras_if = adev->gfx.ras_if; + struct ras_ih_if ih_info = { + .head = *ras_if, + }; + + amdgpu_ras_debugfs_remove(adev, ras_if); + amdgpu_ras_sysfs_remove(adev, ras_if); + amdgpu_ras_interrupt_remove_handler(adev, &ih_info); + amdgpu_ras_feature_enable(adev, ras_if, 0); + kfree(ras_if); + } + amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL); amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL); amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL); @@ -3303,6 +3353,7 @@ static int gfx_v9_0_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0); amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); @@ -3492,6 +3543,80 @@ static int gfx_v9_0_early_init(void *handle) return 0; } +static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev, + struct amdgpu_iv_entry *entry); + +static int gfx_v9_0_ecc_late_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct ras_common_if **ras_if = &adev->gfx.ras_if; + struct ras_ih_if ih_info = { + .cb = gfx_v9_0_process_ras_data_cb, + }; + struct ras_fs_if fs_info = { + .sysfs_name = "gfx_err_count", + .debugfs_name = "gfx_err_inject", + }; + struct ras_common_if ras_block = { + .block = AMDGPU_RAS_BLOCK__GFX, + .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE, + .sub_block_index = 0, + .name = "gfx", + }; + int r; + + if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) { + amdgpu_ras_feature_enable_on_boot(adev, &ras_block, 0); + return 0; + } + + if (*ras_if) + goto resume; + + *ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL); + if (!*ras_if) + return -ENOMEM; + + **ras_if = ras_block; + + r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1); + if (r) + goto feature; + + ih_info.head = **ras_if; + fs_info.head = **ras_if; + + r = amdgpu_ras_interrupt_add_handler(adev, &ih_info); + if (r) + goto interrupt; + + r = amdgpu_ras_debugfs_create(adev, &fs_info); + if (r) + goto debugfs; + + r = amdgpu_ras_sysfs_create(adev, &fs_info); + if (r) + goto sysfs; +resume: + r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0); + if (r) + goto irq; + + return 0; +irq: + amdgpu_ras_sysfs_remove(adev, *ras_if); +sysfs: + amdgpu_ras_debugfs_remove(adev, *ras_if); +debugfs: + amdgpu_ras_interrupt_remove_handler(adev, &ih_info); +interrupt: + amdgpu_ras_feature_enable(adev, *ras_if, 0); +feature: + kfree(*ras_if); + *ras_if = NULL; + return -EINVAL; +} + static int gfx_v9_0_late_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -3505,6 +3630,10 @@ static int gfx_v9_0_late_init(void *handle) if (r) return r; + r = gfx_v9_0_ecc_late_init(handle); + if (r) + return r; + return 0; } @@ -4541,6 +4670,45 @@ static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev, return 0; } +#define ENABLE_ECC_ON_ME_PIPE(me, pipe) \ + WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\ + CP_ECC_ERROR_INT_ENABLE, 1) + +#define DISABLE_ECC_ON_ME_PIPE(me, pipe) \ + WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\ + CP_ECC_ERROR_INT_ENABLE, 0) + +static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned type, + enum amdgpu_interrupt_state state) +{ + switch (state) { + case AMDGPU_IRQ_STATE_DISABLE: + WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, + CP_ECC_ERROR_INT_ENABLE, 0); + DISABLE_ECC_ON_ME_PIPE(1, 0); + DISABLE_ECC_ON_ME_PIPE(1, 1); + DISABLE_ECC_ON_ME_PIPE(1, 2); + DISABLE_ECC_ON_ME_PIPE(1, 3); + break; + + case AMDGPU_IRQ_STATE_ENABLE: + WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, + CP_ECC_ERROR_INT_ENABLE, 1); + ENABLE_ECC_ON_ME_PIPE(1, 0); + ENABLE_ECC_ON_ME_PIPE(1, 1); + ENABLE_ECC_ON_ME_PIPE(1, 2); + ENABLE_ECC_ON_ME_PIPE(1, 3); + break; + default: + break; + } + + return 0; +} + + static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev, struct amdgpu_irq_src *src, unsigned type, @@ -4657,6 +4825,34 @@ static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev, return 0; } +static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev, + struct amdgpu_iv_entry *entry) +{ + /* TODO ue will trigger an interrupt. */ + kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); + amdgpu_ras_reset_gpu(adev, 0); + return AMDGPU_RAS_UE; +} + +static int gfx_v9_0_cp_ecc_error_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + struct ras_common_if *ras_if = adev->gfx.ras_if; + struct ras_dispatch_if ih_data = { + .entry = entry, + }; + + if (!ras_if) + return 0; + + ih_data.head = *ras_if; + + DRM_ERROR("CP ECC ERROR IRQ\n"); + amdgpu_ras_interrupt_dispatch(adev, &ih_data); + return 0; +} + static const struct amd_ip_funcs gfx_v9_0_ip_funcs = { .name = "gfx_v9_0", .early_init = gfx_v9_0_early_init, @@ -4818,6 +5014,12 @@ static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = { .process = gfx_v9_0_priv_inst_irq, }; +static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = { + .set = gfx_v9_0_set_cp_ecc_error_state, + .process = gfx_v9_0_cp_ecc_error_irq, +}; + + static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev) { adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; @@ -4828,6 +5030,9 @@ static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev) adev->gfx.priv_inst_irq.num_types = 1; adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs; + + adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/ + adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs; } static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c index f5edddf3b29d..7bb5359d0bbd 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c @@ -143,7 +143,7 @@ static void gfxhub_v1_0_init_cache_regs(struct amdgpu_device *adev) /* XXX for emulation, Refer to closed source code.*/ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, L2_PDE0_CACHE_TAG_GENERATION_MODE, 0); - tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 1); + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 0); tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1); tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0); WREG32_SOC15(GC, 0, mmVM_L2_CNTL, tmp); @@ -236,7 +236,7 @@ static void gfxhub_v1_0_setup_vmid_config(struct amdgpu_device *adev) block_size); /* Send no-retry XNACK on fault to suppress VM fault storm. */ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, - RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0); + RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 1); WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_CNTL, i, tmp); WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, i*2, 0); WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, i*2, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c index 9fc3296592fe..b06d876da2d9 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c @@ -225,7 +225,7 @@ static void gmc_v6_0_vram_gtt_location(struct amdgpu_device *adev, u64 base = RREG32(mmMC_VM_FB_LOCATION) & 0xFFFF; base <<= 24; - amdgpu_gmc_vram_location(adev, &adev->gmc, base); + amdgpu_gmc_vram_location(adev, mc, base); amdgpu_gmc_gart_location(adev, mc); } @@ -383,20 +383,6 @@ static uint64_t gmc_v6_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, return pd_addr; } -static int gmc_v6_0_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr, - uint32_t gpu_page_idx, uint64_t addr, - uint64_t flags) -{ - void __iomem *ptr = (void *)cpu_pt_addr; - uint64_t value; - - value = addr & 0xFFFFFFFFFFFFF000ULL; - value |= flags; - writeq(value, ptr + (gpu_page_idx * 8)); - - return 0; -} - static uint64_t gmc_v6_0_get_vm_pte_flags(struct amdgpu_device *adev, uint32_t flags) { @@ -886,7 +872,7 @@ static int gmc_v6_0_sw_init(void *handle) pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(32)); dev_warn(adev->dev, "amdgpu: No coherent DMA available.\n"); } - adev->need_swiotlb = drm_get_max_iomem() > ((u64)1 << dma_bits); + adev->need_swiotlb = drm_need_swiotlb(dma_bits); r = gmc_v6_0_init_microcode(adev); if (r) { @@ -1169,7 +1155,6 @@ static const struct amd_ip_funcs gmc_v6_0_ip_funcs = { static const struct amdgpu_gmc_funcs gmc_v6_0_gmc_funcs = { .flush_gpu_tlb = gmc_v6_0_flush_gpu_tlb, .emit_flush_gpu_tlb = gmc_v6_0_emit_flush_gpu_tlb, - .set_pte_pde = gmc_v6_0_set_pte_pde, .set_prt = gmc_v6_0_set_prt, .get_vm_pde = gmc_v6_0_get_vm_pde, .get_vm_pte_flags = gmc_v6_0_get_vm_pte_flags diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index 761dcfb2fec0..75aa3332aee2 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -242,7 +242,7 @@ static void gmc_v7_0_vram_gtt_location(struct amdgpu_device *adev, u64 base = RREG32(mmMC_VM_FB_LOCATION) & 0xFFFF; base <<= 24; - amdgpu_gmc_vram_location(adev, &adev->gmc, base); + amdgpu_gmc_vram_location(adev, mc, base); amdgpu_gmc_gart_location(adev, mc); } @@ -460,31 +460,6 @@ static void gmc_v7_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid, amdgpu_ring_emit_wreg(ring, mmIH_VMID_0_LUT + vmid, pasid); } -/** - * gmc_v7_0_set_pte_pde - update the page tables using MMIO - * - * @adev: amdgpu_device pointer - * @cpu_pt_addr: cpu address of the page table - * @gpu_page_idx: entry in the page table to update - * @addr: dst addr to write into pte/pde - * @flags: access flags - * - * Update the page tables using the CPU. - */ -static int gmc_v7_0_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr, - uint32_t gpu_page_idx, uint64_t addr, - uint64_t flags) -{ - void __iomem *ptr = (void *)cpu_pt_addr; - uint64_t value; - - value = addr & 0xFFFFFFFFFFFFF000ULL; - value |= flags; - writeq(value, ptr + (gpu_page_idx * 8)); - - return 0; -} - static uint64_t gmc_v7_0_get_vm_pte_flags(struct amdgpu_device *adev, uint32_t flags) { @@ -1030,7 +1005,7 @@ static int gmc_v7_0_sw_init(void *handle) pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(32)); pr_warn("amdgpu: No coherent DMA available\n"); } - adev->need_swiotlb = drm_get_max_iomem() > ((u64)1 << dma_bits); + adev->need_swiotlb = drm_need_swiotlb(dma_bits); r = gmc_v7_0_init_microcode(adev); if (r) { @@ -1376,7 +1351,6 @@ static const struct amdgpu_gmc_funcs gmc_v7_0_gmc_funcs = { .flush_gpu_tlb = gmc_v7_0_flush_gpu_tlb, .emit_flush_gpu_tlb = gmc_v7_0_emit_flush_gpu_tlb, .emit_pasid_mapping = gmc_v7_0_emit_pasid_mapping, - .set_pte_pde = gmc_v7_0_set_pte_pde, .set_prt = gmc_v7_0_set_prt, .get_vm_pte_flags = gmc_v7_0_get_vm_pte_flags, .get_vm_pde = gmc_v7_0_get_vm_pde diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index 34440672f938..8a3b5e6fc6c9 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -433,7 +433,7 @@ static void gmc_v8_0_vram_gtt_location(struct amdgpu_device *adev, base = RREG32(mmMC_VM_FB_LOCATION) & 0xFFFF; base <<= 24; - amdgpu_gmc_vram_location(adev, &adev->gmc, base); + amdgpu_gmc_vram_location(adev, mc, base); amdgpu_gmc_gart_location(adev, mc); } @@ -662,50 +662,26 @@ static void gmc_v8_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid, amdgpu_ring_emit_wreg(ring, mmIH_VMID_0_LUT + vmid, pasid); } -/** - * gmc_v8_0_set_pte_pde - update the page tables using MMIO - * - * @adev: amdgpu_device pointer - * @cpu_pt_addr: cpu address of the page table - * @gpu_page_idx: entry in the page table to update - * @addr: dst addr to write into pte/pde - * @flags: access flags +/* + * PTE format on VI: + * 63:40 reserved + * 39:12 4k physical page base address + * 11:7 fragment + * 6 write + * 5 read + * 4 exe + * 3 reserved + * 2 snooped + * 1 system + * 0 valid * - * Update the page tables using the CPU. + * PDE format on VI: + * 63:59 block fragment size + * 58:40 reserved + * 39:1 physical base address of PTE + * bits 5:1 must be 0. + * 0 valid */ -static int gmc_v8_0_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr, - uint32_t gpu_page_idx, uint64_t addr, - uint64_t flags) -{ - void __iomem *ptr = (void *)cpu_pt_addr; - uint64_t value; - - /* - * PTE format on VI: - * 63:40 reserved - * 39:12 4k physical page base address - * 11:7 fragment - * 6 write - * 5 read - * 4 exe - * 3 reserved - * 2 snooped - * 1 system - * 0 valid - * - * PDE format on VI: - * 63:59 block fragment size - * 58:40 reserved - * 39:1 physical base address of PTE - * bits 5:1 must be 0. - * 0 valid - */ - value = addr & 0x000000FFFFFFF000ULL; - value |= flags; - writeq(value, ptr + (gpu_page_idx * 8)); - - return 0; -} static uint64_t gmc_v8_0_get_vm_pte_flags(struct amdgpu_device *adev, uint32_t flags) @@ -1155,7 +1131,7 @@ static int gmc_v8_0_sw_init(void *handle) pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(32)); pr_warn("amdgpu: No coherent DMA available\n"); } - adev->need_swiotlb = drm_get_max_iomem() > ((u64)1 << dma_bits); + adev->need_swiotlb = drm_need_swiotlb(dma_bits); r = gmc_v8_0_init_microcode(adev); if (r) { @@ -1743,7 +1719,6 @@ static const struct amdgpu_gmc_funcs gmc_v8_0_gmc_funcs = { .flush_gpu_tlb = gmc_v8_0_flush_gpu_tlb, .emit_flush_gpu_tlb = gmc_v8_0_emit_flush_gpu_tlb, .emit_pasid_mapping = gmc_v8_0_emit_pasid_mapping, - .set_pte_pde = gmc_v8_0_set_pte_pde, .set_prt = gmc_v8_0_set_prt, .get_vm_pte_flags = gmc_v8_0_get_vm_pte_flags, .get_vm_pde = gmc_v8_0_get_vm_pde diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 2fe8397241ea..3fd79e07944d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -47,6 +47,8 @@ #include "ivsrcid/vmc/irqsrcs_vmc_1_0.h" +#include "amdgpu_ras.h" + /* add these here since we already include dce12 headers and these are for DCN */ #define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION 0x055d #define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION_BASE_IDX 2 @@ -84,121 +86,182 @@ static const struct soc15_reg_golden golden_settings_athub_1_0_0[] = SOC15_REG_GOLDEN_VALUE(ATHUB, 0, mmRPB_ARB_CNTL2, 0x00ff00ff, 0x00080008) }; -/* Ecc related register addresses, (BASE + reg offset) */ -/* Universal Memory Controller caps (may be fused). */ -/* UMCCH:UmcLocalCap */ -#define UMCLOCALCAPS_ADDR0 (0x00014306 + 0x00000000) -#define UMCLOCALCAPS_ADDR1 (0x00014306 + 0x00000800) -#define UMCLOCALCAPS_ADDR2 (0x00014306 + 0x00001000) -#define UMCLOCALCAPS_ADDR3 (0x00014306 + 0x00001800) -#define UMCLOCALCAPS_ADDR4 (0x00054306 + 0x00000000) -#define UMCLOCALCAPS_ADDR5 (0x00054306 + 0x00000800) -#define UMCLOCALCAPS_ADDR6 (0x00054306 + 0x00001000) -#define UMCLOCALCAPS_ADDR7 (0x00054306 + 0x00001800) -#define UMCLOCALCAPS_ADDR8 (0x00094306 + 0x00000000) -#define UMCLOCALCAPS_ADDR9 (0x00094306 + 0x00000800) -#define UMCLOCALCAPS_ADDR10 (0x00094306 + 0x00001000) -#define UMCLOCALCAPS_ADDR11 (0x00094306 + 0x00001800) -#define UMCLOCALCAPS_ADDR12 (0x000d4306 + 0x00000000) -#define UMCLOCALCAPS_ADDR13 (0x000d4306 + 0x00000800) -#define UMCLOCALCAPS_ADDR14 (0x000d4306 + 0x00001000) -#define UMCLOCALCAPS_ADDR15 (0x000d4306 + 0x00001800) - -/* Universal Memory Controller Channel config. */ -/* UMCCH:UMC_CONFIG */ -#define UMCCH_UMC_CONFIG_ADDR0 (0x00014040 + 0x00000000) -#define UMCCH_UMC_CONFIG_ADDR1 (0x00014040 + 0x00000800) -#define UMCCH_UMC_CONFIG_ADDR2 (0x00014040 + 0x00001000) -#define UMCCH_UMC_CONFIG_ADDR3 (0x00014040 + 0x00001800) -#define UMCCH_UMC_CONFIG_ADDR4 (0x00054040 + 0x00000000) -#define UMCCH_UMC_CONFIG_ADDR5 (0x00054040 + 0x00000800) -#define UMCCH_UMC_CONFIG_ADDR6 (0x00054040 + 0x00001000) -#define UMCCH_UMC_CONFIG_ADDR7 (0x00054040 + 0x00001800) -#define UMCCH_UMC_CONFIG_ADDR8 (0x00094040 + 0x00000000) -#define UMCCH_UMC_CONFIG_ADDR9 (0x00094040 + 0x00000800) -#define UMCCH_UMC_CONFIG_ADDR10 (0x00094040 + 0x00001000) -#define UMCCH_UMC_CONFIG_ADDR11 (0x00094040 + 0x00001800) -#define UMCCH_UMC_CONFIG_ADDR12 (0x000d4040 + 0x00000000) -#define UMCCH_UMC_CONFIG_ADDR13 (0x000d4040 + 0x00000800) -#define UMCCH_UMC_CONFIG_ADDR14 (0x000d4040 + 0x00001000) -#define UMCCH_UMC_CONFIG_ADDR15 (0x000d4040 + 0x00001800) - -/* Universal Memory Controller Channel Ecc config. */ -/* UMCCH:EccCtrl */ -#define UMCCH_ECCCTRL_ADDR0 (0x00014053 + 0x00000000) -#define UMCCH_ECCCTRL_ADDR1 (0x00014053 + 0x00000800) -#define UMCCH_ECCCTRL_ADDR2 (0x00014053 + 0x00001000) -#define UMCCH_ECCCTRL_ADDR3 (0x00014053 + 0x00001800) -#define UMCCH_ECCCTRL_ADDR4 (0x00054053 + 0x00000000) -#define UMCCH_ECCCTRL_ADDR5 (0x00054053 + 0x00000800) -#define UMCCH_ECCCTRL_ADDR6 (0x00054053 + 0x00001000) -#define UMCCH_ECCCTRL_ADDR7 (0x00054053 + 0x00001800) -#define UMCCH_ECCCTRL_ADDR8 (0x00094053 + 0x00000000) -#define UMCCH_ECCCTRL_ADDR9 (0x00094053 + 0x00000800) -#define UMCCH_ECCCTRL_ADDR10 (0x00094053 + 0x00001000) -#define UMCCH_ECCCTRL_ADDR11 (0x00094053 + 0x00001800) -#define UMCCH_ECCCTRL_ADDR12 (0x000d4053 + 0x00000000) -#define UMCCH_ECCCTRL_ADDR13 (0x000d4053 + 0x00000800) -#define UMCCH_ECCCTRL_ADDR14 (0x000d4053 + 0x00001000) -#define UMCCH_ECCCTRL_ADDR15 (0x000d4053 + 0x00001800) - -static const uint32_t ecc_umclocalcap_addrs[] = { - UMCLOCALCAPS_ADDR0, - UMCLOCALCAPS_ADDR1, - UMCLOCALCAPS_ADDR2, - UMCLOCALCAPS_ADDR3, - UMCLOCALCAPS_ADDR4, - UMCLOCALCAPS_ADDR5, - UMCLOCALCAPS_ADDR6, - UMCLOCALCAPS_ADDR7, - UMCLOCALCAPS_ADDR8, - UMCLOCALCAPS_ADDR9, - UMCLOCALCAPS_ADDR10, - UMCLOCALCAPS_ADDR11, - UMCLOCALCAPS_ADDR12, - UMCLOCALCAPS_ADDR13, - UMCLOCALCAPS_ADDR14, - UMCLOCALCAPS_ADDR15, +static const uint32_t ecc_umc_mcumc_ctrl_addrs[] = { + (0x000143c0 + 0x00000000), + (0x000143c0 + 0x00000800), + (0x000143c0 + 0x00001000), + (0x000143c0 + 0x00001800), + (0x000543c0 + 0x00000000), + (0x000543c0 + 0x00000800), + (0x000543c0 + 0x00001000), + (0x000543c0 + 0x00001800), + (0x000943c0 + 0x00000000), + (0x000943c0 + 0x00000800), + (0x000943c0 + 0x00001000), + (0x000943c0 + 0x00001800), + (0x000d43c0 + 0x00000000), + (0x000d43c0 + 0x00000800), + (0x000d43c0 + 0x00001000), + (0x000d43c0 + 0x00001800), + (0x001143c0 + 0x00000000), + (0x001143c0 + 0x00000800), + (0x001143c0 + 0x00001000), + (0x001143c0 + 0x00001800), + (0x001543c0 + 0x00000000), + (0x001543c0 + 0x00000800), + (0x001543c0 + 0x00001000), + (0x001543c0 + 0x00001800), + (0x001943c0 + 0x00000000), + (0x001943c0 + 0x00000800), + (0x001943c0 + 0x00001000), + (0x001943c0 + 0x00001800), + (0x001d43c0 + 0x00000000), + (0x001d43c0 + 0x00000800), + (0x001d43c0 + 0x00001000), + (0x001d43c0 + 0x00001800), }; -static const uint32_t ecc_umcch_umc_config_addrs[] = { - UMCCH_UMC_CONFIG_ADDR0, - UMCCH_UMC_CONFIG_ADDR1, - UMCCH_UMC_CONFIG_ADDR2, - UMCCH_UMC_CONFIG_ADDR3, - UMCCH_UMC_CONFIG_ADDR4, - UMCCH_UMC_CONFIG_ADDR5, - UMCCH_UMC_CONFIG_ADDR6, - UMCCH_UMC_CONFIG_ADDR7, - UMCCH_UMC_CONFIG_ADDR8, - UMCCH_UMC_CONFIG_ADDR9, - UMCCH_UMC_CONFIG_ADDR10, - UMCCH_UMC_CONFIG_ADDR11, - UMCCH_UMC_CONFIG_ADDR12, - UMCCH_UMC_CONFIG_ADDR13, - UMCCH_UMC_CONFIG_ADDR14, - UMCCH_UMC_CONFIG_ADDR15, +static const uint32_t ecc_umc_mcumc_ctrl_mask_addrs[] = { + (0x000143e0 + 0x00000000), + (0x000143e0 + 0x00000800), + (0x000143e0 + 0x00001000), + (0x000143e0 + 0x00001800), + (0x000543e0 + 0x00000000), + (0x000543e0 + 0x00000800), + (0x000543e0 + 0x00001000), + (0x000543e0 + 0x00001800), + (0x000943e0 + 0x00000000), + (0x000943e0 + 0x00000800), + (0x000943e0 + 0x00001000), + (0x000943e0 + 0x00001800), + (0x000d43e0 + 0x00000000), + (0x000d43e0 + 0x00000800), + (0x000d43e0 + 0x00001000), + (0x000d43e0 + 0x00001800), + (0x001143e0 + 0x00000000), + (0x001143e0 + 0x00000800), + (0x001143e0 + 0x00001000), + (0x001143e0 + 0x00001800), + (0x001543e0 + 0x00000000), + (0x001543e0 + 0x00000800), + (0x001543e0 + 0x00001000), + (0x001543e0 + 0x00001800), + (0x001943e0 + 0x00000000), + (0x001943e0 + 0x00000800), + (0x001943e0 + 0x00001000), + (0x001943e0 + 0x00001800), + (0x001d43e0 + 0x00000000), + (0x001d43e0 + 0x00000800), + (0x001d43e0 + 0x00001000), + (0x001d43e0 + 0x00001800), }; -static const uint32_t ecc_umcch_eccctrl_addrs[] = { - UMCCH_ECCCTRL_ADDR0, - UMCCH_ECCCTRL_ADDR1, - UMCCH_ECCCTRL_ADDR2, - UMCCH_ECCCTRL_ADDR3, - UMCCH_ECCCTRL_ADDR4, - UMCCH_ECCCTRL_ADDR5, - UMCCH_ECCCTRL_ADDR6, - UMCCH_ECCCTRL_ADDR7, - UMCCH_ECCCTRL_ADDR8, - UMCCH_ECCCTRL_ADDR9, - UMCCH_ECCCTRL_ADDR10, - UMCCH_ECCCTRL_ADDR11, - UMCCH_ECCCTRL_ADDR12, - UMCCH_ECCCTRL_ADDR13, - UMCCH_ECCCTRL_ADDR14, - UMCCH_ECCCTRL_ADDR15, +static const uint32_t ecc_umc_mcumc_status_addrs[] = { + (0x000143c2 + 0x00000000), + (0x000143c2 + 0x00000800), + (0x000143c2 + 0x00001000), + (0x000143c2 + 0x00001800), + (0x000543c2 + 0x00000000), + (0x000543c2 + 0x00000800), + (0x000543c2 + 0x00001000), + (0x000543c2 + 0x00001800), + (0x000943c2 + 0x00000000), + (0x000943c2 + 0x00000800), + (0x000943c2 + 0x00001000), + (0x000943c2 + 0x00001800), + (0x000d43c2 + 0x00000000), + (0x000d43c2 + 0x00000800), + (0x000d43c2 + 0x00001000), + (0x000d43c2 + 0x00001800), + (0x001143c2 + 0x00000000), + (0x001143c2 + 0x00000800), + (0x001143c2 + 0x00001000), + (0x001143c2 + 0x00001800), + (0x001543c2 + 0x00000000), + (0x001543c2 + 0x00000800), + (0x001543c2 + 0x00001000), + (0x001543c2 + 0x00001800), + (0x001943c2 + 0x00000000), + (0x001943c2 + 0x00000800), + (0x001943c2 + 0x00001000), + (0x001943c2 + 0x00001800), + (0x001d43c2 + 0x00000000), + (0x001d43c2 + 0x00000800), + (0x001d43c2 + 0x00001000), + (0x001d43c2 + 0x00001800), }; +static int gmc_v9_0_ecc_interrupt_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *src, + unsigned type, + enum amdgpu_interrupt_state state) +{ + u32 bits, i, tmp, reg; + + bits = 0x7f; + + switch (state) { + case AMDGPU_IRQ_STATE_DISABLE: + for (i = 0; i < ARRAY_SIZE(ecc_umc_mcumc_ctrl_addrs); i++) { + reg = ecc_umc_mcumc_ctrl_addrs[i]; + tmp = RREG32(reg); + tmp &= ~bits; + WREG32(reg, tmp); + } + for (i = 0; i < ARRAY_SIZE(ecc_umc_mcumc_ctrl_mask_addrs); i++) { + reg = ecc_umc_mcumc_ctrl_mask_addrs[i]; + tmp = RREG32(reg); + tmp &= ~bits; + WREG32(reg, tmp); + } + break; + case AMDGPU_IRQ_STATE_ENABLE: + for (i = 0; i < ARRAY_SIZE(ecc_umc_mcumc_ctrl_addrs); i++) { + reg = ecc_umc_mcumc_ctrl_addrs[i]; + tmp = RREG32(reg); + tmp |= bits; + WREG32(reg, tmp); + } + for (i = 0; i < ARRAY_SIZE(ecc_umc_mcumc_ctrl_mask_addrs); i++) { + reg = ecc_umc_mcumc_ctrl_mask_addrs[i]; + tmp = RREG32(reg); + tmp |= bits; + WREG32(reg, tmp); + } + break; + default: + break; + } + + return 0; +} + +static int gmc_v9_0_process_ras_data_cb(struct amdgpu_device *adev, + struct amdgpu_iv_entry *entry) +{ + kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); + amdgpu_ras_reset_gpu(adev, 0); + return AMDGPU_RAS_UE; +} + +static int gmc_v9_0_process_ecc_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + struct ras_common_if *ras_if = adev->gmc.ras_if; + struct ras_dispatch_if ih_data = { + .entry = entry, + }; + + if (!ras_if) + return 0; + + ih_data.head = *ras_if; + + amdgpu_ras_interrupt_dispatch(adev, &ih_data); + return 0; +} + static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev, struct amdgpu_irq_src *src, unsigned type, @@ -244,62 +307,6 @@ static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev, return 0; } -/** - * vega10_ih_prescreen_iv - prescreen an interrupt vector - * - * @adev: amdgpu_device pointer - * - * Returns true if the interrupt vector should be further processed. - */ -static bool gmc_v9_0_prescreen_iv(struct amdgpu_device *adev, - struct amdgpu_iv_entry *entry, - uint64_t addr) -{ - struct amdgpu_vm *vm; - u64 key; - int r; - - /* No PASID, can't identify faulting process */ - if (!entry->pasid) - return true; - - /* Not a retry fault */ - if (!(entry->src_data[1] & 0x80)) - return true; - - /* Track retry faults in per-VM fault FIFO. */ - spin_lock(&adev->vm_manager.pasid_lock); - vm = idr_find(&adev->vm_manager.pasid_idr, entry->pasid); - if (!vm) { - /* VM not found, process it normally */ - spin_unlock(&adev->vm_manager.pasid_lock); - return true; - } - - key = AMDGPU_VM_FAULT(entry->pasid, addr); - r = amdgpu_vm_add_fault(vm->fault_hash, key); - - /* Hash table is full or the fault is already being processed, - * ignore further page faults - */ - if (r != 0) { - spin_unlock(&adev->vm_manager.pasid_lock); - return false; - } - /* No locking required with single writer and single reader */ - r = kfifo_put(&vm->faults, key); - if (!r) { - /* FIFO is full. Ignore it until there is space */ - amdgpu_vm_clear_fault(vm->fault_hash, key); - spin_unlock(&adev->vm_manager.pasid_lock); - return false; - } - - spin_unlock(&adev->vm_manager.pasid_lock); - /* It's the first fault for this address, process it normally */ - return true; -} - static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) @@ -312,9 +319,11 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, addr = (u64)entry->src_data[0] << 12; addr |= ((u64)entry->src_data[1] & 0xf) << 44; - if (!gmc_v9_0_prescreen_iv(adev, entry, addr)) + if (retry_fault && amdgpu_gmc_filter_faults(adev, addr, entry->pasid, + entry->timestamp)) return 1; /* This also prevents sending it to KFD */ + /* If it's the first fault for this address, process it normally */ if (!amdgpu_sriov_vf(adev)) { status = RREG32(hub->vm_l2_pro_fault_status); WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1); @@ -350,10 +359,19 @@ static const struct amdgpu_irq_src_funcs gmc_v9_0_irq_funcs = { .process = gmc_v9_0_process_interrupt, }; + +static const struct amdgpu_irq_src_funcs gmc_v9_0_ecc_funcs = { + .set = gmc_v9_0_ecc_interrupt_state, + .process = gmc_v9_0_process_ecc_irq, +}; + static void gmc_v9_0_set_irq_funcs(struct amdgpu_device *adev) { adev->gmc.vm_fault.num_types = 1; adev->gmc.vm_fault.funcs = &gmc_v9_0_irq_funcs; + + adev->gmc.ecc_irq.num_types = 1; + adev->gmc.ecc_irq.funcs = &gmc_v9_0_ecc_funcs; } static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid, @@ -466,64 +484,37 @@ static void gmc_v9_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid, amdgpu_ring_emit_wreg(ring, reg, pasid); } -/** - * gmc_v9_0_set_pte_pde - update the page tables using MMIO - * - * @adev: amdgpu_device pointer - * @cpu_pt_addr: cpu address of the page table - * @gpu_page_idx: entry in the page table to update - * @addr: dst addr to write into pte/pde - * @flags: access flags +/* + * PTE format on VEGA 10: + * 63:59 reserved + * 58:57 mtype + * 56 F + * 55 L + * 54 P + * 53 SW + * 52 T + * 50:48 reserved + * 47:12 4k physical page base address + * 11:7 fragment + * 6 write + * 5 read + * 4 exe + * 3 Z + * 2 snooped + * 1 system + * 0 valid * - * Update the page tables using the CPU. + * PDE format on VEGA 10: + * 63:59 block fragment size + * 58:55 reserved + * 54 P + * 53:48 reserved + * 47:6 physical base address of PD or PTE + * 5:3 reserved + * 2 C + * 1 system + * 0 valid */ -static int gmc_v9_0_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr, - uint32_t gpu_page_idx, uint64_t addr, - uint64_t flags) -{ - void __iomem *ptr = (void *)cpu_pt_addr; - uint64_t value; - - /* - * PTE format on VEGA 10: - * 63:59 reserved - * 58:57 mtype - * 56 F - * 55 L - * 54 P - * 53 SW - * 52 T - * 50:48 reserved - * 47:12 4k physical page base address - * 11:7 fragment - * 6 write - * 5 read - * 4 exe - * 3 Z - * 2 snooped - * 1 system - * 0 valid - * - * PDE format on VEGA 10: - * 63:59 block fragment size - * 58:55 reserved - * 54 P - * 53:48 reserved - * 47:6 physical base address of PD or PTE - * 5:3 reserved - * 2 C - * 1 system - * 0 valid - */ - - /* - * The following is for PTE only. GART does not have PDEs. - */ - value = addr & 0x0000FFFFFFFFF000ULL; - value |= flags; - writeq(value, ptr + (gpu_page_idx * 8)); - return 0; -} static uint64_t gmc_v9_0_get_vm_pte_flags(struct amdgpu_device *adev, uint32_t flags) @@ -593,7 +584,6 @@ static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = { .flush_gpu_tlb = gmc_v9_0_flush_gpu_tlb, .emit_flush_gpu_tlb = gmc_v9_0_emit_flush_gpu_tlb, .emit_pasid_mapping = gmc_v9_0_emit_pasid_mapping, - .set_pte_pde = gmc_v9_0_set_pte_pde, .get_vm_pte_flags = gmc_v9_0_get_vm_pte_flags, .get_vm_pde = gmc_v9_0_get_vm_pde }; @@ -620,85 +610,6 @@ static int gmc_v9_0_early_init(void *handle) return 0; } -static int gmc_v9_0_ecc_available(struct amdgpu_device *adev) -{ - uint32_t reg_val; - uint32_t reg_addr; - uint32_t field_val; - size_t i; - uint32_t fv2; - size_t lost_sheep; - - DRM_DEBUG("ecc: gmc_v9_0_ecc_available()\n"); - - lost_sheep = 0; - for (i = 0; i < ARRAY_SIZE(ecc_umclocalcap_addrs); ++i) { - reg_addr = ecc_umclocalcap_addrs[i]; - DRM_DEBUG("ecc: " - "UMCCH_UmcLocalCap[%zu]: reg_addr: 0x%08x\n", - i, reg_addr); - reg_val = RREG32(reg_addr); - field_val = REG_GET_FIELD(reg_val, UMCCH0_0_UmcLocalCap, - EccDis); - DRM_DEBUG("ecc: " - "reg_val: 0x%08x, " - "EccDis: 0x%08x, ", - reg_val, field_val); - if (field_val) { - DRM_ERROR("ecc: UmcLocalCap:EccDis is set.\n"); - ++lost_sheep; - } - } - - for (i = 0; i < ARRAY_SIZE(ecc_umcch_umc_config_addrs); ++i) { - reg_addr = ecc_umcch_umc_config_addrs[i]; - DRM_DEBUG("ecc: " - "UMCCH0_0_UMC_CONFIG[%zu]: reg_addr: 0x%08x", - i, reg_addr); - reg_val = RREG32(reg_addr); - field_val = REG_GET_FIELD(reg_val, UMCCH0_0_UMC_CONFIG, - DramReady); - DRM_DEBUG("ecc: " - "reg_val: 0x%08x, " - "DramReady: 0x%08x\n", - reg_val, field_val); - - if (!field_val) { - DRM_ERROR("ecc: UMC_CONFIG:DramReady is not set.\n"); - ++lost_sheep; - } - } - - for (i = 0; i < ARRAY_SIZE(ecc_umcch_eccctrl_addrs); ++i) { - reg_addr = ecc_umcch_eccctrl_addrs[i]; - DRM_DEBUG("ecc: " - "UMCCH_EccCtrl[%zu]: reg_addr: 0x%08x, ", - i, reg_addr); - reg_val = RREG32(reg_addr); - field_val = REG_GET_FIELD(reg_val, UMCCH0_0_EccCtrl, - WrEccEn); - fv2 = REG_GET_FIELD(reg_val, UMCCH0_0_EccCtrl, - RdEccEn); - DRM_DEBUG("ecc: " - "reg_val: 0x%08x, " - "WrEccEn: 0x%08x, " - "RdEccEn: 0x%08x\n", - reg_val, field_val, fv2); - - if (!field_val) { - DRM_DEBUG("ecc: WrEccEn is not set\n"); - ++lost_sheep; - } - if (!fv2) { - DRM_DEBUG("ecc: RdEccEn is not set\n"); - ++lost_sheep; - } - } - - DRM_DEBUG("ecc: lost_sheep: %zu\n", lost_sheep); - return lost_sheep == 0; -} - static bool gmc_v9_0_keep_stolen_memory(struct amdgpu_device *adev) { @@ -751,31 +662,119 @@ static int gmc_v9_0_allocate_vm_inv_eng(struct amdgpu_device *adev) return 0; } -static int gmc_v9_0_late_init(void *handle) +static int gmc_v9_0_ecc_late_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct ras_common_if **ras_if = &adev->gmc.ras_if; + struct ras_ih_if ih_info = { + .cb = gmc_v9_0_process_ras_data_cb, + }; + struct ras_fs_if fs_info = { + .sysfs_name = "umc_err_count", + .debugfs_name = "umc_err_inject", + }; + struct ras_common_if ras_block = { + .block = AMDGPU_RAS_BLOCK__UMC, + .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE, + .sub_block_index = 0, + .name = "umc", + }; int r; + if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC)) { + amdgpu_ras_feature_enable_on_boot(adev, &ras_block, 0); + return 0; + } + /* handle resume path. */ + if (*ras_if) + goto resume; + + *ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL); + if (!*ras_if) + return -ENOMEM; + + **ras_if = ras_block; + + r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1); + if (r) + goto feature; + + ih_info.head = **ras_if; + fs_info.head = **ras_if; + + r = amdgpu_ras_interrupt_add_handler(adev, &ih_info); + if (r) + goto interrupt; + + r = amdgpu_ras_debugfs_create(adev, &fs_info); + if (r) + goto debugfs; + + r = amdgpu_ras_sysfs_create(adev, &fs_info); + if (r) + goto sysfs; +resume: + r = amdgpu_irq_get(adev, &adev->gmc.ecc_irq, 0); + if (r) + goto irq; + + return 0; +irq: + amdgpu_ras_sysfs_remove(adev, *ras_if); +sysfs: + amdgpu_ras_debugfs_remove(adev, *ras_if); +debugfs: + amdgpu_ras_interrupt_remove_handler(adev, &ih_info); +interrupt: + amdgpu_ras_feature_enable(adev, *ras_if, 0); +feature: + kfree(*ras_if); + *ras_if = NULL; + return -EINVAL; +} + + +static int gmc_v9_0_late_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + bool r; + if (!gmc_v9_0_keep_stolen_memory(adev)) amdgpu_bo_late_init(adev); r = gmc_v9_0_allocate_vm_inv_eng(adev); if (r) return r; + /* Check if ecc is available */ + if (!amdgpu_sriov_vf(adev)) { + switch (adev->asic_type) { + case CHIP_VEGA10: + case CHIP_VEGA20: + r = amdgpu_atomfirmware_mem_ecc_supported(adev); + if (!r) { + DRM_INFO("ECC is not present.\n"); + if (adev->df_funcs->enable_ecc_force_par_wr_rmw) + adev->df_funcs->enable_ecc_force_par_wr_rmw(adev, false); + } else { + DRM_INFO("ECC is active.\n"); + } - if (adev->asic_type == CHIP_VEGA10 && !amdgpu_sriov_vf(adev)) { - r = gmc_v9_0_ecc_available(adev); - if (r == 1) { - DRM_INFO("ECC is active.\n"); - } else if (r == 0) { - DRM_INFO("ECC is not present.\n"); - adev->df_funcs->enable_ecc_force_par_wr_rmw(adev, false); - } else { - DRM_ERROR("gmc_v9_0_ecc_available() failed. r: %d\n", r); - return r; + r = amdgpu_atomfirmware_sram_ecc_supported(adev); + if (!r) { + DRM_INFO("SRAM ECC is not present.\n"); + } else { + DRM_INFO("SRAM ECC is active.\n"); + } + break; + default: + break; } } + r = gmc_v9_0_ecc_late_init(handle); + if (r) + return r; + return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0); } @@ -787,7 +786,7 @@ static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev, base = mmhub_v1_0_get_fb_location(adev); /* add the xgmi offset of the physical node */ base += adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size; - amdgpu_gmc_vram_location(adev, &adev->gmc, base); + amdgpu_gmc_vram_location(adev, mc, base); amdgpu_gmc_gart_location(adev, mc); if (!amdgpu_sriov_vf(adev)) amdgpu_gmc_agp_location(adev, mc); @@ -987,6 +986,12 @@ static int gmc_v9_0_sw_init(void *handle) if (r) return r; + /* interrupt sent to DF. */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_DF, 0, + &adev->gmc.ecc_irq); + if (r) + return r; + /* Set the internal MC address mask * This is the max address of the GPU's * internal address space. @@ -1011,7 +1016,7 @@ static int gmc_v9_0_sw_init(void *handle) pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(32)); printk(KERN_WARNING "amdgpu: No coherent DMA available.\n"); } - adev->need_swiotlb = drm_get_max_iomem() > ((u64)1 << dma_bits); + adev->need_swiotlb = drm_need_swiotlb(dma_bits); if (adev->gmc.xgmi.supported) { r = gfxhub_v1_1_get_xgmi_info(adev); @@ -1052,6 +1057,22 @@ static int gmc_v9_0_sw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC) && + adev->gmc.ras_if) { + struct ras_common_if *ras_if = adev->gmc.ras_if; + struct ras_ih_if ih_info = { + .head = *ras_if, + }; + + /*remove fs first*/ + amdgpu_ras_debugfs_remove(adev, ras_if); + amdgpu_ras_sysfs_remove(adev, ras_if); + /*remove the IH*/ + amdgpu_ras_interrupt_remove_handler(adev, &ih_info); + amdgpu_ras_feature_enable(adev, ras_if, 0); + kfree(ras_if); + } + amdgpu_gem_force_release(adev); amdgpu_vm_manager_fini(adev); @@ -1198,6 +1219,7 @@ static int gmc_v9_0_hw_fini(void *handle) return 0; } + amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0); amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0); gmc_v9_0_gart_disable(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c index 0c9a2c03504e..f2e6b148ccad 100644 --- a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c @@ -2824,7 +2824,7 @@ static int kv_dpm_init(struct amdgpu_device *adev) pi->caps_tcp_ramping = true; } - if (adev->powerplay.pp_feature & PP_SCLK_DEEP_SLEEP_MASK) + if (adev->pm.pp_feature & PP_SCLK_DEEP_SLEEP_MASK) pi->caps_sclk_ds = true; else pi->caps_sclk_ds = false; diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c index 1696644ec022..41a9a5779623 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c @@ -163,7 +163,7 @@ static void mmhub_v1_0_init_cache_regs(struct amdgpu_device *adev) /* XXX for emulation, Refer to closed source code.*/ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, L2_PDE0_CACHE_TAG_GENERATION_MODE, 0); - tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 1); + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 0); tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1); tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0); WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL, tmp); @@ -256,7 +256,7 @@ static void mmhub_v1_0_setup_vmid_config(struct amdgpu_device *adev) block_size); /* Send no-retry XNACK on fault to suppress VM fault storm. */ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, - RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0); + RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 1); WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_CNTL, i, tmp); WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, i*2, 0); WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, i*2, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c index 73851ebb3833..8dbad496b29f 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c @@ -157,6 +157,82 @@ static void xgpu_ai_mailbox_trans_msg (struct amdgpu_device *adev, xgpu_ai_mailbox_set_valid(adev, false); } +static int xgpu_ai_get_pp_clk(struct amdgpu_device *adev, u32 type, char *buf) +{ + int r = 0; + u32 req, val, size; + + if (!amdgim_is_hwperf(adev) || buf == NULL) + return -EBADRQC; + + switch(type) { + case PP_SCLK: + req = IDH_IRQ_GET_PP_SCLK; + break; + case PP_MCLK: + req = IDH_IRQ_GET_PP_MCLK; + break; + default: + return -EBADRQC; + } + + mutex_lock(&adev->virt.dpm_mutex); + + xgpu_ai_mailbox_trans_msg(adev, req, 0, 0, 0); + + r = xgpu_ai_poll_msg(adev, IDH_SUCCESS); + if (!r && adev->fw_vram_usage.va != NULL) { + val = RREG32_NO_KIQ( + SOC15_REG_OFFSET(NBIO, 0, + mmBIF_BX_PF0_MAILBOX_MSGBUF_RCV_DW1)); + size = strnlen((((char *)adev->virt.fw_reserve.p_pf2vf) + + val), PAGE_SIZE); + + if (size < PAGE_SIZE) + strcpy(buf,((char *)adev->virt.fw_reserve.p_pf2vf + val)); + else + size = 0; + + r = size; + goto out; + } + + r = xgpu_ai_poll_msg(adev, IDH_FAIL); + if(r) + pr_info("%s DPM request failed", + (type == PP_SCLK)? "SCLK" : "MCLK"); + +out: + mutex_unlock(&adev->virt.dpm_mutex); + return r; +} + +static int xgpu_ai_force_dpm_level(struct amdgpu_device *adev, u32 level) +{ + int r = 0; + u32 req = IDH_IRQ_FORCE_DPM_LEVEL; + + if (!amdgim_is_hwperf(adev)) + return -EBADRQC; + + mutex_lock(&adev->virt.dpm_mutex); + xgpu_ai_mailbox_trans_msg(adev, req, level, 0, 0); + + r = xgpu_ai_poll_msg(adev, IDH_SUCCESS); + if (!r) + goto out; + + r = xgpu_ai_poll_msg(adev, IDH_FAIL); + if (!r) + pr_info("DPM request failed"); + else + pr_info("Mailbox is broken"); + +out: + mutex_unlock(&adev->virt.dpm_mutex); + return r; +} + static int xgpu_ai_send_access_requests(struct amdgpu_device *adev, enum idh_request req) { @@ -375,4 +451,6 @@ const struct amdgpu_virt_ops xgpu_ai_virt_ops = { .reset_gpu = xgpu_ai_request_reset, .wait_reset = NULL, .trans_msg = xgpu_ai_mailbox_trans_msg, + .get_pp_clk = xgpu_ai_get_pp_clk, + .force_dpm_level = xgpu_ai_force_dpm_level, }; diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h index b4a9ceea334b..39d151b79153 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h @@ -35,6 +35,10 @@ enum idh_request { IDH_REL_GPU_FINI_ACCESS, IDH_REQ_GPU_RESET_ACCESS, + IDH_IRQ_FORCE_DPM_LEVEL = 10, + IDH_IRQ_GET_PP_SCLK, + IDH_IRQ_GET_PP_MCLK, + IDH_LOG_VF_ERROR = 200, }; @@ -43,6 +47,8 @@ enum idh_event { IDH_READY_TO_ACCESS_GPU, IDH_FLR_NOTIFICATION, IDH_FLR_NOTIFICATION_CMPL, + IDH_SUCCESS, + IDH_FAIL, IDH_EVENT_MAX }; diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c index 6a0fcd67662a..aef9d059ae52 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c @@ -515,7 +515,7 @@ static void xgpu_vi_mailbox_flr_work(struct work_struct *work) /* wait until RCV_MSG become 3 */ if (xgpu_vi_poll_msg(adev, IDH_FLR_NOTIFICATION_CMPL)) { - pr_err("failed to recieve FLR_CMPL\n"); + pr_err("failed to receive FLR_CMPL\n"); return; } diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c index cc967dbfd631..6590143c3f75 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c @@ -118,7 +118,8 @@ static void nbio_v6_1_ih_doorbell_range(struct amdgpu_device *adev, if (use_doorbell) { ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, BIF_IH_DOORBELL_RANGE, OFFSET, doorbell_index); - ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, BIF_IH_DOORBELL_RANGE, SIZE, 2); + ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, + BIF_IH_DOORBELL_RANGE, SIZE, 6); } else ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, BIF_IH_DOORBELL_RANGE, SIZE, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h index f3a7d207af07..2f79765b4bdb 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h +++ b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h @@ -43,6 +43,7 @@ enum psp_gfx_crtl_cmd_id GFX_CTRL_CMD_ID_ENABLE_INT = 0x00050000, /* enable PSP-to-Gfx interrupt */ GFX_CTRL_CMD_ID_DISABLE_INT = 0x00060000, /* disable PSP-to-Gfx interrupt */ GFX_CTRL_CMD_ID_MODE1_RST = 0x00070000, /* trigger the Mode 1 reset */ + GFX_CTRL_CMD_ID_GBR_IH_SET = 0x00080000, /* set Gbr IH_RB_CNTL registers */ GFX_CTRL_CMD_ID_CONSUME_CMD = 0x000A0000, /* send interrupt to psp for updating write pointer of vf */ GFX_CTRL_CMD_ID_DESTROY_GPCOM_RING = 0x000C0000, /* destroy GPCOM ring */ diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c index 860b70d80d3c..b91df7bd1d98 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c @@ -33,6 +33,9 @@ #include "sdma0/sdma0_4_0_offset.h" #include "nbio/nbio_7_4_offset.h" +#include "oss/osssys_4_0_offset.h" +#include "oss/osssys_4_0_sh_mask.h" + MODULE_FIRMWARE("amdgpu/vega20_sos.bin"); MODULE_FIRMWARE("amdgpu/vega20_asd.bin"); MODULE_FIRMWARE("amdgpu/vega20_ta.bin"); @@ -113,6 +116,13 @@ static int psp_v11_0_init_microcode(struct psp_context *psp) adev->psp.ta_xgmi_ucode_size = le32_to_cpu(ta_hdr->ta_xgmi_size_bytes); adev->psp.ta_xgmi_start_addr = (uint8_t *)ta_hdr + le32_to_cpu(ta_hdr->header.ucode_array_offset_bytes); + + adev->psp.ta_fw_version = le32_to_cpu(ta_hdr->header.ucode_version); + + adev->psp.ta_ras_ucode_version = le32_to_cpu(ta_hdr->ta_ras_ucode_version); + adev->psp.ta_ras_ucode_size = le32_to_cpu(ta_hdr->ta_ras_size_bytes); + adev->psp.ta_ras_start_addr = (uint8_t *)adev->psp.ta_xgmi_start_addr + + le32_to_cpu(ta_hdr->ta_ras_offset_bytes); } return 0; @@ -217,6 +227,37 @@ static int psp_v11_0_bootloader_load_sos(struct psp_context *psp) return ret; } +static void psp_v11_0_reroute_ih(struct psp_context *psp) +{ + struct amdgpu_device *adev = psp->adev; + uint32_t tmp; + + /* Change IH ring for VMC */ + tmp = REG_SET_FIELD(0, IH_CLIENT_CFG_DATA, CREDIT_RETURN_ADDR, 0x1244b); + tmp = REG_SET_FIELD(tmp, IH_CLIENT_CFG_DATA, CLIENT_TYPE, 1); + tmp = REG_SET_FIELD(tmp, IH_CLIENT_CFG_DATA, RING_ID, 1); + + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, 3); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, tmp); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, GFX_CTRL_CMD_ID_GBR_IH_SET); + + mdelay(20); + psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), + 0x80000000, 0x8000FFFF, false); + + /* Change IH ring for UMC */ + tmp = REG_SET_FIELD(0, IH_CLIENT_CFG_DATA, CREDIT_RETURN_ADDR, 0x1216b); + tmp = REG_SET_FIELD(tmp, IH_CLIENT_CFG_DATA, RING_ID, 1); + + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, 4); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, tmp); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, GFX_CTRL_CMD_ID_GBR_IH_SET); + + mdelay(20); + psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), + 0x80000000, 0x8000FFFF, false); +} + static int psp_v11_0_ring_init(struct psp_context *psp, enum psp_ring_type ring_type) { @@ -224,6 +265,8 @@ static int psp_v11_0_ring_init(struct psp_context *psp, struct psp_ring *ring; struct amdgpu_device *adev = psp->adev; + psp_v11_0_reroute_ih(psp); + ring = &psp->km_ring; ring->ring_type = ring_type; @@ -631,7 +674,7 @@ static int psp_v11_0_xgmi_set_topology_info(struct psp_context *psp, for (i = 0; i < topology_info_input->num_nodes; i++) { topology_info_input->nodes[i].node_id = topology->nodes[i].node_id; topology_info_input->nodes[i].num_hops = topology->nodes[i].num_hops; - topology_info_input->nodes[i].is_sharing_enabled = topology->nodes[i].is_sharing_enabled; + topology_info_input->nodes[i].is_sharing_enabled = 1; topology_info_input->nodes[i].sdma_engine = topology->nodes[i].sdma_engine; } @@ -679,6 +722,54 @@ static int psp_v11_0_xgmi_get_node_id(struct psp_context *psp, uint64_t *node_id return 0; } +static int psp_v11_0_ras_trigger_error(struct psp_context *psp, + struct ta_ras_trigger_error_input *info) +{ + struct ta_ras_shared_memory *ras_cmd; + int ret; + + if (!psp->ras.ras_initialized) + return -EINVAL; + + ras_cmd = (struct ta_ras_shared_memory *)psp->ras.ras_shared_buf; + memset(ras_cmd, 0, sizeof(struct ta_ras_shared_memory)); + + ras_cmd->cmd_id = TA_RAS_COMMAND__TRIGGER_ERROR; + ras_cmd->ras_in_message.trigger_error = *info; + + ret = psp_ras_invoke(psp, ras_cmd->cmd_id); + if (ret) + return -EINVAL; + + return ras_cmd->ras_status; +} + +static int psp_v11_0_ras_cure_posion(struct psp_context *psp, uint64_t *mode_ptr) +{ +#if 0 + // not support yet. + struct ta_ras_shared_memory *ras_cmd; + int ret; + + if (!psp->ras.ras_initialized) + return -EINVAL; + + ras_cmd = (struct ta_ras_shared_memory *)psp->ras.ras_shared_buf; + memset(ras_cmd, 0, sizeof(struct ta_ras_shared_memory)); + + ras_cmd->cmd_id = TA_RAS_COMMAND__CURE_POISON; + ras_cmd->ras_in_message.cure_poison.mode_ptr = mode_ptr; + + ret = psp_ras_invoke(psp, ras_cmd->cmd_id); + if (ret) + return -EINVAL; + + return ras_cmd->ras_status; +#else + return -EINVAL; +#endif +} + static const struct psp_funcs psp_v11_0_funcs = { .init_microcode = psp_v11_0_init_microcode, .bootloader_load_sysdrv = psp_v11_0_bootloader_load_sysdrv, @@ -695,6 +786,8 @@ static const struct psp_funcs psp_v11_0_funcs = { .xgmi_get_hive_id = psp_v11_0_xgmi_get_hive_id, .xgmi_get_node_id = psp_v11_0_xgmi_get_node_id, .support_vmr_ring = psp_v11_0_support_vmr_ring, + .ras_trigger_error = psp_v11_0_ras_trigger_error, + .ras_cure_posion = psp_v11_0_ras_cure_posion, }; void psp_v11_0_set_psp_funcs(struct psp_context *psp) diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c index 0487e3a4e9e7..143f0fae69d5 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c @@ -37,6 +37,9 @@ #include "sdma0/sdma0_4_0_offset.h" #include "nbio/nbio_6_1_offset.h" +#include "oss/osssys_4_0_offset.h" +#include "oss/osssys_4_0_sh_mask.h" + MODULE_FIRMWARE("amdgpu/vega10_sos.bin"); MODULE_FIRMWARE("amdgpu/vega10_asd.bin"); MODULE_FIRMWARE("amdgpu/vega12_sos.bin"); @@ -252,6 +255,37 @@ static int psp_v3_1_ring_init(struct psp_context *psp, return 0; } +static void psp_v3_1_reroute_ih(struct psp_context *psp) +{ + struct amdgpu_device *adev = psp->adev; + uint32_t tmp; + + /* Change IH ring for VMC */ + tmp = REG_SET_FIELD(0, IH_CLIENT_CFG_DATA, CREDIT_RETURN_ADDR, 0x1244b); + tmp = REG_SET_FIELD(tmp, IH_CLIENT_CFG_DATA, CLIENT_TYPE, 1); + tmp = REG_SET_FIELD(tmp, IH_CLIENT_CFG_DATA, RING_ID, 1); + + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, 3); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, tmp); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, GFX_CTRL_CMD_ID_GBR_IH_SET); + + mdelay(20); + psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), + 0x80000000, 0x8000FFFF, false); + + /* Change IH ring for UMC */ + tmp = REG_SET_FIELD(0, IH_CLIENT_CFG_DATA, CREDIT_RETURN_ADDR, 0x1216b); + tmp = REG_SET_FIELD(tmp, IH_CLIENT_CFG_DATA, RING_ID, 1); + + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, 4); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, tmp); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, GFX_CTRL_CMD_ID_GBR_IH_SET); + + mdelay(20); + psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), + 0x80000000, 0x8000FFFF, false); +} + static int psp_v3_1_ring_create(struct psp_context *psp, enum psp_ring_type ring_type) { @@ -260,6 +294,8 @@ static int psp_v3_1_ring_create(struct psp_context *psp, struct psp_ring *ring = &psp->km_ring; struct amdgpu_device *adev = psp->adev; + psp_v3_1_reroute_ih(psp); + /* Write low address of the ring to C2PMSG_69 */ psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr); WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, psp_ring_reg); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c index cca3552b36ed..36196372e8db 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c @@ -870,8 +870,8 @@ static int sdma_v2_4_sw_init(void *handle) r = amdgpu_ring_init(adev, ring, 1024, &adev->sdma.trap_irq, (i == 0) ? - AMDGPU_SDMA_IRQ_TRAP0 : - AMDGPU_SDMA_IRQ_TRAP1); + AMDGPU_SDMA_IRQ_INSTANCE0 : + AMDGPU_SDMA_IRQ_INSTANCE1); if (r) return r; } @@ -1006,7 +1006,7 @@ static int sdma_v2_4_set_trap_irq_state(struct amdgpu_device *adev, u32 sdma_cntl; switch (type) { - case AMDGPU_SDMA_IRQ_TRAP0: + case AMDGPU_SDMA_IRQ_INSTANCE0: switch (state) { case AMDGPU_IRQ_STATE_DISABLE: sdma_cntl = RREG32(mmSDMA0_CNTL + SDMA0_REGISTER_OFFSET); @@ -1022,7 +1022,7 @@ static int sdma_v2_4_set_trap_irq_state(struct amdgpu_device *adev, break; } break; - case AMDGPU_SDMA_IRQ_TRAP1: + case AMDGPU_SDMA_IRQ_INSTANCE1: switch (state) { case AMDGPU_IRQ_STATE_DISABLE: sdma_cntl = RREG32(mmSDMA0_CNTL + SDMA1_REGISTER_OFFSET); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index 0ce8331baeb2..6d39544e7829 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -1154,8 +1154,8 @@ static int sdma_v3_0_sw_init(void *handle) r = amdgpu_ring_init(adev, ring, 1024, &adev->sdma.trap_irq, (i == 0) ? - AMDGPU_SDMA_IRQ_TRAP0 : - AMDGPU_SDMA_IRQ_TRAP1); + AMDGPU_SDMA_IRQ_INSTANCE0 : + AMDGPU_SDMA_IRQ_INSTANCE1); if (r) return r; } @@ -1340,7 +1340,7 @@ static int sdma_v3_0_set_trap_irq_state(struct amdgpu_device *adev, u32 sdma_cntl; switch (type) { - case AMDGPU_SDMA_IRQ_TRAP0: + case AMDGPU_SDMA_IRQ_INSTANCE0: switch (state) { case AMDGPU_IRQ_STATE_DISABLE: sdma_cntl = RREG32(mmSDMA0_CNTL + SDMA0_REGISTER_OFFSET); @@ -1356,7 +1356,7 @@ static int sdma_v3_0_set_trap_irq_state(struct amdgpu_device *adev, break; } break; - case AMDGPU_SDMA_IRQ_TRAP1: + case AMDGPU_SDMA_IRQ_INSTANCE1: switch (state) { case AMDGPU_IRQ_STATE_DISABLE: sdma_cntl = RREG32(mmSDMA0_CNTL + SDMA1_REGISTER_OFFSET); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index c816e55d43a9..9c88ce513d78 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -41,6 +41,8 @@ #include "ivsrcid/sdma0/irqsrcs_sdma0_4_0.h" #include "ivsrcid/sdma1/irqsrcs_sdma1_4_0.h" +#include "amdgpu_ras.h" + MODULE_FIRMWARE("amdgpu/vega10_sdma.bin"); MODULE_FIRMWARE("amdgpu/vega10_sdma1.bin"); MODULE_FIRMWARE("amdgpu/vega12_sdma.bin"); @@ -154,7 +156,6 @@ static const struct soc15_reg_golden golden_settings_sdma0_4_2[] = SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC7_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001), SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC7_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0), - SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_WATERMK, 0xFE000000, 0x00000000), }; static const struct soc15_reg_golden golden_settings_sdma1_4_2[] = { @@ -184,7 +185,6 @@ static const struct soc15_reg_golden golden_settings_sdma1_4_2[] = { SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC7_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001), SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC7_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_UTCL1_PAGE, 0x000003ff, 0x000003c0), - SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_UTCL1_WATERMK, 0xFE000000, 0x00000000), }; static const struct soc15_reg_golden golden_settings_sdma_rv1[] = @@ -849,7 +849,7 @@ static void sdma_v4_0_gfx_resume(struct amdgpu_device *adev, unsigned int i) wptr_poll_cntl = RREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL); wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, - F32_POLL_ENABLE, amdgpu_sriov_vf(adev)); + F32_POLL_ENABLE, amdgpu_sriov_vf(adev)? 1 : 0); WREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL, wptr_poll_cntl); /* enable DMA RB */ @@ -940,7 +940,7 @@ static void sdma_v4_0_page_resume(struct amdgpu_device *adev, unsigned int i) wptr_poll_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR_POLL_CNTL); wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_PAGE_RB_WPTR_POLL_CNTL, - F32_POLL_ENABLE, amdgpu_sriov_vf(adev)); + F32_POLL_ENABLE, amdgpu_sriov_vf(adev)? 1 : 0); WREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR_POLL_CNTL, wptr_poll_cntl); /* enable DMA RB */ @@ -1493,6 +1493,87 @@ static int sdma_v4_0_early_init(void *handle) return 0; } +static int sdma_v4_0_process_ras_data_cb(struct amdgpu_device *adev, + struct amdgpu_iv_entry *entry); + +static int sdma_v4_0_late_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct ras_common_if **ras_if = &adev->sdma.ras_if; + struct ras_ih_if ih_info = { + .cb = sdma_v4_0_process_ras_data_cb, + }; + struct ras_fs_if fs_info = { + .sysfs_name = "sdma_err_count", + .debugfs_name = "sdma_err_inject", + }; + struct ras_common_if ras_block = { + .block = AMDGPU_RAS_BLOCK__SDMA, + .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE, + .sub_block_index = 0, + .name = "sdma", + }; + int r; + + if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA)) { + amdgpu_ras_feature_enable_on_boot(adev, &ras_block, 0); + return 0; + } + + /* handle resume path. */ + if (*ras_if) + goto resume; + + *ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL); + if (!*ras_if) + return -ENOMEM; + + **ras_if = ras_block; + + r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1); + if (r) + goto feature; + + ih_info.head = **ras_if; + fs_info.head = **ras_if; + + r = amdgpu_ras_interrupt_add_handler(adev, &ih_info); + if (r) + goto interrupt; + + r = amdgpu_ras_debugfs_create(adev, &fs_info); + if (r) + goto debugfs; + + r = amdgpu_ras_sysfs_create(adev, &fs_info); + if (r) + goto sysfs; +resume: + r = amdgpu_irq_get(adev, &adev->sdma.ecc_irq, AMDGPU_SDMA_IRQ_INSTANCE0); + if (r) + goto irq; + + r = amdgpu_irq_get(adev, &adev->sdma.ecc_irq, AMDGPU_SDMA_IRQ_INSTANCE1); + if (r) { + amdgpu_irq_put(adev, &adev->sdma.ecc_irq, AMDGPU_SDMA_IRQ_INSTANCE0); + goto irq; + } + + return 0; +irq: + amdgpu_ras_sysfs_remove(adev, *ras_if); +sysfs: + amdgpu_ras_debugfs_remove(adev, *ras_if); +debugfs: + amdgpu_ras_interrupt_remove_handler(adev, &ih_info); +interrupt: + amdgpu_ras_feature_enable(adev, *ras_if, 0); +feature: + kfree(*ras_if); + *ras_if = NULL; + return -EINVAL; +} + static int sdma_v4_0_sw_init(void *handle) { struct amdgpu_ring *ring; @@ -1511,6 +1592,18 @@ static int sdma_v4_0_sw_init(void *handle) if (r) return r; + /* SDMA SRAM ECC event */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA0, SDMA0_4_0__SRCID__SDMA_SRAM_ECC, + &adev->sdma.ecc_irq); + if (r) + return r; + + /* SDMA SRAM ECC event */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA1, SDMA1_4_0__SRCID__SDMA_SRAM_ECC, + &adev->sdma.ecc_irq); + if (r) + return r; + for (i = 0; i < adev->sdma.num_instances; i++) { ring = &adev->sdma.instance[i].ring; ring->ring_obj = NULL; @@ -1526,8 +1619,8 @@ static int sdma_v4_0_sw_init(void *handle) r = amdgpu_ring_init(adev, ring, 1024, &adev->sdma.trap_irq, (i == 0) ? - AMDGPU_SDMA_IRQ_TRAP0 : - AMDGPU_SDMA_IRQ_TRAP1); + AMDGPU_SDMA_IRQ_INSTANCE0 : + AMDGPU_SDMA_IRQ_INSTANCE1); if (r) return r; @@ -1546,8 +1639,8 @@ static int sdma_v4_0_sw_init(void *handle) r = amdgpu_ring_init(adev, ring, 1024, &adev->sdma.trap_irq, (i == 0) ? - AMDGPU_SDMA_IRQ_TRAP0 : - AMDGPU_SDMA_IRQ_TRAP1); + AMDGPU_SDMA_IRQ_INSTANCE0 : + AMDGPU_SDMA_IRQ_INSTANCE1); if (r) return r; } @@ -1561,6 +1654,22 @@ static int sdma_v4_0_sw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int i; + if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA) && + adev->sdma.ras_if) { + struct ras_common_if *ras_if = adev->sdma.ras_if; + struct ras_ih_if ih_info = { + .head = *ras_if, + }; + + /*remove fs first*/ + amdgpu_ras_debugfs_remove(adev, ras_if); + amdgpu_ras_sysfs_remove(adev, ras_if); + /*remove the IH*/ + amdgpu_ras_interrupt_remove_handler(adev, &ih_info); + amdgpu_ras_feature_enable(adev, ras_if, 0); + kfree(ras_if); + } + for (i = 0; i < adev->sdma.num_instances; i++) { amdgpu_ring_fini(&adev->sdma.instance[i].ring); if (adev->sdma.has_page_queue) @@ -1598,6 +1707,9 @@ static int sdma_v4_0_hw_fini(void *handle) if (amdgpu_sriov_vf(adev)) return 0; + amdgpu_irq_put(adev, &adev->sdma.ecc_irq, AMDGPU_SDMA_IRQ_INSTANCE0); + amdgpu_irq_put(adev, &adev->sdma.ecc_irq, AMDGPU_SDMA_IRQ_INSTANCE1); + sdma_v4_0_ctx_switch_enable(adev, false); sdma_v4_0_enable(adev, false); @@ -1666,13 +1778,12 @@ static int sdma_v4_0_set_trap_irq_state(struct amdgpu_device *adev, unsigned type, enum amdgpu_interrupt_state state) { - unsigned int instance = (type == AMDGPU_SDMA_IRQ_TRAP0) ? 0 : 1; u32 sdma_cntl; - sdma_cntl = RREG32_SDMA(instance, mmSDMA0_CNTL); + sdma_cntl = RREG32_SDMA(type, mmSDMA0_CNTL); sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE, state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); - WREG32_SDMA(instance, mmSDMA0_CNTL, sdma_cntl); + WREG32_SDMA(type, mmSDMA0_CNTL, sdma_cntl); return 0; } @@ -1714,6 +1825,58 @@ static int sdma_v4_0_process_trap_irq(struct amdgpu_device *adev, return 0; } +static int sdma_v4_0_process_ras_data_cb(struct amdgpu_device *adev, + struct amdgpu_iv_entry *entry) +{ + uint32_t instance, err_source; + + switch (entry->client_id) { + case SOC15_IH_CLIENTID_SDMA0: + instance = 0; + break; + case SOC15_IH_CLIENTID_SDMA1: + instance = 1; + break; + default: + return 0; + } + + switch (entry->src_id) { + case SDMA0_4_0__SRCID__SDMA_SRAM_ECC: + err_source = 0; + break; + case SDMA0_4_0__SRCID__SDMA_ECC: + err_source = 1; + break; + default: + return 0; + } + + kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); + + amdgpu_ras_reset_gpu(adev, 0); + + return AMDGPU_RAS_UE; +} + +static int sdma_v4_0_process_ecc_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + struct ras_common_if *ras_if = adev->sdma.ras_if; + struct ras_dispatch_if ih_data = { + .entry = entry, + }; + + if (!ras_if) + return 0; + + ih_data.head = *ras_if; + + amdgpu_ras_interrupt_dispatch(adev, &ih_data); + return 0; +} + static int sdma_v4_0_process_illegal_inst_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) @@ -1741,6 +1904,25 @@ static int sdma_v4_0_process_illegal_inst_irq(struct amdgpu_device *adev, return 0; } +static int sdma_v4_0_set_ecc_irq_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned type, + enum amdgpu_interrupt_state state) +{ + u32 sdma_edc_config; + + u32 reg_offset = (type == AMDGPU_SDMA_IRQ_INSTANCE0) ? + sdma_v4_0_get_reg_offset(adev, 0, mmSDMA0_EDC_CONFIG) : + sdma_v4_0_get_reg_offset(adev, 1, mmSDMA0_EDC_CONFIG); + + sdma_edc_config = RREG32(reg_offset); + sdma_edc_config = REG_SET_FIELD(sdma_edc_config, SDMA0_EDC_CONFIG, ECC_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + WREG32(reg_offset, sdma_edc_config); + + return 0; +} + static void sdma_v4_0_update_medium_grain_clock_gating( struct amdgpu_device *adev, bool enable) @@ -1906,7 +2088,7 @@ static void sdma_v4_0_get_clockgating_state(void *handle, u32 *flags) const struct amd_ip_funcs sdma_v4_0_ip_funcs = { .name = "sdma_v4_0", .early_init = sdma_v4_0_early_init, - .late_init = NULL, + .late_init = sdma_v4_0_late_init, .sw_init = sdma_v4_0_sw_init, .sw_fini = sdma_v4_0_sw_fini, .hw_init = sdma_v4_0_hw_init, @@ -2008,11 +2190,20 @@ static const struct amdgpu_irq_src_funcs sdma_v4_0_illegal_inst_irq_funcs = { .process = sdma_v4_0_process_illegal_inst_irq, }; +static const struct amdgpu_irq_src_funcs sdma_v4_0_ecc_irq_funcs = { + .set = sdma_v4_0_set_ecc_irq_state, + .process = sdma_v4_0_process_ecc_irq, +}; + + + static void sdma_v4_0_set_irq_funcs(struct amdgpu_device *adev) { adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_LAST; adev->sdma.trap_irq.funcs = &sdma_v4_0_trap_irq_funcs; adev->sdma.illegal_inst_irq.funcs = &sdma_v4_0_illegal_inst_irq_funcs; + adev->sdma.ecc_irq.num_types = AMDGPU_SDMA_IRQ_LAST; + adev->sdma.ecc_irq.funcs = &sdma_v4_0_ecc_irq_funcs; } /** @@ -2077,8 +2268,8 @@ static const struct amdgpu_buffer_funcs sdma_v4_0_buffer_funcs = { static void sdma_v4_0_set_buffer_funcs(struct amdgpu_device *adev) { adev->mman.buffer_funcs = &sdma_v4_0_buffer_funcs; - if (adev->sdma.has_page_queue) - adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].page; + if (adev->sdma.has_page_queue && adev->sdma.num_instances > 1) + adev->mman.buffer_funcs_ring = &adev->sdma.instance[1].page; else adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring; } @@ -2097,15 +2288,22 @@ static void sdma_v4_0_set_vm_pte_funcs(struct amdgpu_device *adev) unsigned i; adev->vm_manager.vm_pte_funcs = &sdma_v4_0_vm_pte_funcs; - for (i = 0; i < adev->sdma.num_instances; i++) { - if (adev->sdma.has_page_queue) + if (adev->sdma.has_page_queue && adev->sdma.num_instances > 1) { + for (i = 1; i < adev->sdma.num_instances; i++) { sched = &adev->sdma.instance[i].page.sched; - else + adev->vm_manager.vm_pte_rqs[i - 1] = + &sched->sched_rq[DRM_SCHED_PRIORITY_KERNEL]; + } + adev->vm_manager.vm_pte_num_rqs = adev->sdma.num_instances - 1; + adev->vm_manager.page_fault = &adev->sdma.instance[0].page; + } else { + for (i = 0; i < adev->sdma.num_instances; i++) { sched = &adev->sdma.instance[i].ring.sched; - adev->vm_manager.vm_pte_rqs[i] = - &sched->sched_rq[DRM_SCHED_PRIORITY_KERNEL]; + adev->vm_manager.vm_pte_rqs[i] = + &sched->sched_rq[DRM_SCHED_PRIORITY_KERNEL]; + } + adev->vm_manager.vm_pte_num_rqs = adev->sdma.num_instances; } - adev->vm_manager.vm_pte_num_rqs = adev->sdma.num_instances; } const struct amdgpu_ip_block_version sdma_v4_0_ip_block = { diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c index f15f196684ba..3eeefd40dae0 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dma.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c @@ -503,8 +503,8 @@ static int si_dma_sw_init(void *handle) r = amdgpu_ring_init(adev, ring, 1024, &adev->sdma.trap_irq, (i == 0) ? - AMDGPU_SDMA_IRQ_TRAP0 : - AMDGPU_SDMA_IRQ_TRAP1); + AMDGPU_SDMA_IRQ_INSTANCE0 : + AMDGPU_SDMA_IRQ_INSTANCE1); if (r) return r; } @@ -591,7 +591,7 @@ static int si_dma_set_trap_irq_state(struct amdgpu_device *adev, u32 sdma_cntl; switch (type) { - case AMDGPU_SDMA_IRQ_TRAP0: + case AMDGPU_SDMA_IRQ_INSTANCE0: switch (state) { case AMDGPU_IRQ_STATE_DISABLE: sdma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET); @@ -607,7 +607,7 @@ static int si_dma_set_trap_irq_state(struct amdgpu_device *adev, break; } break; - case AMDGPU_SDMA_IRQ_TRAP1: + case AMDGPU_SDMA_IRQ_INSTANCE1: switch (state) { case AMDGPU_IRQ_STATE_DISABLE: sdma_cntl = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET); diff --git a/drivers/gpu/drm/amd/amdgpu/si_dpm.c b/drivers/gpu/drm/amd/amdgpu/si_dpm.c index 41e01a7f57a4..d57e75e5c71f 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dpm.c @@ -4098,14 +4098,13 @@ static int si_notify_smc_display_change(struct amdgpu_device *adev, static void si_program_response_times(struct amdgpu_device *adev) { - u32 voltage_response_time, backbias_response_time, acpi_delay_time, vbi_time_out; + u32 voltage_response_time, acpi_delay_time, vbi_time_out; u32 vddc_dly, acpi_dly, vbi_dly; u32 reference_clock; si_write_smc_soft_register(adev, SI_SMC_SOFT_REGISTER_mvdd_chg_time, 1); voltage_response_time = (u32)adev->pm.dpm.voltage_response_time; - backbias_response_time = (u32)adev->pm.dpm.backbias_response_time; if (voltage_response_time == 0) voltage_response_time = 1000; diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index ed89a101f73f..4900e4958dec 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -63,6 +63,7 @@ #include "vcn_v1_0.h" #include "dce_virtual.h" #include "mxgpu_ai.h" +#include "amdgpu_smu.h" #define mmMP0_MISC_CGTT_CTRL0 0x01b9 #define mmMP0_MISC_CGTT_CTRL0_BASE_IDX 0 @@ -392,6 +393,7 @@ void soc15_program_register_sequence(struct amdgpu_device *adev, static int soc15_asic_mode1_reset(struct amdgpu_device *adev) { u32 i; + int ret = 0; amdgpu_atombios_scratch_regs_engine_hung(adev, true); @@ -402,7 +404,9 @@ static int soc15_asic_mode1_reset(struct amdgpu_device *adev) pci_save_state(adev->pdev); - psp_gpu_reset(adev); + ret = psp_gpu_reset(adev); + if (ret) + dev_err(adev->dev, "GPU mode1 reset failed\n"); pci_restore_state(adev->pdev); @@ -417,7 +421,7 @@ static int soc15_asic_mode1_reset(struct amdgpu_device *adev) amdgpu_atombios_scratch_regs_engine_hung(adev, false); - return 0; + return ret; } static int soc15_asic_get_baco_capability(struct amdgpu_device *adev, bool *cap) @@ -451,6 +455,8 @@ static int soc15_asic_baco_reset(struct amdgpu_device *adev) dev_info(adev->dev, "GPU BACO reset\n"); + adev->in_baco_reset = 1; + return 0; } @@ -461,8 +467,15 @@ static int soc15_asic_reset(struct amdgpu_device *adev) switch (adev->asic_type) { case CHIP_VEGA10: + case CHIP_VEGA12: soc15_asic_get_baco_capability(adev, &baco_reset); break; + case CHIP_VEGA20: + if (adev->psp.sos_fw_version >= 0x80067) + soc15_asic_get_baco_capability(adev, &baco_reset); + else + baco_reset = false; + break; default: baco_reset = false; break; @@ -602,8 +615,12 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) } amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block); amdgpu_device_ip_block_add(adev, &sdma_v4_0_ip_block); - if (!amdgpu_sriov_vf(adev)) - amdgpu_device_ip_block_add(adev, &pp_smu_ip_block); + if (!amdgpu_sriov_vf(adev)) { + if (is_support_sw_smu(adev)) + amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); + else + amdgpu_device_ip_block_add(adev, &pp_smu_ip_block); + } if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); #if defined(CONFIG_DRM_AMD_DC) @@ -884,7 +901,8 @@ static int soc15_common_early_init(void *handle) adev->pg_flags = AMD_PG_SUPPORT_SDMA | AMD_PG_SUPPORT_VCN; } else if (adev->pdev->device == 0x15d8) { - adev->cg_flags = AMD_CG_SUPPORT_GFX_MGLS | + adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | + AMD_CG_SUPPORT_GFX_MGLS | AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS | @@ -927,7 +945,7 @@ static int soc15_common_early_init(void *handle) adev->pg_flags = AMD_PG_SUPPORT_SDMA | AMD_PG_SUPPORT_VCN; } - if (adev->powerplay.pp_feature & PP_GFXOFF_MASK) + if (adev->pm.pp_feature & PP_GFXOFF_MASK) adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_CP | AMD_PG_SUPPORT_RLC_SMU_HS; diff --git a/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h b/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h new file mode 100644 index 000000000000..0b4e7b55595a --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h @@ -0,0 +1,108 @@ +/****************************************************************************\ +* +* File Name ta_ras_if.h +* Project AMD PSP SW IP Module +* +* Description Interface to the RAS Trusted Application +* +* Copyright 2019 Advanced Micro Devices, Inc. +* +* Permission is hereby granted, free of charge, to any person obtaining a copy of this software +* and associated documentation files (the "Software"), to deal in the Software without restriction, +* including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, +* and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, +* subject to the following conditions: +* +* The above copyright notice and this permission notice shall be included in all copies or substantial +* portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR +* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +* OTHER DEALINGS IN THE SOFTWARE. +*/ +#ifndef _TA_RAS_IF_H +#define _TA_RAS_IF_H + +/* Responses have bit 31 set */ +#define RSP_ID_MASK (1U << 31) +#define RSP_ID(cmdId) (((uint32_t)(cmdId)) | RSP_ID_MASK) + +#define TA_NUM_BLOCK_MAX 14 + +enum ras_command { + TA_RAS_COMMAND__ENABLE_FEATURES = 0, + TA_RAS_COMMAND__DISABLE_FEATURES, + TA_RAS_COMMAND__TRIGGER_ERROR, +}; + +enum ta_ras_status { + TA_RAS_STATUS__SUCCESS = 0x00, + TA_RAS_STATUS__RESET_NEEDED = 0x01, + TA_RAS_STATUS__ERROR_INVALID_PARAMETER = 0x02, + TA_RAS_STATUS__ERROR_RAS_NOT_AVAILABLE = 0x03, + TA_RAS_STATUS__ERROR_RAS_DUPLICATE_CMD = 0x04, + TA_RAS_STATUS__ERROR_INJECTION_FAILED = 0x05 +}; + +enum ta_ras_block { + TA_RAS_BLOCK__UMC = 0, + TA_RAS_BLOCK__SDMA, + TA_RAS_BLOCK__GFX, + TA_RAS_BLOCK__MMHUB, + TA_RAS_BLOCK__ATHUB, + TA_RAS_BLOCK__PCIE_BIF, + TA_RAS_BLOCK__HDP, + TA_RAS_BLOCK__XGMI_WAFL, + TA_RAS_BLOCK__DF, + TA_RAS_BLOCK__SMN, + TA_RAS_BLOCK__SEM, + TA_RAS_BLOCK__MP0, + TA_RAS_BLOCK__MP1, + TA_RAS_BLOCK__FUSE = (TA_NUM_BLOCK_MAX - 1), +}; + +enum ta_ras_error_type { + TA_RAS_ERROR__NONE = 0, + TA_RAS_ERROR__PARITY = 1, + TA_RAS_ERROR__SINGLE_CORRECTABLE = 2, + TA_RAS_ERROR__MULTI_UNCORRECTABLE = 4, + TA_RAS_ERROR__POISON = 8 +}; + +struct ta_ras_enable_features_input { + enum ta_ras_block block_id; + enum ta_ras_error_type error_type; +}; + +struct ta_ras_disable_features_input { + enum ta_ras_block block_id; + enum ta_ras_error_type error_type; +}; + +struct ta_ras_trigger_error_input { + enum ta_ras_block block_id; + enum ta_ras_error_type inject_error_type; + uint32_t sub_block_index; + uint64_t address; + uint64_t value; +}; + +union ta_ras_cmd_input { + struct ta_ras_enable_features_input enable_features; + struct ta_ras_disable_features_input disable_features; + struct ta_ras_trigger_error_input trigger_error; +}; + +struct ta_ras_shared_memory { + uint32_t cmd_id; + uint32_t resp_id; + enum ta_ras_status ras_status; + uint32_t reserved; + union ta_ras_cmd_input ras_in_message; +}; + +#endif // TL_RAS_IF_H_ diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c index bed78a778e3f..40363ca6c5f1 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c @@ -283,7 +283,7 @@ static int vce_v2_0_stop(struct amdgpu_device *adev) } if (vce_v2_0_wait_for_idle(adev)) { - DRM_INFO("VCE is busy, Can't set clock gateing"); + DRM_INFO("VCE is busy, Can't set clock gating"); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c index aadc3e66ebd7..f3f5938430d4 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c @@ -382,6 +382,7 @@ static int vce_v4_0_start(struct amdgpu_device *adev) static int vce_v4_0_stop(struct amdgpu_device *adev) { + /* Disable VCPU */ WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 0, ~0x200001); /* hold on ECPU */ @@ -389,8 +390,8 @@ static int vce_v4_0_stop(struct amdgpu_device *adev) VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK); - /* clear BUSY flag */ - WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK); + /* clear VCE_STATUS */ + WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0); /* Set Clock-Gating off */ /* if (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG) @@ -922,6 +923,7 @@ static int vce_v4_0_set_clockgating_state(void *handle, return 0; } +#endif static int vce_v4_0_set_powergating_state(void *handle, enum amd_powergating_state state) @@ -935,16 +937,11 @@ static int vce_v4_0_set_powergating_state(void *handle, */ struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (!(adev->pg_flags & AMD_PG_SUPPORT_VCE)) - return 0; - if (state == AMD_PG_STATE_GATE) - /* XXX do we need a vce_v4_0_stop()? */ - return 0; + return vce_v4_0_stop(adev); else return vce_v4_0_start(adev); } -#endif static void vce_v4_0_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job, struct amdgpu_ib *ib, uint32_t flags) @@ -1059,7 +1056,7 @@ const struct amd_ip_funcs vce_v4_0_ip_funcs = { .soft_reset = NULL /* vce_v4_0_soft_reset */, .post_soft_reset = NULL /* vce_v4_0_post_soft_reset */, .set_clockgating_state = vce_v4_0_set_clockgating_state, - .set_powergating_state = NULL /* vce_v4_0_set_powergating_state */, + .set_powergating_state = vce_v4_0_set_powergating_state, }; static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c index 6d1f804277f8..1b2f69a9a24e 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c @@ -136,6 +136,25 @@ static uint32_t vega10_ih_rb_cntl(struct amdgpu_ih_ring *ih, uint32_t ih_rb_cntl return ih_rb_cntl; } +static uint32_t vega10_ih_doorbell_rptr(struct amdgpu_ih_ring *ih) +{ + u32 ih_doorbell_rtpr = 0; + + if (ih->use_doorbell) { + ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr, + IH_DOORBELL_RPTR, OFFSET, + ih->doorbell_index); + ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr, + IH_DOORBELL_RPTR, + ENABLE, 1); + } else { + ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr, + IH_DOORBELL_RPTR, + ENABLE, 0); + } + return ih_doorbell_rtpr; +} + /** * vega10_ih_irq_init - init and enable the interrupt ring * @@ -150,8 +169,8 @@ static uint32_t vega10_ih_rb_cntl(struct amdgpu_ih_ring *ih, uint32_t ih_rb_cntl static int vega10_ih_irq_init(struct amdgpu_device *adev) { struct amdgpu_ih_ring *ih; + u32 ih_rb_cntl; int ret = 0; - u32 ih_rb_cntl, ih_doorbell_rtpr; u32 tmp; /* disable irqs */ @@ -177,23 +196,11 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev) upper_32_bits(ih->wptr_addr) & 0xFFFF); /* set rptr, wptr to 0 */ - WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR, 0); WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR, 0); + WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR, 0); - ih_doorbell_rtpr = RREG32_SOC15(OSSSYS, 0, mmIH_DOORBELL_RPTR); - if (adev->irq.ih.use_doorbell) { - ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr, - IH_DOORBELL_RPTR, OFFSET, - adev->irq.ih.doorbell_index); - ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr, - IH_DOORBELL_RPTR, - ENABLE, 1); - } else { - ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr, - IH_DOORBELL_RPTR, - ENABLE, 0); - } - WREG32_SOC15(OSSSYS, 0, mmIH_DOORBELL_RPTR, ih_doorbell_rtpr); + WREG32_SOC15(OSSSYS, 0, mmIH_DOORBELL_RPTR, + vega10_ih_doorbell_rptr(ih)); ih = &adev->irq.ih1; if (ih->ring_size) { @@ -203,11 +210,18 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev) ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1); ih_rb_cntl = vega10_ih_rb_cntl(ih, ih_rb_cntl); + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, + WPTR_OVERFLOW_ENABLE, 0); + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, + RB_FULL_DRAIN_ENABLE, 1); WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl); /* set rptr, wptr to 0 */ - WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR_RING1, 0); WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_RING1, 0); + WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR_RING1, 0); + + WREG32_SOC15(OSSSYS, 0, mmIH_DOORBELL_RPTR_RING1, + vega10_ih_doorbell_rptr(ih)); } ih = &adev->irq.ih2; @@ -216,13 +230,16 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev) WREG32_SOC15(OSSSYS, 0, mmIH_RB_BASE_HI_RING2, (ih->gpu_addr >> 40) & 0xff); - ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1); + ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2); ih_rb_cntl = vega10_ih_rb_cntl(ih, ih_rb_cntl); WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl); /* set rptr, wptr to 0 */ - WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR_RING2, 0); WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_RING2, 0); + WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR_RING2, 0); + + WREG32_SOC15(OSSSYS, 0, mmIH_DOORBELL_RPTR_RING2, + vega10_ih_doorbell_rptr(ih)); } tmp = RREG32_SOC15(OSSSYS, 0, mmIH_STORM_CLIENT_LIST_CNTL); @@ -449,20 +466,23 @@ static int vega10_ih_sw_init(void *handle) if (r) return r; - if (adev->asic_type == CHIP_VEGA10) { - r = amdgpu_ih_ring_init(adev, &adev->irq.ih1, PAGE_SIZE, true); - if (r) - return r; - - r = amdgpu_ih_ring_init(adev, &adev->irq.ih2, PAGE_SIZE, true); - if (r) - return r; - } - - /* TODO add doorbell for IH1 & IH2 as well */ adev->irq.ih.use_doorbell = true; adev->irq.ih.doorbell_index = adev->doorbell_index.ih << 1; + r = amdgpu_ih_ring_init(adev, &adev->irq.ih1, PAGE_SIZE, true); + if (r) + return r; + + adev->irq.ih1.use_doorbell = true; + adev->irq.ih1.doorbell_index = (adev->doorbell_index.ih + 1) << 1; + + r = amdgpu_ih_ring_init(adev, &adev->irq.ih2, PAGE_SIZE, true); + if (r) + return r; + + adev->irq.ih2.use_doorbell = true; + adev->irq.ih2.doorbell_index = (adev->doorbell_index.ih + 2) << 1; + r = amdgpu_irq_init(adev); return r; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index cf9a49f49d3a..c1e4d44d6137 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -467,6 +467,8 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, memset(&kfd->doorbell_available_index, 0, sizeof(kfd->doorbell_available_index)); + atomic_set(&kfd->sram_ecc_flag, 0); + return kfd; } @@ -492,9 +494,9 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, { unsigned int size; - kfd->mec_fw_version = kfd->kfd2kgd->get_fw_version(kfd->kgd, + kfd->mec_fw_version = amdgpu_amdkfd_get_fw_version(kfd->kgd, KGD_ENGINE_MEC1); - kfd->sdma_fw_version = kfd->kfd2kgd->get_fw_version(kfd->kgd, + kfd->sdma_fw_version = amdgpu_amdkfd_get_fw_version(kfd->kgd, KGD_ENGINE_SDMA1); kfd->shared_resources = *gpu_resources; @@ -662,6 +664,9 @@ int kgd2kfd_post_reset(struct kfd_dev *kfd) return ret; count = atomic_dec_return(&kfd_locked); WARN_ONCE(count != 0, "KFD reset ref. error"); + + atomic_set(&kfd->sram_ecc_flag, 0); + return 0; } @@ -1025,6 +1030,12 @@ int kfd_gtt_sa_free(struct kfd_dev *kfd, struct kfd_mem_obj *mem_obj) return 0; } +void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd) +{ + if (kfd) + atomic_inc(&kfd->sram_ecc_flag); +} + #if defined(CONFIG_DEBUG_FS) /* This function will send a package to HIQ to hang the HWS diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c index e9f0e0a1b41c..6e1d41c5bf86 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c @@ -1011,25 +1011,41 @@ void kfd_signal_vm_fault_event(struct kfd_dev *dev, unsigned int pasid, void kfd_signal_reset_event(struct kfd_dev *dev) { struct kfd_hsa_hw_exception_data hw_exception_data; + struct kfd_hsa_memory_exception_data memory_exception_data; struct kfd_process *p; struct kfd_event *ev; unsigned int temp; uint32_t id, idx; + int reset_cause = atomic_read(&dev->sram_ecc_flag) ? + KFD_HW_EXCEPTION_ECC : + KFD_HW_EXCEPTION_GPU_HANG; /* Whole gpu reset caused by GPU hang and memory is lost */ memset(&hw_exception_data, 0, sizeof(hw_exception_data)); hw_exception_data.gpu_id = dev->id; hw_exception_data.memory_lost = 1; + hw_exception_data.reset_cause = reset_cause; + + memset(&memory_exception_data, 0, sizeof(memory_exception_data)); + memory_exception_data.ErrorType = KFD_MEM_ERR_SRAM_ECC; + memory_exception_data.gpu_id = dev->id; + memory_exception_data.failure.imprecise = true; idx = srcu_read_lock(&kfd_processes_srcu); hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { mutex_lock(&p->event_mutex); id = KFD_FIRST_NONSIGNAL_EVENT_ID; - idr_for_each_entry_continue(&p->event_idr, ev, id) + idr_for_each_entry_continue(&p->event_idr, ev, id) { if (ev->type == KFD_EVENT_TYPE_HW_EXCEPTION) { ev->hw_exception_data = hw_exception_data; set_event(ev); } + if (ev->type == KFD_EVENT_TYPE_MEMORY && + reset_cause == KFD_HW_EXCEPTION_ECC) { + ev->memory_exception_data = memory_exception_data; + set_event(ev); + } + } mutex_unlock(&p->event_mutex); } srcu_read_unlock(&kfd_processes_srcu, idx); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 0eeee3c6d6dc..9e0230965675 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -276,6 +276,9 @@ struct kfd_dev { uint64_t hive_id; bool pci_atomic_requested; + + /* SRAM ECC flag */ + atomic_t sram_ecc_flag; }; enum kfd_mempool { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 09da91644f9f..2cb09e088dce 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -37,6 +37,7 @@ #include "kfd_device_queue_manager.h" #include "kfd_iommu.h" #include "amdgpu_amdkfd.h" +#include "amdgpu_ras.h" /* topology_device_list - Master list of all topology devices */ static struct list_head topology_device_list; @@ -1197,6 +1198,7 @@ int kfd_topology_add_device(struct kfd_dev *gpu) void *crat_image = NULL; size_t image_size = 0; int proximity_domain; + struct amdgpu_ras *ctx; INIT_LIST_HEAD(&temp_topology_device_list); @@ -1328,6 +1330,20 @@ int kfd_topology_add_device(struct kfd_dev *gpu) dev->node_props.capability |= HSA_CAP_ATS_PRESENT; } + ctx = amdgpu_ras_get_context((struct amdgpu_device *)(dev->gpu->kgd)); + if (ctx) { + /* kfd only concerns sram ecc on GFX/SDMA and HBM ecc on UMC */ + dev->node_props.capability |= + (((ctx->features & BIT(AMDGPU_RAS_BLOCK__SDMA)) != 0) || + ((ctx->features & BIT(AMDGPU_RAS_BLOCK__GFX)) != 0)) ? + HSA_CAP_SRAM_EDCSUPPORTED : 0; + dev->node_props.capability |= ((ctx->features & BIT(AMDGPU_RAS_BLOCK__UMC)) != 0) ? + HSA_CAP_MEM_EDCSUPPORTED : 0; + + dev->node_props.capability |= (ctx->features != 0) ? + HSA_CAP_RASEVENTNOTIFY : 0; + } + kfd_debug_print_topology(); if (!res) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h index 92a19be07344..84710cfd23c2 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h @@ -48,6 +48,10 @@ #define HSA_CAP_DOORBELL_TYPE_2_0 0x2 #define HSA_CAP_AQL_QUEUE_DOUBLE_MAP 0x00004000 +#define HSA_CAP_SRAM_EDCSUPPORTED 0x00080000 +#define HSA_CAP_MEM_EDCSUPPORTED 0x00100000 +#define HSA_CAP_RASEVENTNOTIFY 0x00200000 + struct kfd_node_properties { uint64_t hive_id; uint32_t cpu_cores_count; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 3082b55b1e77..1854506e3e8f 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -111,7 +111,8 @@ amdgpu_dm_update_connector_after_detect(struct amdgpu_dm_connector *aconnector); static int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm, struct drm_plane *plane, - unsigned long possible_crtcs); + unsigned long possible_crtcs, + const struct dc_plane_cap *plane_cap); static int amdgpu_dm_crtc_init(struct amdgpu_display_manager *dm, struct drm_plane *plane, uint32_t link_index); @@ -137,30 +138,6 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, static void handle_cursor_update(struct drm_plane *plane, struct drm_plane_state *old_plane_state); - - -static const enum drm_plane_type dm_plane_type_default[AMDGPU_MAX_PLANES] = { - DRM_PLANE_TYPE_PRIMARY, - DRM_PLANE_TYPE_PRIMARY, - DRM_PLANE_TYPE_PRIMARY, - DRM_PLANE_TYPE_PRIMARY, - DRM_PLANE_TYPE_PRIMARY, - DRM_PLANE_TYPE_PRIMARY, -}; - -static const enum drm_plane_type dm_plane_type_carizzo[AMDGPU_MAX_PLANES] = { - DRM_PLANE_TYPE_PRIMARY, - DRM_PLANE_TYPE_PRIMARY, - DRM_PLANE_TYPE_PRIMARY, - DRM_PLANE_TYPE_OVERLAY,/* YUV Capable Underlay */ -}; - -static const enum drm_plane_type dm_plane_type_stoney[AMDGPU_MAX_PLANES] = { - DRM_PLANE_TYPE_PRIMARY, - DRM_PLANE_TYPE_PRIMARY, - DRM_PLANE_TYPE_OVERLAY, /* YUV Capable Underlay */ -}; - /* * dm_vblank_get_counter * @@ -275,12 +252,22 @@ get_crtc_by_otg_inst(struct amdgpu_device *adev, return NULL; } +static inline bool amdgpu_dm_vrr_active(struct dm_crtc_state *dm_state) +{ + return dm_state->freesync_config.state == VRR_STATE_ACTIVE_VARIABLE || + dm_state->freesync_config.state == VRR_STATE_ACTIVE_FIXED; +} + static void dm_pflip_high_irq(void *interrupt_params) { struct amdgpu_crtc *amdgpu_crtc; struct common_irq_params *irq_params = interrupt_params; struct amdgpu_device *adev = irq_params->adev; unsigned long flags; + struct drm_pending_vblank_event *e; + struct dm_crtc_state *acrtc_state; + uint32_t vpos, hpos, v_blank_start, v_blank_end; + bool vrr_active; amdgpu_crtc = get_crtc_by_otg_inst(adev, irq_params->irq_src - IRQ_TYPE_PFLIP); @@ -303,26 +290,116 @@ static void dm_pflip_high_irq(void *interrupt_params) return; } - /* Update to correct count(s) if racing with vblank irq */ - amdgpu_crtc->last_flip_vblank = drm_crtc_accurate_vblank_count(&amdgpu_crtc->base); + /* page flip completed. */ + e = amdgpu_crtc->event; + amdgpu_crtc->event = NULL; - /* wake up userspace */ - if (amdgpu_crtc->event) { - drm_crtc_send_vblank_event(&amdgpu_crtc->base, amdgpu_crtc->event); + if (!e) + WARN_ON(1); - /* page flip completed. clean up */ - amdgpu_crtc->event = NULL; + acrtc_state = to_dm_crtc_state(amdgpu_crtc->base.state); + vrr_active = amdgpu_dm_vrr_active(acrtc_state); + + /* Fixed refresh rate, or VRR scanout position outside front-porch? */ + if (!vrr_active || + !dc_stream_get_scanoutpos(acrtc_state->stream, &v_blank_start, + &v_blank_end, &hpos, &vpos) || + (vpos < v_blank_start)) { + /* Update to correct count and vblank timestamp if racing with + * vblank irq. This also updates to the correct vblank timestamp + * even in VRR mode, as scanout is past the front-porch atm. + */ + drm_crtc_accurate_vblank_count(&amdgpu_crtc->base); - } else - WARN_ON(1); + /* Wake up userspace by sending the pageflip event with proper + * count and timestamp of vblank of flip completion. + */ + if (e) { + drm_crtc_send_vblank_event(&amdgpu_crtc->base, e); + + /* Event sent, so done with vblank for this flip */ + drm_crtc_vblank_put(&amdgpu_crtc->base); + } + } else if (e) { + /* VRR active and inside front-porch: vblank count and + * timestamp for pageflip event will only be up to date after + * drm_crtc_handle_vblank() has been executed from late vblank + * irq handler after start of back-porch (vline 0). We queue the + * pageflip event for send-out by drm_crtc_handle_vblank() with + * updated timestamp and count, once it runs after us. + * + * We need to open-code this instead of using the helper + * drm_crtc_arm_vblank_event(), as that helper would + * call drm_crtc_accurate_vblank_count(), which we must + * not call in VRR mode while we are in front-porch! + */ + + /* sequence will be replaced by real count during send-out. */ + e->sequence = drm_crtc_vblank_count(&amdgpu_crtc->base); + e->pipe = amdgpu_crtc->crtc_id; + + list_add_tail(&e->base.link, &adev->ddev->vblank_event_list); + e = NULL; + } + + /* Keep track of vblank of this flip for flip throttling. We use the + * cooked hw counter, as that one incremented at start of this vblank + * of pageflip completion, so last_flip_vblank is the forbidden count + * for queueing new pageflips if vsync + VRR is enabled. + */ + amdgpu_crtc->last_flip_vblank = amdgpu_get_vblank_counter_kms(adev->ddev, + amdgpu_crtc->crtc_id); amdgpu_crtc->pflip_status = AMDGPU_FLIP_NONE; spin_unlock_irqrestore(&adev->ddev->event_lock, flags); - DRM_DEBUG_DRIVER("%s - crtc :%d[%p], pflip_stat:AMDGPU_FLIP_NONE\n", - __func__, amdgpu_crtc->crtc_id, amdgpu_crtc); + DRM_DEBUG_DRIVER("crtc:%d[%p], pflip_stat:AMDGPU_FLIP_NONE, vrr[%d]-fp %d\n", + amdgpu_crtc->crtc_id, amdgpu_crtc, + vrr_active, (int) !e); +} + +static void dm_vupdate_high_irq(void *interrupt_params) +{ + struct common_irq_params *irq_params = interrupt_params; + struct amdgpu_device *adev = irq_params->adev; + struct amdgpu_crtc *acrtc; + struct dm_crtc_state *acrtc_state; + unsigned long flags; + + acrtc = get_crtc_by_otg_inst(adev, irq_params->irq_src - IRQ_TYPE_VUPDATE); + + if (acrtc) { + acrtc_state = to_dm_crtc_state(acrtc->base.state); + + DRM_DEBUG_DRIVER("crtc:%d, vupdate-vrr:%d\n", acrtc->crtc_id, + amdgpu_dm_vrr_active(acrtc_state)); - drm_crtc_vblank_put(&amdgpu_crtc->base); + /* Core vblank handling is done here after end of front-porch in + * vrr mode, as vblank timestamping will give valid results + * while now done after front-porch. This will also deliver + * page-flip completion events that have been queued to us + * if a pageflip happened inside front-porch. + */ + if (amdgpu_dm_vrr_active(acrtc_state)) { + drm_crtc_handle_vblank(&acrtc->base); + + /* BTR processing for pre-DCE12 ASICs */ + if (acrtc_state->stream && + adev->family < AMDGPU_FAMILY_AI) { + spin_lock_irqsave(&adev->ddev->event_lock, flags); + mod_freesync_handle_v_update( + adev->dm.freesync_module, + acrtc_state->stream, + &acrtc_state->vrr_params); + + dc_stream_adjust_vmin_vmax( + adev->dm.dc, + acrtc_state->stream, + &acrtc_state->vrr_params.adjust); + spin_unlock_irqrestore(&adev->ddev->event_lock, flags); + } + } + } } static void dm_crtc_high_irq(void *interrupt_params) @@ -331,18 +408,33 @@ static void dm_crtc_high_irq(void *interrupt_params) struct amdgpu_device *adev = irq_params->adev; struct amdgpu_crtc *acrtc; struct dm_crtc_state *acrtc_state; + unsigned long flags; acrtc = get_crtc_by_otg_inst(adev, irq_params->irq_src - IRQ_TYPE_VBLANK); if (acrtc) { - drm_crtc_handle_vblank(&acrtc->base); - amdgpu_dm_crtc_handle_crc_irq(&acrtc->base); - acrtc_state = to_dm_crtc_state(acrtc->base.state); - if (acrtc_state->stream && + DRM_DEBUG_DRIVER("crtc:%d, vupdate-vrr:%d\n", acrtc->crtc_id, + amdgpu_dm_vrr_active(acrtc_state)); + + /* Core vblank handling at start of front-porch is only possible + * in non-vrr mode, as only there vblank timestamping will give + * valid results while done in front-porch. Otherwise defer it + * to dm_vupdate_high_irq after end of front-porch. + */ + if (!amdgpu_dm_vrr_active(acrtc_state)) + drm_crtc_handle_vblank(&acrtc->base); + + /* Following stuff must happen at start of vblank, for crc + * computation and below-the-range btr support in vrr mode. + */ + amdgpu_dm_crtc_handle_crc_irq(&acrtc->base); + + if (acrtc_state->stream && adev->family >= AMDGPU_FAMILY_AI && acrtc_state->vrr_params.supported && acrtc_state->freesync_config.state == VRR_STATE_ACTIVE_VARIABLE) { + spin_lock_irqsave(&adev->ddev->event_lock, flags); mod_freesync_handle_v_update( adev->dm.freesync_module, acrtc_state->stream, @@ -352,6 +444,7 @@ static void dm_crtc_high_irq(void *interrupt_params) adev->dm.dc, acrtc_state->stream, &acrtc_state->vrr_params.adjust); + spin_unlock_irqrestore(&adev->ddev->event_lock, flags); } } } @@ -462,6 +555,8 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) if (amdgpu_dc_feature_mask & DC_FBC_MASK) init_data.flags.fbc_support = true; + init_data.flags.power_down_display_on_boot = true; + /* Display Core create. */ adev->dm.dc = dc_create(&init_data); @@ -912,9 +1007,16 @@ static int dm_resume(void *handle) struct drm_plane *plane; struct drm_plane_state *new_plane_state; struct dm_plane_state *dm_new_plane_state; + struct dm_atomic_state *dm_state = to_dm_atomic_state(dm->atomic_obj.state); enum dc_connection_type new_connection_type = dc_connection_none; int i; + /* Recreate dc_state - DC invalidates it when setting power state to S3. */ + dc_release_state(dm_state->context); + dm_state->context = dc_create_state(dm->dc); + /* TODO: Remove dc_state->dccg, use dc->dccg directly. */ + dc_resource_state_construct(dm->dc, dm_state->context); + /* power on hardware */ dc_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D0); @@ -1457,6 +1559,27 @@ static int dce110_register_irq_handlers(struct amdgpu_device *adev) dm_crtc_high_irq, c_irq_params); } + /* Use VUPDATE interrupt */ + for (i = VISLANDS30_IV_SRCID_D1_V_UPDATE_INT; i <= VISLANDS30_IV_SRCID_D6_V_UPDATE_INT; i += 2) { + r = amdgpu_irq_add_id(adev, client_id, i, &adev->vupdate_irq); + if (r) { + DRM_ERROR("Failed to add vupdate irq id!\n"); + return r; + } + + int_params.int_context = INTERRUPT_HIGH_IRQ_CONTEXT; + int_params.irq_source = + dc_interrupt_to_irq_source(dc, i, 0); + + c_irq_params = &adev->dm.vupdate_params[int_params.irq_source - DC_IRQ_SOURCE_VUPDATE1]; + + c_irq_params->adev = adev; + c_irq_params->irq_src = int_params.irq_source; + + amdgpu_dm_irq_register_interrupt(adev, &int_params, + dm_vupdate_high_irq, c_irq_params); + } + /* Use GRPH_PFLIP interrupt */ for (i = VISLANDS30_IV_SRCID_D1_GRPH_PFLIP; i <= VISLANDS30_IV_SRCID_D6_GRPH_PFLIP; i += 2) { @@ -1542,6 +1665,34 @@ static int dcn10_register_irq_handlers(struct amdgpu_device *adev) dm_crtc_high_irq, c_irq_params); } + /* Use VUPDATE_NO_LOCK interrupt on DCN, which seems to correspond to + * the regular VUPDATE interrupt on DCE. We want DC_IRQ_SOURCE_VUPDATEx + * to trigger at end of each vblank, regardless of state of the lock, + * matching DCE behaviour. + */ + for (i = DCN_1_0__SRCID__OTG0_IHC_V_UPDATE_NO_LOCK_INTERRUPT; + i <= DCN_1_0__SRCID__OTG0_IHC_V_UPDATE_NO_LOCK_INTERRUPT + adev->mode_info.num_crtc - 1; + i++) { + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_DCE, i, &adev->vupdate_irq); + + if (r) { + DRM_ERROR("Failed to add vupdate irq id!\n"); + return r; + } + + int_params.int_context = INTERRUPT_HIGH_IRQ_CONTEXT; + int_params.irq_source = + dc_interrupt_to_irq_source(dc, i, 0); + + c_irq_params = &adev->dm.vupdate_params[int_params.irq_source - DC_IRQ_SOURCE_VUPDATE1]; + + c_irq_params->adev = adev; + c_irq_params->irq_src = int_params.irq_source; + + amdgpu_dm_irq_register_interrupt(adev, &int_params, + dm_vupdate_high_irq, c_irq_params); + } + /* Use GRPH_PFLIP interrupt */ for (i = DCN_1_0__SRCID__HUBP0_FLIP_INTERRUPT; i <= DCN_1_0__SRCID__HUBP0_FLIP_INTERRUPT + adev->mode_info.num_crtc - 1; @@ -1593,15 +1744,10 @@ static int dm_atomic_get_state(struct drm_atomic_state *state, struct amdgpu_device *adev = dev->dev_private; struct amdgpu_display_manager *dm = &adev->dm; struct drm_private_state *priv_state; - int ret; if (*dm_state) return 0; - ret = drm_modeset_lock(&dm->atomic_obj_lock, state->acquire_ctx); - if (ret) - return ret; - priv_state = drm_atomic_get_private_obj_state(state, &dm->atomic_obj); if (IS_ERR(priv_state)) return PTR_ERR(priv_state); @@ -1658,17 +1804,16 @@ dm_atomic_duplicate_state(struct drm_private_obj *obj) __drm_atomic_helper_private_obj_duplicate_state(obj, &new_state->base); - new_state->context = dc_create_state(); + old_state = to_dm_atomic_state(obj->state); + + if (old_state && old_state->context) + new_state->context = dc_copy_state(old_state->context); + if (!new_state->context) { kfree(new_state); return NULL; } - old_state = to_dm_atomic_state(obj->state); - if (old_state && old_state->context) - dc_resource_state_copy_construct(old_state->context, - new_state->context); - return &new_state->base; } @@ -1708,13 +1853,11 @@ static int amdgpu_dm_mode_config_init(struct amdgpu_device *adev) adev->ddev->mode_config.fb_base = adev->gmc.aper_base; - drm_modeset_lock_init(&adev->dm.atomic_obj_lock); - state = kzalloc(sizeof(*state), GFP_KERNEL); if (!state) return -ENOMEM; - state->context = dc_create_state(); + state->context = dc_create_state(adev->dm.dc); if (!state->context) { kfree(state); return -ENOMEM; @@ -1841,39 +1984,42 @@ amdgpu_dm_register_backlight_device(struct amdgpu_display_manager *dm) #endif static int initialize_plane(struct amdgpu_display_manager *dm, - struct amdgpu_mode_info *mode_info, - int plane_id) + struct amdgpu_mode_info *mode_info, int plane_id, + enum drm_plane_type plane_type, + const struct dc_plane_cap *plane_cap) { struct drm_plane *plane; unsigned long possible_crtcs; int ret = 0; plane = kzalloc(sizeof(struct drm_plane), GFP_KERNEL); - mode_info->planes[plane_id] = plane; - if (!plane) { DRM_ERROR("KMS: Failed to allocate plane\n"); return -ENOMEM; } - plane->type = mode_info->plane_type[plane_id]; + plane->type = plane_type; /* - * HACK: IGT tests expect that each plane can only have - * one possible CRTC. For now, set one CRTC for each - * plane that is not an underlay, but still allow multiple - * CRTCs for underlay planes. + * HACK: IGT tests expect that the primary plane for a CRTC + * can only have one possible CRTC. Only expose support for + * any CRTC if they're not going to be used as a primary plane + * for a CRTC - like overlay or underlay planes. */ possible_crtcs = 1 << plane_id; if (plane_id >= dm->dc->caps.max_streams) possible_crtcs = 0xff; - ret = amdgpu_dm_plane_init(dm, mode_info->planes[plane_id], possible_crtcs); + ret = amdgpu_dm_plane_init(dm, plane, possible_crtcs, plane_cap); if (ret) { DRM_ERROR("KMS: Failed to initialize plane\n"); + kfree(plane); return ret; } + if (mode_info) + mode_info->planes[plane_id] = plane; + return ret; } @@ -1916,8 +2062,9 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev) struct amdgpu_encoder *aencoder = NULL; struct amdgpu_mode_info *mode_info = &adev->mode_info; uint32_t link_cnt; - int32_t total_overlay_planes, total_primary_planes; + int32_t primary_planes; enum dc_connection_type new_connection_type = dc_connection_none; + const struct dc_plane_cap *plane; link_cnt = dm->dc->caps.max_links; if (amdgpu_dm_mode_config_init(dm->adev)) { @@ -1925,24 +2072,53 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev) return -EINVAL; } - /* Identify the number of planes to be initialized */ - total_overlay_planes = dm->dc->caps.max_slave_planes; - total_primary_planes = dm->dc->caps.max_planes - dm->dc->caps.max_slave_planes; + /* There is one primary plane per CRTC */ + primary_planes = dm->dc->caps.max_streams; + ASSERT(primary_planes <= AMDGPU_MAX_PLANES); - /* First initialize overlay planes, index starting after primary planes */ - for (i = (total_overlay_planes - 1); i >= 0; i--) { - if (initialize_plane(dm, mode_info, (total_primary_planes + i))) { - DRM_ERROR("KMS: Failed to initialize overlay plane\n"); + /* + * Initialize primary planes, implicit planes for legacy IOCTLS. + * Order is reversed to match iteration order in atomic check. + */ + for (i = (primary_planes - 1); i >= 0; i--) { + plane = &dm->dc->caps.planes[i]; + + if (initialize_plane(dm, mode_info, i, + DRM_PLANE_TYPE_PRIMARY, plane)) { + DRM_ERROR("KMS: Failed to initialize primary plane\n"); goto fail; } } - /* Initialize primary planes */ - for (i = (total_primary_planes - 1); i >= 0; i--) { - if (initialize_plane(dm, mode_info, i)) { - DRM_ERROR("KMS: Failed to initialize primary plane\n"); + /* + * Initialize overlay planes, index starting after primary planes. + * These planes have a higher DRM index than the primary planes since + * they should be considered as having a higher z-order. + * Order is reversed to match iteration order in atomic check. + * + * Only support DCN for now, and only expose one so we don't encourage + * userspace to use up all the pipes. + */ + for (i = 0; i < dm->dc->caps.max_planes; ++i) { + struct dc_plane_cap *plane = &dm->dc->caps.planes[i]; + + if (plane->type != DC_PLANE_TYPE_DCN_UNIVERSAL) + continue; + + if (!plane->blends_with_above || !plane->blends_with_below) + continue; + + if (!plane->pixel_format_support.argb8888) + continue; + + if (initialize_plane(dm, NULL, primary_planes + i, + DRM_PLANE_TYPE_OVERLAY, plane)) { + DRM_ERROR("KMS: Failed to initialize overlay plane\n"); goto fail; } + + /* Only create one overlay plane. */ + break; } for (i = 0; i < dm->dc->caps.max_streams; i++) @@ -2042,8 +2218,7 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev) fail: kfree(aencoder); kfree(aconnector); - for (i = 0; i < dm->dc->caps.max_planes; i++) - kfree(mode_info->planes[i]); + return -EINVAL; } @@ -2124,53 +2299,45 @@ static int dm_early_init(void *handle) adev->mode_info.num_crtc = 6; adev->mode_info.num_hpd = 6; adev->mode_info.num_dig = 6; - adev->mode_info.plane_type = dm_plane_type_default; break; case CHIP_KAVERI: adev->mode_info.num_crtc = 4; adev->mode_info.num_hpd = 6; adev->mode_info.num_dig = 7; - adev->mode_info.plane_type = dm_plane_type_default; break; case CHIP_KABINI: case CHIP_MULLINS: adev->mode_info.num_crtc = 2; adev->mode_info.num_hpd = 6; adev->mode_info.num_dig = 6; - adev->mode_info.plane_type = dm_plane_type_default; break; case CHIP_FIJI: case CHIP_TONGA: adev->mode_info.num_crtc = 6; adev->mode_info.num_hpd = 6; adev->mode_info.num_dig = 7; - adev->mode_info.plane_type = dm_plane_type_default; break; case CHIP_CARRIZO: adev->mode_info.num_crtc = 3; adev->mode_info.num_hpd = 6; adev->mode_info.num_dig = 9; - adev->mode_info.plane_type = dm_plane_type_carizzo; break; case CHIP_STONEY: adev->mode_info.num_crtc = 2; adev->mode_info.num_hpd = 6; adev->mode_info.num_dig = 9; - adev->mode_info.plane_type = dm_plane_type_stoney; break; case CHIP_POLARIS11: case CHIP_POLARIS12: adev->mode_info.num_crtc = 5; adev->mode_info.num_hpd = 5; adev->mode_info.num_dig = 5; - adev->mode_info.plane_type = dm_plane_type_default; break; case CHIP_POLARIS10: case CHIP_VEGAM: adev->mode_info.num_crtc = 6; adev->mode_info.num_hpd = 6; adev->mode_info.num_dig = 6; - adev->mode_info.plane_type = dm_plane_type_default; break; case CHIP_VEGA10: case CHIP_VEGA12: @@ -2178,14 +2345,12 @@ static int dm_early_init(void *handle) adev->mode_info.num_crtc = 6; adev->mode_info.num_hpd = 6; adev->mode_info.num_dig = 6; - adev->mode_info.plane_type = dm_plane_type_default; break; #if defined(CONFIG_DRM_AMD_DC_DCN1_0) case CHIP_RAVEN: adev->mode_info.num_crtc = 4; adev->mode_info.num_hpd = 4; adev->mode_info.num_dig = 4; - adev->mode_info.plane_type = dm_plane_type_default; break; #endif default: @@ -2243,56 +2408,63 @@ static const struct drm_encoder_funcs amdgpu_dm_encoder_funcs = { .destroy = amdgpu_dm_encoder_destroy, }; -static bool fill_rects_from_plane_state(const struct drm_plane_state *state, - struct dc_plane_state *plane_state) + +static int fill_dc_scaling_info(const struct drm_plane_state *state, + struct dc_scaling_info *scaling_info) { - plane_state->src_rect.x = state->src_x >> 16; - plane_state->src_rect.y = state->src_y >> 16; - /* we ignore the mantissa for now and do not deal with floating pixels :( */ - plane_state->src_rect.width = state->src_w >> 16; + int scale_w, scale_h; - if (plane_state->src_rect.width == 0) - return false; + memset(scaling_info, 0, sizeof(*scaling_info)); - plane_state->src_rect.height = state->src_h >> 16; - if (plane_state->src_rect.height == 0) - return false; + /* Source is fixed 16.16 but we ignore mantissa for now... */ + scaling_info->src_rect.x = state->src_x >> 16; + scaling_info->src_rect.y = state->src_y >> 16; - plane_state->dst_rect.x = state->crtc_x; - plane_state->dst_rect.y = state->crtc_y; + scaling_info->src_rect.width = state->src_w >> 16; + if (scaling_info->src_rect.width == 0) + return -EINVAL; + + scaling_info->src_rect.height = state->src_h >> 16; + if (scaling_info->src_rect.height == 0) + return -EINVAL; + + scaling_info->dst_rect.x = state->crtc_x; + scaling_info->dst_rect.y = state->crtc_y; if (state->crtc_w == 0) - return false; + return -EINVAL; - plane_state->dst_rect.width = state->crtc_w; + scaling_info->dst_rect.width = state->crtc_w; if (state->crtc_h == 0) - return false; + return -EINVAL; - plane_state->dst_rect.height = state->crtc_h; + scaling_info->dst_rect.height = state->crtc_h; - plane_state->clip_rect = plane_state->dst_rect; + /* DRM doesn't specify clipping on destination output. */ + scaling_info->clip_rect = scaling_info->dst_rect; - switch (state->rotation & DRM_MODE_ROTATE_MASK) { - case DRM_MODE_ROTATE_0: - plane_state->rotation = ROTATION_ANGLE_0; - break; - case DRM_MODE_ROTATE_90: - plane_state->rotation = ROTATION_ANGLE_90; - break; - case DRM_MODE_ROTATE_180: - plane_state->rotation = ROTATION_ANGLE_180; - break; - case DRM_MODE_ROTATE_270: - plane_state->rotation = ROTATION_ANGLE_270; - break; - default: - plane_state->rotation = ROTATION_ANGLE_0; - break; - } + /* TODO: Validate scaling per-format with DC plane caps */ + scale_w = scaling_info->dst_rect.width * 1000 / + scaling_info->src_rect.width; - return true; + if (scale_w < 250 || scale_w > 16000) + return -EINVAL; + + scale_h = scaling_info->dst_rect.height * 1000 / + scaling_info->src_rect.height; + + if (scale_h < 250 || scale_h > 16000) + return -EINVAL; + + /* + * The "scaling_quality" can be ignored for now, quality = 0 has DC + * assume reasonable defaults based on the format. + */ + + return 0; } + static int get_fb_info(const struct amdgpu_framebuffer *amdgpu_fb, uint64_t *tiling_flags) { @@ -2321,10 +2493,16 @@ static inline uint64_t get_dcc_address(uint64_t address, uint64_t tiling_flags) return offset ? (address + offset * 256) : 0; } -static bool fill_plane_dcc_attributes(struct amdgpu_device *adev, - const struct amdgpu_framebuffer *afb, - struct dc_plane_state *plane_state, - uint64_t info) +static int +fill_plane_dcc_attributes(struct amdgpu_device *adev, + const struct amdgpu_framebuffer *afb, + const enum surface_pixel_format format, + const enum dc_rotation_angle rotation, + const union plane_size *plane_size, + const union dc_tiling_info *tiling_info, + const uint64_t info, + struct dc_plane_dcc_param *dcc, + struct dc_plane_address *address) { struct dc *dc = adev->dm.dc; struct dc_dcc_surface_param input; @@ -2337,133 +2515,103 @@ static bool fill_plane_dcc_attributes(struct amdgpu_device *adev, memset(&output, 0, sizeof(output)); if (!offset) - return false; + return 0; + + if (format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN) + return 0; if (!dc->cap_funcs.get_dcc_compression_cap) - return false; + return -EINVAL; - input.format = plane_state->format; - input.surface_size.width = - plane_state->plane_size.grph.surface_size.width; - input.surface_size.height = - plane_state->plane_size.grph.surface_size.height; - input.swizzle_mode = plane_state->tiling_info.gfx9.swizzle; + input.format = format; + input.surface_size.width = plane_size->grph.surface_size.width; + input.surface_size.height = plane_size->grph.surface_size.height; + input.swizzle_mode = tiling_info->gfx9.swizzle; - if (plane_state->rotation == ROTATION_ANGLE_0 || - plane_state->rotation == ROTATION_ANGLE_180) + if (rotation == ROTATION_ANGLE_0 || rotation == ROTATION_ANGLE_180) input.scan = SCAN_DIRECTION_HORIZONTAL; - else if (plane_state->rotation == ROTATION_ANGLE_90 || - plane_state->rotation == ROTATION_ANGLE_270) + else if (rotation == ROTATION_ANGLE_90 || rotation == ROTATION_ANGLE_270) input.scan = SCAN_DIRECTION_VERTICAL; if (!dc->cap_funcs.get_dcc_compression_cap(dc, &input, &output)) - return false; + return -EINVAL; if (!output.capable) - return false; + return -EINVAL; if (i64b == 0 && output.grph.rgb.independent_64b_blks != 0) - return false; + return -EINVAL; - plane_state->dcc.enable = 1; - plane_state->dcc.grph.meta_pitch = + dcc->enable = 1; + dcc->grph.meta_pitch = AMDGPU_TILING_GET(info, DCC_PITCH_MAX) + 1; - plane_state->dcc.grph.independent_64b_blks = i64b; + dcc->grph.independent_64b_blks = i64b; dcc_address = get_dcc_address(afb->address, info); - plane_state->address.grph.meta_addr.low_part = - lower_32_bits(dcc_address); - plane_state->address.grph.meta_addr.high_part = - upper_32_bits(dcc_address); + address->grph.meta_addr.low_part = lower_32_bits(dcc_address); + address->grph.meta_addr.high_part = upper_32_bits(dcc_address); - return true; + return 0; } -static int fill_plane_attributes_from_fb(struct amdgpu_device *adev, - struct dc_plane_state *plane_state, - const struct amdgpu_framebuffer *amdgpu_fb) -{ - uint64_t tiling_flags; - unsigned int awidth; - const struct drm_framebuffer *fb = &amdgpu_fb->base; - int ret = 0; - struct drm_format_name_buf format_name; - - ret = get_fb_info( - amdgpu_fb, - &tiling_flags); - - if (ret) - return ret; - - switch (fb->format->format) { - case DRM_FORMAT_C8: - plane_state->format = SURFACE_PIXEL_FORMAT_GRPH_PALETA_256_COLORS; - break; - case DRM_FORMAT_RGB565: - plane_state->format = SURFACE_PIXEL_FORMAT_GRPH_RGB565; - break; - case DRM_FORMAT_XRGB8888: - case DRM_FORMAT_ARGB8888: - plane_state->format = SURFACE_PIXEL_FORMAT_GRPH_ARGB8888; - break; - case DRM_FORMAT_XRGB2101010: - case DRM_FORMAT_ARGB2101010: - plane_state->format = SURFACE_PIXEL_FORMAT_GRPH_ARGB2101010; - break; - case DRM_FORMAT_XBGR2101010: - case DRM_FORMAT_ABGR2101010: - plane_state->format = SURFACE_PIXEL_FORMAT_GRPH_ABGR2101010; - break; - case DRM_FORMAT_XBGR8888: - case DRM_FORMAT_ABGR8888: - plane_state->format = SURFACE_PIXEL_FORMAT_GRPH_ABGR8888; - break; - case DRM_FORMAT_NV21: - plane_state->format = SURFACE_PIXEL_FORMAT_VIDEO_420_YCbCr; - break; - case DRM_FORMAT_NV12: - plane_state->format = SURFACE_PIXEL_FORMAT_VIDEO_420_YCrCb; - break; - default: - DRM_ERROR("Unsupported screen format %s\n", - drm_get_format_name(fb->format->format, &format_name)); - return -EINVAL; - } - - memset(&plane_state->address, 0, sizeof(plane_state->address)); - memset(&plane_state->tiling_info, 0, sizeof(plane_state->tiling_info)); - memset(&plane_state->dcc, 0, sizeof(plane_state->dcc)); - - if (plane_state->format < SURFACE_PIXEL_FORMAT_VIDEO_BEGIN) { - plane_state->address.type = PLN_ADDR_TYPE_GRAPHICS; - plane_state->plane_size.grph.surface_size.x = 0; - plane_state->plane_size.grph.surface_size.y = 0; - plane_state->plane_size.grph.surface_size.width = fb->width; - plane_state->plane_size.grph.surface_size.height = fb->height; - plane_state->plane_size.grph.surface_pitch = - fb->pitches[0] / fb->format->cpp[0]; - /* TODO: unhardcode */ - plane_state->color_space = COLOR_SPACE_SRGB; +static int +fill_plane_buffer_attributes(struct amdgpu_device *adev, + const struct amdgpu_framebuffer *afb, + const enum surface_pixel_format format, + const enum dc_rotation_angle rotation, + const uint64_t tiling_flags, + union dc_tiling_info *tiling_info, + union plane_size *plane_size, + struct dc_plane_dcc_param *dcc, + struct dc_plane_address *address) +{ + const struct drm_framebuffer *fb = &afb->base; + int ret; + memset(tiling_info, 0, sizeof(*tiling_info)); + memset(plane_size, 0, sizeof(*plane_size)); + memset(dcc, 0, sizeof(*dcc)); + memset(address, 0, sizeof(*address)); + + if (format < SURFACE_PIXEL_FORMAT_VIDEO_BEGIN) { + plane_size->grph.surface_size.x = 0; + plane_size->grph.surface_size.y = 0; + plane_size->grph.surface_size.width = fb->width; + plane_size->grph.surface_size.height = fb->height; + plane_size->grph.surface_pitch = + fb->pitches[0] / fb->format->cpp[0]; + + address->type = PLN_ADDR_TYPE_GRAPHICS; + address->grph.addr.low_part = lower_32_bits(afb->address); + address->grph.addr.high_part = upper_32_bits(afb->address); } else { - awidth = ALIGN(fb->width, 64); - plane_state->address.type = PLN_ADDR_TYPE_VIDEO_PROGRESSIVE; - plane_state->plane_size.video.luma_size.x = 0; - plane_state->plane_size.video.luma_size.y = 0; - plane_state->plane_size.video.luma_size.width = awidth; - plane_state->plane_size.video.luma_size.height = fb->height; - /* TODO: unhardcode */ - plane_state->plane_size.video.luma_pitch = awidth; - - plane_state->plane_size.video.chroma_size.x = 0; - plane_state->plane_size.video.chroma_size.y = 0; - plane_state->plane_size.video.chroma_size.width = awidth; - plane_state->plane_size.video.chroma_size.height = fb->height; - plane_state->plane_size.video.chroma_pitch = awidth / 2; - - /* TODO: unhardcode */ - plane_state->color_space = COLOR_SPACE_YCBCR709; + uint64_t chroma_addr = afb->address + fb->offsets[1]; + + plane_size->video.luma_size.x = 0; + plane_size->video.luma_size.y = 0; + plane_size->video.luma_size.width = fb->width; + plane_size->video.luma_size.height = fb->height; + plane_size->video.luma_pitch = + fb->pitches[0] / fb->format->cpp[0]; + + plane_size->video.chroma_size.x = 0; + plane_size->video.chroma_size.y = 0; + /* TODO: set these based on surface format */ + plane_size->video.chroma_size.width = fb->width / 2; + plane_size->video.chroma_size.height = fb->height / 2; + + plane_size->video.chroma_pitch = + fb->pitches[1] / fb->format->cpp[1]; + + address->type = PLN_ADDR_TYPE_VIDEO_PROGRESSIVE; + address->video_progressive.luma_addr.low_part = + lower_32_bits(afb->address); + address->video_progressive.luma_addr.high_part = + upper_32_bits(afb->address); + address->video_progressive.chroma_addr.low_part = + lower_32_bits(chroma_addr); + address->video_progressive.chroma_addr.high_part = + upper_32_bits(chroma_addr); } /* Fill GFX8 params */ @@ -2477,21 +2625,21 @@ static int fill_plane_attributes_from_fb(struct amdgpu_device *adev, num_banks = AMDGPU_TILING_GET(tiling_flags, NUM_BANKS); /* XXX fix me for VI */ - plane_state->tiling_info.gfx8.num_banks = num_banks; - plane_state->tiling_info.gfx8.array_mode = + tiling_info->gfx8.num_banks = num_banks; + tiling_info->gfx8.array_mode = DC_ARRAY_2D_TILED_THIN1; - plane_state->tiling_info.gfx8.tile_split = tile_split; - plane_state->tiling_info.gfx8.bank_width = bankw; - plane_state->tiling_info.gfx8.bank_height = bankh; - plane_state->tiling_info.gfx8.tile_aspect = mtaspect; - plane_state->tiling_info.gfx8.tile_mode = + tiling_info->gfx8.tile_split = tile_split; + tiling_info->gfx8.bank_width = bankw; + tiling_info->gfx8.bank_height = bankh; + tiling_info->gfx8.tile_aspect = mtaspect; + tiling_info->gfx8.tile_mode = DC_ADDR_SURF_MICRO_TILING_DISPLAY; } else if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == DC_ARRAY_1D_TILED_THIN1) { - plane_state->tiling_info.gfx8.array_mode = DC_ARRAY_1D_TILED_THIN1; + tiling_info->gfx8.array_mode = DC_ARRAY_1D_TILED_THIN1; } - plane_state->tiling_info.gfx8.pipe_config = + tiling_info->gfx8.pipe_config = AMDGPU_TILING_GET(tiling_flags, PIPE_CONFIG); if (adev->asic_type == CHIP_VEGA10 || @@ -2499,60 +2647,249 @@ static int fill_plane_attributes_from_fb(struct amdgpu_device *adev, adev->asic_type == CHIP_VEGA20 || adev->asic_type == CHIP_RAVEN) { /* Fill GFX9 params */ - plane_state->tiling_info.gfx9.num_pipes = + tiling_info->gfx9.num_pipes = adev->gfx.config.gb_addr_config_fields.num_pipes; - plane_state->tiling_info.gfx9.num_banks = + tiling_info->gfx9.num_banks = adev->gfx.config.gb_addr_config_fields.num_banks; - plane_state->tiling_info.gfx9.pipe_interleave = + tiling_info->gfx9.pipe_interleave = adev->gfx.config.gb_addr_config_fields.pipe_interleave_size; - plane_state->tiling_info.gfx9.num_shader_engines = + tiling_info->gfx9.num_shader_engines = adev->gfx.config.gb_addr_config_fields.num_se; - plane_state->tiling_info.gfx9.max_compressed_frags = + tiling_info->gfx9.max_compressed_frags = adev->gfx.config.gb_addr_config_fields.max_compress_frags; - plane_state->tiling_info.gfx9.num_rb_per_se = + tiling_info->gfx9.num_rb_per_se = adev->gfx.config.gb_addr_config_fields.num_rb_per_se; - plane_state->tiling_info.gfx9.swizzle = + tiling_info->gfx9.swizzle = AMDGPU_TILING_GET(tiling_flags, SWIZZLE_MODE); - plane_state->tiling_info.gfx9.shaderEnable = 1; + tiling_info->gfx9.shaderEnable = 1; - fill_plane_dcc_attributes(adev, amdgpu_fb, plane_state, - tiling_flags); + ret = fill_plane_dcc_attributes(adev, afb, format, rotation, + plane_size, tiling_info, + tiling_flags, dcc, address); + if (ret) + return ret; } - plane_state->visible = true; - plane_state->scaling_quality.h_taps_c = 0; - plane_state->scaling_quality.v_taps_c = 0; + return 0; +} - /* is this needed? is plane_state zeroed at allocation? */ - plane_state->scaling_quality.h_taps = 0; - plane_state->scaling_quality.v_taps = 0; - plane_state->stereo_format = PLANE_STEREO_FORMAT_NONE; +static void +fill_blending_from_plane_state(const struct drm_plane_state *plane_state, + bool *per_pixel_alpha, bool *global_alpha, + int *global_alpha_value) +{ + *per_pixel_alpha = false; + *global_alpha = false; + *global_alpha_value = 0xff; - return ret; + if (plane_state->plane->type != DRM_PLANE_TYPE_OVERLAY) + return; + if (plane_state->pixel_blend_mode == DRM_MODE_BLEND_PREMULTI) { + static const uint32_t alpha_formats[] = { + DRM_FORMAT_ARGB8888, + DRM_FORMAT_RGBA8888, + DRM_FORMAT_ABGR8888, + }; + uint32_t format = plane_state->fb->format->format; + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(alpha_formats); ++i) { + if (format == alpha_formats[i]) { + *per_pixel_alpha = true; + break; + } + } + } + + if (plane_state->alpha < 0xffff) { + *global_alpha = true; + *global_alpha_value = plane_state->alpha >> 8; + } } -static int fill_plane_attributes(struct amdgpu_device *adev, - struct dc_plane_state *dc_plane_state, - struct drm_plane_state *plane_state, - struct drm_crtc_state *crtc_state) +static int +fill_plane_color_attributes(const struct drm_plane_state *plane_state, + const enum surface_pixel_format format, + enum dc_color_space *color_space) { - const struct amdgpu_framebuffer *amdgpu_fb = + bool full_range; + + *color_space = COLOR_SPACE_SRGB; + + /* DRM color properties only affect non-RGB formats. */ + if (format < SURFACE_PIXEL_FORMAT_VIDEO_BEGIN) + return 0; + + full_range = (plane_state->color_range == DRM_COLOR_YCBCR_FULL_RANGE); + + switch (plane_state->color_encoding) { + case DRM_COLOR_YCBCR_BT601: + if (full_range) + *color_space = COLOR_SPACE_YCBCR601; + else + *color_space = COLOR_SPACE_YCBCR601_LIMITED; + break; + + case DRM_COLOR_YCBCR_BT709: + if (full_range) + *color_space = COLOR_SPACE_YCBCR709; + else + *color_space = COLOR_SPACE_YCBCR709_LIMITED; + break; + + case DRM_COLOR_YCBCR_BT2020: + if (full_range) + *color_space = COLOR_SPACE_2020_YCBCR; + else + return -EINVAL; + break; + + default: + return -EINVAL; + } + + return 0; +} + +static int +fill_dc_plane_info_and_addr(struct amdgpu_device *adev, + const struct drm_plane_state *plane_state, + const uint64_t tiling_flags, + struct dc_plane_info *plane_info, + struct dc_plane_address *address) +{ + const struct drm_framebuffer *fb = plane_state->fb; + const struct amdgpu_framebuffer *afb = to_amdgpu_framebuffer(plane_state->fb); - const struct drm_crtc *crtc = plane_state->crtc; - int ret = 0; + struct drm_format_name_buf format_name; + int ret; - if (!fill_rects_from_plane_state(plane_state, dc_plane_state)) + memset(plane_info, 0, sizeof(*plane_info)); + + switch (fb->format->format) { + case DRM_FORMAT_C8: + plane_info->format = + SURFACE_PIXEL_FORMAT_GRPH_PALETA_256_COLORS; + break; + case DRM_FORMAT_RGB565: + plane_info->format = SURFACE_PIXEL_FORMAT_GRPH_RGB565; + break; + case DRM_FORMAT_XRGB8888: + case DRM_FORMAT_ARGB8888: + plane_info->format = SURFACE_PIXEL_FORMAT_GRPH_ARGB8888; + break; + case DRM_FORMAT_XRGB2101010: + case DRM_FORMAT_ARGB2101010: + plane_info->format = SURFACE_PIXEL_FORMAT_GRPH_ARGB2101010; + break; + case DRM_FORMAT_XBGR2101010: + case DRM_FORMAT_ABGR2101010: + plane_info->format = SURFACE_PIXEL_FORMAT_GRPH_ABGR2101010; + break; + case DRM_FORMAT_XBGR8888: + case DRM_FORMAT_ABGR8888: + plane_info->format = SURFACE_PIXEL_FORMAT_GRPH_ABGR8888; + break; + case DRM_FORMAT_NV21: + plane_info->format = SURFACE_PIXEL_FORMAT_VIDEO_420_YCbCr; + break; + case DRM_FORMAT_NV12: + plane_info->format = SURFACE_PIXEL_FORMAT_VIDEO_420_YCrCb; + break; + default: + DRM_ERROR( + "Unsupported screen format %s\n", + drm_get_format_name(fb->format->format, &format_name)); return -EINVAL; + } + + switch (plane_state->rotation & DRM_MODE_ROTATE_MASK) { + case DRM_MODE_ROTATE_0: + plane_info->rotation = ROTATION_ANGLE_0; + break; + case DRM_MODE_ROTATE_90: + plane_info->rotation = ROTATION_ANGLE_90; + break; + case DRM_MODE_ROTATE_180: + plane_info->rotation = ROTATION_ANGLE_180; + break; + case DRM_MODE_ROTATE_270: + plane_info->rotation = ROTATION_ANGLE_270; + break; + default: + plane_info->rotation = ROTATION_ANGLE_0; + break; + } - ret = fill_plane_attributes_from_fb( - crtc->dev->dev_private, - dc_plane_state, - amdgpu_fb); + plane_info->visible = true; + plane_info->stereo_format = PLANE_STEREO_FORMAT_NONE; + ret = fill_plane_color_attributes(plane_state, plane_info->format, + &plane_info->color_space); if (ret) return ret; + ret = fill_plane_buffer_attributes(adev, afb, plane_info->format, + plane_info->rotation, tiling_flags, + &plane_info->tiling_info, + &plane_info->plane_size, + &plane_info->dcc, address); + if (ret) + return ret; + + fill_blending_from_plane_state( + plane_state, &plane_info->per_pixel_alpha, + &plane_info->global_alpha, &plane_info->global_alpha_value); + + return 0; +} + +static int fill_dc_plane_attributes(struct amdgpu_device *adev, + struct dc_plane_state *dc_plane_state, + struct drm_plane_state *plane_state, + struct drm_crtc_state *crtc_state) +{ + const struct amdgpu_framebuffer *amdgpu_fb = + to_amdgpu_framebuffer(plane_state->fb); + struct dc_scaling_info scaling_info; + struct dc_plane_info plane_info; + uint64_t tiling_flags; + int ret; + + ret = fill_dc_scaling_info(plane_state, &scaling_info); + if (ret) + return ret; + + dc_plane_state->src_rect = scaling_info.src_rect; + dc_plane_state->dst_rect = scaling_info.dst_rect; + dc_plane_state->clip_rect = scaling_info.clip_rect; + dc_plane_state->scaling_quality = scaling_info.scaling_quality; + + ret = get_fb_info(amdgpu_fb, &tiling_flags); + if (ret) + return ret; + + ret = fill_dc_plane_info_and_addr(adev, plane_state, tiling_flags, + &plane_info, + &dc_plane_state->address); + if (ret) + return ret; + + dc_plane_state->format = plane_info.format; + dc_plane_state->color_space = plane_info.color_space; + dc_plane_state->format = plane_info.format; + dc_plane_state->plane_size = plane_info.plane_size; + dc_plane_state->rotation = plane_info.rotation; + dc_plane_state->horizontal_mirror = plane_info.horizontal_mirror; + dc_plane_state->stereo_format = plane_info.stereo_format; + dc_plane_state->tiling_info = plane_info.tiling_info; + dc_plane_state->visible = plane_info.visible; + dc_plane_state->per_pixel_alpha = plane_info.per_pixel_alpha; + dc_plane_state->global_alpha = plane_info.global_alpha; + dc_plane_state->global_alpha_value = plane_info.global_alpha_value; + dc_plane_state->dcc = plane_info.dcc; + /* * Always set input transfer function, since plane state is refreshed * every time. @@ -3124,6 +3461,8 @@ dm_crtc_duplicate_state(struct drm_crtc *crtc) dc_stream_retain(state->stream); } + state->active_planes = cur->active_planes; + state->interrupts_enabled = cur->interrupts_enabled; state->vrr_params = cur->vrr_params; state->vrr_infopacket = cur->vrr_infopacket; state->abm_level = cur->abm_level; @@ -3136,12 +3475,41 @@ dm_crtc_duplicate_state(struct drm_crtc *crtc) return &state->base; } +static inline int dm_set_vupdate_irq(struct drm_crtc *crtc, bool enable) +{ + enum dc_irq_source irq_source; + struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc); + struct amdgpu_device *adev = crtc->dev->dev_private; + int rc; + + irq_source = IRQ_TYPE_VUPDATE + acrtc->otg_inst; + + rc = dc_interrupt_set(adev->dm.dc, irq_source, enable) ? 0 : -EBUSY; + + DRM_DEBUG_DRIVER("crtc %d - vupdate irq %sabling: r=%d\n", + acrtc->crtc_id, enable ? "en" : "dis", rc); + return rc; +} static inline int dm_set_vblank(struct drm_crtc *crtc, bool enable) { enum dc_irq_source irq_source; struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc); struct amdgpu_device *adev = crtc->dev->dev_private; + struct dm_crtc_state *acrtc_state = to_dm_crtc_state(crtc->state); + int rc = 0; + + if (enable) { + /* vblank irq on -> Only need vupdate irq in vrr mode */ + if (amdgpu_dm_vrr_active(acrtc_state)) + rc = dm_set_vupdate_irq(crtc, true); + } else { + /* vblank irq off -> vupdate irq off */ + rc = dm_set_vupdate_irq(crtc, false); + } + + if (rc) + return rc; irq_source = IRQ_TYPE_VBLANK + acrtc->otg_inst; return dc_interrupt_set(adev->dm.dc, irq_source, enable) ? 0 : -EBUSY; @@ -3519,6 +3887,76 @@ static void dm_crtc_helper_disable(struct drm_crtc *crtc) { } +static bool does_crtc_have_active_cursor(struct drm_crtc_state *new_crtc_state) +{ + struct drm_device *dev = new_crtc_state->crtc->dev; + struct drm_plane *plane; + + drm_for_each_plane_mask(plane, dev, new_crtc_state->plane_mask) { + if (plane->type == DRM_PLANE_TYPE_CURSOR) + return true; + } + + return false; +} + +static int count_crtc_active_planes(struct drm_crtc_state *new_crtc_state) +{ + struct drm_atomic_state *state = new_crtc_state->state; + struct drm_plane *plane; + int num_active = 0; + + drm_for_each_plane_mask(plane, state->dev, new_crtc_state->plane_mask) { + struct drm_plane_state *new_plane_state; + + /* Cursor planes are "fake". */ + if (plane->type == DRM_PLANE_TYPE_CURSOR) + continue; + + new_plane_state = drm_atomic_get_new_plane_state(state, plane); + + if (!new_plane_state) { + /* + * The plane is enable on the CRTC and hasn't changed + * state. This means that it previously passed + * validation and is therefore enabled. + */ + num_active += 1; + continue; + } + + /* We need a framebuffer to be considered enabled. */ + num_active += (new_plane_state->fb != NULL); + } + + return num_active; +} + +/* + * Sets whether interrupts should be enabled on a specific CRTC. + * We require that the stream be enabled and that there exist active + * DC planes on the stream. + */ +static void +dm_update_crtc_interrupt_state(struct drm_crtc *crtc, + struct drm_crtc_state *new_crtc_state) +{ + struct dm_crtc_state *dm_new_crtc_state = + to_dm_crtc_state(new_crtc_state); + + dm_new_crtc_state->active_planes = 0; + dm_new_crtc_state->interrupts_enabled = false; + + if (!dm_new_crtc_state->stream) + return; + + dm_new_crtc_state->active_planes = + count_crtc_active_planes(new_crtc_state); + + dm_new_crtc_state->interrupts_enabled = + dm_new_crtc_state->active_planes > 0; +} + static int dm_crtc_helper_atomic_check(struct drm_crtc *crtc, struct drm_crtc_state *state) { @@ -3527,6 +3965,14 @@ static int dm_crtc_helper_atomic_check(struct drm_crtc *crtc, struct dm_crtc_state *dm_crtc_state = to_dm_crtc_state(state); int ret = -EINVAL; + /* + * Update interrupt state for the CRTC. This needs to happen whenever + * the CRTC has changed or whenever any of its planes have changed. + * Atomic check satisfies both of these requirements since the CRTC + * is added to the state by DRM during drm_atomic_helper_check_planes. + */ + dm_update_crtc_interrupt_state(crtc, state); + if (unlikely(!dm_crtc_state->stream && modeset_required(state, NULL, dm_crtc_state->stream))) { WARN_ON(1); @@ -3537,6 +3983,15 @@ static int dm_crtc_helper_atomic_check(struct drm_crtc *crtc, if (!dm_crtc_state->stream) return 0; + /* + * We want at least one hardware plane enabled to use + * the stream with a cursor enabled. + */ + if (state->enable && state->active && + does_crtc_have_active_cursor(state) && + dm_crtc_state->active_planes == 0) + return -EINVAL; + if (dc_validate_stream(dc, dm_crtc_state->stream) == DC_OK) return 0; @@ -3583,11 +4038,8 @@ static void dm_drm_plane_reset(struct drm_plane *plane) amdgpu_state = kzalloc(sizeof(*amdgpu_state), GFP_KERNEL); WARN_ON(amdgpu_state == NULL); - if (amdgpu_state) { - plane->state = &amdgpu_state->base; - plane->state->plane = plane; - plane->state->rotation = DRM_MODE_ROTATE_0; - } + if (amdgpu_state) + __drm_atomic_helper_plane_reset(plane, &amdgpu_state->base); } static struct drm_plane_state * @@ -3637,10 +4089,8 @@ static int dm_plane_helper_prepare_fb(struct drm_plane *plane, struct drm_gem_object *obj; struct amdgpu_device *adev; struct amdgpu_bo *rbo; - uint64_t chroma_addr = 0; struct dm_plane_state *dm_plane_state_new, *dm_plane_state_old; - uint64_t tiling_flags, dcc_address; - unsigned int awidth; + uint64_t tiling_flags; uint32_t domain; int r; @@ -3693,29 +4143,11 @@ static int dm_plane_helper_prepare_fb(struct drm_plane *plane, dm_plane_state_old->dc_state != dm_plane_state_new->dc_state) { struct dc_plane_state *plane_state = dm_plane_state_new->dc_state; - if (plane_state->format < SURFACE_PIXEL_FORMAT_VIDEO_BEGIN) { - plane_state->address.grph.addr.low_part = lower_32_bits(afb->address); - plane_state->address.grph.addr.high_part = upper_32_bits(afb->address); - - dcc_address = - get_dcc_address(afb->address, tiling_flags); - plane_state->address.grph.meta_addr.low_part = - lower_32_bits(dcc_address); - plane_state->address.grph.meta_addr.high_part = - upper_32_bits(dcc_address); - } else { - awidth = ALIGN(new_state->fb->width, 64); - plane_state->address.type = PLN_ADDR_TYPE_VIDEO_PROGRESSIVE; - plane_state->address.video_progressive.luma_addr.low_part - = lower_32_bits(afb->address); - plane_state->address.video_progressive.luma_addr.high_part - = upper_32_bits(afb->address); - chroma_addr = afb->address + (u64)awidth * new_state->fb->height; - plane_state->address.video_progressive.chroma_addr.low_part - = lower_32_bits(chroma_addr); - plane_state->address.video_progressive.chroma_addr.high_part - = upper_32_bits(chroma_addr); - } + fill_plane_buffer_attributes( + adev, afb, plane_state->format, plane_state->rotation, + tiling_flags, &plane_state->tiling_info, + &plane_state->plane_size, &plane_state->dcc, + &plane_state->address); } return 0; @@ -3747,13 +4179,18 @@ static int dm_plane_atomic_check(struct drm_plane *plane, { struct amdgpu_device *adev = plane->dev->dev_private; struct dc *dc = adev->dm.dc; - struct dm_plane_state *dm_plane_state = to_dm_plane_state(state); + struct dm_plane_state *dm_plane_state; + struct dc_scaling_info scaling_info; + int ret; + + dm_plane_state = to_dm_plane_state(state); if (!dm_plane_state->dc_state) return 0; - if (!fill_rects_from_plane_state(state, dm_plane_state->dc_state)) - return -EINVAL; + ret = fill_dc_scaling_info(state, &scaling_info); + if (ret) + return ret; if (dc_validate_plane(dc, dm_plane_state->dc_state) == DC_OK) return 0; @@ -3826,64 +4263,115 @@ static const uint32_t rgb_formats[] = { DRM_FORMAT_ABGR2101010, DRM_FORMAT_XBGR8888, DRM_FORMAT_ABGR8888, + DRM_FORMAT_RGB565, }; -static const uint32_t yuv_formats[] = { - DRM_FORMAT_NV12, - DRM_FORMAT_NV21, +static const uint32_t overlay_formats[] = { + DRM_FORMAT_XRGB8888, + DRM_FORMAT_ARGB8888, + DRM_FORMAT_RGBA8888, + DRM_FORMAT_XBGR8888, + DRM_FORMAT_ABGR8888, + DRM_FORMAT_RGB565 }; static const u32 cursor_formats[] = { DRM_FORMAT_ARGB8888 }; -static int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm, - struct drm_plane *plane, - unsigned long possible_crtcs) +static int get_plane_formats(const struct drm_plane *plane, + const struct dc_plane_cap *plane_cap, + uint32_t *formats, int max_formats) { - int res = -EPERM; + int i, num_formats = 0; + + /* + * TODO: Query support for each group of formats directly from + * DC plane caps. This will require adding more formats to the + * caps list. + */ switch (plane->type) { case DRM_PLANE_TYPE_PRIMARY: - res = drm_universal_plane_init( - dm->adev->ddev, - plane, - possible_crtcs, - &dm_plane_funcs, - rgb_formats, - ARRAY_SIZE(rgb_formats), - NULL, plane->type, NULL); + for (i = 0; i < ARRAY_SIZE(rgb_formats); ++i) { + if (num_formats >= max_formats) + break; + + formats[num_formats++] = rgb_formats[i]; + } + + if (plane_cap && plane_cap->pixel_format_support.nv12) + formats[num_formats++] = DRM_FORMAT_NV12; break; + case DRM_PLANE_TYPE_OVERLAY: - res = drm_universal_plane_init( - dm->adev->ddev, - plane, - possible_crtcs, - &dm_plane_funcs, - yuv_formats, - ARRAY_SIZE(yuv_formats), - NULL, plane->type, NULL); + for (i = 0; i < ARRAY_SIZE(overlay_formats); ++i) { + if (num_formats >= max_formats) + break; + + formats[num_formats++] = overlay_formats[i]; + } break; + case DRM_PLANE_TYPE_CURSOR: - res = drm_universal_plane_init( - dm->adev->ddev, - plane, - possible_crtcs, - &dm_plane_funcs, - cursor_formats, - ARRAY_SIZE(cursor_formats), - NULL, plane->type, NULL); + for (i = 0; i < ARRAY_SIZE(cursor_formats); ++i) { + if (num_formats >= max_formats) + break; + + formats[num_formats++] = cursor_formats[i]; + } break; } + return num_formats; +} + +static int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm, + struct drm_plane *plane, + unsigned long possible_crtcs, + const struct dc_plane_cap *plane_cap) +{ + uint32_t formats[32]; + int num_formats; + int res = -EPERM; + + num_formats = get_plane_formats(plane, plane_cap, formats, + ARRAY_SIZE(formats)); + + res = drm_universal_plane_init(dm->adev->ddev, plane, possible_crtcs, + &dm_plane_funcs, formats, num_formats, + NULL, plane->type, NULL); + if (res) + return res; + + if (plane->type == DRM_PLANE_TYPE_OVERLAY && + plane_cap && plane_cap->per_pixel_alpha) { + unsigned int blend_caps = BIT(DRM_MODE_BLEND_PIXEL_NONE) | + BIT(DRM_MODE_BLEND_PREMULTI); + + drm_plane_create_alpha_property(plane); + drm_plane_create_blend_mode_property(plane, blend_caps); + } + + if (plane->type == DRM_PLANE_TYPE_PRIMARY && + plane_cap && plane_cap->pixel_format_support.nv12) { + /* This only affects YUV formats. */ + drm_plane_create_color_properties( + plane, + BIT(DRM_COLOR_YCBCR_BT601) | + BIT(DRM_COLOR_YCBCR_BT709), + BIT(DRM_COLOR_YCBCR_LIMITED_RANGE) | + BIT(DRM_COLOR_YCBCR_FULL_RANGE), + DRM_COLOR_YCBCR_BT709, DRM_COLOR_YCBCR_LIMITED_RANGE); + } + drm_plane_helper_add(plane, &dm_plane_helper_funcs); /* Create (reset) the plane state */ if (plane->funcs->reset) plane->funcs->reset(plane); - - return res; + return 0; } static int amdgpu_dm_crtc_init(struct amdgpu_display_manager *dm, @@ -3900,7 +4388,7 @@ static int amdgpu_dm_crtc_init(struct amdgpu_display_manager *dm, goto fail; cursor_plane->type = DRM_PLANE_TYPE_CURSOR; - res = amdgpu_dm_plane_init(dm, cursor_plane, 0); + res = amdgpu_dm_plane_init(dm, cursor_plane, 0, NULL); acrtc = kzalloc(sizeof(struct amdgpu_crtc), GFP_KERNEL); if (!acrtc) @@ -4334,6 +4822,8 @@ static int amdgpu_dm_connector_init(struct amdgpu_display_manager *dm, DRM_ERROR("Failed to create debugfs for connector"); goto out_free; } + aconnector->debugfs_dpcd_address = 0; + aconnector->debugfs_dpcd_size = 0; #endif if (connector_type == DRM_MODE_CONNECTOR_DisplayPort @@ -4473,9 +4963,13 @@ static int get_cursor_position(struct drm_plane *plane, struct drm_crtc *crtc, x = plane->state->crtc_x; y = plane->state->crtc_y; - /* avivo cursor are offset into the total surface */ - x += crtc->primary->state->src_x >> 16; - y += crtc->primary->state->src_y >> 16; + + if (crtc->primary->state) { + /* avivo cursor are offset into the total surface */ + x += crtc->primary->state->src_x >> 16; + y += crtc->primary->state->src_y >> 16; + } + if (x < 0) { xorigin = min(-x, amdgpu_crtc->max_cursor_width - 1); x = 0; @@ -4582,9 +5076,10 @@ static void update_freesync_state_on_stream( struct dc_plane_state *surface, u32 flip_timestamp_in_us) { - struct mod_vrr_params vrr_params = new_crtc_state->vrr_params; + struct mod_vrr_params vrr_params; struct dc_info_packet vrr_infopacket = {0}; - struct mod_freesync_config config = new_crtc_state->freesync_config; + struct amdgpu_device *adev = dm->adev; + unsigned long flags; if (!new_stream) return; @@ -4597,19 +5092,8 @@ static void update_freesync_state_on_stream( if (!new_stream->timing.h_total || !new_stream->timing.v_total) return; - if (new_crtc_state->vrr_supported && - config.min_refresh_in_uhz && - config.max_refresh_in_uhz) { - config.state = new_crtc_state->base.vrr_enabled ? - VRR_STATE_ACTIVE_VARIABLE : - VRR_STATE_INACTIVE; - } else { - config.state = VRR_STATE_UNSUPPORTED; - } - - mod_freesync_build_vrr_params(dm->freesync_module, - new_stream, - &config, &vrr_params); + spin_lock_irqsave(&adev->ddev->event_lock, flags); + vrr_params = new_crtc_state->vrr_params; if (surface) { mod_freesync_handle_preflip( @@ -4618,6 +5102,12 @@ static void update_freesync_state_on_stream( new_stream, flip_timestamp_in_us, &vrr_params); + + if (adev->family < AMDGPU_FAMILY_AI && + amdgpu_dm_vrr_active(new_crtc_state)) { + mod_freesync_handle_v_update(dm->freesync_module, + new_stream, &vrr_params); + } } mod_freesync_build_vrr_infopacket( @@ -4649,6 +5139,100 @@ static void update_freesync_state_on_stream( new_crtc_state->base.crtc->base.id, (int)new_crtc_state->base.vrr_enabled, (int)vrr_params.state); + + spin_unlock_irqrestore(&adev->ddev->event_lock, flags); +} + +static void pre_update_freesync_state_on_stream( + struct amdgpu_display_manager *dm, + struct dm_crtc_state *new_crtc_state) +{ + struct dc_stream_state *new_stream = new_crtc_state->stream; + struct mod_vrr_params vrr_params; + struct mod_freesync_config config = new_crtc_state->freesync_config; + struct amdgpu_device *adev = dm->adev; + unsigned long flags; + + if (!new_stream) + return; + + /* + * TODO: Determine why min/max totals and vrefresh can be 0 here. + * For now it's sufficient to just guard against these conditions. + */ + if (!new_stream->timing.h_total || !new_stream->timing.v_total) + return; + + spin_lock_irqsave(&adev->ddev->event_lock, flags); + vrr_params = new_crtc_state->vrr_params; + + if (new_crtc_state->vrr_supported && + config.min_refresh_in_uhz && + config.max_refresh_in_uhz) { + config.state = new_crtc_state->base.vrr_enabled ? + VRR_STATE_ACTIVE_VARIABLE : + VRR_STATE_INACTIVE; + } else { + config.state = VRR_STATE_UNSUPPORTED; + } + + mod_freesync_build_vrr_params(dm->freesync_module, + new_stream, + &config, &vrr_params); + + new_crtc_state->freesync_timing_changed |= + (memcmp(&new_crtc_state->vrr_params.adjust, + &vrr_params.adjust, + sizeof(vrr_params.adjust)) != 0); + + new_crtc_state->vrr_params = vrr_params; + spin_unlock_irqrestore(&adev->ddev->event_lock, flags); +} + +static void amdgpu_dm_handle_vrr_transition(struct dm_crtc_state *old_state, + struct dm_crtc_state *new_state) +{ + bool old_vrr_active = amdgpu_dm_vrr_active(old_state); + bool new_vrr_active = amdgpu_dm_vrr_active(new_state); + + if (!old_vrr_active && new_vrr_active) { + /* Transition VRR inactive -> active: + * While VRR is active, we must not disable vblank irq, as a + * reenable after disable would compute bogus vblank/pflip + * timestamps if it likely happened inside display front-porch. + * + * We also need vupdate irq for the actual core vblank handling + * at end of vblank. + */ + dm_set_vupdate_irq(new_state->base.crtc, true); + drm_crtc_vblank_get(new_state->base.crtc); + DRM_DEBUG_DRIVER("%s: crtc=%u VRR off->on: Get vblank ref\n", + __func__, new_state->base.crtc->base.id); + } else if (old_vrr_active && !new_vrr_active) { + /* Transition VRR active -> inactive: + * Allow vblank irq disable again for fixed refresh rate. + */ + dm_set_vupdate_irq(new_state->base.crtc, false); + drm_crtc_vblank_put(new_state->base.crtc); + DRM_DEBUG_DRIVER("%s: crtc=%u VRR on->off: Drop vblank ref\n", + __func__, new_state->base.crtc->base.id); + } +} + +static void amdgpu_dm_commit_cursors(struct drm_atomic_state *state) +{ + struct drm_plane *plane; + struct drm_plane_state *old_plane_state, *new_plane_state; + int i; + + /* + * TODO: Make this per-stream so we don't issue redundant updates for + * commits with multiple streams. + */ + for_each_oldnew_plane_in_state(state, plane, old_plane_state, + new_plane_state, i) + if (plane->type == DRM_PLANE_TYPE_CURSOR) + handle_cursor_update(plane, old_plane_state); } static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, @@ -4656,7 +5240,7 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, struct drm_device *dev, struct amdgpu_display_manager *dm, struct drm_crtc *pcrtc, - bool *wait_for_vblank) + bool wait_for_vblank) { uint32_t i, r; uint64_t timestamp_ns; @@ -4668,42 +5252,42 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, struct dm_crtc_state *acrtc_state = to_dm_crtc_state(new_pcrtc_state); struct dm_crtc_state *dm_old_crtc_state = to_dm_crtc_state(drm_atomic_get_old_crtc_state(state, pcrtc)); - int flip_count = 0, planes_count = 0, vpos, hpos; + int planes_count = 0, vpos, hpos; unsigned long flags; struct amdgpu_bo *abo; - uint64_t tiling_flags, dcc_address; - uint32_t target, target_vblank; - uint64_t last_flip_vblank; - bool vrr_active = acrtc_state->freesync_config.state == VRR_STATE_ACTIVE_VARIABLE; - - struct { - struct dc_surface_update surface_updates[MAX_SURFACES]; - struct dc_flip_addrs flip_addrs[MAX_SURFACES]; - struct dc_stream_update stream_update; - } *flip; - + uint64_t tiling_flags; + uint32_t target_vblank, last_flip_vblank; + bool vrr_active = amdgpu_dm_vrr_active(acrtc_state); + bool pflip_present = false; struct { struct dc_surface_update surface_updates[MAX_SURFACES]; struct dc_plane_info plane_infos[MAX_SURFACES]; struct dc_scaling_info scaling_infos[MAX_SURFACES]; + struct dc_flip_addrs flip_addrs[MAX_SURFACES]; struct dc_stream_update stream_update; - } *full; + } *bundle; - flip = kzalloc(sizeof(*flip), GFP_KERNEL); - full = kzalloc(sizeof(*full), GFP_KERNEL); + bundle = kzalloc(sizeof(*bundle), GFP_KERNEL); - if (!flip || !full) { - dm_error("Failed to allocate update bundles\n"); + if (!bundle) { + dm_error("Failed to allocate update bundle\n"); goto cleanup; } + /* + * Disable the cursor first if we're disabling all the planes. + * It'll remain on the screen after the planes are re-enabled + * if we don't. + */ + if (acrtc_state->active_planes == 0) + amdgpu_dm_commit_cursors(state); + /* update planes when needed */ for_each_oldnew_plane_in_state(state, plane, old_plane_state, new_plane_state, i) { struct drm_crtc *crtc = new_plane_state->crtc; struct drm_crtc_state *new_crtc_state; struct drm_framebuffer *fb = new_plane_state->fb; - struct amdgpu_framebuffer *afb = to_amdgpu_framebuffer(fb); - bool pflip_needed; + bool plane_needs_flip; struct dc_plane_state *dc_plane; struct dm_plane_state *dm_new_plane_state = to_dm_plane_state(new_plane_state); @@ -4718,122 +5302,96 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, if (!new_crtc_state->active) continue; - pflip_needed = old_plane_state->fb && - old_plane_state->fb != new_plane_state->fb; - dc_plane = dm_new_plane_state->dc_state; - if (pflip_needed) { - /* - * Assume even ONE crtc with immediate flip means - * entire can't wait for VBLANK - * TODO Check if it's correct - */ - if (new_pcrtc_state->pageflip_flags & DRM_MODE_PAGE_FLIP_ASYNC) - *wait_for_vblank = false; + bundle->surface_updates[planes_count].surface = dc_plane; + if (new_pcrtc_state->color_mgmt_changed) { + bundle->surface_updates[planes_count].gamma = dc_plane->gamma_correction; + bundle->surface_updates[planes_count].in_transfer_func = dc_plane->in_transfer_func; + } - /* - * TODO This might fail and hence better not used, wait - * explicitly on fences instead - * and in general should be called for - * blocking commit to as per framework helpers - */ - abo = gem_to_amdgpu_bo(fb->obj[0]); - r = amdgpu_bo_reserve(abo, true); - if (unlikely(r != 0)) - DRM_ERROR("failed to reserve buffer before flip\n"); + fill_dc_scaling_info(new_plane_state, + &bundle->scaling_infos[planes_count]); - /* - * Wait for all fences on this FB. Do limited wait to avoid - * deadlock during GPU reset when this fence will not signal - * but we hold reservation lock for the BO. - */ - r = reservation_object_wait_timeout_rcu(abo->tbo.resv, - true, false, - msecs_to_jiffies(5000)); - if (unlikely(r == 0)) - DRM_ERROR("Waiting for fences timed out."); + bundle->surface_updates[planes_count].scaling_info = + &bundle->scaling_infos[planes_count]; + plane_needs_flip = old_plane_state->fb && new_plane_state->fb; + pflip_present = pflip_present || plane_needs_flip; - amdgpu_bo_get_tiling_flags(abo, &tiling_flags); + if (!plane_needs_flip) { + planes_count += 1; + continue; + } - amdgpu_bo_unreserve(abo); + abo = gem_to_amdgpu_bo(fb->obj[0]); - flip->flip_addrs[flip_count].address.grph.addr.low_part = lower_32_bits(afb->address); - flip->flip_addrs[flip_count].address.grph.addr.high_part = upper_32_bits(afb->address); + /* + * Wait for all fences on this FB. Do limited wait to avoid + * deadlock during GPU reset when this fence will not signal + * but we hold reservation lock for the BO. + */ + r = reservation_object_wait_timeout_rcu(abo->tbo.resv, true, + false, + msecs_to_jiffies(5000)); + if (unlikely(r <= 0)) + DRM_ERROR("Waiting for fences timed out or interrupted!"); - dcc_address = get_dcc_address(afb->address, tiling_flags); - flip->flip_addrs[flip_count].address.grph.meta_addr.low_part = lower_32_bits(dcc_address); - flip->flip_addrs[flip_count].address.grph.meta_addr.high_part = upper_32_bits(dcc_address); + /* + * TODO This might fail and hence better not used, wait + * explicitly on fences instead + * and in general should be called for + * blocking commit to as per framework helpers + */ + r = amdgpu_bo_reserve(abo, true); + if (unlikely(r != 0)) + DRM_ERROR("failed to reserve buffer before flip\n"); - flip->flip_addrs[flip_count].flip_immediate = - (crtc->state->pageflip_flags & DRM_MODE_PAGE_FLIP_ASYNC) != 0; + amdgpu_bo_get_tiling_flags(abo, &tiling_flags); - timestamp_ns = ktime_get_ns(); - flip->flip_addrs[flip_count].flip_timestamp_in_us = div_u64(timestamp_ns, 1000); - flip->surface_updates[flip_count].flip_addr = &flip->flip_addrs[flip_count]; - flip->surface_updates[flip_count].surface = dc_plane; + amdgpu_bo_unreserve(abo); - if (!flip->surface_updates[flip_count].surface) { - DRM_ERROR("No surface for CRTC: id=%d\n", - acrtc_attach->crtc_id); - continue; - } + fill_dc_plane_info_and_addr( + dm->adev, new_plane_state, tiling_flags, + &bundle->plane_infos[planes_count], + &bundle->flip_addrs[planes_count].address); - if (plane == pcrtc->primary) - update_freesync_state_on_stream( - dm, - acrtc_state, - acrtc_state->stream, - dc_plane, - flip->flip_addrs[flip_count].flip_timestamp_in_us); + bundle->surface_updates[planes_count].plane_info = + &bundle->plane_infos[planes_count]; - DRM_DEBUG_DRIVER("%s Flipping to hi: 0x%x, low: 0x%x\n", - __func__, - flip->flip_addrs[flip_count].address.grph.addr.high_part, - flip->flip_addrs[flip_count].address.grph.addr.low_part); + bundle->flip_addrs[planes_count].flip_immediate = + (crtc->state->pageflip_flags & DRM_MODE_PAGE_FLIP_ASYNC) != 0; - flip_count += 1; - } + timestamp_ns = ktime_get_ns(); + bundle->flip_addrs[planes_count].flip_timestamp_in_us = div_u64(timestamp_ns, 1000); + bundle->surface_updates[planes_count].flip_addr = &bundle->flip_addrs[planes_count]; + bundle->surface_updates[planes_count].surface = dc_plane; - full->surface_updates[planes_count].surface = dc_plane; - if (new_pcrtc_state->color_mgmt_changed) { - full->surface_updates[planes_count].gamma = dc_plane->gamma_correction; - full->surface_updates[planes_count].in_transfer_func = dc_plane->in_transfer_func; + if (!bundle->surface_updates[planes_count].surface) { + DRM_ERROR("No surface for CRTC: id=%d\n", + acrtc_attach->crtc_id); + continue; } + if (plane == pcrtc->primary) + update_freesync_state_on_stream( + dm, + acrtc_state, + acrtc_state->stream, + dc_plane, + bundle->flip_addrs[planes_count].flip_timestamp_in_us); - full->scaling_infos[planes_count].scaling_quality = dc_plane->scaling_quality; - full->scaling_infos[planes_count].src_rect = dc_plane->src_rect; - full->scaling_infos[planes_count].dst_rect = dc_plane->dst_rect; - full->scaling_infos[planes_count].clip_rect = dc_plane->clip_rect; - full->surface_updates[planes_count].scaling_info = &full->scaling_infos[planes_count]; - - - full->plane_infos[planes_count].color_space = dc_plane->color_space; - full->plane_infos[planes_count].format = dc_plane->format; - full->plane_infos[planes_count].plane_size = dc_plane->plane_size; - full->plane_infos[planes_count].rotation = dc_plane->rotation; - full->plane_infos[planes_count].horizontal_mirror = dc_plane->horizontal_mirror; - full->plane_infos[planes_count].stereo_format = dc_plane->stereo_format; - full->plane_infos[planes_count].tiling_info = dc_plane->tiling_info; - full->plane_infos[planes_count].visible = dc_plane->visible; - full->plane_infos[planes_count].per_pixel_alpha = dc_plane->per_pixel_alpha; - full->plane_infos[planes_count].dcc = dc_plane->dcc; - full->surface_updates[planes_count].plane_info = &full->plane_infos[planes_count]; + DRM_DEBUG_DRIVER("%s Flipping to hi: 0x%x, low: 0x%x\n", + __func__, + bundle->flip_addrs[planes_count].address.grph.addr.high_part, + bundle->flip_addrs[planes_count].address.grph.addr.low_part); planes_count += 1; } - /* - * TODO: For proper atomic behaviour, we should be calling into DC once with - * all the changes. However, DC refuses to do pageflips and non-pageflip - * changes in the same call. Change DC to respect atomic behaviour, - * hopefully eliminating dc_*_update structs in their entirety. - */ - if (flip_count) { + if (pflip_present) { if (!vrr_active) { /* Use old throttling in non-vrr fixed refresh rate mode * to keep flip scheduling based on target vblank counts @@ -4841,7 +5399,7 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, * clients using the GLX_OML_sync_control extension or * DRI3/Present extension with defined target_msc. */ - last_flip_vblank = drm_crtc_vblank_count(pcrtc); + last_flip_vblank = amdgpu_get_vblank_counter_kms(dm->ddev, acrtc_attach->crtc_id); } else { /* For variable refresh rate mode only: @@ -4857,11 +5415,7 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, spin_unlock_irqrestore(&pcrtc->dev->event_lock, flags); } - target = (uint32_t)last_flip_vblank + *wait_for_vblank; - - /* Prepare wait for target vblank early - before the fence-waits */ - target_vblank = target - (uint32_t)drm_crtc_vblank_count(pcrtc) + - amdgpu_get_vblank_counter_kms(pcrtc->dev, acrtc_attach->crtc_id); + target_vblank = last_flip_vblank + wait_for_vblank; /* * Wait until we're out of the vertical blank period before the one @@ -4892,54 +5446,102 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, if (acrtc_state->stream) { if (acrtc_state->freesync_timing_changed) - flip->stream_update.adjust = + bundle->stream_update.adjust = &acrtc_state->stream->adjust; if (acrtc_state->freesync_vrr_info_changed) - flip->stream_update.vrr_infopacket = + bundle->stream_update.vrr_infopacket = &acrtc_state->stream->vrr_infopacket; } - - mutex_lock(&dm->dc_lock); - dc_commit_updates_for_stream(dm->dc, - flip->surface_updates, - flip_count, - acrtc_state->stream, - &flip->stream_update, - dc_state); - mutex_unlock(&dm->dc_lock); } - if (planes_count) { + /* Update the planes if changed or disable if we don't have any. */ + if (planes_count || acrtc_state->active_planes == 0) { if (new_pcrtc_state->mode_changed) { - full->stream_update.src = acrtc_state->stream->src; - full->stream_update.dst = acrtc_state->stream->dst; + bundle->stream_update.src = acrtc_state->stream->src; + bundle->stream_update.dst = acrtc_state->stream->dst; } if (new_pcrtc_state->color_mgmt_changed) - full->stream_update.out_transfer_func = acrtc_state->stream->out_transfer_func; + bundle->stream_update.out_transfer_func = acrtc_state->stream->out_transfer_func; acrtc_state->stream->abm_level = acrtc_state->abm_level; if (acrtc_state->abm_level != dm_old_crtc_state->abm_level) - full->stream_update.abm_level = &acrtc_state->abm_level; + bundle->stream_update.abm_level = &acrtc_state->abm_level; mutex_lock(&dm->dc_lock); dc_commit_updates_for_stream(dm->dc, - full->surface_updates, + bundle->surface_updates, planes_count, acrtc_state->stream, - &full->stream_update, + &bundle->stream_update, dc_state); mutex_unlock(&dm->dc_lock); } - for_each_oldnew_plane_in_state(state, plane, old_plane_state, new_plane_state, i) - if (plane->type == DRM_PLANE_TYPE_CURSOR) - handle_cursor_update(plane, old_plane_state); + /* + * Update cursor state *after* programming all the planes. + * This avoids redundant programming in the case where we're going + * to be disabling a single plane - those pipes are being disabled. + */ + if (acrtc_state->active_planes) + amdgpu_dm_commit_cursors(state); cleanup: - kfree(flip); - kfree(full); + kfree(bundle); +} + +/* + * Enable interrupts on CRTCs that are newly active, undergone + * a modeset, or have active planes again. + * + * Done in two passes, based on the for_modeset flag: + * Pass 1: For CRTCs going through modeset + * Pass 2: For CRTCs going from 0 to n active planes + * + * Interrupts can only be enabled after the planes are programmed, + * so this requires a two-pass approach since we don't want to + * just defer the interrupts until after commit planes every time. + */ +static void amdgpu_dm_enable_crtc_interrupts(struct drm_device *dev, + struct drm_atomic_state *state, + bool for_modeset) +{ + struct amdgpu_device *adev = dev->dev_private; + struct drm_crtc *crtc; + struct drm_crtc_state *old_crtc_state, *new_crtc_state; + int i; + + for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, + new_crtc_state, i) { + struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc); + struct dm_crtc_state *dm_new_crtc_state = + to_dm_crtc_state(new_crtc_state); + struct dm_crtc_state *dm_old_crtc_state = + to_dm_crtc_state(old_crtc_state); + bool modeset = drm_atomic_crtc_needs_modeset(new_crtc_state); + bool run_pass; + + run_pass = (for_modeset && modeset) || + (!for_modeset && !modeset && + !dm_old_crtc_state->interrupts_enabled); + + if (!run_pass) + continue; + + if (!dm_new_crtc_state->interrupts_enabled) + continue; + + manage_dm_interrupts(adev, acrtc, true); + +#ifdef CONFIG_DEBUG_FS + /* The stream has changed so CRC capture needs to re-enabled. */ + if (dm_new_crtc_state->crc_enabled) { + dm_new_crtc_state->crc_enabled = false; + amdgpu_dm_crtc_set_crc_source(crtc, "auto"); + } +#endif + } } /* @@ -4953,8 +5555,7 @@ cleanup: static void amdgpu_dm_crtc_copy_transient_flags(struct drm_crtc_state *crtc_state, struct dc_stream_state *stream_state) { - stream_state->mode_changed = - crtc_state->mode_changed || crtc_state->active_changed; + stream_state->mode_changed = drm_atomic_crtc_needs_modeset(crtc_state); } static int amdgpu_dm_atomic_commit(struct drm_device *dev, @@ -4967,30 +5568,41 @@ static int amdgpu_dm_atomic_commit(struct drm_device *dev, int i; /* - * We evade vblanks and pflips on crtc that - * should be changed. We do it here to flush & disable - * interrupts before drm_swap_state is called in drm_atomic_helper_commit - * it will update crtc->dm_crtc_state->stream pointer which is used in - * the ISRs. + * We evade vblank and pflip interrupts on CRTCs that are undergoing + * a modeset, being disabled, or have no active planes. + * + * It's done in atomic commit rather than commit tail for now since + * some of these interrupt handlers access the current CRTC state and + * potentially the stream pointer itself. + * + * Since the atomic state is swapped within atomic commit and not within + * commit tail this would leave to new state (that hasn't been committed yet) + * being accesssed from within the handlers. + * + * TODO: Fix this so we can do this in commit tail and not have to block + * in atomic check. */ for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { struct dm_crtc_state *dm_old_crtc_state = to_dm_crtc_state(old_crtc_state); struct dm_crtc_state *dm_new_crtc_state = to_dm_crtc_state(new_crtc_state); struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc); - if (drm_atomic_crtc_needs_modeset(new_crtc_state) - && dm_old_crtc_state->stream) { + if (dm_old_crtc_state->interrupts_enabled && + (!dm_new_crtc_state->interrupts_enabled || + drm_atomic_crtc_needs_modeset(new_crtc_state))) { /* - * If the stream is removed and CRC capture was - * enabled on the CRTC the extra vblank reference - * needs to be dropped since CRC capture will be - * disabled. + * Drop the extra vblank reference added by CRC + * capture if applicable. */ - if (!dm_new_crtc_state->stream - && dm_new_crtc_state->crc_enabled) { + if (dm_new_crtc_state->crc_enabled) drm_crtc_vblank_put(crtc); + + /* + * Only keep CRC capture enabled if there's + * still a stream for the CRTC. + */ + if (!dm_new_crtc_state->stream) dm_new_crtc_state->crc_enabled = false; - } manage_dm_interrupts(adev, acrtc, false); } @@ -5037,7 +5649,7 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) dc_state = dm_state->context; } else { /* No state changes, retain current state. */ - dc_state_temp = dc_create_state(); + dc_state_temp = dc_create_state(dm->dc); ASSERT(dc_state_temp); dc_state = dc_state_temp; dc_resource_state_copy_construct_current(dm->dc, dc_state); @@ -5206,45 +5818,41 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) mutex_unlock(&dm->dc_lock); } + /* Count number of newly disabled CRTCs for dropping PM refs later. */ for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, - new_crtc_state, i) { - /* - * loop to enable interrupts on newly arrived crtc - */ - struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc); - bool modeset_needed; - + new_crtc_state, i) { if (old_crtc_state->active && !new_crtc_state->active) crtc_disable_count++; dm_new_crtc_state = to_dm_crtc_state(new_crtc_state); dm_old_crtc_state = to_dm_crtc_state(old_crtc_state); - modeset_needed = modeset_required( - new_crtc_state, - dm_new_crtc_state->stream, - dm_old_crtc_state->stream); - if (dm_new_crtc_state->stream == NULL || !modeset_needed) - continue; - - manage_dm_interrupts(adev, acrtc, true); + /* Update freesync active state. */ + pre_update_freesync_state_on_stream(dm, dm_new_crtc_state); -#ifdef CONFIG_DEBUG_FS - /* The stream has changed so CRC capture needs to re-enabled. */ - if (dm_new_crtc_state->crc_enabled) - amdgpu_dm_crtc_set_crc_source(crtc, "auto"); -#endif + /* Handle vrr on->off / off->on transitions */ + amdgpu_dm_handle_vrr_transition(dm_old_crtc_state, + dm_new_crtc_state); } + /* Enable interrupts for CRTCs going through a modeset. */ + amdgpu_dm_enable_crtc_interrupts(dev, state, true); + + for_each_new_crtc_in_state(state, crtc, new_crtc_state, j) + if (new_crtc_state->pageflip_flags & DRM_MODE_PAGE_FLIP_ASYNC) + wait_for_vblank = false; + /* update planes when needed per crtc*/ for_each_new_crtc_in_state(state, crtc, new_crtc_state, j) { dm_new_crtc_state = to_dm_crtc_state(new_crtc_state); if (dm_new_crtc_state->stream) amdgpu_dm_commit_planes(state, dc_state, dev, - dm, crtc, &wait_for_vblank); + dm, crtc, wait_for_vblank); } + /* Enable interrupts for CRTCs going from 0 to n active planes. */ + amdgpu_dm_enable_crtc_interrupts(dev, state, false); /* * send vblank event on all events not handled in flip and @@ -5484,21 +6092,12 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm, struct amdgpu_dm_connector *aconnector = NULL; struct drm_connector_state *drm_new_conn_state = NULL, *drm_old_conn_state = NULL; struct dm_connector_state *dm_new_conn_state = NULL, *dm_old_conn_state = NULL; - struct drm_plane_state *new_plane_state = NULL; new_stream = NULL; dm_old_crtc_state = to_dm_crtc_state(old_crtc_state); dm_new_crtc_state = to_dm_crtc_state(new_crtc_state); acrtc = to_amdgpu_crtc(crtc); - - new_plane_state = drm_atomic_get_new_plane_state(state, new_crtc_state->crtc->primary); - - if (new_crtc_state->enable && new_plane_state && !new_plane_state->fb) { - ret = -EINVAL; - goto fail; - } - aconnector = amdgpu_dm_find_first_crtc_matching_connector(state, crtc); /* TODO This hack should go away */ @@ -5661,6 +6260,9 @@ skip_modeset: update_stream_scaling_settings( &new_crtc_state->mode, dm_new_conn_state, dm_new_crtc_state->stream); + /* ABM settings */ + dm_new_crtc_state->abm_level = dm_new_conn_state->abm_level; + /* * Color management settings. We also update color properties * when a modeset is needed, to ensure it gets reprogrammed. @@ -5685,6 +6287,69 @@ fail: return ret; } +static bool should_reset_plane(struct drm_atomic_state *state, + struct drm_plane *plane, + struct drm_plane_state *old_plane_state, + struct drm_plane_state *new_plane_state) +{ + struct drm_plane *other; + struct drm_plane_state *old_other_state, *new_other_state; + struct drm_crtc_state *new_crtc_state; + int i; + + /* + * TODO: Remove this hack once the checks below are sufficient + * enough to determine when we need to reset all the planes on + * the stream. + */ + if (state->allow_modeset) + return true; + + /* Exit early if we know that we're adding or removing the plane. */ + if (old_plane_state->crtc != new_plane_state->crtc) + return true; + + /* old crtc == new_crtc == NULL, plane not in context. */ + if (!new_plane_state->crtc) + return false; + + new_crtc_state = + drm_atomic_get_new_crtc_state(state, new_plane_state->crtc); + + if (!new_crtc_state) + return true; + + if (drm_atomic_crtc_needs_modeset(new_crtc_state)) + return true; + + /* + * If there are any new primary or overlay planes being added or + * removed then the z-order can potentially change. To ensure + * correct z-order and pipe acquisition the current DC architecture + * requires us to remove and recreate all existing planes. + * + * TODO: Come up with a more elegant solution for this. + */ + for_each_oldnew_plane_in_state(state, other, old_other_state, new_other_state, i) { + if (other->type == DRM_PLANE_TYPE_CURSOR) + continue; + + if (old_other_state->crtc != new_plane_state->crtc && + new_other_state->crtc != new_plane_state->crtc) + continue; + + if (old_other_state->crtc != new_other_state->crtc) + return true; + + /* TODO: Remove this once we can handle fast format changes. */ + if (old_other_state->fb && new_other_state->fb && + old_other_state->fb->format != new_other_state->fb->format) + return true; + } + + return false; +} + static int dm_update_plane_state(struct dc *dc, struct drm_atomic_state *state, struct drm_plane *plane, @@ -5699,8 +6364,7 @@ static int dm_update_plane_state(struct dc *dc, struct drm_crtc_state *old_crtc_state, *new_crtc_state; struct dm_crtc_state *dm_new_crtc_state, *dm_old_crtc_state; struct dm_plane_state *dm_new_plane_state, *dm_old_plane_state; - /* TODO return page_flip_needed() function */ - bool pflip_needed = !state->allow_modeset; + bool needs_reset; int ret = 0; @@ -5713,10 +6377,12 @@ static int dm_update_plane_state(struct dc *dc, if (plane->type == DRM_PLANE_TYPE_CURSOR) return 0; + needs_reset = should_reset_plane(state, plane, old_plane_state, + new_plane_state); + /* Remove any changed/removed planes */ if (!enable) { - if (pflip_needed && - plane->type != DRM_PLANE_TYPE_OVERLAY) + if (!needs_reset) return 0; if (!old_plane_crtc) @@ -5767,7 +6433,7 @@ static int dm_update_plane_state(struct dc *dc, if (!dm_new_crtc_state->stream) return 0; - if (pflip_needed && plane->type != DRM_PLANE_TYPE_OVERLAY) + if (!needs_reset) return 0; WARN_ON(dm_new_plane_state->dc_state); @@ -5779,7 +6445,7 @@ static int dm_update_plane_state(struct dc *dc, DRM_DEBUG_DRIVER("Enabling DRM plane: %d on DRM crtc %d\n", plane->base.id, new_plane_crtc->base.id); - ret = fill_plane_attributes( + ret = fill_dc_plane_attributes( new_plane_crtc->dev->dev_private, dc_new_plane_state, new_plane_state, @@ -5827,10 +6493,11 @@ static int dm_update_plane_state(struct dc *dc, } static int -dm_determine_update_type_for_commit(struct dc *dc, +dm_determine_update_type_for_commit(struct amdgpu_display_manager *dm, struct drm_atomic_state *state, enum surface_update_type *out_type) { + struct dc *dc = dm->dc; struct dm_atomic_state *dm_state = NULL, *old_dm_state = NULL; int i, j, num_plane, ret = 0; struct drm_plane_state *old_plane_state, *new_plane_state; @@ -5844,21 +6511,22 @@ dm_determine_update_type_for_commit(struct dc *dc, struct dc_stream_status *status = NULL; struct dc_surface_update *updates; - struct dc_plane_state *surface; enum surface_update_type update_type = UPDATE_TYPE_FAST; updates = kcalloc(MAX_SURFACES, sizeof(*updates), GFP_KERNEL); - surface = kcalloc(MAX_SURFACES, sizeof(*surface), GFP_KERNEL); - if (!updates || !surface) { - DRM_ERROR("Plane or surface update failed to allocate"); + if (!updates) { + DRM_ERROR("Failed to allocate plane updates\n"); /* Set type to FULL to avoid crashing in DC*/ update_type = UPDATE_TYPE_FULL; goto cleanup; } for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { - struct dc_stream_update stream_update = { 0 }; + struct dc_scaling_info scaling_info; + struct dc_stream_update stream_update; + + memset(&stream_update, 0, sizeof(stream_update)); new_dm_crtc_state = to_dm_crtc_state(new_crtc_state); old_dm_crtc_state = to_dm_crtc_state(old_crtc_state); @@ -5886,23 +6554,12 @@ dm_determine_update_type_for_commit(struct dc *dc, goto cleanup; } - if (!state->allow_modeset) - continue; - if (crtc != new_plane_crtc) continue; - updates[num_plane].surface = &surface[num_plane]; + updates[num_plane].surface = new_dm_plane_state->dc_state; if (new_crtc_state->mode_changed) { - updates[num_plane].surface->src_rect = - new_dm_plane_state->dc_state->src_rect; - updates[num_plane].surface->dst_rect = - new_dm_plane_state->dc_state->dst_rect; - updates[num_plane].surface->rotation = - new_dm_plane_state->dc_state->rotation; - updates[num_plane].surface->in_transfer_func = - new_dm_plane_state->dc_state->in_transfer_func; stream_update.dst = new_dm_crtc_state->stream->dst; stream_update.src = new_dm_crtc_state->stream->src; } @@ -5918,6 +6575,13 @@ dm_determine_update_type_for_commit(struct dc *dc, new_dm_crtc_state->stream->out_transfer_func; } + ret = fill_dc_scaling_info(new_plane_state, + &scaling_info); + if (ret) + goto cleanup; + + updates[num_plane].scaling_info = &scaling_info; + num_plane++; } @@ -5937,8 +6601,14 @@ dm_determine_update_type_for_commit(struct dc *dc, status = dc_stream_get_status_from_state(old_dm_state->context, new_dm_crtc_state->stream); + /* + * TODO: DC modifies the surface during this call so we need + * to lock here - find a way to do this without locking. + */ + mutex_lock(&dm->dc_lock); update_type = dc_check_update_surfaces_for_stream(dc, updates, num_plane, &stream_update, status); + mutex_unlock(&dm->dc_lock); if (update_type > UPDATE_TYPE_MED) { update_type = UPDATE_TYPE_FULL; @@ -5948,7 +6618,6 @@ dm_determine_update_type_for_commit(struct dc *dc, cleanup: kfree(updates); - kfree(surface); *out_type = update_type; return ret; @@ -6132,7 +6801,7 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, lock_and_validation_needed = true; } - ret = dm_determine_update_type_for_commit(dc, state, &update_type); + ret = dm_determine_update_type_for_commit(&adev->dm, state, &update_type); if (ret) goto fail; @@ -6147,9 +6816,6 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, */ if (lock_and_validation_needed && overall_update_type <= UPDATE_TYPE_FAST) WARN(1, "Global lock should be Set, overall_update_type should be UPDATE_TYPE_MED or UPDATE_TYPE_FULL"); - else if (!lock_and_validation_needed && overall_update_type > UPDATE_TYPE_FAST) - WARN(1, "Global lock should NOT be set, overall_update_type should be UPDATE_TYPE_FAST"); - if (overall_update_type > UPDATE_TYPE_FAST) { ret = dm_atomic_get_state(state, &dm_state); @@ -6160,7 +6826,7 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, if (ret) goto fail; - if (dc_validate_global_state(dc, dm_state->context) != DC_OK) { + if (dc_validate_global_state(dc, dm_state->context, false) != DC_OK) { ret = -EINVAL; goto fail; } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index fbd161ddc3f4..978ff14a7d45 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -132,8 +132,6 @@ struct amdgpu_display_manager { */ struct drm_private_obj atomic_obj; - struct drm_modeset_lock atomic_obj_lock; - /** * @dc_lock: * @@ -184,6 +182,15 @@ struct amdgpu_display_manager { struct common_irq_params vblank_params[DC_IRQ_SOURCE_VBLANK6 - DC_IRQ_SOURCE_VBLANK1 + 1]; + /** + * @vupdate_params: + * + * Vertical update IRQ parameters, passed to registered handlers when + * triggered. + */ + struct common_irq_params + vupdate_params[DC_IRQ_SOURCE_VUPDATE6 - DC_IRQ_SOURCE_VUPDATE1 + 1]; + spinlock_t irq_handler_list_table_lock; struct backlight_device *backlight_dev; @@ -240,6 +247,10 @@ struct amdgpu_dm_connector { struct mutex hpd_lock; bool fake_enable; +#ifdef CONFIG_DEBUG_FS + uint32_t debugfs_dpcd_address; + uint32_t debugfs_dpcd_size; +#endif }; #define to_amdgpu_dm_connector(x) container_of(x, struct amdgpu_dm_connector, base) @@ -260,6 +271,9 @@ struct dm_crtc_state { struct drm_crtc_state base; struct dc_stream_state *stream; + int active_planes; + bool interrupts_enabled; + int crc_skip_count; bool crc_enabled; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index 216e48cec716..7258c992a2bf 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -126,46 +126,51 @@ int amdgpu_dm_set_regamma_lut(struct dm_crtc_state *crtc) crtc->base.state->dev->dev_private; struct drm_color_lut *lut; uint32_t lut_size; - struct dc_gamma *gamma; + struct dc_gamma *gamma = NULL; enum dc_transfer_func_type old_type = stream->out_transfer_func->type; bool ret; - if (!blob) { + if (!blob && adev->asic_type <= CHIP_RAVEN) { /* By default, use the SRGB predefined curve.*/ stream->out_transfer_func->type = TF_TYPE_PREDEFINED; stream->out_transfer_func->tf = TRANSFER_FUNCTION_SRGB; return 0; } - lut = (struct drm_color_lut *)blob->data; - lut_size = blob->length / sizeof(struct drm_color_lut); - - gamma = dc_create_gamma(); - if (!gamma) - return -ENOMEM; + if (blob) { + lut = (struct drm_color_lut *)blob->data; + lut_size = blob->length / sizeof(struct drm_color_lut); + + gamma = dc_create_gamma(); + if (!gamma) + return -ENOMEM; + + gamma->num_entries = lut_size; + if (gamma->num_entries == MAX_COLOR_LEGACY_LUT_ENTRIES) + gamma->type = GAMMA_RGB_256; + else if (gamma->num_entries == MAX_COLOR_LUT_ENTRIES) + gamma->type = GAMMA_CS_TFM_1D; + else { + /* Invalid lut size */ + dc_gamma_release(&gamma); + return -EINVAL; + } - gamma->num_entries = lut_size; - if (gamma->num_entries == MAX_COLOR_LEGACY_LUT_ENTRIES) - gamma->type = GAMMA_RGB_256; - else if (gamma->num_entries == MAX_COLOR_LUT_ENTRIES) - gamma->type = GAMMA_CS_TFM_1D; - else { - /* Invalid lut size */ - dc_gamma_release(&gamma); - return -EINVAL; + /* Convert drm_lut into dc_gamma */ + __drm_lut_to_dc_gamma(lut, gamma, gamma->type == GAMMA_RGB_256); } - /* Convert drm_lut into dc_gamma */ - __drm_lut_to_dc_gamma(lut, gamma, gamma->type == GAMMA_RGB_256); - - /* Call color module to translate into something DC understands. Namely - * a transfer function. + /* predefined gamma ROM only exist for RAVEN and pre-RAVEN ASIC, + * set canRomBeUsed accordingly */ stream->out_transfer_func->type = TF_TYPE_DISTRIBUTED_POINTS; ret = mod_color_calculate_regamma_params(stream->out_transfer_func, - gamma, true, adev->asic_type <= CHIP_RAVEN, NULL); - dc_gamma_release(&gamma); + gamma, true, adev->asic_type <= CHIP_RAVEN, NULL); + + if (gamma) + dc_gamma_release(&gamma); + if (!ret) { stream->out_transfer_func->type = old_type; DRM_ERROR("Out of memory when calculating regamma params\n"); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c index 4a55cde027cf..1d5fc5ad3bee 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c @@ -29,6 +29,7 @@ #include "amdgpu.h" #include "amdgpu_dm.h" #include "amdgpu_dm_debugfs.h" +#include "dm_helpers.h" /* function description * get/ set DP configuration: lane_count, link_rate, spread_spectrum @@ -688,8 +689,131 @@ static int vrr_range_show(struct seq_file *m, void *data) return 0; } + +/* function description + * + * generic SDP message access for testing + * + * debugfs sdp_message is located at /syskernel/debug/dri/0/DP-x + * + * SDP header + * Hb0 : Secondary-Data Packet ID + * Hb1 : Secondary-Data Packet type + * Hb2 : Secondary-Data-packet-specific header, Byte 0 + * Hb3 : Secondary-Data-packet-specific header, Byte 1 + * + * for using custom sdp message: input 4 bytes SDP header and 32 bytes raw data + */ +static ssize_t dp_sdp_message_debugfs_write(struct file *f, const char __user *buf, + size_t size, loff_t *pos) +{ + int r; + uint8_t data[36]; + struct amdgpu_dm_connector *connector = file_inode(f)->i_private; + struct dm_crtc_state *acrtc_state; + uint32_t write_size = 36; + + if (connector->base.status != connector_status_connected) + return -ENODEV; + + if (size == 0) + return 0; + + acrtc_state = to_dm_crtc_state(connector->base.state->crtc->state); + + r = copy_from_user(data, buf, write_size); + + write_size -= r; + + dc_stream_send_dp_sdp(acrtc_state->stream, data, write_size); + + return write_size; +} + DEFINE_SHOW_ATTRIBUTE(vrr_range); +static ssize_t dp_dpcd_address_write(struct file *f, const char __user *buf, + size_t size, loff_t *pos) +{ + int r; + struct amdgpu_dm_connector *connector = file_inode(f)->i_private; + + if (size < sizeof(connector->debugfs_dpcd_address)) + return 0; + + r = copy_from_user(&connector->debugfs_dpcd_address, + buf, sizeof(connector->debugfs_dpcd_address)); + + return size - r; +} + +static ssize_t dp_dpcd_size_write(struct file *f, const char __user *buf, + size_t size, loff_t *pos) +{ + int r; + struct amdgpu_dm_connector *connector = file_inode(f)->i_private; + + if (size < sizeof(connector->debugfs_dpcd_size)) + return 0; + + r = copy_from_user(&connector->debugfs_dpcd_size, + buf, sizeof(connector->debugfs_dpcd_size)); + + if (connector->debugfs_dpcd_size > 256) + connector->debugfs_dpcd_size = 0; + + return size - r; +} + +static ssize_t dp_dpcd_data_write(struct file *f, const char __user *buf, + size_t size, loff_t *pos) +{ + int r; + char *data; + struct amdgpu_dm_connector *connector = file_inode(f)->i_private; + struct dc_link *link = connector->dc_link; + uint32_t write_size = connector->debugfs_dpcd_size; + + if (size < write_size) + return 0; + + data = kzalloc(write_size, GFP_KERNEL); + if (!data) + return 0; + + r = copy_from_user(data, buf, write_size); + + dm_helpers_dp_write_dpcd(link->ctx, link, + connector->debugfs_dpcd_address, data, write_size - r); + kfree(data); + return write_size - r; +} + +static ssize_t dp_dpcd_data_read(struct file *f, char __user *buf, + size_t size, loff_t *pos) +{ + int r; + char *data; + struct amdgpu_dm_connector *connector = file_inode(f)->i_private; + struct dc_link *link = connector->dc_link; + uint32_t read_size = connector->debugfs_dpcd_size; + + if (size < read_size) + return 0; + + data = kzalloc(read_size, GFP_KERNEL); + if (!data) + return 0; + + dm_helpers_dp_read_dpcd(link->ctx, link, + connector->debugfs_dpcd_address, data, read_size); + + r = copy_to_user(buf, data, read_size); + + kfree(data); + return read_size - r; +} + static const struct file_operations dp_link_settings_debugfs_fops = { .owner = THIS_MODULE, .read = dp_link_settings_read, @@ -710,6 +834,31 @@ static const struct file_operations dp_phy_test_pattern_fops = { .llseek = default_llseek }; +static const struct file_operations sdp_message_fops = { + .owner = THIS_MODULE, + .write = dp_sdp_message_debugfs_write, + .llseek = default_llseek +}; + +static const struct file_operations dp_dpcd_address_debugfs_fops = { + .owner = THIS_MODULE, + .write = dp_dpcd_address_write, + .llseek = default_llseek +}; + +static const struct file_operations dp_dpcd_size_debugfs_fops = { + .owner = THIS_MODULE, + .write = dp_dpcd_size_write, + .llseek = default_llseek +}; + +static const struct file_operations dp_dpcd_data_debugfs_fops = { + .owner = THIS_MODULE, + .read = dp_dpcd_data_read, + .write = dp_dpcd_data_write, + .llseek = default_llseek +}; + static const struct { char *name; const struct file_operations *fops; @@ -717,7 +866,11 @@ static const struct { {"link_settings", &dp_link_settings_debugfs_fops}, {"phy_settings", &dp_phy_settings_debugfs_fop}, {"test_pattern", &dp_phy_test_pattern_fops}, - {"vrr_range", &vrr_range_fops} + {"vrr_range", &vrr_range_fops}, + {"sdp_message", &sdp_message_fops}, + {"aux_dpcd_address", &dp_dpcd_address_debugfs_fops}, + {"aux_dpcd_size", &dp_dpcd_size_debugfs_fops}, + {"aux_dpcd_data", &dp_dpcd_data_debugfs_fops} }; int connector_debugfs_init(struct amdgpu_dm_connector *connector) @@ -842,6 +995,35 @@ static const struct drm_info_list amdgpu_dm_debugfs_list[] = { {"amdgpu_target_backlight_pwm", &target_backlight_read}, }; +/* + * Sets the DC visual confirm debug option from the given string. + * Example usage: echo 1 > /sys/kernel/debug/dri/0/amdgpu_visual_confirm + */ +static int visual_confirm_set(void *data, u64 val) +{ + struct amdgpu_device *adev = data; + + adev->dm.dc->debug.visual_confirm = (enum visual_confirm)val; + + return 0; +} + +/* + * Reads the DC visual confirm debug option value into the given buffer. + * Example usage: cat /sys/kernel/debug/dri/0/amdgpu_dm_visual_confirm + */ +static int visual_confirm_get(void *data, u64 *val) +{ + struct amdgpu_device *adev = data; + + *val = adev->dm.dc->debug.visual_confirm; + + return 0; +} + +DEFINE_DEBUGFS_ATTRIBUTE(visual_confirm_fops, visual_confirm_get, + visual_confirm_set, "%llu\n"); + int dtn_debugfs_init(struct amdgpu_device *adev) { static const struct file_operations dtn_log_fops = { @@ -867,5 +1049,13 @@ int dtn_debugfs_init(struct amdgpu_device *adev) adev, &dtn_log_fops); - return PTR_ERR_OR_ZERO(ent); + if (IS_ERR(ent)) + return PTR_ERR(ent); + + ent = debugfs_create_file_unsafe("amdgpu_dm_visual_confirm", 0644, root, + adev, &visual_confirm_fops); + if (IS_ERR(ent)) + return PTR_ERR(ent); + + return 0; } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c index b39766bd2840..e6cd67342df8 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c @@ -264,7 +264,7 @@ bool dm_helpers_dp_mst_write_payload_allocation_table( } /* - * poll pending down reply before clear payload allocation table + * poll pending down reply */ void dm_helpers_dp_mst_poll_pending_down_reply( struct dc_context *ctx, diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c index cd10f77cdeb0..fd22b4474dbf 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c @@ -674,11 +674,30 @@ static int amdgpu_dm_set_crtc_irq_state(struct amdgpu_device *adev, __func__); } +static int amdgpu_dm_set_vupdate_irq_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned int crtc_id, + enum amdgpu_interrupt_state state) +{ + return dm_irq_state( + adev, + source, + crtc_id, + state, + IRQ_TYPE_VUPDATE, + __func__); +} + static const struct amdgpu_irq_src_funcs dm_crtc_irq_funcs = { .set = amdgpu_dm_set_crtc_irq_state, .process = amdgpu_dm_irq_handler, }; +static const struct amdgpu_irq_src_funcs dm_vupdate_irq_funcs = { + .set = amdgpu_dm_set_vupdate_irq_state, + .process = amdgpu_dm_irq_handler, +}; + static const struct amdgpu_irq_src_funcs dm_pageflip_irq_funcs = { .set = amdgpu_dm_set_pflip_irq_state, .process = amdgpu_dm_irq_handler, @@ -695,6 +714,9 @@ void amdgpu_dm_set_irq_funcs(struct amdgpu_device *adev) adev->crtc_irq.num_types = adev->mode_info.num_crtc; adev->crtc_irq.funcs = &dm_crtc_irq_funcs; + adev->vupdate_irq.num_types = adev->mode_info.num_crtc; + adev->vupdate_irq.funcs = &dm_vupdate_irq_funcs; + adev->pageflip_irq.num_types = adev->mode_info.num_crtc; adev->pageflip_irq.funcs = &dm_pageflip_irq_funcs; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index c4ea3a91f17a..6e205ee36ac3 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -84,6 +84,7 @@ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux, { ssize_t result = 0; struct aux_payload payload; + enum aux_channel_operation_result operation_result; if (WARN_ON(msg->size > 16)) return -E2BIG; @@ -97,13 +98,27 @@ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux, payload.mot = (msg->request & DP_AUX_I2C_MOT) != 0; payload.defer_delay = 0; - result = dc_link_aux_transfer(TO_DM_AUX(aux)->ddc_service, &payload); + result = dc_link_aux_transfer_raw(TO_DM_AUX(aux)->ddc_service, &payload, + &operation_result); if (payload.write) result = msg->size; - if (result < 0) /* DC doesn't know about kernel error codes */ - result = -EIO; + if (result < 0) + switch (operation_result) { + case AUX_CHANNEL_OPERATION_SUCCEEDED: + break; + case AUX_CHANNEL_OPERATION_FAILED_HPD_DISCON: + case AUX_CHANNEL_OPERATION_FAILED_REASON_UNKNOWN: + result = -EIO; + break; + case AUX_CHANNEL_OPERATION_FAILED_INVALID_REPLY: + result = -EBUSY; + break; + case AUX_CHANNEL_OPERATION_FAILED_TIMEOUT: + result = -ETIMEDOUT; + break; + } return result; } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c index a114954d6a5b..350e7a620d45 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c @@ -33,6 +33,7 @@ #include "amdgpu_dm_irq.h" #include "amdgpu_pm.h" #include "dm_pp_smu.h" +#include "amdgpu_smu.h" bool dm_pp_apply_display_requirements( @@ -40,6 +41,7 @@ bool dm_pp_apply_display_requirements( const struct dm_pp_display_configuration *pp_display_cfg) { struct amdgpu_device *adev = ctx->driver_context; + struct smu_context *smu = &adev->smu; int i; if (adev->pm.dpm_enabled) { @@ -105,6 +107,9 @@ bool dm_pp_apply_display_requirements( adev->powerplay.pp_funcs->display_configuration_change( adev->powerplay.pp_handle, &adev->pm.pm_display_cfg); + else + smu_display_configuration_change(smu, + &adev->pm.pm_display_cfg); amdgpu_pm_compute_clocks(adev); } @@ -308,6 +313,12 @@ bool dm_pp_get_clock_levels_by_type( if (adev->powerplay.pp_funcs->get_clock_by_type(pp_handle, dc_to_pp_clock_type(clk_type), &pp_clks)) { /* Error in pplib. Provide default values. */ + return true; + } + } else if (adev->smu.funcs && adev->smu.funcs->get_clock_by_type) { + if (smu_get_clock_by_type(&adev->smu, + dc_to_pp_clock_type(clk_type), + &pp_clks)) { get_default_clock_levels(clk_type, dc_clks); return true; } @@ -324,6 +335,13 @@ bool dm_pp_get_clock_levels_by_type( validation_clks.memory_max_clock = 80000; validation_clks.level = 0; } + } else if (adev->smu.funcs && adev->smu.funcs->get_max_high_clocks) { + if (smu_get_max_high_clocks(&adev->smu, &validation_clks)) { + DRM_INFO("DM_PPLIB: Warning: using default validation clocks!\n"); + validation_clks.engine_max_clock = 72000; + validation_clks.memory_max_clock = 80000; + validation_clks.level = 0; + } } DRM_INFO("DM_PPLIB: Validation clocks:\n"); @@ -374,14 +392,21 @@ bool dm_pp_get_clock_levels_by_type_with_latency( void *pp_handle = adev->powerplay.pp_handle; struct pp_clock_levels_with_latency pp_clks = { 0 }; const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; + int ret; + + if (pp_funcs && pp_funcs->get_clock_by_type_with_latency) { + ret = pp_funcs->get_clock_by_type_with_latency(pp_handle, + dc_to_pp_clock_type(clk_type), + &pp_clks); + if (ret) + return false; + } else if (adev->smu.ppt_funcs && adev->smu.ppt_funcs->get_clock_by_type_with_latency) { + if (smu_get_clock_by_type_with_latency(&adev->smu, + dc_to_pp_clock_type(clk_type), + &pp_clks)) + return false; + } - if (!pp_funcs || !pp_funcs->get_clock_by_type_with_latency) - return false; - - if (pp_funcs->get_clock_by_type_with_latency(pp_handle, - dc_to_pp_clock_type(clk_type), - &pp_clks)) - return false; pp_to_dc_clock_levels_with_latency(&pp_clks, clk_level_info, clk_type); @@ -397,14 +422,20 @@ bool dm_pp_get_clock_levels_by_type_with_voltage( void *pp_handle = adev->powerplay.pp_handle; struct pp_clock_levels_with_voltage pp_clk_info = {0}; const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; - - if (!pp_funcs || !pp_funcs->get_clock_by_type_with_voltage) - return false; - - if (pp_funcs->get_clock_by_type_with_voltage(pp_handle, - dc_to_pp_clock_type(clk_type), - &pp_clk_info)) - return false; + int ret; + + if (pp_funcs && pp_funcs->get_clock_by_type_with_voltage) { + ret = pp_funcs->get_clock_by_type_with_voltage(pp_handle, + dc_to_pp_clock_type(clk_type), + &pp_clk_info); + if (ret) + return false; + } else if (adev->smu.ppt_funcs && adev->smu.ppt_funcs->get_clock_by_type_with_voltage) { + if (smu_get_clock_by_type_with_voltage(&adev->smu, + dc_to_pp_clock_type(clk_type), + &pp_clk_info)) + return false; + } pp_to_dc_clock_levels_with_voltage(&pp_clk_info, clk_level_info, clk_type); @@ -445,6 +476,10 @@ bool dm_pp_apply_clock_for_voltage_request( ret = adev->powerplay.pp_funcs->display_clock_voltage_request( adev->powerplay.pp_handle, &pp_clock_request); + else if (adev->smu.funcs && + adev->smu.funcs->display_clock_voltage_request) + ret = smu_display_clock_voltage_request(&adev->smu, + &pp_clock_request); if (ret) return false; return true; @@ -462,6 +497,8 @@ bool dm_pp_get_static_clocks( ret = adev->powerplay.pp_funcs->get_current_clocks( adev->powerplay.pp_handle, &pp_clk_info); + else if (adev->smu.funcs) + ret = smu_get_current_clocks(&adev->smu, &pp_clk_info); if (ret) return false; @@ -472,27 +509,6 @@ bool dm_pp_get_static_clocks( return true; } -void pp_rv_set_display_requirement(struct pp_smu *pp, - struct pp_smu_display_requirement_rv *req) -{ - const struct dc_context *ctx = pp->dm; - struct amdgpu_device *adev = ctx->driver_context; - void *pp_handle = adev->powerplay.pp_handle; - const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; - struct pp_display_clock_request clock = {0}; - - if (!pp_funcs || !pp_funcs->display_clock_voltage_request) - return; - - clock.clock_type = amd_pp_dcf_clock; - clock.clock_freq_in_khz = req->hard_min_dcefclk_mhz * 1000; - pp_funcs->display_clock_voltage_request(pp_handle, &clock); - - clock.clock_type = amd_pp_f_clock; - clock.clock_freq_in_khz = req->hard_min_fclk_mhz * 1000; - pp_funcs->display_clock_voltage_request(pp_handle, &clock); -} - void pp_rv_set_wm_ranges(struct pp_smu *pp, struct pp_smu_wm_range_sets *ranges) { @@ -508,9 +524,6 @@ void pp_rv_set_wm_ranges(struct pp_smu *pp, wm_with_clock_ranges.num_wm_dmif_sets = ranges->num_reader_wm_sets; wm_with_clock_ranges.num_wm_mcif_sets = ranges->num_writer_wm_sets; - if (!pp_funcs || !pp_funcs->set_watermarks_for_clocks_ranges) - return; - for (i = 0; i < wm_with_clock_ranges.num_wm_dmif_sets; i++) { if (ranges->reader_wm_sets[i].wm_inst > 3) wm_dce_clocks[i].wm_set_id = WM_SET_A; @@ -543,7 +556,13 @@ void pp_rv_set_wm_ranges(struct pp_smu *pp, ranges->writer_wm_sets[i].min_drain_clk_mhz * 1000; } - pp_funcs->set_watermarks_for_clocks_ranges(pp_handle, &wm_with_clock_ranges); + if (pp_funcs && pp_funcs->set_watermarks_for_clocks_ranges) + pp_funcs->set_watermarks_for_clocks_ranges(pp_handle, + &wm_with_clock_ranges); + else if (adev->smu.funcs && + adev->smu.funcs->set_watermarks_for_clock_ranges) + smu_set_watermarks_for_clock_ranges(&adev->smu, + &wm_with_clock_ranges); } void pp_rv_set_pme_wa_enable(struct pp_smu *pp) @@ -553,10 +572,10 @@ void pp_rv_set_pme_wa_enable(struct pp_smu *pp) void *pp_handle = adev->powerplay.pp_handle; const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; - if (!pp_funcs || !pp_funcs->notify_smu_enable_pwe) - return; - - pp_funcs->notify_smu_enable_pwe(pp_handle); + if (pp_funcs && pp_funcs->notify_smu_enable_pwe) + pp_funcs->notify_smu_enable_pwe(pp_handle); + else if (adev->smu.funcs) + smu_notify_smu_enable_pwe(&adev->smu); } void pp_rv_set_active_display_count(struct pp_smu *pp, int count) @@ -611,17 +630,16 @@ void pp_rv_set_hard_min_fclk_by_freq(struct pp_smu *pp, int mhz) pp_funcs->set_hard_min_fclk_by_freq(pp_handle, mhz); } -void dm_pp_get_funcs_rv( +void dm_pp_get_funcs( struct dc_context *ctx, - struct pp_smu_funcs_rv *funcs) + struct pp_smu_funcs *funcs) { - funcs->pp_smu.dm = ctx; - funcs->set_display_requirement = pp_rv_set_display_requirement; - funcs->set_wm_ranges = pp_rv_set_wm_ranges; - funcs->set_pme_wa_enable = pp_rv_set_pme_wa_enable; - funcs->set_display_count = pp_rv_set_active_display_count; - funcs->set_min_deep_sleep_dcfclk = pp_rv_set_min_deep_sleep_dcfclk; - funcs->set_hard_min_dcfclk_by_freq = pp_rv_set_hard_min_dcefclk_by_freq; - funcs->set_hard_min_fclk_by_freq = pp_rv_set_hard_min_fclk_by_freq; + funcs->rv_funcs.pp_smu.dm = ctx; + funcs->rv_funcs.set_wm_ranges = pp_rv_set_wm_ranges; + funcs->rv_funcs.set_pme_wa_enable = pp_rv_set_pme_wa_enable; + funcs->rv_funcs.set_display_count = pp_rv_set_active_display_count; + funcs->rv_funcs.set_min_deep_sleep_dcfclk = pp_rv_set_min_deep_sleep_dcfclk; + funcs->rv_funcs.set_hard_min_dcfclk_by_freq = pp_rv_set_hard_min_dcefclk_by_freq; + funcs->rv_funcs.set_hard_min_fclk_by_freq = pp_rv_set_hard_min_fclk_by_freq; } diff --git a/drivers/gpu/drm/amd/display/dc/basics/fixpt31_32.c b/drivers/gpu/drm/amd/display/dc/basics/fixpt31_32.c index f28989860fd8..1e9a2d352068 100644 --- a/drivers/gpu/drm/amd/display/dc/basics/fixpt31_32.c +++ b/drivers/gpu/drm/amd/display/dc/basics/fixpt31_32.c @@ -449,6 +449,11 @@ static inline unsigned int clamp_ux_dy( return min_clamp; } +unsigned int dc_fixpt_u4d19(struct fixed31_32 arg) +{ + return ux_dy(arg.value, 4, 19); +} + unsigned int dc_fixpt_u3d19(struct fixed31_32 arg) { return ux_dy(arg.value, 3, 19); diff --git a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c index eb62d10bb65c..1b4b51657f5e 100644 --- a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c @@ -247,6 +247,53 @@ static enum dcn_bw_defs tl_pixel_format_to_bw_defs(enum surface_pixel_format for } } +enum source_macro_tile_size swizzle_mode_to_macro_tile_size(enum swizzle_mode_values sw_mode) +{ + switch (sw_mode) { + /* for 4/8/16 high tiles */ + case DC_SW_LINEAR: + return dm_4k_tile; + case DC_SW_4KB_S: + case DC_SW_4KB_S_X: + return dm_4k_tile; + case DC_SW_64KB_S: + case DC_SW_64KB_S_X: + case DC_SW_64KB_S_T: + return dm_64k_tile; + case DC_SW_VAR_S: + case DC_SW_VAR_S_X: + return dm_256k_tile; + + /* For 64bpp 2 high tiles */ + case DC_SW_4KB_D: + case DC_SW_4KB_D_X: + return dm_4k_tile; + case DC_SW_64KB_D: + case DC_SW_64KB_D_X: + case DC_SW_64KB_D_T: + return dm_64k_tile; + case DC_SW_VAR_D: + case DC_SW_VAR_D_X: + return dm_256k_tile; + + case DC_SW_4KB_R: + case DC_SW_4KB_R_X: + return dm_4k_tile; + case DC_SW_64KB_R: + case DC_SW_64KB_R_X: + return dm_64k_tile; + case DC_SW_VAR_R: + case DC_SW_VAR_R_X: + return dm_256k_tile; + + /* Unsupported swizzle modes for dcn */ + case DC_SW_256B_S: + default: + ASSERT(0); /* Not supported */ + return 0; + } +} + static void pipe_ctx_to_e2e_pipe_params ( const struct pipe_ctx *pipe, struct _vcs_dpi_display_pipe_params_st *input) @@ -287,46 +334,7 @@ static void pipe_ctx_to_e2e_pipe_params ( input->src.cur0_src_width = 128; /* TODO: Cursor calcs, not curently stored */ input->src.cur0_bpp = 32; - switch (pipe->plane_state->tiling_info.gfx9.swizzle) { - /* for 4/8/16 high tiles */ - case DC_SW_LINEAR: - input->src.macro_tile_size = dm_4k_tile; - break; - case DC_SW_4KB_S: - case DC_SW_4KB_S_X: - input->src.macro_tile_size = dm_4k_tile; - break; - case DC_SW_64KB_S: - case DC_SW_64KB_S_X: - case DC_SW_64KB_S_T: - input->src.macro_tile_size = dm_64k_tile; - break; - case DC_SW_VAR_S: - case DC_SW_VAR_S_X: - input->src.macro_tile_size = dm_256k_tile; - break; - - /* For 64bpp 2 high tiles */ - case DC_SW_4KB_D: - case DC_SW_4KB_D_X: - input->src.macro_tile_size = dm_4k_tile; - break; - case DC_SW_64KB_D: - case DC_SW_64KB_D_X: - case DC_SW_64KB_D_T: - input->src.macro_tile_size = dm_64k_tile; - break; - case DC_SW_VAR_D: - case DC_SW_VAR_D_X: - input->src.macro_tile_size = dm_256k_tile; - break; - - /* Unsupported swizzle modes for dcn */ - case DC_SW_256B_S: - default: - ASSERT(0); /* Not supported */ - break; - } + input->src.macro_tile_size = swizzle_mode_to_macro_tile_size(pipe->plane_state->tiling_info.gfx9.swizzle); switch (pipe->plane_state->rotation) { case ROTATION_ANGLE_0: @@ -466,7 +474,7 @@ static void dcn_bw_calc_rq_dlg_ttu( input.clks_cfg.dcfclk_mhz = v->dcfclk; input.clks_cfg.dispclk_mhz = v->dispclk; input.clks_cfg.dppclk_mhz = v->dppclk; - input.clks_cfg.refclk_mhz = dc->res_pool->ref_clock_inKhz / 1000.0; + input.clks_cfg.refclk_mhz = dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000.0; input.clks_cfg.socclk_mhz = v->socclk; input.clks_cfg.voltage = v->voltage_level; // dc->dml.logger = pool->base.logger; @@ -536,28 +544,28 @@ static void calc_wm_sets_and_perf_params( v->fabric_and_dram_bandwidth = v->fabric_and_dram_bandwidth_vnom0p8; dispclkdppclkdcfclk_deep_sleep_prefetch_parameters_watermarks_and_performance_calculation(v); - context->bw.dcn.watermarks.b.cstate_pstate.cstate_exit_ns = + context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_exit_ns = v->stutter_exit_watermark * 1000; - context->bw.dcn.watermarks.b.cstate_pstate.cstate_enter_plus_exit_ns = + context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_enter_plus_exit_ns = v->stutter_enter_plus_exit_watermark * 1000; - context->bw.dcn.watermarks.b.cstate_pstate.pstate_change_ns = + context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.pstate_change_ns = v->dram_clock_change_watermark * 1000; - context->bw.dcn.watermarks.b.pte_meta_urgent_ns = v->ptemeta_urgent_watermark * 1000; - context->bw.dcn.watermarks.b.urgent_ns = v->urgent_watermark * 1000; + context->bw_ctx.bw.dcn.watermarks.b.pte_meta_urgent_ns = v->ptemeta_urgent_watermark * 1000; + context->bw_ctx.bw.dcn.watermarks.b.urgent_ns = v->urgent_watermark * 1000; v->dcfclk_per_state[1] = v->dcfclkv_nom0p8; v->dcfclk_per_state[0] = v->dcfclkv_nom0p8; v->dcfclk = v->dcfclkv_nom0p8; dispclkdppclkdcfclk_deep_sleep_prefetch_parameters_watermarks_and_performance_calculation(v); - context->bw.dcn.watermarks.c.cstate_pstate.cstate_exit_ns = + context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_exit_ns = v->stutter_exit_watermark * 1000; - context->bw.dcn.watermarks.c.cstate_pstate.cstate_enter_plus_exit_ns = + context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_enter_plus_exit_ns = v->stutter_enter_plus_exit_watermark * 1000; - context->bw.dcn.watermarks.c.cstate_pstate.pstate_change_ns = + context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.pstate_change_ns = v->dram_clock_change_watermark * 1000; - context->bw.dcn.watermarks.c.pte_meta_urgent_ns = v->ptemeta_urgent_watermark * 1000; - context->bw.dcn.watermarks.c.urgent_ns = v->urgent_watermark * 1000; + context->bw_ctx.bw.dcn.watermarks.c.pte_meta_urgent_ns = v->ptemeta_urgent_watermark * 1000; + context->bw_ctx.bw.dcn.watermarks.c.urgent_ns = v->urgent_watermark * 1000; } if (v->voltage_level < 3) { @@ -571,14 +579,14 @@ static void calc_wm_sets_and_perf_params( v->dcfclk = v->dcfclkv_max0p9; dispclkdppclkdcfclk_deep_sleep_prefetch_parameters_watermarks_and_performance_calculation(v); - context->bw.dcn.watermarks.d.cstate_pstate.cstate_exit_ns = + context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_exit_ns = v->stutter_exit_watermark * 1000; - context->bw.dcn.watermarks.d.cstate_pstate.cstate_enter_plus_exit_ns = + context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_enter_plus_exit_ns = v->stutter_enter_plus_exit_watermark * 1000; - context->bw.dcn.watermarks.d.cstate_pstate.pstate_change_ns = + context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.pstate_change_ns = v->dram_clock_change_watermark * 1000; - context->bw.dcn.watermarks.d.pte_meta_urgent_ns = v->ptemeta_urgent_watermark * 1000; - context->bw.dcn.watermarks.d.urgent_ns = v->urgent_watermark * 1000; + context->bw_ctx.bw.dcn.watermarks.d.pte_meta_urgent_ns = v->ptemeta_urgent_watermark * 1000; + context->bw_ctx.bw.dcn.watermarks.d.urgent_ns = v->urgent_watermark * 1000; } v->fabric_and_dram_bandwidth_per_state[2] = v->fabric_and_dram_bandwidth_vnom0p8; @@ -591,20 +599,20 @@ static void calc_wm_sets_and_perf_params( v->dcfclk = v->dcfclk_per_state[v->voltage_level]; dispclkdppclkdcfclk_deep_sleep_prefetch_parameters_watermarks_and_performance_calculation(v); - context->bw.dcn.watermarks.a.cstate_pstate.cstate_exit_ns = + context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_exit_ns = v->stutter_exit_watermark * 1000; - context->bw.dcn.watermarks.a.cstate_pstate.cstate_enter_plus_exit_ns = + context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_enter_plus_exit_ns = v->stutter_enter_plus_exit_watermark * 1000; - context->bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = + context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = v->dram_clock_change_watermark * 1000; - context->bw.dcn.watermarks.a.pte_meta_urgent_ns = v->ptemeta_urgent_watermark * 1000; - context->bw.dcn.watermarks.a.urgent_ns = v->urgent_watermark * 1000; + context->bw_ctx.bw.dcn.watermarks.a.pte_meta_urgent_ns = v->ptemeta_urgent_watermark * 1000; + context->bw_ctx.bw.dcn.watermarks.a.urgent_ns = v->urgent_watermark * 1000; if (v->voltage_level >= 2) { - context->bw.dcn.watermarks.b = context->bw.dcn.watermarks.a; - context->bw.dcn.watermarks.c = context->bw.dcn.watermarks.a; + context->bw_ctx.bw.dcn.watermarks.b = context->bw_ctx.bw.dcn.watermarks.a; + context->bw_ctx.bw.dcn.watermarks.c = context->bw_ctx.bw.dcn.watermarks.a; } if (v->voltage_level >= 3) - context->bw.dcn.watermarks.d = context->bw.dcn.watermarks.a; + context->bw_ctx.bw.dcn.watermarks.d = context->bw_ctx.bw.dcn.watermarks.a; } #endif @@ -693,8 +701,15 @@ static void hack_bounding_box(struct dcn_bw_internal_vars *v, bool dcn_validate_bandwidth( struct dc *dc, - struct dc_state *context) + struct dc_state *context, + bool fast_validate) { + /* + * we want a breakdown of the various stages of validation, which the + * perf_trace macro doesn't support + */ + BW_VAL_TRACE_SETUP(); + const struct resource_pool *pool = dc->res_pool; struct dcn_bw_internal_vars *v = &context->dcn_bw_vars; int i, input_idx; @@ -703,6 +718,9 @@ bool dcn_validate_bandwidth( float bw_limit; PERFORMANCE_TRACE_START(); + + BW_VAL_TRACE_COUNT(); + if (dcn_bw_apply_registry_override(dc)) dcn_bw_sync_calcs_and_dml(dc); @@ -1000,13 +1018,16 @@ bool dcn_validate_bandwidth( dc->debug.sr_enter_plus_exit_time_dpm0_ns / 1000.0f; if (dc->debug.sr_exit_time_dpm0_ns) v->sr_exit_time = dc->debug.sr_exit_time_dpm0_ns / 1000.0f; - dc->dml.soc.sr_enter_plus_exit_time_us = v->sr_enter_plus_exit_time; - dc->dml.soc.sr_exit_time_us = v->sr_exit_time; + context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = v->sr_enter_plus_exit_time; + context->bw_ctx.dml.soc.sr_exit_time_us = v->sr_exit_time; mode_support_and_system_configuration(v); } - if (v->voltage_level != 5) { + BW_VAL_TRACE_END_VOLTAGE_LEVEL(); + + if (v->voltage_level != number_of_states_plus_one && !fast_validate) { float bw_consumed = v->total_bandwidth_consumed_gbyte_per_second; + if (bw_consumed < v->fabric_and_dram_bandwidth_vmin0p65) bw_consumed = v->fabric_and_dram_bandwidth_vmin0p65; else if (bw_consumed < v->fabric_and_dram_bandwidth_vmid0p72) @@ -1027,58 +1048,60 @@ bool dcn_validate_bandwidth( */ dispclkdppclkdcfclk_deep_sleep_prefetch_parameters_watermarks_and_performance_calculation(v); - context->bw.dcn.watermarks.a.cstate_pstate.cstate_exit_ns = + context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_exit_ns = v->stutter_exit_watermark * 1000; - context->bw.dcn.watermarks.a.cstate_pstate.cstate_enter_plus_exit_ns = + context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_enter_plus_exit_ns = v->stutter_enter_plus_exit_watermark * 1000; - context->bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = + context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = v->dram_clock_change_watermark * 1000; - context->bw.dcn.watermarks.a.pte_meta_urgent_ns = v->ptemeta_urgent_watermark * 1000; - context->bw.dcn.watermarks.a.urgent_ns = v->urgent_watermark * 1000; - context->bw.dcn.watermarks.b = context->bw.dcn.watermarks.a; - context->bw.dcn.watermarks.c = context->bw.dcn.watermarks.a; - context->bw.dcn.watermarks.d = context->bw.dcn.watermarks.a; + context->bw_ctx.bw.dcn.watermarks.a.pte_meta_urgent_ns = v->ptemeta_urgent_watermark * 1000; + context->bw_ctx.bw.dcn.watermarks.a.urgent_ns = v->urgent_watermark * 1000; + context->bw_ctx.bw.dcn.watermarks.b = context->bw_ctx.bw.dcn.watermarks.a; + context->bw_ctx.bw.dcn.watermarks.c = context->bw_ctx.bw.dcn.watermarks.a; + context->bw_ctx.bw.dcn.watermarks.d = context->bw_ctx.bw.dcn.watermarks.a; - context->bw.dcn.clk.fclk_khz = (int)(bw_consumed * 1000000 / + context->bw_ctx.bw.dcn.clk.fclk_khz = (int)(bw_consumed * 1000000 / (ddr4_dram_factor_single_Channel * v->number_of_channels)); if (bw_consumed == v->fabric_and_dram_bandwidth_vmin0p65) { - context->bw.dcn.clk.fclk_khz = (int)(bw_consumed * 1000000 / 32); + context->bw_ctx.bw.dcn.clk.fclk_khz = (int)(bw_consumed * 1000000 / 32); } - context->bw.dcn.clk.dcfclk_deep_sleep_khz = (int)(v->dcf_clk_deep_sleep * 1000); - context->bw.dcn.clk.dcfclk_khz = (int)(v->dcfclk * 1000); + context->bw_ctx.bw.dcn.clk.dcfclk_deep_sleep_khz = (int)(v->dcf_clk_deep_sleep * 1000); + context->bw_ctx.bw.dcn.clk.dcfclk_khz = (int)(v->dcfclk * 1000); - context->bw.dcn.clk.dispclk_khz = (int)(v->dispclk * 1000); + context->bw_ctx.bw.dcn.clk.dispclk_khz = (int)(v->dispclk * 1000); if (dc->debug.max_disp_clk == true) - context->bw.dcn.clk.dispclk_khz = (int)(dc->dcn_soc->max_dispclk_vmax0p9 * 1000); + context->bw_ctx.bw.dcn.clk.dispclk_khz = (int)(dc->dcn_soc->max_dispclk_vmax0p9 * 1000); - if (context->bw.dcn.clk.dispclk_khz < + if (context->bw_ctx.bw.dcn.clk.dispclk_khz < dc->debug.min_disp_clk_khz) { - context->bw.dcn.clk.dispclk_khz = + context->bw_ctx.bw.dcn.clk.dispclk_khz = dc->debug.min_disp_clk_khz; } - context->bw.dcn.clk.dppclk_khz = context->bw.dcn.clk.dispclk_khz / v->dispclk_dppclk_ratio; - context->bw.dcn.clk.phyclk_khz = v->phyclk_per_state[v->voltage_level]; + context->bw_ctx.bw.dcn.clk.dppclk_khz = context->bw_ctx.bw.dcn.clk.dispclk_khz / v->dispclk_dppclk_ratio; + context->bw_ctx.bw.dcn.clk.phyclk_khz = v->phyclk_per_state[v->voltage_level]; switch (v->voltage_level) { case 0: - context->bw.dcn.clk.max_supported_dppclk_khz = + context->bw_ctx.bw.dcn.clk.max_supported_dppclk_khz = (int)(dc->dcn_soc->max_dppclk_vmin0p65 * 1000); break; case 1: - context->bw.dcn.clk.max_supported_dppclk_khz = + context->bw_ctx.bw.dcn.clk.max_supported_dppclk_khz = (int)(dc->dcn_soc->max_dppclk_vmid0p72 * 1000); break; case 2: - context->bw.dcn.clk.max_supported_dppclk_khz = + context->bw_ctx.bw.dcn.clk.max_supported_dppclk_khz = (int)(dc->dcn_soc->max_dppclk_vnom0p8 * 1000); break; default: - context->bw.dcn.clk.max_supported_dppclk_khz = + context->bw_ctx.bw.dcn.clk.max_supported_dppclk_khz = (int)(dc->dcn_soc->max_dppclk_vmax0p9 * 1000); break; } + BW_VAL_TRACE_END_WATERMARKS(); + for (i = 0, input_idx = 0; i < pool->pipe_count; i++) { struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; @@ -1141,7 +1164,7 @@ bool dcn_validate_bandwidth( hsplit_pipe->pipe_dlg_param.vblank_end = pipe->pipe_dlg_param.vblank_end; } else { /* pipe not split previously needs split */ - hsplit_pipe = find_idle_secondary_pipe(&context->res_ctx, pool); + hsplit_pipe = find_idle_secondary_pipe(&context->res_ctx, pool, pipe); ASSERT(hsplit_pipe); split_stream_across_pipes( &context->res_ctx, pool, @@ -1169,13 +1192,17 @@ bool dcn_validate_bandwidth( input_idx++; } + } else if (v->voltage_level == number_of_states_plus_one) { + BW_VAL_TRACE_SKIP(fail); + } else if (fast_validate) { + BW_VAL_TRACE_SKIP(fast); } if (v->voltage_level == 0) { - dc->dml.soc.sr_enter_plus_exit_time_us = + context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->dcn_soc->sr_enter_plus_exit_time; - dc->dml.soc.sr_exit_time_us = dc->dcn_soc->sr_exit_time; + context->bw_ctx.dml.soc.sr_exit_time_us = dc->dcn_soc->sr_exit_time; } /* @@ -1188,6 +1215,7 @@ bool dcn_validate_bandwidth( kernel_fpu_end(); PERFORMANCE_TRACE_END(); + BW_VAL_TRACE_FINISH(); if (bw_limit_pass && v->voltage_level != 5) return true; @@ -1395,12 +1423,14 @@ void dcn_bw_update_from_pplib(struct dc *dc) void dcn_bw_notify_pplib_of_wm_ranges(struct dc *dc) { - struct pp_smu_funcs_rv *pp = dc->res_pool->pp_smu; + struct pp_smu_funcs_rv *pp = NULL; struct pp_smu_wm_range_sets ranges = {0}; int min_fclk_khz, min_dcfclk_khz, socclk_khz; const int overdrive = 5000000; /* 5 GHz to cover Overdrive */ - if (!pp->set_wm_ranges) + if (dc->res_pool->pp_smu) + pp = &dc->res_pool->pp_smu->rv_funcs; + if (!pp || !pp->set_wm_ranges) return; kernel_fpu_begin(); diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index a6cda201c964..18c775a950cc 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -524,6 +524,14 @@ void dc_link_set_preferred_link_settings(struct dc *dc, struct dc_stream_state *link_stream; struct dc_link_settings store_settings = *link_setting; + link->preferred_link_setting = store_settings; + + /* Retrain with preferred link settings only relevant for + * DP signal type + */ + if (!dc_is_dp_signal(link->connector_signal)) + return; + for (i = 0; i < MAX_PIPES; i++) { pipe = &dc->current_state->res_ctx.pipe_ctx[i]; if (pipe->stream && pipe->stream->link) { @@ -538,7 +546,10 @@ void dc_link_set_preferred_link_settings(struct dc *dc, link_stream = link->dc->current_state->res_ctx.pipe_ctx[i].stream; - link->preferred_link_setting = store_settings; + /* Cannot retrain link if backend is off */ + if (link_stream->dpms_off) + return; + if (link_stream) decide_link_settings(link_stream, &store_settings); @@ -573,6 +584,28 @@ void dc_link_set_test_pattern(struct dc_link *link, cust_pattern_size); } +uint32_t dc_link_bandwidth_kbps( + const struct dc_link *link, + const struct dc_link_settings *link_setting) +{ + uint32_t link_bw_kbps = link_setting->link_rate * LINK_RATE_REF_FREQ_IN_KHZ; /* bytes per sec */ + + link_bw_kbps *= 8; /* 8 bits per byte*/ + link_bw_kbps *= link_setting->lane_count; + + return link_bw_kbps; + +} + +const struct dc_link_settings *dc_link_get_link_cap( + const struct dc_link *link) +{ + if (link->preferred_link_setting.lane_count != LANE_COUNT_UNKNOWN && + link->preferred_link_setting.link_rate != LINK_RATE_UNKNOWN) + return &link->preferred_link_setting; + return &link->verified_link_cap; +} + static void destruct(struct dc *dc) { dc_release_state(dc->current_state); @@ -621,6 +654,10 @@ static bool construct(struct dc *dc, #endif enum dce_version dc_version = DCE_VERSION_UNKNOWN; + dc->config = init_params->flags; + + memcpy(&dc->bb_overrides, &init_params->bb_overrides, sizeof(dc->bb_overrides)); + dc_dceip = kzalloc(sizeof(*dc_dceip), GFP_KERNEL); if (!dc_dceip) { dm_error("%s: failed to create dceip\n", __func__); @@ -668,13 +705,6 @@ static bool construct(struct dc *dc, dc_ctx->dc_stream_id_count = 0; dc->ctx = dc_ctx; - dc->current_state = dc_create_state(); - - if (!dc->current_state) { - dm_error("%s: failed to create validate ctx\n", __func__); - goto fail; - } - /* Create logger */ dc_ctx->dce_environment = init_params->dce_environment; @@ -722,14 +752,22 @@ static bool construct(struct dc *dc, goto fail; } - dc->res_pool = dc_create_resource_pool( - dc, - init_params->num_virtual_links, - dc_version, - init_params->asic_id); + dc->res_pool = dc_create_resource_pool(dc, init_params, dc_version); if (!dc->res_pool) goto fail; + /* Creation of current_state must occur after dc->dml + * is initialized in dc_create_resource_pool because + * on creation it copies the contents of dc->dml + */ + + dc->current_state = dc_create_state(dc); + + if (!dc->current_state) { + dm_error("%s: failed to create validate ctx\n", __func__); + goto fail; + } + dc_resource_state_construct(dc, dc->current_state); if (!create_links(dc, init_params->num_virtual_links)) @@ -746,7 +784,7 @@ fail: static void disable_dangling_plane(struct dc *dc, struct dc_state *context) { int i, j; - struct dc_state *dangling_context = dc_create_state(); + struct dc_state *dangling_context = dc_create_state(dc); struct dc_state *current_ctx; if (dangling_context == NULL) @@ -811,8 +849,6 @@ struct dc *dc_create(const struct dc_init_data *init_params) if (dc->res_pool->dmcu != NULL) dc->versions.dmcu_version = dc->res_pool->dmcu->dmcu_version; - dc->config = init_params->flags; - dc->build_id = DC_BUILD_ID; DC_LOG_DC("Display Core initialized\n"); @@ -969,7 +1005,7 @@ static bool context_changed( return false; } -bool dc_validate_seamless_boot_timing(struct dc *dc, +bool dc_validate_seamless_boot_timing(const struct dc *dc, const struct dc_sink *sink, struct dc_crtc_timing *crtc_timing) { @@ -1060,7 +1096,13 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c if (!dcb->funcs->is_accelerated_mode(dcb)) dc->hwss.enable_accelerated_mode(dc, context); - dc->hwss.prepare_bandwidth(dc, context); + for (i = 0; i < context->stream_count; i++) { + if (context->streams[i]->apply_seamless_boot_optimization) + dc->optimize_seamless_boot = true; + } + + if (!dc->optimize_seamless_boot) + dc->hwss.prepare_bandwidth(dc, context); /* re-program planes for existing stream, in case we need to * free up plane resource for later use @@ -1135,12 +1177,15 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c dc_enable_stereo(dc, context, dc_streams, context->stream_count); - /* pplib is notified if disp_num changed */ - dc->hwss.optimize_bandwidth(dc, context); + if (!dc->optimize_seamless_boot) + /* pplib is notified if disp_num changed */ + dc->hwss.optimize_bandwidth(dc, context); for (i = 0; i < context->stream_count; i++) context->streams[i]->mode_changed = false; + memset(&context->commit_hints, 0, sizeof(context->commit_hints)); + dc_release_state(dc->current_state); dc->current_state = context; @@ -1177,7 +1222,7 @@ bool dc_post_update_surfaces_to_stream(struct dc *dc) int i; struct dc_state *context = dc->current_state; - if (dc->optimized_required == false) + if (!dc->optimized_required || dc->optimize_seamless_boot) return true; post_surface_trace(dc); @@ -1195,18 +1240,60 @@ bool dc_post_update_surfaces_to_stream(struct dc *dc) return true; } -struct dc_state *dc_create_state(void) +struct dc_state *dc_create_state(struct dc *dc) { struct dc_state *context = kzalloc(sizeof(struct dc_state), GFP_KERNEL); if (!context) return NULL; + /* Each context must have their own instance of VBA and in order to + * initialize and obtain IP and SOC the base DML instance from DC is + * initially copied into every context + */ +#ifdef CONFIG_DRM_AMD_DC_DCN1_0 + memcpy(&context->bw_ctx.dml, &dc->dml, sizeof(struct display_mode_lib)); +#endif kref_init(&context->refcount); + return context; } +struct dc_state *dc_copy_state(struct dc_state *src_ctx) +{ + int i, j; + struct dc_state *new_ctx = kzalloc(sizeof(struct dc_state), + GFP_KERNEL); + + if (!new_ctx) + return NULL; + + memcpy(new_ctx, src_ctx, sizeof(struct dc_state)); + + for (i = 0; i < MAX_PIPES; i++) { + struct pipe_ctx *cur_pipe = &new_ctx->res_ctx.pipe_ctx[i]; + + if (cur_pipe->top_pipe) + cur_pipe->top_pipe = &new_ctx->res_ctx.pipe_ctx[cur_pipe->top_pipe->pipe_idx]; + + if (cur_pipe->bottom_pipe) + cur_pipe->bottom_pipe = &new_ctx->res_ctx.pipe_ctx[cur_pipe->bottom_pipe->pipe_idx]; + + } + + for (i = 0; i < new_ctx->stream_count; i++) { + dc_stream_retain(new_ctx->streams[i]); + for (j = 0; j < new_ctx->stream_status[i].plane_count; j++) + dc_plane_state_retain( + new_ctx->stream_status[i].plane_states[j]); + } + + kref_init(&new_ctx->refcount); + + return new_ctx; +} + void dc_retain_state(struct dc_state *context) { kref_get(&context->refcount); @@ -1666,6 +1753,7 @@ static void commit_planes_do_stream_update(struct dc *dc, continue; if (stream_update->dpms_off) { + dc->hwss.pipe_control_lock(dc, pipe_ctx, true); if (*stream_update->dpms_off) { core_link_disable_stream(pipe_ctx, KEEP_ACQUIRED_RESOURCE); dc->hwss.optimize_bandwidth(dc, dc->current_state); @@ -1673,6 +1761,7 @@ static void commit_planes_do_stream_update(struct dc *dc, dc->hwss.prepare_bandwidth(dc, dc->current_state); core_link_enable_stream(dc->current_state, pipe_ctx); } + dc->hwss.pipe_control_lock(dc, pipe_ctx, false); } if (stream_update->abm_level && pipe_ctx->stream_res.abm) { @@ -1700,7 +1789,16 @@ static void commit_planes_for_stream(struct dc *dc, int i, j; struct pipe_ctx *top_pipe_to_program = NULL; - if (update_type == UPDATE_TYPE_FULL) { + if (dc->optimize_seamless_boot && surface_count > 0) { + /* Optimize seamless boot flag keeps clocks and watermarks high until + * first flip. After first flip, optimization is required to lower + * bandwidth. + */ + dc->optimize_seamless_boot = false; + dc->optimized_required = true; + } + + if (update_type == UPDATE_TYPE_FULL && !dc->optimize_seamless_boot) { dc->hwss.prepare_bandwidth(dc, context); context_clock_trace(dc, context); } @@ -1800,7 +1898,7 @@ void dc_commit_updates_for_stream(struct dc *dc, if (update_type >= UPDATE_TYPE_FULL) { /* initialize scratch memory for building context */ - context = dc_create_state(); + context = dc_create_state(dc); if (context == NULL) { DC_ERROR("Failed to allocate new validate context!\n"); return; @@ -2099,13 +2197,13 @@ void dc_link_remove_remote_sink(struct dc_link *link, struct dc_sink *sink) void get_clock_requirements_for_state(struct dc_state *state, struct AsicStateEx *info) { - info->displayClock = (unsigned int)state->bw.dcn.clk.dispclk_khz; - info->engineClock = (unsigned int)state->bw.dcn.clk.dcfclk_khz; - info->memoryClock = (unsigned int)state->bw.dcn.clk.dramclk_khz; - info->maxSupportedDppClock = (unsigned int)state->bw.dcn.clk.max_supported_dppclk_khz; - info->dppClock = (unsigned int)state->bw.dcn.clk.dppclk_khz; - info->socClock = (unsigned int)state->bw.dcn.clk.socclk_khz; - info->dcfClockDeepSleep = (unsigned int)state->bw.dcn.clk.dcfclk_deep_sleep_khz; - info->fClock = (unsigned int)state->bw.dcn.clk.fclk_khz; - info->phyClock = (unsigned int)state->bw.dcn.clk.phyclk_khz; + info->displayClock = (unsigned int)state->bw_ctx.bw.dcn.clk.dispclk_khz; + info->engineClock = (unsigned int)state->bw_ctx.bw.dcn.clk.dcfclk_khz; + info->memoryClock = (unsigned int)state->bw_ctx.bw.dcn.clk.dramclk_khz; + info->maxSupportedDppClock = (unsigned int)state->bw_ctx.bw.dcn.clk.max_supported_dppclk_khz; + info->dppClock = (unsigned int)state->bw_ctx.bw.dcn.clk.dppclk_khz; + info->socClock = (unsigned int)state->bw_ctx.bw.dcn.clk.socclk_khz; + info->dcfClockDeepSleep = (unsigned int)state->bw_ctx.bw.dcn.clk.dcfclk_deep_sleep_khz; + info->fClock = (unsigned int)state->bw_ctx.bw.dcn.clk.fclk_khz; + info->phyClock = (unsigned int)state->bw_ctx.bw.dcn.clk.phyclk_khz; } diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_debug.c b/drivers/gpu/drm/amd/display/dc/core/dc_debug.c index 73d049506618..5903e7822f98 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_debug.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_debug.c @@ -351,19 +351,19 @@ void context_clock_trace( DC_LOGGER_INIT(dc->ctx->logger); CLOCK_TRACE("Current: dispclk_khz:%d max_dppclk_khz:%d dcfclk_khz:%d\n" "dcfclk_deep_sleep_khz:%d fclk_khz:%d socclk_khz:%d\n", - context->bw.dcn.clk.dispclk_khz, - context->bw.dcn.clk.dppclk_khz, - context->bw.dcn.clk.dcfclk_khz, - context->bw.dcn.clk.dcfclk_deep_sleep_khz, - context->bw.dcn.clk.fclk_khz, - context->bw.dcn.clk.socclk_khz); + context->bw_ctx.bw.dcn.clk.dispclk_khz, + context->bw_ctx.bw.dcn.clk.dppclk_khz, + context->bw_ctx.bw.dcn.clk.dcfclk_khz, + context->bw_ctx.bw.dcn.clk.dcfclk_deep_sleep_khz, + context->bw_ctx.bw.dcn.clk.fclk_khz, + context->bw_ctx.bw.dcn.clk.socclk_khz); CLOCK_TRACE("Calculated: dispclk_khz:%d max_dppclk_khz:%d dcfclk_khz:%d\n" "dcfclk_deep_sleep_khz:%d fclk_khz:%d socclk_khz:%d\n", - context->bw.dcn.clk.dispclk_khz, - context->bw.dcn.clk.dppclk_khz, - context->bw.dcn.clk.dcfclk_khz, - context->bw.dcn.clk.dcfclk_deep_sleep_khz, - context->bw.dcn.clk.fclk_khz, - context->bw.dcn.clk.socclk_khz); + context->bw_ctx.bw.dcn.clk.dispclk_khz, + context->bw_ctx.bw.dcn.clk.dppclk_khz, + context->bw_ctx.bw.dcn.clk.dcfclk_khz, + context->bw_ctx.bw.dcn.clk.dcfclk_deep_sleep_khz, + context->bw_ctx.bw.dcn.clk.fclk_khz, + context->bw_ctx.bw.dcn.clk.socclk_khz); #endif } diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index ea18e9c2d8ce..b37ecc3ede61 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -58,7 +58,6 @@ ******************************************************************************/ enum { - LINK_RATE_REF_FREQ_IN_MHZ = 27, PEAK_FACTOR_X1000 = 1006, /* * Some receivers fail to train on first try and are good @@ -515,6 +514,40 @@ static void link_disconnect_remap(struct dc_sink *prev_sink, struct dc_link *lin } +static void read_edp_current_link_settings_on_detect(struct dc_link *link) +{ + union lane_count_set lane_count_set = { {0} }; + uint8_t link_bw_set; + uint8_t link_rate_set; + + // Read DPCD 00101h to find out the number of lanes currently set + core_link_read_dpcd(link, DP_LANE_COUNT_SET, + &lane_count_set.raw, sizeof(lane_count_set)); + link->cur_link_settings.lane_count = lane_count_set.bits.LANE_COUNT_SET; + + // Read DPCD 00100h to find if standard link rates are set + core_link_read_dpcd(link, DP_LINK_BW_SET, + &link_bw_set, sizeof(link_bw_set)); + + if (link_bw_set == 0) { + /* If standard link rates are not being used, + * Read DPCD 00115h to find the link rate set used + */ + core_link_read_dpcd(link, DP_LINK_RATE_SET, + &link_rate_set, sizeof(link_rate_set)); + + if (link_rate_set < link->dpcd_caps.edp_supported_link_rates_count) { + link->cur_link_settings.link_rate = + link->dpcd_caps.edp_supported_link_rates[link_rate_set]; + link->cur_link_settings.link_rate_set = link_rate_set; + link->cur_link_settings.use_link_rate_set = true; + } + } else { + link->cur_link_settings.link_rate = link_bw_set; + link->cur_link_settings.use_link_rate_set = false; + } +} + static bool detect_dp( struct dc_link *link, struct display_sink_capability *sink_caps, @@ -640,7 +673,8 @@ bool dc_link_detect(struct dc_link *link, enum dc_detect_reason reason) bool same_dpcd = true; enum dc_connection_type new_connection_type = dc_connection_none; DC_LOGGER_INIT(link->ctx->logger); - if (link->connector_signal == SIGNAL_TYPE_VIRTUAL) + + if (dc_is_virtual_signal(link->connector_signal)) return false; if (false == dc_link_detect_sink(link, &new_connection_type)) { @@ -648,9 +682,14 @@ bool dc_link_detect(struct dc_link *link, enum dc_detect_reason reason) return false; } - if (link->connector_signal == SIGNAL_TYPE_EDP && - link->local_sink) - return true; + if (link->connector_signal == SIGNAL_TYPE_EDP) { + /* On detect, we want to make sure current link settings are + * up to date, especially if link was powered on by GOP. + */ + read_edp_current_link_settings_on_detect(link); + if (link->local_sink) + return true; + } if (link->connector_signal == SIGNAL_TYPE_LVDS && link->local_sink) @@ -720,9 +759,8 @@ bool dc_link_detect(struct dc_link *link, enum dc_detect_reason reason) same_dpcd = false; } /* Active dongle plug in without display or downstream unplug*/ - if (link->type == dc_connection_active_dongle - && link->dpcd_caps.sink_count. - bits.SINK_COUNT == 0) { + if (link->type == dc_connection_active_dongle && + link->dpcd_caps.sink_count.bits.SINK_COUNT == 0) { if (prev_sink != NULL) { /* Downstream unplug */ dc_sink_release(prev_sink); @@ -1172,8 +1210,6 @@ static bool construct( goto create_fail; } - - /* TODO: #DAL3 Implement id to str function.*/ LINK_INFO("Connector[%d] description:" "signal %d\n", @@ -1207,7 +1243,7 @@ static bool construct( link->link_enc = link->dc->res_pool->funcs->link_enc_create( &enc_init_data); - if( link->link_enc == NULL) { + if (link->link_enc == NULL) { DC_ERROR("Failed to create link encoder!\n"); goto link_enc_create_fail; } @@ -1399,9 +1435,24 @@ static enum dc_status enable_link_dp( /* get link settings for video mode timing */ decide_link_settings(stream, &link_settings); + if (pipe_ctx->stream->signal == SIGNAL_TYPE_EDP) { + /* If link settings are different than current and link already enabled + * then need to disable before programming to new rate. + */ + if (link->link_status.link_active && + (link->cur_link_settings.lane_count != link_settings.lane_count || + link->cur_link_settings.link_rate != link_settings.link_rate)) { + dp_disable_link_phy(link, pipe_ctx->stream->signal); + } + + /*in case it is not on*/ + link->dc->hwss.edp_power_control(link, true); + link->dc->hwss.edp_wait_for_hpd_ready(link, true); + } + pipe_ctx->stream_res.pix_clk_params.requested_sym_clk = link_settings.link_rate * LINK_RATE_REF_FREQ_IN_KHZ; - state->dccg->funcs->update_clocks(state->dccg, state, false); + state->clk_mgr->funcs->update_clocks(state->clk_mgr, state, false); dp_enable_link_phy( link, @@ -1442,15 +1493,9 @@ static enum dc_status enable_link_edp( struct pipe_ctx *pipe_ctx) { enum dc_status status; - struct dc_stream_state *stream = pipe_ctx->stream; - struct dc_link *link = stream->link; - /*in case it is not on*/ - link->dc->hwss.edp_power_control(link, true); - link->dc->hwss.edp_wait_for_hpd_ready(link, true); status = enable_link_dp(state, pipe_ctx); - return status; } @@ -1466,14 +1511,14 @@ static enum dc_status enable_link_dp_mst( if (link->cur_link_settings.lane_count != LANE_COUNT_UNKNOWN) return DC_OK; + /* clear payload table */ + dm_helpers_dp_mst_clear_payload_allocation_table(link->ctx, link); + /* to make sure the pending down rep can be processed - * before clear payload table + * before enabling the link */ dm_helpers_dp_mst_poll_pending_down_reply(link->ctx, link); - /* clear payload table */ - dm_helpers_dp_mst_clear_payload_allocation_table(link->ctx, link); - /* set the sink to MST mode before enabling the link */ dp_enable_mst_on_sink(link, true); @@ -1982,7 +2027,7 @@ static void enable_link_hdmi(struct pipe_ctx *pipe_ctx) pipe_ctx->stream->signal, stream->phy_pix_clk); - if (pipe_ctx->stream->signal == SIGNAL_TYPE_HDMI_TYPE_A) + if (dc_is_hdmi_signal(pipe_ctx->stream->signal)) dal_ddc_service_read_scdc_data(link->ddc); } @@ -2074,11 +2119,28 @@ static void disable_link(struct dc_link *link, enum signal_type signal) } } +static uint32_t get_timing_pixel_clock_100hz(const struct dc_crtc_timing *timing) +{ + + uint32_t pxl_clk = timing->pix_clk_100hz; + + if (timing->pixel_encoding == PIXEL_ENCODING_YCBCR420) + pxl_clk /= 2; + else if (timing->pixel_encoding == PIXEL_ENCODING_YCBCR422) + pxl_clk = pxl_clk * 2 / 3; + + if (timing->display_color_depth == COLOR_DEPTH_101010) + pxl_clk = pxl_clk * 10 / 8; + else if (timing->display_color_depth == COLOR_DEPTH_121212) + pxl_clk = pxl_clk * 12 / 8; + + return pxl_clk; +} + static bool dp_active_dongle_validate_timing( const struct dc_crtc_timing *timing, const struct dpcd_caps *dpcd_caps) { - unsigned int required_pix_clk_100hz = timing->pix_clk_100hz; const struct dc_dongle_caps *dongle_caps = &dpcd_caps->dongle_caps; switch (dpcd_caps->dongle_type) { @@ -2115,13 +2177,6 @@ static bool dp_active_dongle_validate_timing( return false; } - - /* Check Color Depth and Pixel Clock */ - if (timing->pixel_encoding == PIXEL_ENCODING_YCBCR420) - required_pix_clk_100hz /= 2; - else if (timing->pixel_encoding == PIXEL_ENCODING_YCBCR422) - required_pix_clk_100hz = required_pix_clk_100hz * 2 / 3; - switch (timing->display_color_depth) { case COLOR_DEPTH_666: case COLOR_DEPTH_888: @@ -2130,14 +2185,11 @@ static bool dp_active_dongle_validate_timing( case COLOR_DEPTH_101010: if (dongle_caps->dp_hdmi_max_bpc < 10) return false; - required_pix_clk_100hz = required_pix_clk_100hz * 10 / 8; break; case COLOR_DEPTH_121212: if (dongle_caps->dp_hdmi_max_bpc < 12) return false; - required_pix_clk_100hz = required_pix_clk_100hz * 12 / 8; break; - case COLOR_DEPTH_141414: case COLOR_DEPTH_161616: default: @@ -2145,7 +2197,7 @@ static bool dp_active_dongle_validate_timing( return false; } - if (required_pix_clk_100hz > (dongle_caps->dp_hdmi_max_pixel_clk * 10)) + if (get_timing_pixel_clock_100hz(timing) > (dongle_caps->dp_hdmi_max_pixel_clk_in_khz * 10)) return false; return true; @@ -2166,7 +2218,7 @@ enum dc_status dc_link_validate_mode_timing( return DC_OK; /* Passive Dongle */ - if (0 != max_pix_clk && timing->pix_clk_100hz > max_pix_clk) + if (max_pix_clk != 0 && get_timing_pixel_clock_100hz(timing) > max_pix_clk) return DC_EXCEED_DONGLE_CAP; /* Active Dongle*/ @@ -2284,14 +2336,13 @@ void core_link_resume(struct dc_link *link) static struct fixed31_32 get_pbn_per_slot(struct dc_stream_state *stream) { - struct dc_link_settings *link_settings = - &stream->link->cur_link_settings; - uint32_t link_rate_in_mbps = - link_settings->link_rate * LINK_RATE_REF_FREQ_IN_MHZ; - struct fixed31_32 mbps = dc_fixpt_from_int( - link_rate_in_mbps * link_settings->lane_count); - - return dc_fixpt_div_int(mbps, 54); + struct fixed31_32 mbytes_per_sec; + uint32_t link_rate_in_mbytes_per_sec = dc_link_bandwidth_kbps(stream->link, &stream->link->cur_link_settings); + link_rate_in_mbytes_per_sec /= 8000; /* Kbits to MBytes */ + + mbytes_per_sec = dc_fixpt_from_int(link_rate_in_mbytes_per_sec); + + return dc_fixpt_div_int(mbytes_per_sec, 54); } static int get_color_depth(enum dc_color_depth color_depth) @@ -2316,7 +2367,7 @@ static struct fixed31_32 get_pbn_from_timing(struct pipe_ctx *pipe_ctx) uint32_t denominator; bpc = get_color_depth(pipe_ctx->stream_res.pix_clk_params.color_depth); - kbps = pipe_ctx->stream_res.pix_clk_params.requested_pix_clk_100hz / 10 * bpc * 3; + kbps = dc_bandwidth_in_kbps_from_timing(&pipe_ctx->stream->timing); /* * margin 5300ppm + 300ppm ~ 0.6% as per spec, factor is 1.006 @@ -2551,12 +2602,12 @@ void core_link_enable_stream( struct dc_state *state, struct pipe_ctx *pipe_ctx) { - struct dc *core_dc = pipe_ctx->stream->ctx->dc; + struct dc *core_dc = pipe_ctx->stream->ctx->dc; struct dc_stream_state *stream = pipe_ctx->stream; enum dc_status status; DC_LOGGER_INIT(pipe_ctx->stream->ctx->logger); - if (pipe_ctx->stream->signal != SIGNAL_TYPE_VIRTUAL) { + if (!dc_is_virtual_signal(pipe_ctx->stream->signal)) { stream->link->link_enc->funcs->setup( stream->link->link_enc, pipe_ctx->stream->signal); @@ -2570,9 +2621,10 @@ void core_link_enable_stream( pipe_ctx->stream_res.stream_enc->funcs->dp_set_stream_attribute( pipe_ctx->stream_res.stream_enc, &stream->timing, - stream->output_color_space); + stream->output_color_space, + stream->link->dpcd_caps.dprx_feature.bits.SST_SPLIT_SDP_CAP); - if (dc_is_hdmi_signal(pipe_ctx->stream->signal)) + if (dc_is_hdmi_tmds_signal(pipe_ctx->stream->signal)) pipe_ctx->stream_res.stream_enc->funcs->hdmi_set_stream_attribute( pipe_ctx->stream_res.stream_enc, &stream->timing, @@ -2736,3 +2788,49 @@ void dc_link_enable_hpd_filter(struct dc_link *link, bool enable) } } +uint32_t dc_bandwidth_in_kbps_from_timing( + const struct dc_crtc_timing *timing) +{ + uint32_t bits_per_channel = 0; + uint32_t kbps; + + switch (timing->display_color_depth) { + case COLOR_DEPTH_666: + bits_per_channel = 6; + break; + case COLOR_DEPTH_888: + bits_per_channel = 8; + break; + case COLOR_DEPTH_101010: + bits_per_channel = 10; + break; + case COLOR_DEPTH_121212: + bits_per_channel = 12; + break; + case COLOR_DEPTH_141414: + bits_per_channel = 14; + break; + case COLOR_DEPTH_161616: + bits_per_channel = 16; + break; + default: + break; + } + + ASSERT(bits_per_channel != 0); + + kbps = timing->pix_clk_100hz / 10; + kbps *= bits_per_channel; + + if (timing->flags.Y_ONLY != 1) { + /*Only YOnly make reduce bandwidth by 1/3 compares to RGB*/ + kbps *= 3; + if (timing->pixel_encoding == PIXEL_ENCODING_YCBCR420) + kbps /= 2; + else if (timing->pixel_encoding == PIXEL_ENCODING_YCBCR422) + kbps = kbps * 2 / 3; + } + + return kbps; + +} diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c index b7ee63cd8dc7..f02092a0dc76 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c @@ -573,12 +573,28 @@ bool dal_ddc_service_query_ddc_data( return ret; } -int dc_link_aux_transfer(struct ddc_service *ddc, - struct aux_payload *payload) +/* dc_link_aux_transfer_raw() - Attempt to transfer + * the given aux payload. This function does not perform + * retries or handle error states. The reply is returned + * in the payload->reply and the result through + * *operation_result. Returns the number of bytes transferred, + * or -1 on a failure. + */ +int dc_link_aux_transfer_raw(struct ddc_service *ddc, + struct aux_payload *payload, + enum aux_channel_operation_result *operation_result) { - return dce_aux_transfer(ddc, payload); + return dce_aux_transfer_raw(ddc, payload, operation_result); } +/* dc_link_aux_transfer_with_retries() - Attempt to submit an + * aux payload, retrying on timeouts, defers, and busy states + * as outlined in the DP spec. Returns true if the request + * was successful. + * + * Unless you want to implement your own retry semantics, this + * is probably the one you want. + */ bool dc_link_aux_transfer_with_retries(struct ddc_service *ddc, struct aux_payload *payload) { diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index 09d301216076..1ee544a32ebb 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -93,12 +93,10 @@ static void dpcd_set_link_settings( struct dc_link *link, const struct link_training_settings *lt_settings) { - uint8_t rate = (uint8_t) - (lt_settings->link_settings.link_rate); + uint8_t rate; union down_spread_ctrl downspread = { {0} }; union lane_count_set lane_count_set = { {0} }; - uint8_t link_set_buffer[2]; downspread.raw = (uint8_t) (lt_settings->link_settings.link_spread); @@ -111,29 +109,42 @@ static void dpcd_set_link_settings( lane_count_set.bits.POST_LT_ADJ_REQ_GRANTED = link->dpcd_caps.max_ln_count.bits.POST_LT_ADJ_REQ_SUPPORTED; - link_set_buffer[0] = rate; - link_set_buffer[1] = lane_count_set.raw; - - core_link_write_dpcd(link, DP_LINK_BW_SET, - link_set_buffer, 2); core_link_write_dpcd(link, DP_DOWNSPREAD_CTRL, &downspread.raw, sizeof(downspread)); + core_link_write_dpcd(link, DP_LANE_COUNT_SET, + &lane_count_set.raw, 1); + if (link->dpcd_caps.dpcd_rev.raw >= DPCD_REV_14 && - (link->dpcd_caps.link_rate_set >= 1 && - link->dpcd_caps.link_rate_set <= 8)) { + lt_settings->link_settings.use_link_rate_set == true) { + rate = 0; + core_link_write_dpcd(link, DP_LINK_BW_SET, &rate, 1); core_link_write_dpcd(link, DP_LINK_RATE_SET, - &link->dpcd_caps.link_rate_set, 1); + <_settings->link_settings.link_rate_set, 1); + } else { + rate = (uint8_t) (lt_settings->link_settings.link_rate); + core_link_write_dpcd(link, DP_LINK_BW_SET, &rate, 1); } - DC_LOG_HW_LINK_TRAINING("%s\n %x rate = %x\n %x lane = %x\n %x spread = %x\n", - __func__, - DP_LINK_BW_SET, - lt_settings->link_settings.link_rate, - DP_LANE_COUNT_SET, - lt_settings->link_settings.lane_count, - DP_DOWNSPREAD_CTRL, - lt_settings->link_settings.link_spread); + if (rate) { + DC_LOG_HW_LINK_TRAINING("%s\n %x rate = %x\n %x lane = %x\n %x spread = %x\n", + __func__, + DP_LINK_BW_SET, + lt_settings->link_settings.link_rate, + DP_LANE_COUNT_SET, + lt_settings->link_settings.lane_count, + DP_DOWNSPREAD_CTRL, + lt_settings->link_settings.link_spread); + } else { + DC_LOG_HW_LINK_TRAINING("%s\n %x rate set = %x\n %x lane = %x\n %x spread = %x\n", + __func__, + DP_LINK_RATE_SET, + lt_settings->link_settings.link_rate_set, + DP_LANE_COUNT_SET, + lt_settings->link_settings.lane_count, + DP_DOWNSPREAD_CTRL, + lt_settings->link_settings.link_spread); + } } @@ -952,6 +963,8 @@ enum link_training_result dc_link_dp_perform_link_training( lt_settings.link_settings.link_rate = link_setting->link_rate; lt_settings.link_settings.lane_count = link_setting->lane_count; + lt_settings.link_settings.use_link_rate_set = link_setting->use_link_rate_set; + lt_settings.link_settings.link_rate_set = link_setting->link_rate_set; /*@todo[vdevulap] move SS to LS, should not be handled by displaypath*/ @@ -1075,7 +1088,7 @@ static struct dc_link_settings get_max_link_cap(struct dc_link *link) { /* Set Default link settings */ struct dc_link_settings max_link_cap = {LANE_COUNT_FOUR, LINK_RATE_HIGH, - LINK_SPREAD_05_DOWNSPREAD_30KHZ}; + LINK_SPREAD_05_DOWNSPREAD_30KHZ, false, 0}; /* Higher link settings based on feature supported */ if (link->link_enc->features.flags.bits.IS_HBR2_CAPABLE) @@ -1520,69 +1533,6 @@ static bool decide_fallback_link_setting( return true; } -static uint32_t bandwidth_in_kbps_from_timing( - const struct dc_crtc_timing *timing) -{ - uint32_t bits_per_channel = 0; - uint32_t kbps; - - switch (timing->display_color_depth) { - case COLOR_DEPTH_666: - bits_per_channel = 6; - break; - case COLOR_DEPTH_888: - bits_per_channel = 8; - break; - case COLOR_DEPTH_101010: - bits_per_channel = 10; - break; - case COLOR_DEPTH_121212: - bits_per_channel = 12; - break; - case COLOR_DEPTH_141414: - bits_per_channel = 14; - break; - case COLOR_DEPTH_161616: - bits_per_channel = 16; - break; - default: - break; - } - - ASSERT(bits_per_channel != 0); - - kbps = timing->pix_clk_100hz / 10; - kbps *= bits_per_channel; - - if (timing->flags.Y_ONLY != 1) { - /*Only YOnly make reduce bandwidth by 1/3 compares to RGB*/ - kbps *= 3; - if (timing->pixel_encoding == PIXEL_ENCODING_YCBCR420) - kbps /= 2; - else if (timing->pixel_encoding == PIXEL_ENCODING_YCBCR422) - kbps = kbps * 2 / 3; - } - - return kbps; - -} - -static uint32_t bandwidth_in_kbps_from_link_settings( - const struct dc_link_settings *link_setting) -{ - uint32_t link_rate_in_kbps = link_setting->link_rate * - LINK_RATE_REF_FREQ_IN_KHZ; - - uint32_t lane_count = link_setting->lane_count; - uint32_t kbps = link_rate_in_kbps; - - kbps *= lane_count; - kbps *= 8; /* 8 bits per byte*/ - - return kbps; - -} - bool dp_validate_mode_timing( struct dc_link *link, const struct dc_crtc_timing *timing) @@ -1598,8 +1548,7 @@ bool dp_validate_mode_timing( timing->v_addressable == (uint32_t) 480) return true; - /* We always use verified link settings */ - link_setting = &link->verified_link_cap; + link_setting = dc_link_get_link_cap(link); /* TODO: DYNAMIC_VALIDATION needs to be implemented */ /*if (flags.DYNAMIC_VALIDATION == 1 && @@ -1607,8 +1556,8 @@ bool dp_validate_mode_timing( link_setting = &link->verified_link_cap; */ - req_bw = bandwidth_in_kbps_from_timing(timing); - max_bw = bandwidth_in_kbps_from_link_settings(link_setting); + req_bw = dc_bandwidth_in_kbps_from_timing(timing); + max_bw = dc_link_bandwidth_kbps(link, link_setting); if (req_bw <= max_bw) { /* remember the biggest mode here, during @@ -1629,58 +1578,78 @@ bool dp_validate_mode_timing( return false; } -void decide_link_settings(struct dc_stream_state *stream, - struct dc_link_settings *link_setting) +static bool decide_dp_link_settings(struct dc_link *link, struct dc_link_settings *link_setting, uint32_t req_bw) { - struct dc_link_settings initial_link_setting = { - LANE_COUNT_ONE, LINK_RATE_LOW, LINK_SPREAD_DISABLED}; + LANE_COUNT_ONE, LINK_RATE_LOW, LINK_SPREAD_DISABLED, false, 0}; struct dc_link_settings current_link_setting = initial_link_setting; - struct dc_link *link; - uint32_t req_bw; uint32_t link_bw; - req_bw = bandwidth_in_kbps_from_timing(&stream->timing); - - link = stream->link; - - /* if preferred is specified through AMDDP, use it, if it's enough - * to drive the mode + /* search for the minimum link setting that: + * 1. is supported according to the link training result + * 2. could support the b/w requested by the timing */ - if (link->preferred_link_setting.lane_count != - LANE_COUNT_UNKNOWN && - link->preferred_link_setting.link_rate != - LINK_RATE_UNKNOWN) { - *link_setting = link->preferred_link_setting; - return; - } + while (current_link_setting.link_rate <= + link->verified_link_cap.link_rate) { + link_bw = dc_link_bandwidth_kbps( + link, + ¤t_link_setting); + if (req_bw <= link_bw) { + *link_setting = current_link_setting; + return true; + } - /* MST doesn't perform link training for now - * TODO: add MST specific link training routine - */ - if (stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) { - *link_setting = link->verified_link_cap; - return; + if (current_link_setting.lane_count < + link->verified_link_cap.lane_count) { + current_link_setting.lane_count = + increase_lane_count( + current_link_setting.lane_count); + } else { + current_link_setting.link_rate = + increase_link_rate( + current_link_setting.link_rate); + current_link_setting.lane_count = + initial_link_setting.lane_count; + } } - /* EDP use the link cap setting */ - if (link->connector_signal == SIGNAL_TYPE_EDP) { + return false; +} + +static bool decide_edp_link_settings(struct dc_link *link, struct dc_link_settings *link_setting, uint32_t req_bw) +{ + struct dc_link_settings initial_link_setting; + struct dc_link_settings current_link_setting; + uint32_t link_bw; + + if (link->dpcd_caps.dpcd_rev.raw < DPCD_REV_14 || + link->dpcd_caps.edp_supported_link_rates_count == 0 || + link->dc->config.optimize_edp_link_rate == false) { *link_setting = link->verified_link_cap; - return; + return true; } + memset(&initial_link_setting, 0, sizeof(initial_link_setting)); + initial_link_setting.lane_count = LANE_COUNT_ONE; + initial_link_setting.link_rate = link->dpcd_caps.edp_supported_link_rates[0]; + initial_link_setting.link_spread = LINK_SPREAD_DISABLED; + initial_link_setting.use_link_rate_set = true; + initial_link_setting.link_rate_set = 0; + current_link_setting = initial_link_setting; + /* search for the minimum link setting that: * 1. is supported according to the link training result * 2. could support the b/w requested by the timing */ while (current_link_setting.link_rate <= link->verified_link_cap.link_rate) { - link_bw = bandwidth_in_kbps_from_link_settings( + link_bw = dc_link_bandwidth_kbps( + link, ¤t_link_setting); if (req_bw <= link_bw) { *link_setting = current_link_setting; - return; + return true; } if (current_link_setting.lane_count < @@ -1689,13 +1658,53 @@ void decide_link_settings(struct dc_stream_state *stream, increase_lane_count( current_link_setting.lane_count); } else { - current_link_setting.link_rate = - increase_link_rate( - current_link_setting.link_rate); - current_link_setting.lane_count = - initial_link_setting.lane_count; + if (current_link_setting.link_rate_set < link->dpcd_caps.edp_supported_link_rates_count) { + current_link_setting.link_rate_set++; + current_link_setting.link_rate = + link->dpcd_caps.edp_supported_link_rates[current_link_setting.link_rate_set]; + current_link_setting.lane_count = + initial_link_setting.lane_count; + } else + break; } } + return false; +} + +void decide_link_settings(struct dc_stream_state *stream, + struct dc_link_settings *link_setting) +{ + struct dc_link *link; + uint32_t req_bw; + + req_bw = dc_bandwidth_in_kbps_from_timing(&stream->timing); + + link = stream->link; + + /* if preferred is specified through AMDDP, use it, if it's enough + * to drive the mode + */ + if (link->preferred_link_setting.lane_count != + LANE_COUNT_UNKNOWN && + link->preferred_link_setting.link_rate != + LINK_RATE_UNKNOWN) { + *link_setting = link->preferred_link_setting; + return; + } + + /* MST doesn't perform link training for now + * TODO: add MST specific link training routine + */ + if (stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) { + *link_setting = link->verified_link_cap; + return; + } + + if (link->connector_signal == SIGNAL_TYPE_EDP) { + if (decide_edp_link_settings(link, link_setting, req_bw)) + return; + } else if (decide_dp_link_settings(link, link_setting, req_bw)) + return; BREAK_TO_DEBUGGER(); ASSERT(link->verified_link_cap.lane_count != LANE_COUNT_UNKNOWN); @@ -2155,11 +2164,7 @@ bool is_mst_supported(struct dc_link *link) bool is_dp_active_dongle(const struct dc_link *link) { - enum display_dongle_type dongle_type = link->dpcd_caps.dongle_type; - - return (dongle_type == DISPLAY_DONGLE_DP_VGA_CONVERTER) || - (dongle_type == DISPLAY_DONGLE_DP_DVI_CONVERTER) || - (dongle_type == DISPLAY_DONGLE_DP_HDMI_CONVERTER); + return link->dpcd_caps.is_branch_dev; } static int translate_dpcd_max_bpc(enum dpcd_downstream_port_max_bpc bpc) @@ -2180,6 +2185,30 @@ static int translate_dpcd_max_bpc(enum dpcd_downstream_port_max_bpc bpc) return -1; } +static void read_dp_device_vendor_id(struct dc_link *link) +{ + struct dp_device_vendor_id dp_id; + + /* read IEEE branch device id */ + core_link_read_dpcd( + link, + DP_BRANCH_OUI, + (uint8_t *)&dp_id, + sizeof(dp_id)); + + link->dpcd_caps.branch_dev_id = + (dp_id.ieee_oui[0] << 16) + + (dp_id.ieee_oui[1] << 8) + + dp_id.ieee_oui[2]; + + memmove( + link->dpcd_caps.branch_dev_name, + dp_id.ieee_device_id, + sizeof(dp_id.ieee_device_id)); +} + + + static void get_active_converter_info( uint8_t data, struct dc_link *link) { @@ -2193,6 +2222,9 @@ static void get_active_converter_info( return; } + /* DPCD 0x5 bit 0 = 1, it indicate it's branch device */ + link->dpcd_caps.is_branch_dev = ds_port.fields.PORT_PRESENT; + switch (ds_port.fields.PORT_TYPE) { case DOWNSTREAM_VGA: link->dpcd_caps.dongle_type = DISPLAY_DONGLE_DP_VGA_CONVERTER; @@ -2234,8 +2266,8 @@ static void get_active_converter_info( hdmi_caps = {.raw = det_caps[3] }; union dwnstream_port_caps_byte2 hdmi_color_caps = {.raw = det_caps[2] }; - link->dpcd_caps.dongle_caps.dp_hdmi_max_pixel_clk = - det_caps[1] * 25000; + link->dpcd_caps.dongle_caps.dp_hdmi_max_pixel_clk_in_khz = + det_caps[1] * 2500; link->dpcd_caps.dongle_caps.is_dp_hdmi_s3d_converter = hdmi_caps.bits.FRAME_SEQ_TO_FRAME_PACK; @@ -2252,7 +2284,7 @@ static void get_active_converter_info( translate_dpcd_max_bpc( hdmi_color_caps.bits.MAX_BITS_PER_COLOR_COMPONENT); - if (link->dpcd_caps.dongle_caps.dp_hdmi_max_pixel_clk != 0) + if (link->dpcd_caps.dongle_caps.dp_hdmi_max_pixel_clk_in_khz != 0) link->dpcd_caps.dongle_caps.extendedCapValid = true; } @@ -2263,27 +2295,6 @@ static void get_active_converter_info( ddc_service_set_dongle_type(link->ddc, link->dpcd_caps.dongle_type); { - struct dp_device_vendor_id dp_id; - - /* read IEEE branch device id */ - core_link_read_dpcd( - link, - DP_BRANCH_OUI, - (uint8_t *)&dp_id, - sizeof(dp_id)); - - link->dpcd_caps.branch_dev_id = - (dp_id.ieee_oui[0] << 16) + - (dp_id.ieee_oui[1] << 8) + - dp_id.ieee_oui[2]; - - memmove( - link->dpcd_caps.branch_dev_name, - dp_id.ieee_device_id, - sizeof(dp_id.ieee_device_id)); - } - - { struct dp_sink_hw_fw_revision dp_hw_fw_revision; core_link_read_dpcd( @@ -2347,6 +2358,10 @@ static bool retrieve_link_cap(struct dc_link *link) { uint8_t dpcd_data[DP_ADAPTER_CAP - DP_DPCD_REV + 1]; + /*Only need to read 1 byte starting from DP_DPRX_FEATURE_ENUMERATION_LIST. + */ + uint8_t dpcd_dprx_data = '\0'; + struct dp_device_vendor_id sink_id; union down_stream_port_count down_strm_port_count; union edp_configuration_cap edp_config_cap; @@ -2383,7 +2398,10 @@ static bool retrieve_link_cap(struct dc_link *link) aux_rd_interval.raw = dpcd_data[DP_TRAINING_AUX_RD_INTERVAL]; - if (aux_rd_interval.bits.EXT_RECIEVER_CAP_FIELD_PRESENT == 1) { + link->dpcd_caps.ext_receiver_cap_field_present = + aux_rd_interval.bits.EXT_RECEIVER_CAP_FIELD_PRESENT == 1 ? true:false; + + if (aux_rd_interval.bits.EXT_RECEIVER_CAP_FIELD_PRESENT == 1) { uint8_t ext_cap_data[16]; memset(ext_cap_data, '\0', sizeof(ext_cap_data)); @@ -2404,11 +2422,44 @@ static bool retrieve_link_cap(struct dc_link *link) } link->dpcd_caps.dpcd_rev.raw = - dpcd_data[DP_DPCD_REV - DP_DPCD_REV]; + dpcd_data[DP_DPCD_REV - DP_DPCD_REV]; + + if (link->dpcd_caps.dpcd_rev.raw >= 0x14) { + for (i = 0; i < read_dpcd_retry_cnt; i++) { + status = core_link_read_dpcd( + link, + DP_DPRX_FEATURE_ENUMERATION_LIST, + &dpcd_dprx_data, + sizeof(dpcd_dprx_data)); + if (status == DC_OK) + break; + } + + link->dpcd_caps.dprx_feature.raw = dpcd_dprx_data; + + if (status != DC_OK) + dm_error("%s: Read DPRX caps data failed.\n", __func__); + } + + else { + link->dpcd_caps.dprx_feature.raw = 0; + } + + + /* Error condition checking... + * It is impossible for Sink to report Max Lane Count = 0. + * It is possible for Sink to report Max Link Rate = 0, if it is + * an eDP device that is reporting specialized link rates in the + * SUPPORTED_LINK_RATE table. + */ + if (dpcd_data[DP_MAX_LANE_COUNT - DP_DPCD_REV] == 0) + return false; ds_port.byte = dpcd_data[DP_DOWNSTREAMPORT_PRESENT - DP_DPCD_REV]; + read_dp_device_vendor_id(link); + get_active_converter_info(ds_port.byte, link); dp_wa_power_up_0010FA(link, dpcd_data, sizeof(dpcd_data)); @@ -2536,31 +2587,31 @@ enum dc_link_rate linkRateInKHzToLinkRateMultiplier(uint32_t link_rate_in_khz) void detect_edp_sink_caps(struct dc_link *link) { - uint8_t supported_link_rates[16] = {0}; + uint8_t supported_link_rates[16]; uint32_t entry; uint32_t link_rate_in_khz; enum dc_link_rate link_rate = LINK_RATE_UNKNOWN; retrieve_link_cap(link); + link->dpcd_caps.edp_supported_link_rates_count = 0; + memset(supported_link_rates, 0, sizeof(supported_link_rates)); - if (link->dpcd_caps.dpcd_rev.raw >= DPCD_REV_14) { + if (link->dpcd_caps.dpcd_rev.raw >= DPCD_REV_14 && + link->dc->config.optimize_edp_link_rate) { // Read DPCD 00010h - 0001Fh 16 bytes at one shot core_link_read_dpcd(link, DP_SUPPORTED_LINK_RATES, supported_link_rates, sizeof(supported_link_rates)); - link->dpcd_caps.link_rate_set = 0; for (entry = 0; entry < 16; entry += 2) { // DPCD register reports per-lane link rate = 16-bit link rate capability - // value X 200 kHz. Need multipler to find link rate in kHz. + // value X 200 kHz. Need multiplier to find link rate in kHz. link_rate_in_khz = (supported_link_rates[entry+1] * 0x100 + supported_link_rates[entry]) * 200; if (link_rate_in_khz != 0) { link_rate = linkRateInKHzToLinkRateMultiplier(link_rate_in_khz); - if (link->reported_link_cap.link_rate < link_rate) { - link->reported_link_cap.link_rate = link_rate; - link->dpcd_caps.link_rate_set = entry; - } + link->dpcd_caps.edp_supported_link_rates[link->dpcd_caps.edp_supported_link_rates_count] = link_rate; + link->dpcd_caps.edp_supported_link_rates_count++; } } } @@ -2601,6 +2652,7 @@ static void set_crtc_test_pattern(struct dc_link *link, enum dc_color_depth color_depth = pipe_ctx-> stream->timing.display_color_depth; struct bit_depth_reduction_params params; + struct output_pixel_processor *opp = pipe_ctx->stream_res.opp; memset(¶ms, 0, sizeof(params)); @@ -2640,8 +2692,7 @@ static void set_crtc_test_pattern(struct dc_link *link, { /* disable bit depth reduction */ pipe_ctx->stream->bit_depth_params = params; - pipe_ctx->stream_res.opp->funcs-> - opp_program_bit_depth_reduction(pipe_ctx->stream_res.opp, ¶ms); + opp->funcs->opp_program_bit_depth_reduction(opp, ¶ms); if (pipe_ctx->stream_res.tg->funcs->set_test_pattern) pipe_ctx->stream_res.tg->funcs->set_test_pattern(pipe_ctx->stream_res.tg, controller_test_pattern, color_depth); @@ -2650,11 +2701,9 @@ static void set_crtc_test_pattern(struct dc_link *link, case DP_TEST_PATTERN_VIDEO_MODE: { /* restore bitdepth reduction */ - resource_build_bit_depth_reduction_params(pipe_ctx->stream, - ¶ms); + resource_build_bit_depth_reduction_params(pipe_ctx->stream, ¶ms); pipe_ctx->stream->bit_depth_params = params; - pipe_ctx->stream_res.opp->funcs-> - opp_program_bit_depth_reduction(pipe_ctx->stream_res.opp, ¶ms); + opp->funcs->opp_program_bit_depth_reduction(opp, ¶ms); if (pipe_ctx->stream_res.tg->funcs->set_test_pattern) pipe_ctx->stream_res.tg->funcs->set_test_pattern(pipe_ctx->stream_res.tg, CONTROLLER_DP_TEST_PATTERN_VIDEOMODE, diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_hwss.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_hwss.c index f7f7515f65f4..b0dea759cd86 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_hwss.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_hwss.c @@ -58,6 +58,8 @@ void dp_enable_link_phy( const struct dc_link_settings *link_settings) { struct link_encoder *link_enc = link->link_enc; + struct dc *core_dc = link->ctx->dc; + struct dmcu *dmcu = core_dc->res_pool->dmcu; struct pipe_ctx *pipes = link->dc->current_state->res_ctx.pipe_ctx; @@ -84,6 +86,9 @@ void dp_enable_link_phy( } } + if (dmcu != NULL && dmcu->funcs->lock_phy) + dmcu->funcs->lock_phy(dmcu); + if (dc_is_dp_sst_signal(signal)) { link_enc->funcs->enable_dp_output( link_enc, @@ -95,6 +100,10 @@ void dp_enable_link_phy( link_settings, clock_source); } + + if (dmcu != NULL && dmcu->funcs->unlock_phy) + dmcu->funcs->unlock_phy(dmcu); + link->cur_link_settings = *link_settings; dp_receiver_power_ctrl(link, true); @@ -150,15 +159,25 @@ bool edp_receiver_ready_T7(struct dc_link *link) void dp_disable_link_phy(struct dc_link *link, enum signal_type signal) { + struct dc *core_dc = link->ctx->dc; + struct dmcu *dmcu = core_dc->res_pool->dmcu; + if (!link->wa_flags.dp_keep_receiver_powered) dp_receiver_power_ctrl(link, false); if (signal == SIGNAL_TYPE_EDP) { link->link_enc->funcs->disable_output(link->link_enc, signal); link->dc->hwss.edp_power_control(link, false); - } else + } else { + if (dmcu != NULL && dmcu->funcs->lock_phy) + dmcu->funcs->lock_phy(dmcu); + link->link_enc->funcs->disable_output(link->link_enc, signal); + if (dmcu != NULL && dmcu->funcs->unlock_phy) + dmcu->funcs->unlock_phy(dmcu); + } + /* Clear current link setting.*/ memset(&link->cur_link_settings, 0, sizeof(link->cur_link_settings)); diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 349ab8017776..eac7186e4f08 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -31,6 +31,8 @@ #include "opp.h" #include "timing_generator.h" #include "transform.h" +#include "dccg.h" +#include "dchubbub.h" #include "dpp.h" #include "core_types.h" #include "set_mode_types.h" @@ -104,44 +106,43 @@ enum dce_version resource_parse_asic_id(struct hw_asic_id asic_id) return dc_version; } -struct resource_pool *dc_create_resource_pool( - struct dc *dc, - int num_virtual_links, - enum dce_version dc_version, - struct hw_asic_id asic_id) +struct resource_pool *dc_create_resource_pool(struct dc *dc, + const struct dc_init_data *init_data, + enum dce_version dc_version) { struct resource_pool *res_pool = NULL; switch (dc_version) { case DCE_VERSION_8_0: res_pool = dce80_create_resource_pool( - num_virtual_links, dc); + init_data->num_virtual_links, dc); break; case DCE_VERSION_8_1: res_pool = dce81_create_resource_pool( - num_virtual_links, dc); + init_data->num_virtual_links, dc); break; case DCE_VERSION_8_3: res_pool = dce83_create_resource_pool( - num_virtual_links, dc); + init_data->num_virtual_links, dc); break; case DCE_VERSION_10_0: res_pool = dce100_create_resource_pool( - num_virtual_links, dc); + init_data->num_virtual_links, dc); break; case DCE_VERSION_11_0: res_pool = dce110_create_resource_pool( - num_virtual_links, dc, asic_id); + init_data->num_virtual_links, dc, + init_data->asic_id); break; case DCE_VERSION_11_2: case DCE_VERSION_11_22: res_pool = dce112_create_resource_pool( - num_virtual_links, dc); + init_data->num_virtual_links, dc); break; case DCE_VERSION_12_0: case DCE_VERSION_12_1: res_pool = dce120_create_resource_pool( - num_virtual_links, dc); + init_data->num_virtual_links, dc); break; #if defined(CONFIG_DRM_AMD_DC_DCN1_0) @@ -149,8 +150,7 @@ struct resource_pool *dc_create_resource_pool( #if defined(CONFIG_DRM_AMD_DC_DCN1_01) case DCN_VERSION_1_01: #endif - res_pool = dcn10_create_resource_pool( - num_virtual_links, dc); + res_pool = dcn10_create_resource_pool(init_data, dc); break; #endif @@ -163,7 +163,28 @@ struct resource_pool *dc_create_resource_pool( if (dc->ctx->dc_bios->funcs->get_firmware_info( dc->ctx->dc_bios, &fw_info) == BP_RESULT_OK) { - res_pool->ref_clock_inKhz = fw_info.pll_info.crystal_frequency; + res_pool->ref_clocks.xtalin_clock_inKhz = fw_info.pll_info.crystal_frequency; + + if (IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) { + // On FPGA these dividers are currently not configured by GDB + res_pool->ref_clocks.dccg_ref_clock_inKhz = res_pool->ref_clocks.xtalin_clock_inKhz; + res_pool->ref_clocks.dchub_ref_clock_inKhz = res_pool->ref_clocks.xtalin_clock_inKhz; + } else if (res_pool->dccg && res_pool->hubbub) { + // If DCCG reference frequency cannot be determined (usually means not set to xtalin) then this is a critical error + // as this value must be known for DCHUB programming + (res_pool->dccg->funcs->get_dccg_ref_freq)(res_pool->dccg, + fw_info.pll_info.crystal_frequency, + &res_pool->ref_clocks.dccg_ref_clock_inKhz); + + // Similarly, if DCHUB reference frequency cannot be determined, then it is also a critical error + (res_pool->hubbub->funcs->get_dchub_ref_freq)(res_pool->hubbub, + res_pool->ref_clocks.dccg_ref_clock_inKhz, + &res_pool->ref_clocks.dchub_ref_clock_inKhz); + } else { + // Not all ASICs have DCCG sw component + res_pool->ref_clocks.dccg_ref_clock_inKhz = res_pool->ref_clocks.xtalin_clock_inKhz; + res_pool->ref_clocks.dchub_ref_clock_inKhz = res_pool->ref_clocks.xtalin_clock_inKhz; + } } else ASSERT_CRITICAL(false); } @@ -260,6 +281,7 @@ bool resource_construct( pool->stream_enc_count++; } } + dc->caps.dynamic_audio = false; if (pool->audio_count < pool->stream_enc_count) { dc->caps.dynamic_audio = true; @@ -1014,24 +1036,60 @@ enum dc_status resource_build_scaling_params_for_context( struct pipe_ctx *find_idle_secondary_pipe( struct resource_context *res_ctx, - const struct resource_pool *pool) + const struct resource_pool *pool, + const struct pipe_ctx *primary_pipe) { int i; struct pipe_ctx *secondary_pipe = NULL; /* - * search backwards for the second pipe to keep pipe - * assignment more consistent + * We add a preferred pipe mapping to avoid the chance that + * MPCCs already in use will need to be reassigned to other trees. + * For example, if we went with the strict, assign backwards logic: + * + * (State 1) + * Display A on, no surface, top pipe = 0 + * Display B on, no surface, top pipe = 1 + * + * (State 2) + * Display A on, no surface, top pipe = 0 + * Display B on, surface enable, top pipe = 1, bottom pipe = 5 + * + * (State 3) + * Display A on, surface enable, top pipe = 0, bottom pipe = 5 + * Display B on, surface enable, top pipe = 1, bottom pipe = 4 + * + * The state 2->3 transition requires remapping MPCC 5 from display B + * to display A. + * + * However, with the preferred pipe logic, state 2 would look like: + * + * (State 2) + * Display A on, no surface, top pipe = 0 + * Display B on, surface enable, top pipe = 1, bottom pipe = 4 + * + * This would then cause 2->3 to not require remapping any MPCCs. */ - - for (i = pool->pipe_count - 1; i >= 0; i--) { - if (res_ctx->pipe_ctx[i].stream == NULL) { - secondary_pipe = &res_ctx->pipe_ctx[i]; - secondary_pipe->pipe_idx = i; - break; + if (primary_pipe) { + int preferred_pipe_idx = (pool->pipe_count - 1) - primary_pipe->pipe_idx; + if (res_ctx->pipe_ctx[preferred_pipe_idx].stream == NULL) { + secondary_pipe = &res_ctx->pipe_ctx[preferred_pipe_idx]; + secondary_pipe->pipe_idx = preferred_pipe_idx; } } + /* + * search backwards for the second pipe to keep pipe + * assignment more consistent + */ + if (!secondary_pipe) + for (i = pool->pipe_count - 1; i >= 0; i--) { + if (res_ctx->pipe_ctx[i].stream == NULL) { + secondary_pipe = &res_ctx->pipe_ctx[i]; + secondary_pipe->pipe_idx = i; + break; + } + } return secondary_pipe; } @@ -1214,6 +1272,9 @@ bool dc_add_plane_to_context( free_pipe->clock_source = tail_pipe->clock_source; free_pipe->top_pipe = tail_pipe; tail_pipe->bottom_pipe = free_pipe; + } else if (free_pipe->bottom_pipe && free_pipe->bottom_pipe->plane_state == NULL) { + ASSERT(free_pipe->bottom_pipe->stream_res.opp != free_pipe->stream_res.opp); + free_pipe->bottom_pipe->plane_state = plane_state; } /* assign new surfaces*/ @@ -1224,6 +1285,35 @@ bool dc_add_plane_to_context( return true; } +struct pipe_ctx *dc_res_get_odm_bottom_pipe(struct pipe_ctx *pipe_ctx) +{ + struct pipe_ctx *bottom_pipe = pipe_ctx->bottom_pipe; + + /* ODM should only be updated once per otg */ + if (pipe_ctx->top_pipe) + return NULL; + + while (bottom_pipe) { + if (bottom_pipe->stream_res.opp != pipe_ctx->stream_res.opp) + break; + bottom_pipe = bottom_pipe->bottom_pipe; + } + + return bottom_pipe; +} + +bool dc_res_is_odm_head_pipe(struct pipe_ctx *pipe_ctx) +{ + struct pipe_ctx *top_pipe = pipe_ctx->top_pipe; + + if (!top_pipe) + return false; + if (top_pipe && top_pipe->stream_res.opp == pipe_ctx->stream_res.opp) + return false; + + return true; +} + bool dc_remove_plane_from_context( const struct dc *dc, struct dc_stream_state *stream, @@ -1247,10 +1337,14 @@ bool dc_remove_plane_from_context( /* release pipe for plane*/ for (i = pool->pipe_count - 1; i >= 0; i--) { - struct pipe_ctx *pipe_ctx; + struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; - if (context->res_ctx.pipe_ctx[i].plane_state == plane_state) { - pipe_ctx = &context->res_ctx.pipe_ctx[i]; + if (pipe_ctx->plane_state == plane_state) { + if (dc_res_is_odm_head_pipe(pipe_ctx)) { + pipe_ctx->plane_state = NULL; + pipe_ctx->bottom_pipe = NULL; + continue; + } if (pipe_ctx->top_pipe) pipe_ctx->top_pipe->bottom_pipe = pipe_ctx->bottom_pipe; @@ -1268,8 +1362,9 @@ bool dc_remove_plane_from_context( */ if (!pipe_ctx->top_pipe) { pipe_ctx->plane_state = NULL; - pipe_ctx->bottom_pipe = NULL; - } else { + if (!dc_res_get_odm_bottom_pipe(pipe_ctx)) + pipe_ctx->bottom_pipe = NULL; + } else { memset(pipe_ctx, 0, sizeof(*pipe_ctx)); } } @@ -1674,6 +1769,9 @@ enum dc_status dc_remove_stream_from_ctx( for (i = 0; i < MAX_PIPES; i++) { if (new_ctx->res_ctx.pipe_ctx[i].stream == stream && !new_ctx->res_ctx.pipe_ctx[i].top_pipe) { + struct pipe_ctx *odm_pipe = + dc_res_get_odm_bottom_pipe(&new_ctx->res_ctx.pipe_ctx[i]); + del_pipe = &new_ctx->res_ctx.pipe_ctx[i]; ASSERT(del_pipe->stream_res.stream_enc); @@ -1698,6 +1796,8 @@ enum dc_status dc_remove_stream_from_ctx( dc->res_pool->funcs->remove_stream_from_ctx(dc, new_ctx, stream); memset(del_pipe, 0, sizeof(*del_pipe)); + if (odm_pipe) + memset(odm_pipe, 0, sizeof(*odm_pipe)); break; } @@ -1855,6 +1955,7 @@ enum dc_status resource_map_pool_resources( struct dc_context *dc_ctx = dc->ctx; struct pipe_ctx *pipe_ctx = NULL; int pipe_idx = -1; + struct dc_bios *dcb = dc->ctx->dc_bios; /* TODO Check if this is needed */ /*if (!resource_is_stream_unchanged(old_context, stream)) { @@ -1869,6 +1970,13 @@ enum dc_status resource_map_pool_resources( calculate_phy_pix_clks(stream); + /* TODO: Check Linux */ + if (dc->config.allow_seamless_boot_optimization && + !dcb->funcs->is_accelerated_mode(dcb)) { + if (dc_validate_seamless_boot_timing(dc, stream->sink, &stream->timing)) + stream->apply_seamless_boot_optimization = true; + } + if (stream->apply_seamless_boot_optimization) pipe_idx = acquire_resource_from_hw_enabled_state( &context->res_ctx, @@ -1951,7 +2059,7 @@ void dc_resource_state_construct( const struct dc *dc, struct dc_state *dst_ctx) { - dst_ctx->dccg = dc->res_pool->clk_mgr; + dst_ctx->clk_mgr = dc->res_pool->clk_mgr; } /** @@ -1959,12 +2067,14 @@ void dc_resource_state_construct( * Checks HW resource availability and bandwidth requirement. * @dc: dc struct for this driver * @new_ctx: state to be validated + * @fast_validate: set to true if only yes/no to support matters * * Return: DC_OK if the result can be programmed. Otherwise, an error code. */ enum dc_status dc_validate_global_state( struct dc *dc, - struct dc_state *new_ctx) + struct dc_state *new_ctx, + bool fast_validate) { enum dc_status result = DC_ERROR_UNEXPECTED; int i, j; @@ -2019,7 +2129,7 @@ enum dc_status dc_validate_global_state( result = resource_build_scaling_params_for_context(dc, new_ctx); if (result == DC_OK) - if (!dc->res_pool->funcs->validate_bandwidth(dc, new_ctx)) + if (!dc->res_pool->funcs->validate_bandwidth(dc, new_ctx, fast_validate)) result = DC_FAIL_BANDWIDTH_VALIDATE; return result; @@ -2315,6 +2425,21 @@ static void set_spd_info_packet( *info_packet = stream->vrr_infopacket; } +static void set_dp_sdp_info_packet( + struct dc_info_packet *info_packet, + struct dc_stream_state *stream) +{ + /* SPD info packet for custom sdp message */ + + /* Return if false. If true, + * set the corresponding bit in the info packet + */ + if (!stream->dpsdp_infopacket.valid) + return; + + *info_packet = stream->dpsdp_infopacket; +} + static void set_hdr_static_info_packet( struct dc_info_packet *info_packet, struct dc_stream_state *stream) @@ -2411,6 +2536,7 @@ void resource_build_info_frame(struct pipe_ctx *pipe_ctx) info->spd.valid = false; info->hdrsmd.valid = false; info->vsc.valid = false; + info->dpsdp.valid = false; signal = pipe_ctx->stream->signal; @@ -2430,6 +2556,8 @@ void resource_build_info_frame(struct pipe_ctx *pipe_ctx) set_spd_info_packet(&info->spd, pipe_ctx->stream); set_hdr_static_info_packet(&info->hdrsmd, pipe_ctx->stream); + + set_dp_sdp_info_packet(&info->dpsdp, pipe_ctx->stream); } patch_gamut_packet_checksum(&info->gamut); @@ -2657,10 +2785,11 @@ enum dc_status dc_validate_stream(struct dc *dc, struct dc_stream_state *stream) if (!tg->funcs->validate_timing(tg, &stream->timing)) res = DC_FAIL_CONTROLLER_VALIDATE; - if (res == DC_OK) + if (res == DC_OK) { if (!link->link_enc->funcs->validate_output_with_stream( link->link_enc, stream)) res = DC_FAIL_ENC_VALIDATE; + } /* TODO: validate audio ASIC caps, encoder */ diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c index 996298c35f42..96e97d25d639 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c @@ -29,6 +29,9 @@ #include "resource.h" #include "ipp.h" #include "timing_generator.h" +#if defined(CONFIG_DRM_AMD_DC_DCN1_0) +#include "dcn10/dcn10_hw_sequencer.h" +#endif #define DC_LOGGER dc->ctx->logger @@ -160,6 +163,27 @@ struct dc_stream_state *dc_create_stream_for_sink( return stream; } +struct dc_stream_state *dc_copy_stream(const struct dc_stream_state *stream) +{ + struct dc_stream_state *new_stream; + + new_stream = kzalloc(sizeof(struct dc_stream_state), GFP_KERNEL); + if (!new_stream) + return NULL; + + memcpy(new_stream, stream, sizeof(struct dc_stream_state)); + + if (new_stream->sink) + dc_sink_retain(new_stream->sink); + + if (new_stream->out_transfer_func) + dc_transfer_func_retain(new_stream->out_transfer_func); + + kref_init(&new_stream->refcount); + + return new_stream; +} + /** * dc_stream_get_status_from_state - Get stream status from given dc state * @state: DC state to find the stream status in @@ -196,6 +220,35 @@ struct dc_stream_status *dc_stream_get_status( return dc_stream_get_status_from_state(dc->current_state, stream); } +static void delay_cursor_until_vupdate(struct pipe_ctx *pipe_ctx, struct dc *dc) +{ +#if defined(CONFIG_DRM_AMD_DC_DCN1_0) + unsigned int vupdate_line; + unsigned int lines_to_vupdate, us_to_vupdate, vpos, nvpos; + struct dc_stream_state *stream = pipe_ctx->stream; + unsigned int us_per_line; + + if (stream->ctx->asic_id.chip_family == FAMILY_RV && + ASIC_REV_IS_RAVEN(stream->ctx->asic_id.hw_internal_rev)) { + + vupdate_line = get_vupdate_offset_from_vsync(pipe_ctx); + if (!dc_stream_get_crtc_position(dc, &stream, 1, &vpos, &nvpos)) + return; + + if (vpos >= vupdate_line) + return; + + us_per_line = stream->timing.h_total * 10000 / stream->timing.pix_clk_100hz; + lines_to_vupdate = vupdate_line - vpos; + us_to_vupdate = lines_to_vupdate * us_per_line; + + /* 70 us is a conservative estimate of cursor update time*/ + if (us_to_vupdate < 70) + udelay(us_to_vupdate); + } +#endif +} + /** * dc_stream_set_cursor_attributes() - Update cursor attributes and set cursor surface address */ @@ -234,6 +287,8 @@ bool dc_stream_set_cursor_attributes( if (!pipe_to_program) { pipe_to_program = pipe_ctx; + + delay_cursor_until_vupdate(pipe_ctx, core_dc); core_dc->hwss.pipe_control_lock(core_dc, pipe_to_program, true); } @@ -278,11 +333,13 @@ bool dc_stream_set_cursor_position( (!pipe_ctx->plane_res.mi && !pipe_ctx->plane_res.hubp) || !pipe_ctx->plane_state || (!pipe_ctx->plane_res.xfm && !pipe_ctx->plane_res.dpp) || - !pipe_ctx->plane_res.ipp) + (!pipe_ctx->plane_res.ipp && !pipe_ctx->plane_res.dpp)) continue; if (!pipe_to_program) { pipe_to_program = pipe_ctx; + + delay_cursor_until_vupdate(pipe_ctx, core_dc); core_dc->hwss.pipe_control_lock(core_dc, pipe_to_program, true); } @@ -314,6 +371,68 @@ uint32_t dc_stream_get_vblank_counter(const struct dc_stream_state *stream) return 0; } +static void build_dp_sdp_info_frame(struct pipe_ctx *pipe_ctx, + const uint8_t *custom_sdp_message, + unsigned int sdp_message_size) +{ + uint8_t i; + struct encoder_info_frame *info = &pipe_ctx->stream_res.encoder_info_frame; + + /* set valid info */ + info->dpsdp.valid = true; + + /* set sdp message header */ + info->dpsdp.hb0 = custom_sdp_message[0]; /* package id */ + info->dpsdp.hb1 = custom_sdp_message[1]; /* package type */ + info->dpsdp.hb2 = custom_sdp_message[2]; /* package specific byte 0 any data */ + info->dpsdp.hb3 = custom_sdp_message[3]; /* package specific byte 0 any data */ + + /* set sdp message data */ + for (i = 0; i < 32; i++) + info->dpsdp.sb[i] = (custom_sdp_message[i+4]); + +} + +static void invalid_dp_sdp_info_frame(struct pipe_ctx *pipe_ctx) +{ + struct encoder_info_frame *info = &pipe_ctx->stream_res.encoder_info_frame; + + /* in-valid info */ + info->dpsdp.valid = false; +} + +bool dc_stream_send_dp_sdp(const struct dc_stream_state *stream, + const uint8_t *custom_sdp_message, + unsigned int sdp_message_size) +{ + int i; + struct dc *core_dc; + struct resource_context *res_ctx; + + if (stream == NULL) { + dm_error("DC: dc_stream is NULL!\n"); + return false; + } + + core_dc = stream->ctx->dc; + res_ctx = &core_dc->current_state->res_ctx; + + for (i = 0; i < MAX_PIPES; i++) { + struct pipe_ctx *pipe_ctx = &res_ctx->pipe_ctx[i]; + + if (pipe_ctx->stream != stream) + continue; + + build_dp_sdp_info_frame(pipe_ctx, custom_sdp_message, sdp_message_size); + + core_dc->hwss.update_info_frame(pipe_ctx); + + invalid_dp_sdp_info_frame(pipe_ctx); + } + + return true; +} + bool dc_stream_get_scanoutpos(const struct dc_stream_state *stream, uint32_t *v_blank_start, uint32_t *v_blank_end, diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_surface.c b/drivers/gpu/drm/amd/display/dc/core/dc_surface.c index ee6bd50f60b8..a5e86f9b148f 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_surface.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_surface.c @@ -119,6 +119,19 @@ const struct dc_plane_status *dc_plane_get_status( if (core_dc->current_state == NULL) return NULL; + /* Find the current plane state and set its pending bit to false */ + for (i = 0; i < core_dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe_ctx = + &core_dc->current_state->res_ctx.pipe_ctx[i]; + + if (pipe_ctx->plane_state != plane_state) + continue; + + pipe_ctx->plane_state->status.is_flip_pending = false; + + break; + } + for (i = 0; i < core_dc->res_pool->pipe_count; i++) { struct pipe_ctx *pipe_ctx = &core_dc->current_state->res_ctx.pipe_ctx[i]; diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 0515095574e7..44e4b0465587 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -39,9 +39,10 @@ #include "inc/hw/dmcu.h" #include "dml/display_mode_lib.h" -#define DC_VER "3.2.17" +#define DC_VER "3.2.27" #define MAX_SURFACES 3 +#define MAX_PLANES 6 #define MAX_STREAMS 6 #define MAX_SINKS_PER_LINK 4 @@ -53,6 +54,41 @@ struct dc_versions { struct dmcu_version dmcu_version; }; +enum dc_plane_type { + DC_PLANE_TYPE_INVALID, + DC_PLANE_TYPE_DCE_RGB, + DC_PLANE_TYPE_DCE_UNDERLAY, + DC_PLANE_TYPE_DCN_UNIVERSAL, +}; + +struct dc_plane_cap { + enum dc_plane_type type; + uint32_t blends_with_above : 1; + uint32_t blends_with_below : 1; + uint32_t per_pixel_alpha : 1; + struct { + uint32_t argb8888 : 1; + uint32_t nv12 : 1; + uint32_t fp16 : 1; + } pixel_format_support; + // max upscaling factor x1000 + // upscaling factors are always >= 1 + // for example, 1080p -> 8K is 4.0, or 4000 raw value + struct { + uint32_t argb8888; + uint32_t nv12; + uint32_t fp16; + } max_upscale_factor; + // max downscale factor x1000 + // downscale factors are always <= 1 + // for example, 8K -> 1080p is 0.25, or 250 raw value + struct { + uint32_t argb8888; + uint32_t nv12; + uint32_t fp16; + } max_downscale_factor; +}; + struct dc_caps { uint32_t max_streams; uint32_t max_links; @@ -73,6 +109,7 @@ struct dc_caps { bool force_dp_tps4_for_cp2520; bool disable_dp_clk_share; bool psp_setup_panel_mode; + struct dc_plane_cap planes[MAX_PLANES]; }; struct dc_dcc_surface_param { @@ -164,6 +201,10 @@ struct dc_config { bool gpu_vm_support; bool disable_disp_pll_sharing; bool fbc_support; + bool optimize_edp_link_rate; + bool disable_fractional_pwm; + bool allow_seamless_boot_optimization; + bool power_down_display_on_boot; }; enum visual_confirm { @@ -203,7 +244,59 @@ struct dc_clocks { int fclk_khz; int phyclk_khz; int dramclk_khz; -}; + bool p_state_change_support; +}; + +struct dc_bw_validation_profile { + bool enable; + + unsigned long long total_ticks; + unsigned long long voltage_level_ticks; + unsigned long long watermark_ticks; + unsigned long long rq_dlg_ticks; + + unsigned long long total_count; + unsigned long long skip_fast_count; + unsigned long long skip_pass_count; + unsigned long long skip_fail_count; +}; + +#define BW_VAL_TRACE_SETUP() \ + unsigned long long end_tick = 0; \ + unsigned long long voltage_level_tick = 0; \ + unsigned long long watermark_tick = 0; \ + unsigned long long start_tick = dc->debug.bw_val_profile.enable ? \ + dm_get_timestamp(dc->ctx) : 0 + +#define BW_VAL_TRACE_COUNT() \ + if (dc->debug.bw_val_profile.enable) \ + dc->debug.bw_val_profile.total_count++ + +#define BW_VAL_TRACE_SKIP(status) \ + if (dc->debug.bw_val_profile.enable) { \ + if (!voltage_level_tick) \ + voltage_level_tick = dm_get_timestamp(dc->ctx); \ + dc->debug.bw_val_profile.skip_ ## status ## _count++; \ + } + +#define BW_VAL_TRACE_END_VOLTAGE_LEVEL() \ + if (dc->debug.bw_val_profile.enable) \ + voltage_level_tick = dm_get_timestamp(dc->ctx) + +#define BW_VAL_TRACE_END_WATERMARKS() \ + if (dc->debug.bw_val_profile.enable) \ + watermark_tick = dm_get_timestamp(dc->ctx) + +#define BW_VAL_TRACE_FINISH() \ + if (dc->debug.bw_val_profile.enable) { \ + end_tick = dm_get_timestamp(dc->ctx); \ + dc->debug.bw_val_profile.total_ticks += end_tick - start_tick; \ + dc->debug.bw_val_profile.voltage_level_ticks += voltage_level_tick - start_tick; \ + if (watermark_tick) { \ + dc->debug.bw_val_profile.watermark_ticks += watermark_tick - voltage_level_tick; \ + dc->debug.bw_val_profile.rq_dlg_ticks += end_tick - watermark_tick; \ + } \ + } struct dc_debug_options { enum visual_confirm visual_confirm; @@ -257,6 +350,8 @@ struct dc_debug_options { bool skip_detection_link_training; unsigned int force_odm_combine; //bit vector based on otg inst unsigned int force_fclk_khz; + bool disable_tri_buf; + struct dc_bw_validation_profile bw_val_profile; }; struct dc_debug_data { @@ -265,6 +360,14 @@ struct dc_debug_data { uint32_t auxErrorCount; }; +struct dc_bounding_box_overrides { + int sr_exit_time_ns; + int sr_enter_plus_exit_time_ns; + int urgent_latency_ns; + int percent_of_ideal_drambw; + int dram_clock_change_latency_ns; +}; + struct dc_state; struct resource_pool; struct dce_hwseq; @@ -274,6 +377,7 @@ struct dc { struct dc_cap_funcs cap_funcs; struct dc_config config; struct dc_debug_options debug; + struct dc_bounding_box_overrides bb_overrides; struct dc_context *ctx; uint8_t link_count; @@ -298,8 +402,12 @@ struct dc { struct hw_sequencer_funcs hwss; struct dce_hwseq *hwseq; + /* Require to optimize clocks and bandwidth for added/removed planes */ bool optimized_required; + /* Require to maintain clocks and bandwidth for UEFI enabled HW */ + bool optimize_seamless_boot; + /* FBC compressor */ struct compressor *fbc_compressor; @@ -327,6 +435,7 @@ struct dc_init_data { struct hw_asic_id asic_id; void *driver; /* ctx */ struct cgs_device *cgs_device; + struct dc_bounding_box_overrides bb_overrides; int num_virtual_links; /* @@ -597,7 +706,7 @@ struct dc_validation_set { uint8_t plane_count; }; -bool dc_validate_seamless_boot_timing(struct dc *dc, +bool dc_validate_seamless_boot_timing(const struct dc *dc, const struct dc_sink *sink, struct dc_crtc_timing *crtc_timing); @@ -605,9 +714,14 @@ enum dc_status dc_validate_plane(struct dc *dc, const struct dc_plane_state *pla void get_clock_requirements_for_state(struct dc_state *state, struct AsicStateEx *info); +/* + * fast_validate: we return after determining if we can support the new state, + * but before we populate the programming info + */ enum dc_status dc_validate_global_state( struct dc *dc, - struct dc_state *new_ctx); + struct dc_state *new_ctx, + bool fast_validate); void dc_resource_state_construct( @@ -636,7 +750,8 @@ void dc_resource_state_destruct(struct dc_state *context); bool dc_commit_state(struct dc *dc, struct dc_state *context); -struct dc_state *dc_create_state(void); +struct dc_state *dc_create_state(struct dc *dc); +struct dc_state *dc_copy_state(struct dc_state *src_ctx); void dc_retain_state(struct dc_state *context); void dc_release_state(struct dc_state *context); @@ -648,9 +763,16 @@ struct dpcd_caps { union dpcd_rev dpcd_rev; union max_lane_count max_ln_count; union max_down_spread max_down_spread; + union dprx_feature dprx_feature; + + /* valid only for eDP v1.4 or higher*/ + uint8_t edp_supported_link_rates_count; + enum dc_link_rate edp_supported_link_rates[8]; /* dongle type (DP converter, CV smart dongle) */ enum display_dongle_type dongle_type; + /* branch device or sink device */ + bool is_branch_dev; /* Dongle's downstream count. */ union sink_count sink_count; /* If dongle_type == DISPLAY_DONGLE_DP_HDMI_CONVERTER, @@ -666,11 +788,11 @@ struct dpcd_caps { int8_t branch_dev_name[6]; int8_t branch_hw_revision; int8_t branch_fw_revision[2]; - uint8_t link_rate_set; bool allow_invalid_MSA_timing_param; bool panel_mode_edp; bool dpcd_display_control_capable; + bool ext_receiver_cap_field_present; }; #include "dc_link.h" diff --git a/drivers/gpu/drm/amd/display/dc/dc_ddc_types.h b/drivers/gpu/drm/amd/display/dc/dc_ddc_types.h index 05c8c31d8b31..4ef97f65e55d 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_ddc_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_ddc_types.h @@ -68,6 +68,8 @@ enum aux_transaction_reply { AUX_TRANSACTION_REPLY_AUX_ACK = 0x00, AUX_TRANSACTION_REPLY_AUX_NACK = 0x01, AUX_TRANSACTION_REPLY_AUX_DEFER = 0x02, + AUX_TRANSACTION_REPLY_I2C_OVER_AUX_NACK = 0x04, + AUX_TRANSACTION_REPLY_I2C_OVER_AUX_DEFER = 0x08, AUX_TRANSACTION_REPLY_I2C_ACK = 0x00, AUX_TRANSACTION_REPLY_I2C_NACK = 0x10, diff --git a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h index d4eab33c453b..11c68a399267 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h @@ -94,6 +94,8 @@ struct dc_link_settings { enum dc_lane_count lane_count; enum dc_link_rate link_rate; enum dc_link_spread link_spread; + bool use_link_rate_set; + uint8_t link_rate_set; }; struct dc_lane_settings { @@ -420,10 +422,24 @@ union edp_configuration_cap { uint8_t raw; }; +union dprx_feature { + struct { + uint8_t GTC_CAP:1; // bit 0: DP 1.3+ + uint8_t SST_SPLIT_SDP_CAP:1; // bit 1: DP 1.4 + uint8_t AV_SYNC_CAP:1; // bit 2: DP 1.3+ + uint8_t VSC_SDP_COLORIMETRY_SUPPORTED:1; // bit 3: DP 1.3+ + uint8_t VSC_EXT_VESA_SDP_SUPPORTED:1; // bit 4: DP 1.4 + uint8_t VSC_EXT_VESA_SDP_CHAINING_SUPPORTED:1; // bit 5: DP 1.4 + uint8_t VSC_EXT_CEA_SDP_SUPPORTED:1; // bit 6: DP 1.4 + uint8_t VSC_EXT_CEA_SDP_CHAINING_SUPPORTED:1; // bit 7: DP 1.4 + } bits; + uint8_t raw; +}; + union training_aux_rd_interval { struct { uint8_t TRAINIG_AUX_RD_INTERVAL:7; - uint8_t EXT_RECIEVER_CAP_FIELD_PRESENT:1; + uint8_t EXT_RECEIVER_CAP_FIELD_PRESENT:1; } bits; uint8_t raw; }; diff --git a/drivers/gpu/drm/amd/display/dc/dc_helper.c b/drivers/gpu/drm/amd/display/dc/dc_helper.c index 597d38393379..5e6c5eff49cf 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dc_helper.c @@ -51,20 +51,16 @@ static inline void set_reg_field_value_masks( field_value_mask->mask = field_value_mask->mask | mask; } -uint32_t generic_reg_update_ex(const struct dc_context *ctx, - uint32_t addr, uint32_t reg_val, int n, +static void set_reg_field_values(struct dc_reg_value_masks *field_value_mask, + uint32_t addr, int n, uint8_t shift1, uint32_t mask1, uint32_t field_value1, - ...) + va_list ap) { - struct dc_reg_value_masks field_value_mask = {0}; uint32_t shift, mask, field_value; int i = 1; - va_list ap; - va_start(ap, field_value1); - /* gather all bits value/mask getting updated in this register */ - set_reg_field_value_masks(&field_value_mask, + set_reg_field_value_masks(field_value_mask, field_value1, mask1, shift1); while (i < n) { @@ -72,10 +68,48 @@ uint32_t generic_reg_update_ex(const struct dc_context *ctx, mask = va_arg(ap, uint32_t); field_value = va_arg(ap, uint32_t); - set_reg_field_value_masks(&field_value_mask, + set_reg_field_value_masks(field_value_mask, field_value, mask, shift); i++; } +} + +uint32_t generic_reg_update_ex(const struct dc_context *ctx, + uint32_t addr, int n, + uint8_t shift1, uint32_t mask1, uint32_t field_value1, + ...) +{ + struct dc_reg_value_masks field_value_mask = {0}; + uint32_t reg_val; + va_list ap; + + va_start(ap, field_value1); + + set_reg_field_values(&field_value_mask, addr, n, shift1, mask1, + field_value1, ap); + + va_end(ap); + + /* mmio write directly */ + reg_val = dm_read_reg(ctx, addr); + reg_val = (reg_val & ~field_value_mask.mask) | field_value_mask.value; + dm_write_reg(ctx, addr, reg_val); + return reg_val; +} + +uint32_t generic_reg_set_ex(const struct dc_context *ctx, + uint32_t addr, uint32_t reg_val, int n, + uint8_t shift1, uint32_t mask1, uint32_t field_value1, + ...) +{ + struct dc_reg_value_masks field_value_mask = {0}; + va_list ap; + + va_start(ap, field_value1); + + set_reg_field_values(&field_value_mask, addr, n, shift1, mask1, + field_value1, ap); + va_end(ap); @@ -85,6 +119,24 @@ uint32_t generic_reg_update_ex(const struct dc_context *ctx, return reg_val; } +uint32_t dm_read_reg_func( + const struct dc_context *ctx, + uint32_t address, + const char *func_name) +{ + uint32_t value; +#ifdef DM_CHECK_ADDR_0 + if (address == 0) { + DC_ERR("invalid register read; address = 0\n"); + return 0; + } +#endif + value = cgs_read_register(ctx->cgs_device, address); + trace_amdgpu_dc_rreg(&ctx->perf_trace->read_count, address, value); + + return value; +} + uint32_t generic_reg_get(const struct dc_context *ctx, uint32_t addr, uint8_t shift, uint32_t mask, uint32_t *field_value) { @@ -235,7 +287,7 @@ uint32_t generic_reg_get(const struct dc_context *ctx, } */ -uint32_t generic_reg_wait(const struct dc_context *ctx, +void generic_reg_wait(const struct dc_context *ctx, uint32_t addr, uint32_t shift, uint32_t mask, uint32_t condition_value, unsigned int delay_between_poll_us, unsigned int time_out_num_tries, const char *func_name, int line) @@ -265,7 +317,7 @@ uint32_t generic_reg_wait(const struct dc_context *ctx, DC_LOG_DC("REG_WAIT taking a while: %dms in %s line:%d\n", delay_between_poll_us * i / 1000, func_name, line); - return reg_val; + return; } } @@ -275,8 +327,6 @@ uint32_t generic_reg_wait(const struct dc_context *ctx, if (!IS_FPGA_MAXIMUS_DC(ctx->dce_environment)) BREAK_TO_DEBUGGER(); - - return reg_val; } void generic_write_indirect_reg(const struct dc_context *ctx, diff --git a/drivers/gpu/drm/amd/display/dc/dc_link.h b/drivers/gpu/drm/amd/display/dc/dc_link.h index 8fc223defed4..7b9429e30d82 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_link.h +++ b/drivers/gpu/drm/amd/display/dc/dc_link.h @@ -120,6 +120,7 @@ struct dc_link { /* MST record stream using this link */ struct link_flags { bool dp_keep_receiver_powered; + bool dp_skip_DID2; } wa_flags; struct link_mst_stream_allocation_table mst_stream_alloc_table; @@ -246,10 +247,18 @@ void dc_link_set_test_pattern(struct dc_link *link, const struct link_training_settings *p_link_settings, const unsigned char *p_custom_pattern, unsigned int cust_pattern_size); +uint32_t dc_link_bandwidth_kbps( + const struct dc_link *link, + const struct dc_link_settings *link_setting); + +const struct dc_link_settings *dc_link_get_link_cap( + const struct dc_link *link); bool dc_submit_i2c( struct dc *dc, uint32_t link_index, struct i2c_command *cmd); +uint32_t dc_bandwidth_in_kbps_from_timing( + const struct dc_crtc_timing *timing); #endif /* DC_LINK_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h index 5657cb3a2ad3..189bdab929a5 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_stream.h +++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h @@ -80,6 +80,7 @@ struct dc_stream_state { struct dc_info_packet vrr_infopacket; struct dc_info_packet vsc_infopacket; struct dc_info_packet vsp_infopacket; + struct dc_info_packet dpsdp_infopacket; struct rect src; /* composition area */ struct rect dst; /* stream addressable area */ @@ -221,6 +222,13 @@ struct dc_stream_state *dc_get_stream_at_index(struct dc *dc, uint8_t i); */ uint32_t dc_stream_get_vblank_counter(const struct dc_stream_state *stream); +/* + * Send dp sdp message. + */ +bool dc_stream_send_dp_sdp(const struct dc_stream_state *stream, + const uint8_t *custom_sdp_message, + unsigned int sdp_message_size); + /* TODO: Return parsed values rather than direct register read * This has a dependency on the caller (amdgpu_display_get_crtc_scanoutpos) * being refactored properly to be dce-specific @@ -299,6 +307,8 @@ enum surface_update_type dc_check_update_surfaces_for_stream( */ struct dc_stream_state *dc_create_stream_for_sink(struct dc_sink *dc_sink); +struct dc_stream_state *dc_copy_stream(const struct dc_stream_state *stream); + void update_stream_signal(struct dc_stream_state *stream, struct dc_sink *sink); void dc_stream_retain(struct dc_stream_state *dc_stream); diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h index da2009a108cf..6c2a3d9a4c2e 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_types.h @@ -103,7 +103,7 @@ struct dc_context { }; -#define DC_MAX_EDID_BUFFER_SIZE 512 +#define DC_MAX_EDID_BUFFER_SIZE 1024 #define EDID_BLOCK_SIZE 128 #define MAX_SURFACE_NUM 4 #define NUM_PIXEL_FORMATS 10 @@ -395,7 +395,7 @@ struct dc_dongle_caps { bool is_dp_hdmi_ycbcr422_converter; bool is_dp_hdmi_ycbcr420_converter; uint32_t dp_hdmi_max_bpc; - uint32_t dp_hdmi_max_pixel_clk; + uint32_t dp_hdmi_max_pixel_clk_in_khz; }; /* Scaling format */ enum scaling_transformation { @@ -550,9 +550,9 @@ struct psr_config { unsigned char psr_version; unsigned int psr_rfb_setup_time; bool psr_exit_link_training_required; - bool psr_frame_capture_indication_req; unsigned int psr_sdp_transmit_line_num_deadline; + bool allow_smu_optimizations; }; union dmcu_psr_level { @@ -654,6 +654,7 @@ struct psr_context { * continuing powerd own */ unsigned int frame_delay; + bool allow_smu_optimizations; }; struct colorspace_transform { diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c index 4fe3664fb495..bd33c47183fc 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c @@ -171,24 +171,24 @@ static void submit_channel_request( (request->action == I2CAUX_TRANSACTION_ACTION_I2C_WRITE_MOT))); if (REG(AUXN_IMPCAL)) { /* clear_aux_error */ - REG_UPDATE_SEQ(AUXN_IMPCAL, AUXN_CALOUT_ERROR_AK, - 1, - 0); + REG_UPDATE_SEQ_2(AUXN_IMPCAL, + AUXN_CALOUT_ERROR_AK, 1, + AUXN_CALOUT_ERROR_AK, 0); - REG_UPDATE_SEQ(AUXP_IMPCAL, AUXP_CALOUT_ERROR_AK, - 1, - 0); + REG_UPDATE_SEQ_2(AUXP_IMPCAL, + AUXP_CALOUT_ERROR_AK, 1, + AUXP_CALOUT_ERROR_AK, 0); /* force_default_calibrate */ - REG_UPDATE_1BY1_2(AUXN_IMPCAL, + REG_UPDATE_SEQ_2(AUXN_IMPCAL, AUXN_IMPCAL_ENABLE, 1, AUXN_IMPCAL_OVERRIDE_ENABLE, 0); /* bug? why AUXN update EN and OVERRIDE_EN 1 by 1 while AUX P toggles OVERRIDE? */ - REG_UPDATE_SEQ(AUXP_IMPCAL, AUXP_IMPCAL_OVERRIDE_ENABLE, - 1, - 0); + REG_UPDATE_SEQ_2(AUXP_IMPCAL, + AUXP_IMPCAL_OVERRIDE_ENABLE, 1, + AUXP_IMPCAL_OVERRIDE_ENABLE, 0); } REG_UPDATE(AUX_INTERRUPT_CONTROL, AUX_SW_DONE_ACK, 1); @@ -270,7 +270,7 @@ static int read_channel_reply(struct dce_aux *engine, uint32_t size, if (!bytes_replied) return -1; - REG_UPDATE_1BY1_3(AUX_SW_DATA, + REG_UPDATE_SEQ_3(AUX_SW_DATA, AUX_SW_INDEX, 0, AUX_SW_AUTOINCREMENT_DISABLE, 1, AUX_SW_DATA_RW, 1); @@ -320,9 +320,10 @@ static enum aux_channel_operation_result get_channel_status( *returned_bytes = 0; /* poll to make sure that SW_DONE is asserted */ - value = REG_WAIT(AUX_SW_STATUS, AUX_SW_DONE, 1, + REG_WAIT(AUX_SW_STATUS, AUX_SW_DONE, 1, 10, aux110->timeout_period/10); + value = REG_READ(AUX_SW_STATUS); /* in case HPD is LOW, exit AUX transaction */ if ((value & AUX_SW_STATUS__AUX_SW_HPD_DISCON_MASK)) return AUX_CHANNEL_OPERATION_FAILED_HPD_DISCON; @@ -377,7 +378,6 @@ static bool acquire( struct dce_aux *engine, struct ddc *ddc) { - enum gpio_result result; if (!is_engine_available(engine)) @@ -442,12 +442,12 @@ static enum i2caux_transaction_action i2caux_action_from_payload(struct aux_payl return I2CAUX_TRANSACTION_ACTION_DP_READ; } -int dce_aux_transfer(struct ddc_service *ddc, - struct aux_payload *payload) +int dce_aux_transfer_raw(struct ddc_service *ddc, + struct aux_payload *payload, + enum aux_channel_operation_result *operation_result) { struct ddc *ddc_pin = ddc->ddc_pin; struct dce_aux *aux_engine; - enum aux_channel_operation_result operation_result; struct aux_request_transaction_data aux_req; struct aux_reply_transaction_data aux_rep; uint8_t returned_bytes = 0; @@ -458,7 +458,8 @@ int dce_aux_transfer(struct ddc_service *ddc, memset(&aux_rep, 0, sizeof(aux_rep)); aux_engine = ddc->ctx->dc->res_pool->engines[ddc_pin->pin_data->en]; - acquire(aux_engine, ddc_pin); + if (!acquire(aux_engine, ddc_pin)) + return -1; if (payload->i2c_over_aux) aux_req.type = AUX_TRANSACTION_TYPE_I2C; @@ -473,28 +474,26 @@ int dce_aux_transfer(struct ddc_service *ddc, aux_req.data = payload->data; submit_channel_request(aux_engine, &aux_req); - operation_result = get_channel_status(aux_engine, &returned_bytes); - - switch (operation_result) { - case AUX_CHANNEL_OPERATION_SUCCEEDED: - res = read_channel_reply(aux_engine, payload->length, - payload->data, payload->reply, - &status); - break; - case AUX_CHANNEL_OPERATION_FAILED_HPD_DISCON: - res = 0; - break; - case AUX_CHANNEL_OPERATION_FAILED_REASON_UNKNOWN: - case AUX_CHANNEL_OPERATION_FAILED_INVALID_REPLY: - case AUX_CHANNEL_OPERATION_FAILED_TIMEOUT: + *operation_result = get_channel_status(aux_engine, &returned_bytes); + + if (*operation_result == AUX_CHANNEL_OPERATION_SUCCEEDED) { + read_channel_reply(aux_engine, payload->length, + payload->data, payload->reply, + &status); + res = returned_bytes; + } else { res = -1; - break; } + release_engine(aux_engine); return res; } -#define AUX_RETRY_MAX 7 +#define AUX_MAX_RETRIES 7 +#define AUX_MAX_DEFER_RETRIES 7 +#define AUX_MAX_I2C_DEFER_RETRIES 7 +#define AUX_MAX_INVALID_REPLY_RETRIES 2 +#define AUX_MAX_TIMEOUT_RETRIES 3 bool dce_aux_transfer_with_retries(struct ddc_service *ddc, struct aux_payload *payload) @@ -502,24 +501,85 @@ bool dce_aux_transfer_with_retries(struct ddc_service *ddc, int i, ret = 0; uint8_t reply; bool payload_reply = true; + enum aux_channel_operation_result operation_result; + int aux_ack_retries = 0, + aux_defer_retries = 0, + aux_i2c_defer_retries = 0, + aux_timeout_retries = 0, + aux_invalid_reply_retries = 0; if (!payload->reply) { payload_reply = false; payload->reply = &reply; } - for (i = 0; i < AUX_RETRY_MAX; i++) { - ret = dce_aux_transfer(ddc, payload); - - if (ret >= 0) { - if (*payload->reply == 0) { - if (!payload_reply) - payload->reply = NULL; - return true; + for (i = 0; i < AUX_MAX_RETRIES; i++) { + ret = dce_aux_transfer_raw(ddc, payload, &operation_result); + switch (operation_result) { + case AUX_CHANNEL_OPERATION_SUCCEEDED: + aux_timeout_retries = 0; + aux_invalid_reply_retries = 0; + + switch (*payload->reply) { + case AUX_TRANSACTION_REPLY_AUX_ACK: + if (!payload->write && payload->length != ret) { + if (++aux_ack_retries >= AUX_MAX_RETRIES) + goto fail; + else + udelay(300); + } else + return true; + break; + + case AUX_TRANSACTION_REPLY_AUX_DEFER: + case AUX_TRANSACTION_REPLY_I2C_OVER_AUX_NACK: + case AUX_TRANSACTION_REPLY_I2C_OVER_AUX_DEFER: + if (++aux_defer_retries >= AUX_MAX_DEFER_RETRIES) + goto fail; + break; + + case AUX_TRANSACTION_REPLY_I2C_DEFER: + aux_defer_retries = 0; + if (++aux_i2c_defer_retries >= AUX_MAX_I2C_DEFER_RETRIES) + goto fail; + break; + + case AUX_TRANSACTION_REPLY_AUX_NACK: + case AUX_TRANSACTION_REPLY_HPD_DISCON: + default: + goto fail; } - } + break; + + case AUX_CHANNEL_OPERATION_FAILED_INVALID_REPLY: + if (++aux_invalid_reply_retries >= AUX_MAX_INVALID_REPLY_RETRIES) + goto fail; + else + udelay(400); + break; + + case AUX_CHANNEL_OPERATION_FAILED_TIMEOUT: + if (++aux_timeout_retries >= AUX_MAX_TIMEOUT_RETRIES) + goto fail; + else { + /* + * DP 1.4, 2.8.2: AUX Transaction Response/Reply Timeouts + * According to the DP spec there should be 3 retries total + * with a 400us wait inbetween each. Hardware already waits + * for 550us therefore no wait is required here. + */ + } + break; - udelay(1000); + case AUX_CHANNEL_OPERATION_FAILED_HPD_DISCON: + case AUX_CHANNEL_OPERATION_FAILED_REASON_UNKNOWN: + default: + goto fail; + } } + +fail: + if (!payload_reply) + payload->reply = NULL; return false; } diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.h b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.h index e28ed6a00ff4..ce6a26d189b0 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.h +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.h @@ -123,8 +123,9 @@ bool dce110_aux_engine_acquire( struct dce_aux *aux_engine, struct ddc *ddc); -int dce_aux_transfer(struct ddc_service *ddc, - struct aux_payload *cmd); +int dce_aux_transfer_raw(struct ddc_service *ddc, + struct aux_payload *cmd, + enum aux_channel_operation_result *operation_result); bool dce_aux_transfer_with_retries(struct ddc_service *ddc, struct aux_payload *cmd); diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.c index 6e142c2db986..963686380738 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.c @@ -222,7 +222,7 @@ static enum dm_pp_clocks_state dce_get_required_clocks_state( * all required clocks */ for (i = clk_mgr_dce->max_clks_state; i >= DM_PP_CLOCKS_STATE_ULTRA_LOW; i--) - if (context->bw.dce.dispclk_khz > + if (context->bw_ctx.bw.dce.dispclk_khz > clk_mgr_dce->max_clks_by_state[i].display_clk_khz || max_pix_clk > clk_mgr_dce->max_clks_by_state[i].pixel_clk_khz) @@ -232,7 +232,7 @@ static enum dm_pp_clocks_state dce_get_required_clocks_state( if (low_req_clk > clk_mgr_dce->max_clks_state) { /* set max clock state for high phyclock, invalid on exceeding display clock */ if (clk_mgr_dce->max_clks_by_state[clk_mgr_dce->max_clks_state].display_clk_khz - < context->bw.dce.dispclk_khz) + < context->bw_ctx.bw.dce.dispclk_khz) low_req_clk = DM_PP_CLOCKS_STATE_INVALID; else low_req_clk = clk_mgr_dce->max_clks_state; @@ -610,22 +610,22 @@ static void dce11_pplib_apply_display_requirements( struct dm_pp_display_configuration *pp_display_cfg = &context->pp_display_cfg; pp_display_cfg->all_displays_in_sync = - context->bw.dce.all_displays_in_sync; + context->bw_ctx.bw.dce.all_displays_in_sync; pp_display_cfg->nb_pstate_switch_disable = - context->bw.dce.nbp_state_change_enable == false; + context->bw_ctx.bw.dce.nbp_state_change_enable == false; pp_display_cfg->cpu_cc6_disable = - context->bw.dce.cpuc_state_change_enable == false; + context->bw_ctx.bw.dce.cpuc_state_change_enable == false; pp_display_cfg->cpu_pstate_disable = - context->bw.dce.cpup_state_change_enable == false; + context->bw_ctx.bw.dce.cpup_state_change_enable == false; pp_display_cfg->cpu_pstate_separation_time = - context->bw.dce.blackout_recovery_time_us; + context->bw_ctx.bw.dce.blackout_recovery_time_us; - pp_display_cfg->min_memory_clock_khz = context->bw.dce.yclk_khz + pp_display_cfg->min_memory_clock_khz = context->bw_ctx.bw.dce.yclk_khz / MEMORY_TYPE_MULTIPLIER_CZ; pp_display_cfg->min_engine_clock_khz = determine_sclk_from_bounding_box( dc, - context->bw.dce.sclk_khz); + context->bw_ctx.bw.dce.sclk_khz); /* * As workaround for >4x4K lightup set dcfclock to min_engine_clock value. @@ -638,7 +638,7 @@ static void dce11_pplib_apply_display_requirements( pp_display_cfg->min_engine_clock_khz : 0; pp_display_cfg->min_engine_clock_deep_sleep_khz - = context->bw.dce.sclk_deep_sleep_khz; + = context->bw_ctx.bw.dce.sclk_deep_sleep_khz; pp_display_cfg->avail_mclk_switch_time_us = dce110_get_min_vblank_time_us(context); @@ -669,7 +669,7 @@ static void dce_update_clocks(struct clk_mgr *clk_mgr, { struct dce_clk_mgr *clk_mgr_dce = TO_DCE_CLK_MGR(clk_mgr); struct dm_pp_power_level_change_request level_change_req; - int patched_disp_clk = context->bw.dce.dispclk_khz; + int patched_disp_clk = context->bw_ctx.bw.dce.dispclk_khz; /*TODO: W/A for dal3 linux, investigate why this works */ if (!clk_mgr_dce->dfs_bypass_active) @@ -696,7 +696,7 @@ static void dce11_update_clocks(struct clk_mgr *clk_mgr, { struct dce_clk_mgr *clk_mgr_dce = TO_DCE_CLK_MGR(clk_mgr); struct dm_pp_power_level_change_request level_change_req; - int patched_disp_clk = context->bw.dce.dispclk_khz; + int patched_disp_clk = context->bw_ctx.bw.dce.dispclk_khz; /*TODO: W/A for dal3 linux, investigate why this works */ if (!clk_mgr_dce->dfs_bypass_active) @@ -711,7 +711,7 @@ static void dce11_update_clocks(struct clk_mgr *clk_mgr, } if (should_set_clock(safe_to_lower, patched_disp_clk, clk_mgr->clks.dispclk_khz)) { - context->bw.dce.dispclk_khz = dce_set_clock(clk_mgr, patched_disp_clk); + context->bw_ctx.bw.dce.dispclk_khz = dce_set_clock(clk_mgr, patched_disp_clk); clk_mgr->clks.dispclk_khz = patched_disp_clk; } dce11_pplib_apply_display_requirements(clk_mgr->ctx->dc, context); @@ -723,7 +723,7 @@ static void dce112_update_clocks(struct clk_mgr *clk_mgr, { struct dce_clk_mgr *clk_mgr_dce = TO_DCE_CLK_MGR(clk_mgr); struct dm_pp_power_level_change_request level_change_req; - int patched_disp_clk = context->bw.dce.dispclk_khz; + int patched_disp_clk = context->bw_ctx.bw.dce.dispclk_khz; /*TODO: W/A for dal3 linux, investigate why this works */ if (!clk_mgr_dce->dfs_bypass_active) @@ -751,7 +751,7 @@ static void dce12_update_clocks(struct clk_mgr *clk_mgr, struct dce_clk_mgr *clk_mgr_dce = TO_DCE_CLK_MGR(clk_mgr); struct dm_pp_clock_for_voltage_req clock_voltage_req = {0}; int max_pix_clk = get_max_pixel_clock_for_all_paths(context); - int patched_disp_clk = context->bw.dce.dispclk_khz; + int patched_disp_clk = context->bw_ctx.bw.dce.dispclk_khz; /*TODO: W/A for dal3 linux, investigate why this works */ if (!clk_mgr_dce->dfs_bypass_active) diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c index 71d5777de961..f70437aae8e0 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c @@ -978,7 +978,7 @@ static bool dce110_clock_source_power_down( } static bool get_pixel_clk_frequency_100hz( - struct clock_source *clock_source, + const struct clock_source *clock_source, unsigned int inst, unsigned int *pixel_clk_khz) { diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c b/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c index c2926cf19dee..818536eea00a 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c @@ -51,6 +51,9 @@ #define PSR_SET_WAITLOOP 0x31 #define MCP_INIT_DMCU 0x88 #define MCP_INIT_IRAM 0x89 +#define MCP_SYNC_PHY_LOCK 0x90 +#define MCP_SYNC_PHY_UNLOCK 0x91 +#define MCP_BL_SET_PWM_FRAC 0x6A /* Enable or disable Fractional PWM */ #define MASTER_COMM_CNTL_REG__MASTER_COMM_INTERRUPT_MASK 0x00000001L static bool dce_dmcu_init(struct dmcu *dmcu) @@ -213,9 +216,6 @@ static bool dce_dmcu_setup_psr(struct dmcu *dmcu, link->link_enc->funcs->psr_program_secondary_packet(link->link_enc, psr_context->sdpTransmitLineNumDeadline); - if (psr_context->psr_level.bits.SKIP_SMU_NOTIFICATION) - REG_UPDATE(SMU_INTERRUPT_CONTROL, DC_SMU_INT_ENABLE, 1); - /* waitDMCUReadyForCmd */ REG_WAIT(MASTER_COMM_CNTL_REG, MASTER_COMM_INTERRUPT, 0, dmcu_wait_reg_ready_interval, @@ -342,9 +342,32 @@ static void dcn10_get_dmcu_version(struct dmcu *dmcu) IRAM_RD_ADDR_AUTO_INC, 0); } +static void dcn10_dmcu_enable_fractional_pwm(struct dmcu *dmcu, + uint32_t fractional_pwm) +{ + struct dce_dmcu *dmcu_dce = TO_DCE_DMCU(dmcu); + + /* Wait until microcontroller is ready to process interrupt */ + REG_WAIT(MASTER_COMM_CNTL_REG, MASTER_COMM_INTERRUPT, 0, 100, 800); + + /* Set PWM fractional enable/disable */ + REG_WRITE(MASTER_COMM_DATA_REG1, fractional_pwm); + + /* Set command to enable or disable fractional PWM microcontroller */ + REG_UPDATE(MASTER_COMM_CMD_REG, MASTER_COMM_CMD_REG_BYTE0, + MCP_BL_SET_PWM_FRAC); + + /* Notify microcontroller of new command */ + REG_UPDATE(MASTER_COMM_CNTL_REG, MASTER_COMM_INTERRUPT, 1); + + /* Ensure command has been executed before continuing */ + REG_WAIT(MASTER_COMM_CNTL_REG, MASTER_COMM_INTERRUPT, 0, 100, 800); +} + static bool dcn10_dmcu_init(struct dmcu *dmcu) { struct dce_dmcu *dmcu_dce = TO_DCE_DMCU(dmcu); + const struct dc_config *config = &dmcu->ctx->dc->config; bool status = false; /* Definition of DC_DMCU_SCRATCH @@ -382,9 +405,14 @@ static bool dcn10_dmcu_init(struct dmcu *dmcu) if (dmcu->dmcu_state == DMCU_RUNNING) { /* Retrieve and cache the DMCU firmware version. */ dcn10_get_dmcu_version(dmcu); + + /* Initialize DMCU to use fractional PWM or not */ + dcn10_dmcu_enable_fractional_pwm(dmcu, + (config->disable_fractional_pwm == false) ? 1 : 0); status = true; - } else + } else { status = false; + } break; case DMCU_RUNNING: @@ -594,7 +622,7 @@ static bool dcn10_dmcu_setup_psr(struct dmcu *dmcu, link->link_enc->funcs->psr_program_secondary_packet(link->link_enc, psr_context->sdpTransmitLineNumDeadline); - if (psr_context->psr_level.bits.SKIP_SMU_NOTIFICATION) + if (psr_context->allow_smu_optimizations) REG_UPDATE(SMU_INTERRUPT_CONTROL, DC_SMU_INT_ENABLE, 1); /* waitDMCUReadyForCmd */ @@ -615,6 +643,7 @@ static bool dcn10_dmcu_setup_psr(struct dmcu *dmcu, psr_context->psrFrameCaptureIndicationReq; masterCmdData1.bits.aux_chan = psr_context->channel; masterCmdData1.bits.aux_repeat = psr_context->aux_repeats; + masterCmdData1.bits.allow_smu_optimizations = psr_context->allow_smu_optimizations; dm_write_reg(dmcu->ctx, REG(MASTER_COMM_DATA_REG1), masterCmdData1.u32All); @@ -635,6 +664,7 @@ static bool dcn10_dmcu_setup_psr(struct dmcu *dmcu, dm_write_reg(dmcu->ctx, REG(MASTER_COMM_DATA_REG3), masterCmdData3.u32All); + /* setDMCUParam_Cmd */ REG_UPDATE(MASTER_COMM_CMD_REG, MASTER_COMM_CMD_REG_BYTE0, PSR_SET); @@ -691,7 +721,7 @@ static bool dcn10_is_dmcu_initialized(struct dmcu *dmcu) return true; } -#endif +#endif //(CONFIG_DRM_AMD_DC_DCN1_0) static const struct dmcu_funcs dce_funcs = { .dmcu_init = dce_dmcu_init, diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.h b/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.h index c24c0e5ea44e..60ce56f60ae3 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.h +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.h @@ -199,16 +199,16 @@ struct dce_dmcu { ******************************************************************/ union dce_dmcu_psr_config_data_reg1 { struct { - unsigned int timehyst_frames:8; /*[7:0]*/ - unsigned int hyst_lines:7; /*[14:8]*/ - unsigned int rfb_update_auto_en:1; /*[15:15]*/ - unsigned int dp_port_num:3; /*[18:16]*/ - unsigned int dcp_sel:3; /*[21:19]*/ - unsigned int phy_type:1; /*[22:22]*/ - unsigned int frame_cap_ind:1; /*[23:23]*/ - unsigned int aux_chan:3; /*[26:24]*/ - unsigned int aux_repeat:4; /*[30:27]*/ - unsigned int reserved:1; /*[31:31]*/ + unsigned int timehyst_frames:8; /*[7:0]*/ + unsigned int hyst_lines:7; /*[14:8]*/ + unsigned int rfb_update_auto_en:1; /*[15:15]*/ + unsigned int dp_port_num:3; /*[18:16]*/ + unsigned int dcp_sel:3; /*[21:19]*/ + unsigned int phy_type:1; /*[22:22]*/ + unsigned int frame_cap_ind:1; /*[23:23]*/ + unsigned int aux_chan:3; /*[26:24]*/ + unsigned int aux_repeat:4; /*[30:27]*/ + unsigned int allow_smu_optimizations:1; /*[31:31]*/ } bits; unsigned int u32All; }; @@ -236,7 +236,7 @@ union dce_dmcu_psr_config_data_reg3 { struct { unsigned int psr_level:16; /*[15:0]*/ unsigned int link_rate:4; /*[19:16]*/ - unsigned int reserved:12; /*[31:20]*/ + unsigned int reserved:12; /*[31:20]*/ } bits; unsigned int u32All; }; diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.c b/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.c index 40f2d6e0b122..cd26161bcc4d 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.c @@ -346,6 +346,16 @@ static void release_engine( } +static bool is_engine_available(struct dce_i2c_hw *dce_i2c_hw) +{ + unsigned int arbitrate; + + REG_GET(DC_I2C_ARBITRATION, DC_I2C_REG_RW_CNTL_STATUS, &arbitrate); + if (arbitrate == DC_I2C_REG_RW_CNTL_STATUS_DMCU_ONLY) + return false; + return true; +} + struct dce_i2c_hw *acquire_i2c_hw_engine( struct resource_pool *pool, struct ddc *ddc) @@ -368,7 +378,7 @@ struct dce_i2c_hw *acquire_i2c_hw_engine( if (!dce_i2c_hw) return NULL; - if (pool->i2c_hw_buffer_in_use) + if (pool->i2c_hw_buffer_in_use || !is_engine_available(dce_i2c_hw)) return NULL; do { diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.h b/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.h index 7f19bb439665..575500755b2e 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.h +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.h @@ -29,7 +29,8 @@ enum dc_i2c_status { DC_I2C_STATUS__DC_I2C_STATUS_IDLE, DC_I2C_STATUS__DC_I2C_STATUS_USED_BY_SW, - DC_I2C_STATUS__DC_I2C_STATUS_USED_BY_HW + DC_I2C_STATUS__DC_I2C_STATUS_USED_BY_HW, + DC_I2C_REG_RW_CNTL_STATUS_DMCU_ONLY = 2, }; enum dc_i2c_arbitration { @@ -129,7 +130,8 @@ enum { I2C_SF(DC_I2C_DATA, DC_I2C_DATA, mask_sh),\ I2C_SF(DC_I2C_DATA, DC_I2C_INDEX, mask_sh),\ I2C_SF(DC_I2C_DATA, DC_I2C_INDEX_WRITE, mask_sh),\ - I2C_SF(MICROSECOND_TIME_BASE_DIV, XTAL_REF_DIV, mask_sh) + I2C_SF(MICROSECOND_TIME_BASE_DIV, XTAL_REF_DIV, mask_sh),\ + I2C_SF(DC_I2C_ARBITRATION, DC_I2C_REG_RW_CNTL_STATUS, mask_sh) #define I2C_COMMON_MASK_SH_LIST_DCE110(mask_sh)\ I2C_COMMON_MASK_SH_LIST_DCE_COMMON_BASE(mask_sh),\ @@ -170,6 +172,7 @@ struct dce_i2c_shift { uint8_t DC_I2C_INDEX; uint8_t DC_I2C_INDEX_WRITE; uint8_t XTAL_REF_DIV; + uint8_t DC_I2C_REG_RW_CNTL_STATUS; }; struct dce_i2c_mask { @@ -207,6 +210,7 @@ struct dce_i2c_mask { uint32_t DC_I2C_INDEX; uint32_t DC_I2C_INDEX_WRITE; uint32_t XTAL_REF_DIV; + uint32_t DC_I2C_REG_RW_CNTL_STATUS; }; struct dce_i2c_registers { diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c index 1fa2d4fd7a35..14309fe6f2e6 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c @@ -272,7 +272,8 @@ static void dce110_update_hdmi_info_packet( static void dce110_stream_encoder_dp_set_stream_attribute( struct stream_encoder *enc, struct dc_crtc_timing *crtc_timing, - enum dc_color_space output_color_space) + enum dc_color_space output_color_space, + uint32_t enable_sdp_splitting) { #if defined(CONFIG_DRM_AMD_DC_DCN1_0) uint32_t h_active_start; @@ -977,7 +978,7 @@ static void dce110_stream_encoder_dp_unblank( uint64_t m_vid_l = n_vid; - m_vid_l *= param->pixel_clk_khz; + m_vid_l *= param->timing.pix_clk_100hz / 10; m_vid_l = div_u64(m_vid_l, param->link_settings.link_rate * LINK_RATE_REF_FREQ_IN_KHZ); diff --git a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c b/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c index 23044e6723e8..e938bf9986d3 100644 --- a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c @@ -378,6 +378,28 @@ static const struct resource_caps res_cap = { .num_ddc = 6, }; +static const struct dc_plane_cap plane_cap = { + .type = DC_PLANE_TYPE_DCE_RGB, + + .pixel_format_support = { + .argb8888 = true, + .nv12 = false, + .fp16 = false + }, + + .max_upscale_factor = { + .argb8888 = 16000, + .nv12 = 1, + .fp16 = 1 + }, + + .max_downscale_factor = { + .argb8888 = 250, + .nv12 = 1, + .fp16 = 1 + } +}; + #define CTX ctx #define REG(reg) mm ## reg @@ -756,7 +778,8 @@ static enum dc_status build_mapped_resource( bool dce100_validate_bandwidth( struct dc *dc, - struct dc_state *context) + struct dc_state *context, + bool fast_validate) { int i; bool at_least_one_pipe = false; @@ -768,11 +791,11 @@ bool dce100_validate_bandwidth( if (at_least_one_pipe) { /* TODO implement when needed but for now hardcode max value*/ - context->bw.dce.dispclk_khz = 681000; - context->bw.dce.yclk_khz = 250000 * MEMORY_TYPE_MULTIPLIER_CZ; + context->bw_ctx.bw.dce.dispclk_khz = 681000; + context->bw_ctx.bw.dce.yclk_khz = 250000 * MEMORY_TYPE_MULTIPLIER_CZ; } else { - context->bw.dce.dispclk_khz = 0; - context->bw.dce.yclk_khz = 0; + context->bw_ctx.bw.dce.dispclk_khz = 0; + context->bw_ctx.bw.dce.yclk_khz = 0; } return true; @@ -1023,6 +1046,9 @@ static bool construct( dc->caps.max_planes = pool->base.pipe_count; + for (i = 0; i < dc->caps.max_planes; ++i) + dc->caps.planes[i] = plane_cap; + if (!resource_construct(num_virtual_links, dc, &pool->base, &res_create_funcs)) goto res_create_fail; diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c index 5e4db3712eef..7ac50ab1b762 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c @@ -616,7 +616,7 @@ dce110_set_output_transfer_func(struct pipe_ctx *pipe_ctx, void dce110_update_info_frame(struct pipe_ctx *pipe_ctx) { - bool is_hdmi; + bool is_hdmi_tmds; bool is_dp; ASSERT(pipe_ctx->stream); @@ -624,13 +624,13 @@ void dce110_update_info_frame(struct pipe_ctx *pipe_ctx) if (pipe_ctx->stream_res.stream_enc == NULL) return; /* this is not root pipe */ - is_hdmi = dc_is_hdmi_signal(pipe_ctx->stream->signal); + is_hdmi_tmds = dc_is_hdmi_tmds_signal(pipe_ctx->stream->signal); is_dp = dc_is_dp_signal(pipe_ctx->stream->signal); - if (!is_hdmi && !is_dp) + if (!is_hdmi_tmds && !is_dp) return; - if (is_hdmi) + if (is_hdmi_tmds) pipe_ctx->stream_res.stream_enc->funcs->update_hdmi_info_packets( pipe_ctx->stream_res.stream_enc, &pipe_ctx->stream_res.encoder_info_frame); @@ -935,13 +935,31 @@ void hwss_edp_backlight_control( edp_receiver_ready_T9(link); } +// Static helper function which calls the correct function +// based on pp_smu version +static void set_pme_wa_enable_by_version(struct dc *dc) +{ + struct pp_smu_funcs *pp_smu = NULL; + + if (dc->res_pool->pp_smu) + pp_smu = dc->res_pool->pp_smu; + + if (pp_smu) { + if (pp_smu->ctx.ver == PP_SMU_VER_RV && pp_smu->rv_funcs.set_pme_wa_enable) + pp_smu->rv_funcs.set_pme_wa_enable(&(pp_smu->ctx)); + } +} + void dce110_enable_audio_stream(struct pipe_ctx *pipe_ctx) { - struct dc *core_dc = pipe_ctx->stream->ctx->dc; /* notify audio driver for audio modes of monitor */ - struct pp_smu_funcs_rv *pp_smu = core_dc->res_pool->pp_smu; + struct dc *core_dc = pipe_ctx->stream->ctx->dc; + struct pp_smu_funcs *pp_smu = NULL; unsigned int i, num_audio = 1; + if (core_dc->res_pool->pp_smu) + pp_smu = core_dc->res_pool->pp_smu; + if (pipe_ctx->stream_res.audio) { for (i = 0; i < MAX_PIPES; i++) { /*current_state not updated yet*/ @@ -951,30 +969,31 @@ void dce110_enable_audio_stream(struct pipe_ctx *pipe_ctx) pipe_ctx->stream_res.audio->funcs->az_enable(pipe_ctx->stream_res.audio); - if (num_audio >= 1 && pp_smu != NULL && pp_smu->set_pme_wa_enable != NULL) + if (num_audio >= 1 && pp_smu != NULL) /*this is the first audio. apply the PME w/a in order to wake AZ from D3*/ - pp_smu->set_pme_wa_enable(&pp_smu->pp_smu); + set_pme_wa_enable_by_version(core_dc); /* un-mute audio */ /* TODO: audio should be per stream rather than per link */ pipe_ctx->stream_res.stream_enc->funcs->audio_mute_control( - pipe_ctx->stream_res.stream_enc, false); + pipe_ctx->stream_res.stream_enc, false); } } void dce110_disable_audio_stream(struct pipe_ctx *pipe_ctx, int option) { struct dc *dc = pipe_ctx->stream->ctx->dc; + struct pp_smu_funcs *pp_smu = NULL; pipe_ctx->stream_res.stream_enc->funcs->audio_mute_control( pipe_ctx->stream_res.stream_enc, true); if (pipe_ctx->stream_res.audio) { - struct pp_smu_funcs_rv *pp_smu = dc->res_pool->pp_smu; + if (dc->res_pool->pp_smu) + pp_smu = dc->res_pool->pp_smu; if (option != KEEP_ACQUIRED_RESOURCE || - !dc->debug.az_endpoint_mute_only) { + !dc->debug.az_endpoint_mute_only) /*only disalbe az_endpoint if power down or free*/ pipe_ctx->stream_res.audio->funcs->az_disable(pipe_ctx->stream_res.audio); - } if (dc_is_dp_signal(pipe_ctx->stream->signal)) pipe_ctx->stream_res.stream_enc->funcs->dp_audio_disable( @@ -989,9 +1008,9 @@ void dce110_disable_audio_stream(struct pipe_ctx *pipe_ctx, int option) update_audio_usage(&dc->current_state->res_ctx, dc->res_pool, pipe_ctx->stream_res.audio, false); pipe_ctx->stream_res.audio = NULL; } - if (pp_smu != NULL && pp_smu->set_pme_wa_enable != NULL) + if (pp_smu != NULL) /*this is the first audio. apply the PME w/a in order to wake AZ from D3*/ - pp_smu->set_pme_wa_enable(&pp_smu->pp_smu); + set_pme_wa_enable_by_version(dc); /* TODO: notify audio driver for if audio modes list changed * add audio mode list change flag */ @@ -1007,7 +1026,7 @@ void dce110_disable_stream(struct pipe_ctx *pipe_ctx, int option) struct dc_link *link = stream->link; struct dc *dc = pipe_ctx->stream->ctx->dc; - if (dc_is_hdmi_signal(pipe_ctx->stream->signal)) + if (dc_is_hdmi_tmds_signal(pipe_ctx->stream->signal)) pipe_ctx->stream_res.stream_enc->funcs->stop_hdmi_info_packets( pipe_ctx->stream_res.stream_enc); @@ -1032,7 +1051,7 @@ void dce110_unblank_stream(struct pipe_ctx *pipe_ctx, struct dc_link *link = stream->link; /* only 3 items below are used by unblank */ - params.pixel_clk_khz = pipe_ctx->stream->timing.pix_clk_100hz / 10; + params.timing = pipe_ctx->stream->timing; params.link_settings.link_rate = link_settings->link_rate; if (dc_is_dp_signal(pipe_ctx->stream->signal)) @@ -1147,8 +1166,8 @@ static void build_audio_output( if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT || pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) { audio_output->pll_info.dp_dto_source_clock_in_khz = - state->dccg->funcs->get_dp_ref_clk_frequency( - state->dccg); + state->clk_mgr->funcs->get_dp_ref_clk_frequency( + state->clk_mgr); } audio_output->pll_info.feed_back_divider = @@ -1349,7 +1368,7 @@ static enum dc_status apply_single_controller_ctx_to_hw( pipe_ctx->stream_res.tg->funcs->set_static_screen_control( pipe_ctx->stream_res.tg, event_triggers); - if (pipe_ctx->stream->signal != SIGNAL_TYPE_VIRTUAL) + if (!dc_is_virtual_signal(pipe_ctx->stream->signal)) pipe_ctx->stream_res.stream_enc->funcs->dig_connect_to_otg( pipe_ctx->stream_res.stream_enc, pipe_ctx->stream_res.tg->inst); @@ -1358,7 +1377,7 @@ static enum dc_status apply_single_controller_ctx_to_hw( pipe_ctx->stream_res.opp, COLOR_SPACE_YCBCR601, stream->timing.display_color_depth, - pipe_ctx->stream->signal); + stream->signal); pipe_ctx->stream_res.opp->funcs->opp_program_fmt( pipe_ctx->stream_res.opp, @@ -1532,6 +1551,9 @@ void dce110_enable_accelerated_mode(struct dc *dc, struct dc_state *context) } } + if (dc->hwss.init_pipes) + dc->hwss.init_pipes(dc, context); + if (edp_link) { /* this seems to cause blank screens on DCE8 */ if ((dc->ctx->dce_version == DCE_VERSION_8_0) || @@ -1608,18 +1630,18 @@ static void dce110_set_displaymarks( dc->bw_vbios->blackout_duration, pipe_ctx->stream); pipe_ctx->plane_res.mi->funcs->mem_input_program_display_marks( pipe_ctx->plane_res.mi, - context->bw.dce.nbp_state_change_wm_ns[num_pipes], - context->bw.dce.stutter_exit_wm_ns[num_pipes], - context->bw.dce.stutter_entry_wm_ns[num_pipes], - context->bw.dce.urgent_wm_ns[num_pipes], + context->bw_ctx.bw.dce.nbp_state_change_wm_ns[num_pipes], + context->bw_ctx.bw.dce.stutter_exit_wm_ns[num_pipes], + context->bw_ctx.bw.dce.stutter_entry_wm_ns[num_pipes], + context->bw_ctx.bw.dce.urgent_wm_ns[num_pipes], total_dest_line_time_ns); if (i == underlay_idx) { num_pipes++; pipe_ctx->plane_res.mi->funcs->mem_input_program_chroma_display_marks( pipe_ctx->plane_res.mi, - context->bw.dce.nbp_state_change_wm_ns[num_pipes], - context->bw.dce.stutter_exit_wm_ns[num_pipes], - context->bw.dce.urgent_wm_ns[num_pipes], + context->bw_ctx.bw.dce.nbp_state_change_wm_ns[num_pipes], + context->bw_ctx.bw.dce.stutter_exit_wm_ns[num_pipes], + context->bw_ctx.bw.dce.urgent_wm_ns[num_pipes], total_dest_line_time_ns); } num_pipes++; @@ -2612,7 +2634,7 @@ void dce110_set_cursor_position(struct pipe_ctx *pipe_ctx) struct mem_input *mi = pipe_ctx->plane_res.mi; struct dc_cursor_mi_param param = { .pixel_clk_khz = pipe_ctx->stream->timing.pix_clk_100hz / 10, - .ref_clk_khz = pipe_ctx->stream->ctx->dc->res_pool->ref_clock_inKhz, + .ref_clk_khz = pipe_ctx->stream->ctx->dc->res_pool->ref_clocks.xtalin_clock_inKhz, .viewport = pipe_ctx->plane_res.scl_data.viewport, .h_scale_ratio = pipe_ctx->plane_res.scl_data.ratios.horz, .v_scale_ratio = pipe_ctx->plane_res.scl_data.ratios.vert, diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c index 7549adaa1542..dcd04e9ea76b 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c @@ -392,6 +392,55 @@ static const struct resource_caps stoney_resource_cap = { .num_ddc = 3, }; +static const struct dc_plane_cap plane_cap = { + .type = DC_PLANE_TYPE_DCE_RGB, + .blends_with_below = true, + .blends_with_above = true, + .per_pixel_alpha = 1, + + .pixel_format_support = { + .argb8888 = true, + .nv12 = false, + .fp16 = false + }, + + .max_upscale_factor = { + .argb8888 = 16000, + .nv12 = 1, + .fp16 = 1 + }, + + .max_downscale_factor = { + .argb8888 = 250, + .nv12 = 1, + .fp16 = 1 + } +}; + +static const struct dc_plane_cap underlay_plane_cap = { + .type = DC_PLANE_TYPE_DCE_UNDERLAY, + .blends_with_above = true, + .per_pixel_alpha = 1, + + .pixel_format_support = { + .argb8888 = false, + .nv12 = true, + .fp16 = false + }, + + .max_upscale_factor = { + .argb8888 = 1, + .nv12 = 16000, + .fp16 = 1 + }, + + .max_downscale_factor = { + .argb8888 = 1, + .nv12 = 250, + .fp16 = 1 + } +}; + #define CTX ctx #define REG(reg) mm ## reg @@ -854,7 +903,8 @@ static enum dc_status build_mapped_resource( static bool dce110_validate_bandwidth( struct dc *dc, - struct dc_state *context) + struct dc_state *context, + bool fast_validate) { bool result = false; @@ -868,7 +918,7 @@ static bool dce110_validate_bandwidth( dc->bw_vbios, context->res_ctx.pipe_ctx, dc->res_pool->pipe_count, - &context->bw.dce)) + &context->bw_ctx.bw.dce)) result = true; if (!result) @@ -878,8 +928,8 @@ static bool dce110_validate_bandwidth( context->streams[0]->timing.v_addressable, context->streams[0]->timing.pix_clk_100hz / 10); - if (memcmp(&dc->current_state->bw.dce, - &context->bw.dce, sizeof(context->bw.dce))) { + if (memcmp(&dc->current_state->bw_ctx.bw.dce, + &context->bw_ctx.bw.dce, sizeof(context->bw_ctx.bw.dce))) { DC_LOG_BANDWIDTH_CALCS( "%s: finish,\n" @@ -893,34 +943,34 @@ static bool dce110_validate_bandwidth( "sclk: %d sclk_sleep: %d yclk: %d blackout_recovery_time_us: %d\n" , __func__, - context->bw.dce.nbp_state_change_wm_ns[0].b_mark, - context->bw.dce.nbp_state_change_wm_ns[0].a_mark, - context->bw.dce.urgent_wm_ns[0].b_mark, - context->bw.dce.urgent_wm_ns[0].a_mark, - context->bw.dce.stutter_exit_wm_ns[0].b_mark, - context->bw.dce.stutter_exit_wm_ns[0].a_mark, - context->bw.dce.nbp_state_change_wm_ns[1].b_mark, - context->bw.dce.nbp_state_change_wm_ns[1].a_mark, - context->bw.dce.urgent_wm_ns[1].b_mark, - context->bw.dce.urgent_wm_ns[1].a_mark, - context->bw.dce.stutter_exit_wm_ns[1].b_mark, - context->bw.dce.stutter_exit_wm_ns[1].a_mark, - context->bw.dce.nbp_state_change_wm_ns[2].b_mark, - context->bw.dce.nbp_state_change_wm_ns[2].a_mark, - context->bw.dce.urgent_wm_ns[2].b_mark, - context->bw.dce.urgent_wm_ns[2].a_mark, - context->bw.dce.stutter_exit_wm_ns[2].b_mark, - context->bw.dce.stutter_exit_wm_ns[2].a_mark, - context->bw.dce.stutter_mode_enable, - context->bw.dce.cpuc_state_change_enable, - context->bw.dce.cpup_state_change_enable, - context->bw.dce.nbp_state_change_enable, - context->bw.dce.all_displays_in_sync, - context->bw.dce.dispclk_khz, - context->bw.dce.sclk_khz, - context->bw.dce.sclk_deep_sleep_khz, - context->bw.dce.yclk_khz, - context->bw.dce.blackout_recovery_time_us); + context->bw_ctx.bw.dce.nbp_state_change_wm_ns[0].b_mark, + context->bw_ctx.bw.dce.nbp_state_change_wm_ns[0].a_mark, + context->bw_ctx.bw.dce.urgent_wm_ns[0].b_mark, + context->bw_ctx.bw.dce.urgent_wm_ns[0].a_mark, + context->bw_ctx.bw.dce.stutter_exit_wm_ns[0].b_mark, + context->bw_ctx.bw.dce.stutter_exit_wm_ns[0].a_mark, + context->bw_ctx.bw.dce.nbp_state_change_wm_ns[1].b_mark, + context->bw_ctx.bw.dce.nbp_state_change_wm_ns[1].a_mark, + context->bw_ctx.bw.dce.urgent_wm_ns[1].b_mark, + context->bw_ctx.bw.dce.urgent_wm_ns[1].a_mark, + context->bw_ctx.bw.dce.stutter_exit_wm_ns[1].b_mark, + context->bw_ctx.bw.dce.stutter_exit_wm_ns[1].a_mark, + context->bw_ctx.bw.dce.nbp_state_change_wm_ns[2].b_mark, + context->bw_ctx.bw.dce.nbp_state_change_wm_ns[2].a_mark, + context->bw_ctx.bw.dce.urgent_wm_ns[2].b_mark, + context->bw_ctx.bw.dce.urgent_wm_ns[2].a_mark, + context->bw_ctx.bw.dce.stutter_exit_wm_ns[2].b_mark, + context->bw_ctx.bw.dce.stutter_exit_wm_ns[2].a_mark, + context->bw_ctx.bw.dce.stutter_mode_enable, + context->bw_ctx.bw.dce.cpuc_state_change_enable, + context->bw_ctx.bw.dce.cpup_state_change_enable, + context->bw_ctx.bw.dce.nbp_state_change_enable, + context->bw_ctx.bw.dce.all_displays_in_sync, + context->bw_ctx.bw.dce.dispclk_khz, + context->bw_ctx.bw.dce.sclk_khz, + context->bw_ctx.bw.dce.sclk_deep_sleep_khz, + context->bw_ctx.bw.dce.yclk_khz, + context->bw_ctx.bw.dce.blackout_recovery_time_us); } return result; } @@ -1371,6 +1421,11 @@ static bool construct( dc->caps.max_planes = pool->base.pipe_count; + for (i = 0; i < pool->base.underlay_pipe_index; ++i) + dc->caps.planes[i] = plane_cap; + + dc->caps.planes[pool->base.underlay_pipe_index] = underlay_plane_cap; + bw_calcs_init(dc->bw_dceip, dc->bw_vbios, dc->ctx->asic_id); bw_calcs_data_update_from_pplib(dc); diff --git a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c b/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c index ea3065d63372..a480b15f6885 100644 --- a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c @@ -397,6 +397,28 @@ static const struct resource_caps polaris_11_resource_cap = { .num_ddc = 5, }; +static const struct dc_plane_cap plane_cap = { + .type = DC_PLANE_TYPE_DCE_RGB, + + .pixel_format_support = { + .argb8888 = true, + .nv12 = false, + .fp16 = false + }, + + .max_upscale_factor = { + .argb8888 = 16000, + .nv12 = 1, + .fp16 = 1 + }, + + .max_downscale_factor = { + .argb8888 = 250, + .nv12 = 1, + .fp16 = 1 + } +}; + #define CTX ctx #define REG(reg) mm ## reg @@ -804,7 +826,8 @@ static enum dc_status build_mapped_resource( bool dce112_validate_bandwidth( struct dc *dc, - struct dc_state *context) + struct dc_state *context, + bool fast_validate) { bool result = false; @@ -818,7 +841,7 @@ bool dce112_validate_bandwidth( dc->bw_vbios, context->res_ctx.pipe_ctx, dc->res_pool->pipe_count, - &context->bw.dce)) + &context->bw_ctx.bw.dce)) result = true; if (!result) @@ -826,8 +849,8 @@ bool dce112_validate_bandwidth( "%s: Bandwidth validation failed!", __func__); - if (memcmp(&dc->current_state->bw.dce, - &context->bw.dce, sizeof(context->bw.dce))) { + if (memcmp(&dc->current_state->bw_ctx.bw.dce, + &context->bw_ctx.bw.dce, sizeof(context->bw_ctx.bw.dce))) { DC_LOG_BANDWIDTH_CALCS( "%s: finish,\n" @@ -841,34 +864,34 @@ bool dce112_validate_bandwidth( "sclk: %d sclk_sleep: %d yclk: %d blackout_recovery_time_us: %d\n" , __func__, - context->bw.dce.nbp_state_change_wm_ns[0].b_mark, - context->bw.dce.nbp_state_change_wm_ns[0].a_mark, - context->bw.dce.urgent_wm_ns[0].b_mark, - context->bw.dce.urgent_wm_ns[0].a_mark, - context->bw.dce.stutter_exit_wm_ns[0].b_mark, - context->bw.dce.stutter_exit_wm_ns[0].a_mark, - context->bw.dce.nbp_state_change_wm_ns[1].b_mark, - context->bw.dce.nbp_state_change_wm_ns[1].a_mark, - context->bw.dce.urgent_wm_ns[1].b_mark, - context->bw.dce.urgent_wm_ns[1].a_mark, - context->bw.dce.stutter_exit_wm_ns[1].b_mark, - context->bw.dce.stutter_exit_wm_ns[1].a_mark, - context->bw.dce.nbp_state_change_wm_ns[2].b_mark, - context->bw.dce.nbp_state_change_wm_ns[2].a_mark, - context->bw.dce.urgent_wm_ns[2].b_mark, - context->bw.dce.urgent_wm_ns[2].a_mark, - context->bw.dce.stutter_exit_wm_ns[2].b_mark, - context->bw.dce.stutter_exit_wm_ns[2].a_mark, - context->bw.dce.stutter_mode_enable, - context->bw.dce.cpuc_state_change_enable, - context->bw.dce.cpup_state_change_enable, - context->bw.dce.nbp_state_change_enable, - context->bw.dce.all_displays_in_sync, - context->bw.dce.dispclk_khz, - context->bw.dce.sclk_khz, - context->bw.dce.sclk_deep_sleep_khz, - context->bw.dce.yclk_khz, - context->bw.dce.blackout_recovery_time_us); + context->bw_ctx.bw.dce.nbp_state_change_wm_ns[0].b_mark, + context->bw_ctx.bw.dce.nbp_state_change_wm_ns[0].a_mark, + context->bw_ctx.bw.dce.urgent_wm_ns[0].b_mark, + context->bw_ctx.bw.dce.urgent_wm_ns[0].a_mark, + context->bw_ctx.bw.dce.stutter_exit_wm_ns[0].b_mark, + context->bw_ctx.bw.dce.stutter_exit_wm_ns[0].a_mark, + context->bw_ctx.bw.dce.nbp_state_change_wm_ns[1].b_mark, + context->bw_ctx.bw.dce.nbp_state_change_wm_ns[1].a_mark, + context->bw_ctx.bw.dce.urgent_wm_ns[1].b_mark, + context->bw_ctx.bw.dce.urgent_wm_ns[1].a_mark, + context->bw_ctx.bw.dce.stutter_exit_wm_ns[1].b_mark, + context->bw_ctx.bw.dce.stutter_exit_wm_ns[1].a_mark, + context->bw_ctx.bw.dce.nbp_state_change_wm_ns[2].b_mark, + context->bw_ctx.bw.dce.nbp_state_change_wm_ns[2].a_mark, + context->bw_ctx.bw.dce.urgent_wm_ns[2].b_mark, + context->bw_ctx.bw.dce.urgent_wm_ns[2].a_mark, + context->bw_ctx.bw.dce.stutter_exit_wm_ns[2].b_mark, + context->bw_ctx.bw.dce.stutter_exit_wm_ns[2].a_mark, + context->bw_ctx.bw.dce.stutter_mode_enable, + context->bw_ctx.bw.dce.cpuc_state_change_enable, + context->bw_ctx.bw.dce.cpup_state_change_enable, + context->bw_ctx.bw.dce.nbp_state_change_enable, + context->bw_ctx.bw.dce.all_displays_in_sync, + context->bw_ctx.bw.dce.dispclk_khz, + context->bw_ctx.bw.dce.sclk_khz, + context->bw_ctx.bw.dce.sclk_deep_sleep_khz, + context->bw_ctx.bw.dce.yclk_khz, + context->bw_ctx.bw.dce.blackout_recovery_time_us); } return result; } @@ -887,7 +910,7 @@ enum dc_status resource_map_phy_clock_resources( return DC_ERROR_UNEXPECTED; if (dc_is_dp_signal(pipe_ctx->stream->signal) - || pipe_ctx->stream->signal == SIGNAL_TYPE_VIRTUAL) + || dc_is_virtual_signal(pipe_ctx->stream->signal)) pipe_ctx->clock_source = dc->res_pool->dp_clock_source; else @@ -1310,6 +1333,9 @@ static bool construct( dc->caps.max_planes = pool->base.pipe_count; + for (i = 0; i < dc->caps.max_planes; ++i) + dc->caps.planes[i] = plane_cap; + /* Create hardware sequencer */ dce112_hw_sequencer_construct(dc); diff --git a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.h b/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.h index 95a403396219..1f57ebc6f9b4 100644 --- a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.h +++ b/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.h @@ -44,7 +44,8 @@ enum dc_status dce112_validate_with_context( bool dce112_validate_bandwidth( struct dc *dc, - struct dc_state *context); + struct dc_state *context, + bool fast_validate); enum dc_status dce112_add_stream_to_ctx( struct dc *dc, diff --git a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c b/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c index 312a0aebf91f..6d49c7143c67 100644 --- a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c @@ -454,6 +454,28 @@ static const struct resource_caps res_cap = { .num_ddc = 6, }; +static const struct dc_plane_cap plane_cap = { + .type = DC_PLANE_TYPE_DCE_RGB, + + .pixel_format_support = { + .argb8888 = true, + .nv12 = false, + .fp16 = false + }, + + .max_upscale_factor = { + .argb8888 = 16000, + .nv12 = 1, + .fp16 = 1 + }, + + .max_downscale_factor = { + .argb8888 = 250, + .nv12 = 1, + .fp16 = 1 + } +}; + static const struct dc_debug_options debug_defaults = { .disable_clock_gate = true, }; @@ -1171,6 +1193,9 @@ static bool construct( dc->caps.max_planes = pool->base.pipe_count; + for (i = 0; i < dc->caps.max_planes; ++i) + dc->caps.planes[i] = plane_cap; + bw_calcs_init(dc->bw_dceip, dc->bw_vbios, dc->ctx->asic_id); bw_calcs_data_update_from_pplib(dc); diff --git a/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c b/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c index c109ace96be9..27d0cc394963 100644 --- a/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c @@ -387,6 +387,28 @@ static const struct resource_caps res_cap_83 = { .num_ddc = 2, }; +static const struct dc_plane_cap plane_cap = { + .type = DC_PLANE_TYPE_DCE_RGB, + + .pixel_format_support = { + .argb8888 = true, + .nv12 = false, + .fp16 = false + }, + + .max_upscale_factor = { + .argb8888 = 16000, + .nv12 = 1, + .fp16 = 1 + }, + + .max_downscale_factor = { + .argb8888 = 250, + .nv12 = 1, + .fp16 = 1 + } +}; + static const struct dce_dmcu_registers dmcu_regs = { DMCU_DCE80_REG_LIST() }; @@ -790,7 +812,8 @@ static void destruct(struct dce110_resource_pool *pool) bool dce80_validate_bandwidth( struct dc *dc, - struct dc_state *context) + struct dc_state *context, + bool fast_validate) { int i; bool at_least_one_pipe = false; @@ -802,11 +825,11 @@ bool dce80_validate_bandwidth( if (at_least_one_pipe) { /* TODO implement when needed but for now hardcode max value*/ - context->bw.dce.dispclk_khz = 681000; - context->bw.dce.yclk_khz = 250000 * MEMORY_TYPE_MULTIPLIER_CZ; + context->bw_ctx.bw.dce.dispclk_khz = 681000; + context->bw_ctx.bw.dce.yclk_khz = 250000 * MEMORY_TYPE_MULTIPLIER_CZ; } else { - context->bw.dce.dispclk_khz = 0; - context->bw.dce.yclk_khz = 0; + context->bw_ctx.bw.dce.dispclk_khz = 0; + context->bw_ctx.bw.dce.yclk_khz = 0; } return true; @@ -1032,6 +1055,10 @@ static bool dce80_construct( } dc->caps.max_planes = pool->base.pipe_count; + + for (i = 0; i < dc->caps.max_planes; ++i) + dc->caps.planes[i] = plane_cap; + dc->caps.disable_dp_clk_share = true; if (!resource_construct(num_virtual_links, dc, &pool->base, @@ -1237,6 +1264,10 @@ static bool dce81_construct( } dc->caps.max_planes = pool->base.pipe_count; + + for (i = 0; i < dc->caps.max_planes; ++i) + dc->caps.planes[i] = plane_cap; + dc->caps.disable_dp_clk_share = true; if (!resource_construct(num_virtual_links, dc, &pool->base, @@ -1438,6 +1469,10 @@ static bool dce83_construct( } dc->caps.max_planes = pool->base.pipe_count; + + for (i = 0; i < dc->caps.max_planes; ++i) + dc->caps.planes[i] = plane_cap; + dc->caps.disable_dp_clk_share = true; if (!resource_construct(num_virtual_links, dc, &pool->base, diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_clk_mgr.c index afe8c42211cd..2b2de1d913c9 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_clk_mgr.c @@ -43,23 +43,6 @@ #define DC_LOGGER \ clk_mgr->ctx->logger -void dcn1_pplib_apply_display_requirements( - struct dc *dc, - struct dc_state *context) -{ - struct dm_pp_display_configuration *pp_display_cfg = &context->pp_display_cfg; - - pp_display_cfg->min_engine_clock_khz = dc->res_pool->clk_mgr->clks.dcfclk_khz; - pp_display_cfg->min_memory_clock_khz = dc->res_pool->clk_mgr->clks.fclk_khz; - pp_display_cfg->min_engine_clock_deep_sleep_khz = dc->res_pool->clk_mgr->clks.dcfclk_deep_sleep_khz; - pp_display_cfg->min_dcfc_deep_sleep_clock_khz = dc->res_pool->clk_mgr->clks.dcfclk_deep_sleep_khz; - pp_display_cfg->min_dcfclock_khz = dc->res_pool->clk_mgr->clks.dcfclk_khz; - pp_display_cfg->disp_clk_khz = dc->res_pool->clk_mgr->clks.dispclk_khz; - dce110_fill_display_configs(context, pp_display_cfg); - - dm_pp_apply_display_requirements(dc->ctx, pp_display_cfg); -} - static int dcn1_determine_dppclk_threshold(struct clk_mgr *clk_mgr, struct dc_clocks *new_clocks) { bool request_dpp_div = new_clocks->dispclk_khz > new_clocks->dppclk_khz; @@ -167,11 +150,8 @@ static void dcn1_update_clocks(struct clk_mgr *clk_mgr, { struct dc *dc = clk_mgr->ctx->dc; struct dc_debug_options *debug = &dc->debug; - struct dc_clocks *new_clocks = &context->bw.dcn.clk; - struct pp_smu_display_requirement_rv *smu_req_cur = - &dc->res_pool->pp_smu_req; - struct pp_smu_display_requirement_rv smu_req = *smu_req_cur; - struct pp_smu_funcs_rv *pp_smu = dc->res_pool->pp_smu; + struct dc_clocks *new_clocks = &context->bw_ctx.bw.dcn.clk; + struct pp_smu_funcs_rv *pp_smu = NULL; bool send_request_to_increase = false; bool send_request_to_lower = false; int display_count; @@ -179,7 +159,8 @@ static void dcn1_update_clocks(struct clk_mgr *clk_mgr, bool enter_display_off = false; display_count = get_active_display_cnt(dc, context); - + if (dc->res_pool->pp_smu) + pp_smu = &dc->res_pool->pp_smu->rv_funcs; if (display_count == 0) enter_display_off = true; @@ -189,10 +170,8 @@ static void dcn1_update_clocks(struct clk_mgr *clk_mgr, * if function pointer not set up, this message is * sent as part of pplib_apply_display_requirements. */ - if (pp_smu->set_display_count) + if (pp_smu && pp_smu->set_display_count) pp_smu->set_display_count(&pp_smu->pp_smu, display_count); - - smu_req.display_count = display_count; } if (new_clocks->dispclk_khz > clk_mgr->clks.dispclk_khz @@ -203,7 +182,6 @@ static void dcn1_update_clocks(struct clk_mgr *clk_mgr, if (should_set_clock(safe_to_lower, new_clocks->phyclk_khz, clk_mgr->clks.phyclk_khz)) { clk_mgr->clks.phyclk_khz = new_clocks->phyclk_khz; - send_request_to_lower = true; } @@ -213,24 +191,18 @@ static void dcn1_update_clocks(struct clk_mgr *clk_mgr, if (should_set_clock(safe_to_lower, new_clocks->fclk_khz, clk_mgr->clks.fclk_khz)) { clk_mgr->clks.fclk_khz = new_clocks->fclk_khz; - smu_req.hard_min_fclk_mhz = new_clocks->fclk_khz / 1000; - send_request_to_lower = true; } //DCF Clock if (should_set_clock(safe_to_lower, new_clocks->dcfclk_khz, clk_mgr->clks.dcfclk_khz)) { clk_mgr->clks.dcfclk_khz = new_clocks->dcfclk_khz; - smu_req.hard_min_dcefclk_mhz = new_clocks->dcfclk_khz / 1000; - send_request_to_lower = true; } if (should_set_clock(safe_to_lower, new_clocks->dcfclk_deep_sleep_khz, clk_mgr->clks.dcfclk_deep_sleep_khz)) { clk_mgr->clks.dcfclk_deep_sleep_khz = new_clocks->dcfclk_deep_sleep_khz; - smu_req.min_deep_sleep_dcefclk_mhz = (new_clocks->dcfclk_deep_sleep_khz + 999) / 1000; - send_request_to_lower = true; } @@ -239,17 +211,13 @@ static void dcn1_update_clocks(struct clk_mgr *clk_mgr, */ if (send_request_to_increase) { /*use dcfclk to request voltage*/ - if (pp_smu->set_hard_min_fclk_by_freq && + if (pp_smu && pp_smu->set_hard_min_fclk_by_freq && pp_smu->set_hard_min_dcfclk_by_freq && pp_smu->set_min_deep_sleep_dcfclk) { - pp_smu->set_hard_min_fclk_by_freq(&pp_smu->pp_smu, smu_req.hard_min_fclk_mhz); - pp_smu->set_hard_min_dcfclk_by_freq(&pp_smu->pp_smu, smu_req.hard_min_dcefclk_mhz); - pp_smu->set_min_deep_sleep_dcfclk(&pp_smu->pp_smu, smu_req.min_deep_sleep_dcefclk_mhz); - } else { - if (pp_smu->set_display_requirement) - pp_smu->set_display_requirement(&pp_smu->pp_smu, &smu_req); - dcn1_pplib_apply_display_requirements(dc, context); + pp_smu->set_hard_min_fclk_by_freq(&pp_smu->pp_smu, new_clocks->fclk_khz / 1000); + pp_smu->set_hard_min_dcfclk_by_freq(&pp_smu->pp_smu, new_clocks->dcfclk_khz / 1000); + pp_smu->set_min_deep_sleep_dcfclk(&pp_smu->pp_smu, (new_clocks->dcfclk_deep_sleep_khz + 999) / 1000); } } @@ -259,27 +227,20 @@ static void dcn1_update_clocks(struct clk_mgr *clk_mgr, || new_clocks->dispclk_khz == clk_mgr->clks.dispclk_khz) { dcn1_ramp_up_dispclk_with_dpp(clk_mgr, new_clocks); clk_mgr->clks.dispclk_khz = new_clocks->dispclk_khz; - send_request_to_lower = true; } if (!send_request_to_increase && send_request_to_lower) { /*use dcfclk to request voltage*/ - if (pp_smu->set_hard_min_fclk_by_freq && + if (pp_smu && pp_smu->set_hard_min_fclk_by_freq && pp_smu->set_hard_min_dcfclk_by_freq && pp_smu->set_min_deep_sleep_dcfclk) { - pp_smu->set_hard_min_fclk_by_freq(&pp_smu->pp_smu, smu_req.hard_min_fclk_mhz); - pp_smu->set_hard_min_dcfclk_by_freq(&pp_smu->pp_smu, smu_req.hard_min_dcefclk_mhz); - pp_smu->set_min_deep_sleep_dcfclk(&pp_smu->pp_smu, smu_req.min_deep_sleep_dcefclk_mhz); - } else { - if (pp_smu->set_display_requirement) - pp_smu->set_display_requirement(&pp_smu->pp_smu, &smu_req); - dcn1_pplib_apply_display_requirements(dc, context); + pp_smu->set_hard_min_fclk_by_freq(&pp_smu->pp_smu, new_clocks->fclk_khz / 1000); + pp_smu->set_hard_min_dcfclk_by_freq(&pp_smu->pp_smu, new_clocks->dcfclk_khz / 1000); + pp_smu->set_min_deep_sleep_dcfclk(&pp_smu->pp_smu, (new_clocks->dcfclk_deep_sleep_khz + 999) / 1000); } } - - *smu_req_cur = smu_req; } static const struct clk_mgr_funcs dcn1_funcs = { .get_dp_ref_clk_frequency = dce12_get_dp_ref_freq_khz, diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_clk_mgr.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_clk_mgr.h index a995eda443a3..97007cf33665 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_clk_mgr.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_clk_mgr.h @@ -34,10 +34,6 @@ struct clk_bypass { uint32_t dprefclk_bypass; }; -void dcn1_pplib_apply_display_requirements( - struct dc *dc, - struct dc_state *context); - struct clk_mgr *dcn1_clk_mgr_create(struct dc_context *ctx); #endif //__DCN10_CLK_MGR_H__ diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c index cd1ebe57ed59..6f4b24756323 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c @@ -91,13 +91,6 @@ enum dscl_mode_sel { DSCL_MODE_DSCL_BYPASS = 6 }; -enum gamut_remap_select { - GAMUT_REMAP_BYPASS = 0, - GAMUT_REMAP_COEFF, - GAMUT_REMAP_COMA_COEFF, - GAMUT_REMAP_COMB_COEFF -}; - void dpp_read_state(struct dpp *dpp_base, struct dcn_dpp_state *s) { @@ -392,6 +385,10 @@ void dpp1_cnv_setup ( default: break; } + + /* Set default color space based on format if none is given. */ + color_space = input_color_space ? input_color_space : color_space; + REG_SET(CNVC_SURFACE_PIXEL_FORMAT, 0, CNVC_SURFACE_PIXEL_FORMAT, pixel_format); REG_UPDATE(FORMAT_CONTROL, FORMAT_CONTROL__ALPHA_EN, alpha_en); @@ -403,7 +400,7 @@ void dpp1_cnv_setup ( for (i = 0; i < 12; i++) tbl_entry.regval[i] = input_csc_color_matrix.matrix[i]; - tbl_entry.color_space = input_color_space; + tbl_entry.color_space = color_space; if (color_space >= COLOR_SPACE_YCBCR601) select = INPUT_CSC_SELECT_ICSC; diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_cm.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_cm.c index 41f0f4c912e7..882bcc5a40f6 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_cm.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_cm.c @@ -88,13 +88,6 @@ enum dscl_mode_sel { DSCL_MODE_DSCL_BYPASS = 6 }; -enum gamut_remap_select { - GAMUT_REMAP_BYPASS = 0, - GAMUT_REMAP_COEFF, - GAMUT_REMAP_COMA_COEFF, - GAMUT_REMAP_COMB_COEFF -}; - static const struct dpp_input_csc_matrix dpp_input_csc_matrix[] = { {COLOR_SPACE_SRGB, {0x2000, 0, 0, 0, 0, 0x2000, 0, 0, 0, 0, 0x2000, 0} }, diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c index c7642e748297..ce21a290bf3e 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c @@ -406,15 +406,25 @@ void dpp1_dscl_calc_lb_num_partitions( int *num_part_y, int *num_part_c) { + int lb_memory_size, lb_memory_size_c, lb_memory_size_a, num_partitions_a, + lb_bpc, memory_line_size_y, memory_line_size_c, memory_line_size_a; + int line_size = scl_data->viewport.width < scl_data->recout.width ? scl_data->viewport.width : scl_data->recout.width; int line_size_c = scl_data->viewport_c.width < scl_data->recout.width ? scl_data->viewport_c.width : scl_data->recout.width; - int lb_bpc = dpp1_dscl_get_lb_depth_bpc(scl_data->lb_params.depth); - int memory_line_size_y = (line_size * lb_bpc + 71) / 72; /* +71 to ceil */ - int memory_line_size_c = (line_size_c * lb_bpc + 71) / 72; /* +71 to ceil */ - int memory_line_size_a = (line_size + 5) / 6; /* +5 to ceil */ - int lb_memory_size, lb_memory_size_c, lb_memory_size_a, num_partitions_a; + + if (line_size == 0) + line_size = 1; + + if (line_size_c == 0) + line_size_c = 1; + + + lb_bpc = dpp1_dscl_get_lb_depth_bpc(scl_data->lb_params.depth); + memory_line_size_y = (line_size * lb_bpc + 71) / 72; /* +71 to ceil */ + memory_line_size_c = (line_size_c * lb_bpc + 71) / 72; /* +71 to ceil */ + memory_line_size_a = (line_size + 5) / 6; /* +5 to ceil */ if (lb_config == LB_MEMORY_CONFIG_1) { lb_memory_size = 816; diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c index e161ad836812..0db2a6e96fc0 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c @@ -258,8 +258,9 @@ void hubbub1_wm_change_req_wa(struct hubbub *hubbub) { struct dcn10_hubbub *hubbub1 = TO_DCN10_HUBBUB(hubbub); - REG_UPDATE_SEQ(DCHUBBUB_ARB_WATERMARK_CHANGE_CNTL, - DCHUBBUB_ARB_WATERMARK_CHANGE_REQUEST, 0, 1); + REG_UPDATE_SEQ_2(DCHUBBUB_ARB_WATERMARK_CHANGE_CNTL, + DCHUBBUB_ARB_WATERMARK_CHANGE_REQUEST, 0, + DCHUBBUB_ARB_WATERMARK_CHANGE_REQUEST, 1); } void hubbub1_program_watermarks( @@ -282,7 +283,8 @@ void hubbub1_program_watermarks( hubbub1->watermarks.a.urgent_ns = watermarks->a.urgent_ns; prog_wm_value = convert_and_clamp(watermarks->a.urgent_ns, refclk_mhz, 0x1fffff); - REG_WRITE(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_A, prog_wm_value); + REG_SET(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_A, 0, + DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_A, prog_wm_value); DC_LOG_BANDWIDTH_CALCS("URGENCY_WATERMARK_A calculated =%d\n" "HW register value = 0x%x\n", @@ -309,7 +311,8 @@ void hubbub1_program_watermarks( prog_wm_value = convert_and_clamp( watermarks->a.cstate_pstate.cstate_enter_plus_exit_ns, refclk_mhz, 0x1fffff); - REG_WRITE(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A, prog_wm_value); + REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A, 0, + DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A, prog_wm_value); DC_LOG_BANDWIDTH_CALCS("SR_ENTER_EXIT_WATERMARK_A calculated =%d\n" "HW register value = 0x%x\n", watermarks->a.cstate_pstate.cstate_enter_plus_exit_ns, prog_wm_value); @@ -322,7 +325,8 @@ void hubbub1_program_watermarks( prog_wm_value = convert_and_clamp( watermarks->a.cstate_pstate.cstate_exit_ns, refclk_mhz, 0x1fffff); - REG_WRITE(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A, prog_wm_value); + REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A, 0, + DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A, prog_wm_value); DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_A calculated =%d\n" "HW register value = 0x%x\n", watermarks->a.cstate_pstate.cstate_exit_ns, prog_wm_value); @@ -336,7 +340,8 @@ void hubbub1_program_watermarks( prog_wm_value = convert_and_clamp( watermarks->a.cstate_pstate.pstate_change_ns, refclk_mhz, 0x1fffff); - REG_WRITE(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_A, prog_wm_value); + REG_SET(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_A, 0, + DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_A, prog_wm_value); DC_LOG_BANDWIDTH_CALCS("DRAM_CLK_CHANGE_WATERMARK_A calculated =%d\n" "HW register value = 0x%x\n\n", watermarks->a.cstate_pstate.pstate_change_ns, prog_wm_value); @@ -347,7 +352,8 @@ void hubbub1_program_watermarks( hubbub1->watermarks.b.urgent_ns = watermarks->b.urgent_ns; prog_wm_value = convert_and_clamp(watermarks->b.urgent_ns, refclk_mhz, 0x1fffff); - REG_WRITE(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_B, prog_wm_value); + REG_SET(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_B, 0, + DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_B, prog_wm_value); DC_LOG_BANDWIDTH_CALCS("URGENCY_WATERMARK_B calculated =%d\n" "HW register value = 0x%x\n", @@ -374,7 +380,8 @@ void hubbub1_program_watermarks( prog_wm_value = convert_and_clamp( watermarks->b.cstate_pstate.cstate_enter_plus_exit_ns, refclk_mhz, 0x1fffff); - REG_WRITE(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B, prog_wm_value); + REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B, 0, + DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B, prog_wm_value); DC_LOG_BANDWIDTH_CALCS("SR_ENTER_EXIT_WATERMARK_B calculated =%d\n" "HW register value = 0x%x\n", watermarks->b.cstate_pstate.cstate_enter_plus_exit_ns, prog_wm_value); @@ -387,7 +394,8 @@ void hubbub1_program_watermarks( prog_wm_value = convert_and_clamp( watermarks->b.cstate_pstate.cstate_exit_ns, refclk_mhz, 0x1fffff); - REG_WRITE(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B, prog_wm_value); + REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B, 0, + DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B, prog_wm_value); DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_B calculated =%d\n" "HW register value = 0x%x\n", watermarks->b.cstate_pstate.cstate_exit_ns, prog_wm_value); @@ -401,7 +409,8 @@ void hubbub1_program_watermarks( prog_wm_value = convert_and_clamp( watermarks->b.cstate_pstate.pstate_change_ns, refclk_mhz, 0x1fffff); - REG_WRITE(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_B, prog_wm_value); + REG_SET(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_B, 0, + DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_B, prog_wm_value); DC_LOG_BANDWIDTH_CALCS("DRAM_CLK_CHANGE_WATERMARK_B calculated =%d\n" "HW register value = 0x%x\n\n", watermarks->b.cstate_pstate.pstate_change_ns, prog_wm_value); @@ -412,7 +421,8 @@ void hubbub1_program_watermarks( hubbub1->watermarks.c.urgent_ns = watermarks->c.urgent_ns; prog_wm_value = convert_and_clamp(watermarks->c.urgent_ns, refclk_mhz, 0x1fffff); - REG_WRITE(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_C, prog_wm_value); + REG_SET(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_C, 0, + DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_C, prog_wm_value); DC_LOG_BANDWIDTH_CALCS("URGENCY_WATERMARK_C calculated =%d\n" "HW register value = 0x%x\n", @@ -439,7 +449,8 @@ void hubbub1_program_watermarks( prog_wm_value = convert_and_clamp( watermarks->c.cstate_pstate.cstate_enter_plus_exit_ns, refclk_mhz, 0x1fffff); - REG_WRITE(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_C, prog_wm_value); + REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_C, 0, + DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_C, prog_wm_value); DC_LOG_BANDWIDTH_CALCS("SR_ENTER_EXIT_WATERMARK_C calculated =%d\n" "HW register value = 0x%x\n", watermarks->c.cstate_pstate.cstate_enter_plus_exit_ns, prog_wm_value); @@ -452,7 +463,8 @@ void hubbub1_program_watermarks( prog_wm_value = convert_and_clamp( watermarks->c.cstate_pstate.cstate_exit_ns, refclk_mhz, 0x1fffff); - REG_WRITE(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_C, prog_wm_value); + REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_C, 0, + DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_C, prog_wm_value); DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_C calculated =%d\n" "HW register value = 0x%x\n", watermarks->c.cstate_pstate.cstate_exit_ns, prog_wm_value); @@ -466,7 +478,8 @@ void hubbub1_program_watermarks( prog_wm_value = convert_and_clamp( watermarks->c.cstate_pstate.pstate_change_ns, refclk_mhz, 0x1fffff); - REG_WRITE(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_C, prog_wm_value); + REG_SET(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_C, 0, + DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_C, prog_wm_value); DC_LOG_BANDWIDTH_CALCS("DRAM_CLK_CHANGE_WATERMARK_C calculated =%d\n" "HW register value = 0x%x\n\n", watermarks->c.cstate_pstate.pstate_change_ns, prog_wm_value); @@ -477,7 +490,8 @@ void hubbub1_program_watermarks( hubbub1->watermarks.d.urgent_ns = watermarks->d.urgent_ns; prog_wm_value = convert_and_clamp(watermarks->d.urgent_ns, refclk_mhz, 0x1fffff); - REG_WRITE(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_D, prog_wm_value); + REG_SET(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_D, 0, + DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_D, prog_wm_value); DC_LOG_BANDWIDTH_CALCS("URGENCY_WATERMARK_D calculated =%d\n" "HW register value = 0x%x\n", @@ -504,7 +518,8 @@ void hubbub1_program_watermarks( prog_wm_value = convert_and_clamp( watermarks->d.cstate_pstate.cstate_enter_plus_exit_ns, refclk_mhz, 0x1fffff); - REG_WRITE(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_D, prog_wm_value); + REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_D, 0, + DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_D, prog_wm_value); DC_LOG_BANDWIDTH_CALCS("SR_ENTER_EXIT_WATERMARK_D calculated =%d\n" "HW register value = 0x%x\n", watermarks->d.cstate_pstate.cstate_enter_plus_exit_ns, prog_wm_value); @@ -517,7 +532,8 @@ void hubbub1_program_watermarks( prog_wm_value = convert_and_clamp( watermarks->d.cstate_pstate.cstate_exit_ns, refclk_mhz, 0x1fffff); - REG_WRITE(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_D, prog_wm_value); + REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_D, 0, + DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_D, prog_wm_value); DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_D calculated =%d\n" "HW register value = 0x%x\n", watermarks->d.cstate_pstate.cstate_exit_ns, prog_wm_value); @@ -531,7 +547,8 @@ void hubbub1_program_watermarks( prog_wm_value = convert_and_clamp( watermarks->d.cstate_pstate.pstate_change_ns, refclk_mhz, 0x1fffff); - REG_WRITE(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_D, prog_wm_value); + REG_SET(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_D, 0, + DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_D, prog_wm_value); DC_LOG_BANDWIDTH_CALCS("DRAM_CLK_CHANGE_WATERMARK_D calculated =%d\n" "HW register value = 0x%x\n\n", watermarks->d.cstate_pstate.pstate_change_ns, prog_wm_value); @@ -866,6 +883,7 @@ static const struct hubbub_funcs hubbub1_funcs = { .dcc_support_pixel_format = hubbub1_dcc_support_pixel_format, .get_dcc_compression_cap = hubbub1_get_dcc_compression_cap, .wm_read_state = hubbub1_wm_read_state, + .program_watermarks = hubbub1_program_watermarks, }; void hubbub1_construct(struct hubbub *hubbub, diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.h index 9cd4a5194154..85811b24a497 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.h @@ -32,18 +32,14 @@ #define TO_DCN10_HUBBUB(hubbub)\ container_of(hubbub, struct dcn10_hubbub, base) -#define HUBHUB_REG_LIST_DCN()\ +#define HUBBUB_REG_LIST_DCN_COMMON()\ SR(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_A),\ - SR(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_A),\ SR(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_A),\ SR(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_B),\ - SR(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_B),\ SR(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_B),\ SR(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_C),\ - SR(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_C),\ SR(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_C),\ SR(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_D),\ - SR(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_D),\ SR(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_D),\ SR(DCHUBBUB_ARB_WATERMARK_CHANGE_CNTL),\ SR(DCHUBBUB_ARB_DRAM_STATE_CNTL),\ @@ -54,6 +50,12 @@ SR(DCHUBBUB_TEST_DEBUG_DATA),\ SR(DCHUBBUB_SOFT_RESET) +#define HUBBUB_VM_REG_LIST() \ + SR(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_A),\ + SR(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_B),\ + SR(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_C),\ + SR(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_D) + #define HUBBUB_SR_WATERMARK_REG_LIST()\ SR(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A),\ SR(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A),\ @@ -65,7 +67,8 @@ SR(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_D) #define HUBBUB_REG_LIST_DCN10(id)\ - HUBHUB_REG_LIST_DCN(), \ + HUBBUB_REG_LIST_DCN_COMMON(), \ + HUBBUB_VM_REG_LIST(), \ HUBBUB_SR_WATERMARK_REG_LIST(), \ SR(DCHUBBUB_SDPIF_FB_TOP),\ SR(DCHUBBUB_SDPIF_FB_BASE),\ @@ -122,8 +125,7 @@ struct dcn_hubbub_registers { #define HUBBUB_SF(reg_name, field_name, post_fix)\ .field_name = reg_name ## __ ## field_name ## post_fix - -#define HUBBUB_MASK_SH_LIST_DCN(mask_sh)\ +#define HUBBUB_MASK_SH_LIST_DCN_COMMON(mask_sh)\ HUBBUB_SF(DCHUBBUB_GLOBAL_TIMER_CNTL, DCHUBBUB_GLOBAL_TIMER_ENABLE, mask_sh), \ HUBBUB_SF(DCHUBBUB_SOFT_RESET, DCHUBBUB_GLOBAL_SOFT_RESET, mask_sh), \ HUBBUB_SF(DCHUBBUB_ARB_WATERMARK_CHANGE_CNTL, DCHUBBUB_ARB_WATERMARK_CHANGE_REQUEST, mask_sh), \ @@ -133,10 +135,29 @@ struct dcn_hubbub_registers { HUBBUB_SF(DCHUBBUB_ARB_DRAM_STATE_CNTL, DCHUBBUB_ARB_ALLOW_PSTATE_CHANGE_FORCE_VALUE, mask_sh), \ HUBBUB_SF(DCHUBBUB_ARB_DRAM_STATE_CNTL, DCHUBBUB_ARB_ALLOW_PSTATE_CHANGE_FORCE_ENABLE, mask_sh), \ HUBBUB_SF(DCHUBBUB_ARB_SAT_LEVEL, DCHUBBUB_ARB_SAT_LEVEL, mask_sh), \ - HUBBUB_SF(DCHUBBUB_ARB_DF_REQ_OUTSTAND, DCHUBBUB_ARB_MIN_REQ_OUTSTAND, mask_sh) + HUBBUB_SF(DCHUBBUB_ARB_DF_REQ_OUTSTAND, DCHUBBUB_ARB_MIN_REQ_OUTSTAND, mask_sh), \ + HUBBUB_SF(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_A, DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_A, mask_sh), \ + HUBBUB_SF(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_B, DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_B, mask_sh), \ + HUBBUB_SF(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_C, DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_C, mask_sh), \ + HUBBUB_SF(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_D, DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_D, mask_sh), \ + HUBBUB_SF(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_A, DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_A, mask_sh), \ + HUBBUB_SF(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_B, DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_B, mask_sh), \ + HUBBUB_SF(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_C, DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_C, mask_sh), \ + HUBBUB_SF(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_D, DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_D, mask_sh) + +#define HUBBUB_MASK_SH_LIST_STUTTER(mask_sh) \ + HUBBUB_SF(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A, DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A, mask_sh), \ + HUBBUB_SF(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B, DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B, mask_sh), \ + HUBBUB_SF(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_C, DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_C, mask_sh), \ + HUBBUB_SF(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_D, DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_D, mask_sh), \ + HUBBUB_SF(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A, DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A, mask_sh), \ + HUBBUB_SF(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B, DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B, mask_sh), \ + HUBBUB_SF(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_C, DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_C, mask_sh), \ + HUBBUB_SF(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_D, DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_D, mask_sh) #define HUBBUB_MASK_SH_LIST_DCN10(mask_sh)\ - HUBBUB_MASK_SH_LIST_DCN(mask_sh), \ + HUBBUB_MASK_SH_LIST_DCN_COMMON(mask_sh), \ + HUBBUB_MASK_SH_LIST_STUTTER(mask_sh), \ HUBBUB_SF(DCHUBBUB_SDPIF_FB_TOP, SDPIF_FB_TOP, mask_sh), \ HUBBUB_SF(DCHUBBUB_SDPIF_FB_BASE, SDPIF_FB_BASE, mask_sh), \ HUBBUB_SF(DCHUBBUB_SDPIF_FB_OFFSET, SDPIF_FB_OFFSET, mask_sh), \ @@ -167,15 +188,35 @@ struct dcn_hubbub_registers { type FB_OFFSET;\ type AGP_BOT;\ type AGP_TOP;\ - type AGP_BASE + type AGP_BASE;\ + type DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_A;\ + type DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_B;\ + type DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_C;\ + type DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_D;\ + type DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_A;\ + type DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_B;\ + type DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_C;\ + type DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_D + +#define HUBBUB_STUTTER_REG_FIELD_LIST(type) \ + type DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A;\ + type DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B;\ + type DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_C;\ + type DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_D;\ + type DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A;\ + type DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B;\ + type DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_C;\ + type DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_D struct dcn_hubbub_shift { DCN_HUBBUB_REG_FIELD_LIST(uint8_t); + HUBBUB_STUTTER_REG_FIELD_LIST(uint8_t); }; struct dcn_hubbub_mask { DCN_HUBBUB_REG_FIELD_LIST(uint32_t); + HUBBUB_STUTTER_REG_FIELD_LIST(uint32_t); }; struct dc; diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c index 0ba68d41b9c3..54b219a710d8 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c @@ -1178,6 +1178,10 @@ void hubp1_vtg_sel(struct hubp *hubp, uint32_t otg_inst) REG_UPDATE(DCHUBP_CNTL, HUBP_VTG_SEL, otg_inst); } +void hubp1_init(struct hubp *hubp) +{ + //do nothing +} static const struct hubp_funcs dcn10_hubp_funcs = { .hubp_program_surface_flip_and_addr = hubp1_program_surface_flip_and_addr, @@ -1201,7 +1205,7 @@ static const struct hubp_funcs dcn10_hubp_funcs = { .hubp_clear_underflow = hubp1_clear_underflow, .hubp_disable_control = hubp1_disable_control, .hubp_get_underflow_status = hubp1_get_underflow_status, - + .hubp_init = hubp1_init, }; /*****************************************/ diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h index a6d6dfe00617..99d2b7e2a578 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h @@ -34,6 +34,7 @@ #define HUBP_REG_LIST_DCN(id)\ SRI(DCHUBP_CNTL, HUBP, id),\ SRI(HUBPREQ_DEBUG_DB, HUBP, id),\ + SRI(HUBPREQ_DEBUG, HUBP, id),\ SRI(DCSURF_ADDR_CONFIG, HUBP, id),\ SRI(DCSURF_TILING_CONFIG, HUBP, id),\ SRI(DCSURF_SURFACE_PITCH, HUBPREQ, id),\ @@ -138,6 +139,7 @@ #define HUBP_COMMON_REG_VARIABLE_LIST \ uint32_t DCHUBP_CNTL; \ uint32_t HUBPREQ_DEBUG_DB; \ + uint32_t HUBPREQ_DEBUG; \ uint32_t DCSURF_ADDR_CONFIG; \ uint32_t DCSURF_TILING_CONFIG; \ uint32_t DCSURF_SURFACE_PITCH; \ @@ -247,7 +249,7 @@ .field_name = reg_name ## __ ## field_name ## post_fix /* Mask/shift struct generation macro for all ASICs (including those with reduced functionality) */ -#define HUBP_MASK_SH_LIST_DCN(mask_sh)\ +#define HUBP_MASK_SH_LIST_DCN_COMMON(mask_sh)\ HUBP_SF(HUBP0_DCHUBP_CNTL, HUBP_BLANK_EN, mask_sh),\ HUBP_SF(HUBP0_DCHUBP_CNTL, HUBP_TTU_DISABLE, mask_sh),\ HUBP_SF(HUBP0_DCHUBP_CNTL, HUBP_UNDERFLOW_STATUS, mask_sh),\ @@ -331,7 +333,6 @@ HUBP_SF(HUBP0_DCHUBP_REQ_SIZE_CONFIG, META_CHUNK_SIZE, mask_sh),\ HUBP_SF(HUBP0_DCHUBP_REQ_SIZE_CONFIG, MIN_META_CHUNK_SIZE, mask_sh),\ HUBP_SF(HUBP0_DCHUBP_REQ_SIZE_CONFIG, DPTE_GROUP_SIZE, mask_sh),\ - HUBP_SF(HUBP0_DCHUBP_REQ_SIZE_CONFIG, MPTE_GROUP_SIZE, mask_sh),\ HUBP_SF(HUBP0_DCHUBP_REQ_SIZE_CONFIG, SWATH_HEIGHT, mask_sh),\ HUBP_SF(HUBP0_DCHUBP_REQ_SIZE_CONFIG, PTE_ROW_HEIGHT_LINEAR, mask_sh),\ HUBP_SF(HUBP0_DCHUBP_REQ_SIZE_CONFIG_C, CHUNK_SIZE_C, mask_sh),\ @@ -339,7 +340,6 @@ HUBP_SF(HUBP0_DCHUBP_REQ_SIZE_CONFIG_C, META_CHUNK_SIZE_C, mask_sh),\ HUBP_SF(HUBP0_DCHUBP_REQ_SIZE_CONFIG_C, MIN_META_CHUNK_SIZE_C, mask_sh),\ HUBP_SF(HUBP0_DCHUBP_REQ_SIZE_CONFIG_C, DPTE_GROUP_SIZE_C, mask_sh),\ - HUBP_SF(HUBP0_DCHUBP_REQ_SIZE_CONFIG_C, MPTE_GROUP_SIZE_C, mask_sh),\ HUBP_SF(HUBP0_DCHUBP_REQ_SIZE_CONFIG_C, SWATH_HEIGHT_C, mask_sh),\ HUBP_SF(HUBP0_DCHUBP_REQ_SIZE_CONFIG_C, PTE_ROW_HEIGHT_LINEAR_C, mask_sh),\ HUBP_SF(HUBPREQ0_BLANK_OFFSET_0, REFCYC_H_BLANK_END, mask_sh),\ @@ -373,6 +373,11 @@ HUBP_SF(HUBPREQ0_DCN_SURF0_TTU_CNTL1, REFCYC_PER_REQ_DELIVERY_PRE, mask_sh),\ HUBP_SF(HUBP0_HUBP_CLK_CNTL, HUBP_CLOCK_ENABLE, mask_sh) +#define HUBP_MASK_SH_LIST_DCN(mask_sh)\ + HUBP_MASK_SH_LIST_DCN_COMMON(mask_sh),\ + HUBP_SF(HUBP0_DCHUBP_REQ_SIZE_CONFIG, MPTE_GROUP_SIZE, mask_sh),\ + HUBP_SF(HUBP0_DCHUBP_REQ_SIZE_CONFIG_C, MPTE_GROUP_SIZE_C, mask_sh) + /* Mask/shift struct generation macro for ASICs with VM */ #define HUBP_MASK_SH_LIST_DCN_VM(mask_sh)\ HUBP_SF(HUBPREQ0_NOM_PARAMETERS_0, DST_Y_PER_PTE_ROW_NOM_L, mask_sh),\ @@ -595,6 +600,9 @@ type AGP_BASE;\ type AGP_BOT;\ type AGP_TOP;\ + type DCN_VM_SYSTEM_APERTURE_DEFAULT_SYSTEM;\ + type DCN_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB;\ + type DCN_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB;\ /* todo: get these from GVM instead of reading registers ourselves */\ type PAGE_DIRECTORY_ENTRY_HI32;\ type PAGE_DIRECTORY_ENTRY_LO32;\ @@ -743,4 +751,6 @@ enum cursor_pitch hubp1_get_cursor_pitch(unsigned int pitch); void hubp1_vready_workaround(struct hubp *hubp, struct _vcs_dpi_display_pipe_dest_params_st *pipe_dest); +void hubp1_init(struct hubp *hubp); + #endif diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index d1a8f1c302a9..33d311cea28c 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -65,7 +65,7 @@ void print_microsec(struct dc_context *dc_ctx, struct dc_log_buffer_ctx *log_ctx, uint32_t ref_cycle) { - const uint32_t ref_clk_mhz = dc_ctx->dc->res_pool->ref_clock_inKhz / 1000; + const uint32_t ref_clk_mhz = dc_ctx->dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000; static const unsigned int frac = 1000; uint32_t us_x10 = (ref_cycle * frac) / ref_clk_mhz; @@ -345,13 +345,13 @@ void dcn10_log_hw_state(struct dc *dc, DTN_INFO("\nCALCULATED Clocks: dcfclk_khz:%d dcfclk_deep_sleep_khz:%d dispclk_khz:%d\n" "dppclk_khz:%d max_supported_dppclk_khz:%d fclk_khz:%d socclk_khz:%d\n\n", - dc->current_state->bw.dcn.clk.dcfclk_khz, - dc->current_state->bw.dcn.clk.dcfclk_deep_sleep_khz, - dc->current_state->bw.dcn.clk.dispclk_khz, - dc->current_state->bw.dcn.clk.dppclk_khz, - dc->current_state->bw.dcn.clk.max_supported_dppclk_khz, - dc->current_state->bw.dcn.clk.fclk_khz, - dc->current_state->bw.dcn.clk.socclk_khz); + dc->current_state->bw_ctx.bw.dcn.clk.dcfclk_khz, + dc->current_state->bw_ctx.bw.dcn.clk.dcfclk_deep_sleep_khz, + dc->current_state->bw_ctx.bw.dcn.clk.dispclk_khz, + dc->current_state->bw_ctx.bw.dcn.clk.dppclk_khz, + dc->current_state->bw_ctx.bw.dcn.clk.max_supported_dppclk_khz, + dc->current_state->bw_ctx.bw.dcn.clk.fclk_khz, + dc->current_state->bw_ctx.bw.dcn.clk.socclk_khz); log_mpc_crc(dc, log_ctx); @@ -714,7 +714,7 @@ static enum dc_status dcn10_enable_stream_timing( return DC_OK; } -static void reset_back_end_for_pipe( +static void dcn10_reset_back_end_for_pipe( struct dc *dc, struct pipe_ctx *pipe_ctx, struct dc_state *context) @@ -889,22 +889,23 @@ void hwss1_plane_atomic_disconnect(struct dc *dc, struct pipe_ctx *pipe_ctx) dcn10_verify_allow_pstate_change_high(dc); } -static void plane_atomic_power_down(struct dc *dc, struct pipe_ctx *pipe_ctx) +static void plane_atomic_power_down(struct dc *dc, + struct dpp *dpp, + struct hubp *hubp) { struct dce_hwseq *hws = dc->hwseq; - struct dpp *dpp = pipe_ctx->plane_res.dpp; DC_LOGGER_INIT(dc->ctx->logger); if (REG(DC_IP_REQUEST_CNTL)) { REG_SET(DC_IP_REQUEST_CNTL, 0, IP_REQUEST_EN, 1); dpp_pg_control(hws, dpp->inst, false); - hubp_pg_control(hws, pipe_ctx->plane_res.hubp->inst, false); + hubp_pg_control(hws, hubp->inst, false); dpp->funcs->dpp_reset(dpp); REG_SET(DC_IP_REQUEST_CNTL, 0, IP_REQUEST_EN, 0); DC_LOG_DEBUG( - "Power gated front end %d\n", pipe_ctx->pipe_idx); + "Power gated front end %d\n", hubp->inst); } } @@ -931,7 +932,9 @@ static void plane_atomic_disable(struct dc *dc, struct pipe_ctx *pipe_ctx) hubp->power_gated = true; dc->optimized_required = false; /* We're powering off, no need to optimize */ - plane_atomic_power_down(dc, pipe_ctx); + plane_atomic_power_down(dc, + pipe_ctx->plane_res.dpp, + pipe_ctx->plane_res.hubp); pipe_ctx->stream = NULL; memset(&pipe_ctx->stream_res, 0, sizeof(pipe_ctx->stream_res)); @@ -976,16 +979,14 @@ static void dcn10_init_pipes(struct dc *dc, struct dc_state *context) * to non-preferred front end. If pipe_ctx->stream is not NULL, * we will use the pipe, so don't disable */ - if (pipe_ctx->stream != NULL) + if (pipe_ctx->stream != NULL && can_apply_seamless_boot) continue; - if (tg->funcs->is_tg_enabled(tg)) - tg->funcs->lock(tg); - /* Blank controller using driver code instead of * command table. */ if (tg->funcs->is_tg_enabled(tg)) { + tg->funcs->lock(tg); tg->funcs->set_blank(tg, true); hwss_wait_for_blank_complete(tg); } @@ -1001,16 +1002,19 @@ static void dcn10_init_pipes(struct dc *dc, struct dc_state *context) struct dpp *dpp = dc->res_pool->dpps[i]; struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; - // W/A for issue with dc_post_update_surfaces_to_stream - hubp->power_gated = true; - /* There is assumption that pipe_ctx is not mapping irregularly * to non-preferred front end. If pipe_ctx->stream is not NULL, * we will use the pipe, so don't disable */ - if (pipe_ctx->stream != NULL) + if (can_apply_seamless_boot && + pipe_ctx->stream != NULL && + pipe_ctx->stream_res.tg->funcs->is_tg_enabled( + pipe_ctx->stream_res.tg)) continue; + /* Disable on the current state so the new one isn't cleared. */ + pipe_ctx = &dc->current_state->res_ctx.pipe_ctx[i]; + dpp->funcs->dpp_reset(dpp); pipe_ctx->stream_res.tg = tg; @@ -1108,6 +1112,25 @@ static void dcn10_init_hw(struct dc *dc) link->link_status.link_active = true; } + /* If taking control over from VBIOS, we may want to optimize our first + * mode set, so we need to skip powering down pipes until we know which + * pipes we want to use. + * Otherwise, if taking control is not possible, we need to power + * everything down. + */ + if (dcb->funcs->is_accelerated_mode(dcb) || dc->config.power_down_display_on_boot) { + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct hubp *hubp = dc->res_pool->hubps[i]; + struct dpp *dpp = dc->res_pool->dpps[i]; + + hubp->funcs->hubp_init(hubp); + dc->res_pool->opps[i]->mpc_tree_params.opp_id = dc->res_pool->opps[i]->inst; + plane_atomic_power_down(dc, dpp, hubp); + } + + apply_DEGVIDCN10_253_wa(dc); + } + for (i = 0; i < dc->res_pool->audio_count; i++) { struct audio *audio = dc->res_pool->audios[i]; @@ -1137,12 +1160,9 @@ static void dcn10_init_hw(struct dc *dc) enable_power_gating_plane(dc->hwseq, true); memset(&dc->res_pool->clk_mgr->clks, 0, sizeof(dc->res_pool->clk_mgr->clks)); - - if (dc->hwss.init_pipes) - dc->hwss.init_pipes(dc, dc->current_state); } -static void reset_hw_ctx_wrap( +static void dcn10_reset_hw_ctx_wrap( struct dc *dc, struct dc_state *context) { @@ -1164,10 +1184,9 @@ static void reset_hw_ctx_wrap( pipe_need_reprogram(pipe_ctx_old, pipe_ctx)) { struct clock_source *old_clk = pipe_ctx_old->clock_source; - reset_back_end_for_pipe(dc, pipe_ctx_old, dc->current_state); - if (dc->hwss.enable_stream_gating) { + dcn10_reset_back_end_for_pipe(dc, pipe_ctx_old, dc->current_state); + if (dc->hwss.enable_stream_gating) dc->hwss.enable_stream_gating(dc, pipe_ctx); - } if (old_clk) old_clk->funcs->cs_power_down(old_clk); } @@ -1836,7 +1855,7 @@ void dcn10_get_hdr_visual_confirm_color( switch (top_pipe_ctx->plane_res.scl_data.format) { case PIXEL_FORMAT_ARGB2101010: - if (top_pipe_ctx->stream->out_transfer_func->tf == TRANSFER_FUNCTION_UNITY) { + if (top_pipe_ctx->stream->out_transfer_func->tf == TRANSFER_FUNCTION_PQ) { /* HDR10, ARGB2101010 - set boarder color to red */ color->color_r_cr = color_value; } @@ -1931,7 +1950,7 @@ static void update_dpp(struct dpp *dpp, struct dc_plane_state *plane_state) plane_state->format, EXPANSION_MODE_ZERO, plane_state->input_csc_color_matrix, - COLOR_SPACE_YCBCR601_LIMITED); + plane_state->color_space); //set scale and bias registers build_prescale_params(&bns_params, plane_state); @@ -2051,7 +2070,7 @@ void update_dchubp_dpp( * divided by 2 */ if (plane_state->update_flags.bits.full_update) { - bool should_divided_by_2 = context->bw.dcn.clk.dppclk_khz <= + bool should_divided_by_2 = context->bw_ctx.bw.dcn.clk.dppclk_khz <= dc->res_pool->clk_mgr->clks.dispclk_khz / 2; dpp->funcs->dpp_dppclk_control( @@ -2120,6 +2139,9 @@ void update_dchubp_dpp( if (pipe_ctx->stream->cursor_attributes.address.quad_part != 0) { dc->hwss.set_cursor_position(pipe_ctx); dc->hwss.set_cursor_attribute(pipe_ctx); + + if (dc->hwss.set_cursor_sdr_white_level) + dc->hwss.set_cursor_sdr_white_level(pipe_ctx); } if (plane_state->update_flags.bits.full_update) { @@ -2310,6 +2332,7 @@ static void dcn10_apply_ctx_for_surface( int i; struct timing_generator *tg; bool removed_pipe[4] = { false }; + bool interdependent_update = false; struct pipe_ctx *top_pipe_to_program = find_top_pipe_for_stream(dc, context, stream); DC_LOGGER_INIT(dc->ctx->logger); @@ -2319,7 +2342,13 @@ static void dcn10_apply_ctx_for_surface( tg = top_pipe_to_program->stream_res.tg; - dcn10_pipe_control_lock(dc, top_pipe_to_program, true); + interdependent_update = top_pipe_to_program->plane_state && + top_pipe_to_program->plane_state->update_flags.bits.full_update; + + if (interdependent_update) + lock_all_pipes(dc, context, true); + else + dcn10_pipe_control_lock(dc, top_pipe_to_program, true); if (num_planes == 0) { /* OTG blank before remove all front end */ @@ -2339,15 +2368,9 @@ static void dcn10_apply_ctx_for_surface( */ if (pipe_ctx->plane_state && !old_pipe_ctx->plane_state) { if (old_pipe_ctx->stream_res.tg == tg && - old_pipe_ctx->plane_res.hubp && - old_pipe_ctx->plane_res.hubp->opp_id != 0xf) { + old_pipe_ctx->plane_res.hubp && + old_pipe_ctx->plane_res.hubp->opp_id != 0xf) dcn10_disable_plane(dc, old_pipe_ctx); - /* - * power down fe will unlock when calling reset, need - * to lock it back here. Messy, need rework. - */ - pipe_ctx->stream_res.tg->funcs->lock(pipe_ctx->stream_res.tg); - } } if ((!pipe_ctx->plane_state || @@ -2366,29 +2389,25 @@ static void dcn10_apply_ctx_for_surface( if (num_planes > 0) program_all_pipe_in_tree(dc, top_pipe_to_program, context); - dcn10_pipe_control_lock(dc, top_pipe_to_program, false); - - if (top_pipe_to_program->plane_state && - top_pipe_to_program->plane_state->update_flags.bits.full_update) + if (interdependent_update) for (i = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; - tg = pipe_ctx->stream_res.tg; /* Skip inactive pipes and ones already updated */ - if (!pipe_ctx->stream || pipe_ctx->stream == stream - || !pipe_ctx->plane_state - || !tg->funcs->is_tg_enabled(tg)) + if (!pipe_ctx->stream || pipe_ctx->stream == stream || + !pipe_ctx->plane_state || !tg->funcs->is_tg_enabled(tg)) continue; - tg->funcs->lock(tg); - pipe_ctx->plane_res.hubp->funcs->hubp_setup_interdependent( pipe_ctx->plane_res.hubp, &pipe_ctx->dlg_regs, &pipe_ctx->ttu_regs); - - tg->funcs->unlock(tg); } + if (interdependent_update) + lock_all_pipes(dc, context, false); + else + dcn10_pipe_control_lock(dc, top_pipe_to_program, false); + if (num_planes == 0) false_optc_underflow_wa(dc, stream, tg); @@ -2420,12 +2439,14 @@ static void dcn10_prepare_bandwidth( struct dc *dc, struct dc_state *context) { + struct hubbub *hubbub = dc->res_pool->hubbub; + if (dc->debug.sanity_checks) dcn10_verify_allow_pstate_change_high(dc); if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) { if (context->stream_count == 0) - context->bw.dcn.clk.phyclk_khz = 0; + context->bw_ctx.bw.dcn.clk.phyclk_khz = 0; dc->res_pool->clk_mgr->funcs->update_clocks( dc->res_pool->clk_mgr, @@ -2433,9 +2454,9 @@ static void dcn10_prepare_bandwidth( false); } - hubbub1_program_watermarks(dc->res_pool->hubbub, - &context->bw.dcn.watermarks, - dc->res_pool->ref_clock_inKhz / 1000, + hubbub->funcs->program_watermarks(hubbub, + &context->bw_ctx.bw.dcn.watermarks, + dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000, true); dcn10_stereo_hw_frame_pack_wa(dc, context); @@ -2450,12 +2471,14 @@ static void dcn10_optimize_bandwidth( struct dc *dc, struct dc_state *context) { + struct hubbub *hubbub = dc->res_pool->hubbub; + if (dc->debug.sanity_checks) dcn10_verify_allow_pstate_change_high(dc); if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) { if (context->stream_count == 0) - context->bw.dcn.clk.phyclk_khz = 0; + context->bw_ctx.bw.dcn.clk.phyclk_khz = 0; dc->res_pool->clk_mgr->funcs->update_clocks( dc->res_pool->clk_mgr, @@ -2463,9 +2486,9 @@ static void dcn10_optimize_bandwidth( true); } - hubbub1_program_watermarks(dc->res_pool->hubbub, - &context->bw.dcn.watermarks, - dc->res_pool->ref_clock_inKhz / 1000, + hubbub->funcs->program_watermarks(hubbub, + &context->bw_ctx.bw.dcn.watermarks, + dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000, true); dcn10_stereo_hw_frame_pack_wa(dc, context); @@ -2654,7 +2677,7 @@ static void dcn10_update_pending_status(struct pipe_ctx *pipe_ctx) flip_pending = pipe_ctx->plane_res.hubp->funcs->hubp_is_flip_pending( pipe_ctx->plane_res.hubp); - plane_state->status.is_flip_pending = flip_pending; + plane_state->status.is_flip_pending = plane_state->status.is_flip_pending || flip_pending; if (!flip_pending) plane_state->status.current_address = plane_state->status.requested_address; @@ -2685,16 +2708,22 @@ static void dcn10_set_cursor_position(struct pipe_ctx *pipe_ctx) struct dpp *dpp = pipe_ctx->plane_res.dpp; struct dc_cursor_mi_param param = { .pixel_clk_khz = pipe_ctx->stream->timing.pix_clk_100hz / 10, - .ref_clk_khz = pipe_ctx->stream->ctx->dc->res_pool->ref_clock_inKhz, + .ref_clk_khz = pipe_ctx->stream->ctx->dc->res_pool->ref_clocks.dchub_ref_clock_inKhz, .viewport = pipe_ctx->plane_res.scl_data.viewport, .h_scale_ratio = pipe_ctx->plane_res.scl_data.ratios.horz, .v_scale_ratio = pipe_ctx->plane_res.scl_data.ratios.vert, .rotation = pipe_ctx->plane_state->rotation, .mirror = pipe_ctx->plane_state->horizontal_mirror }; + uint32_t x_plane = pipe_ctx->plane_state->dst_rect.x; + uint32_t y_plane = pipe_ctx->plane_state->dst_rect.y; + uint32_t x_offset = min(x_plane, pos_cpy.x); + uint32_t y_offset = min(y_plane, pos_cpy.y); - pos_cpy.x_hotspot += pipe_ctx->plane_state->dst_rect.x; - pos_cpy.y_hotspot += pipe_ctx->plane_state->dst_rect.y; + pos_cpy.x -= x_offset; + pos_cpy.y -= y_offset; + pos_cpy.x_hotspot += (x_plane - x_offset); + pos_cpy.y_hotspot += (y_plane - y_offset); if (pipe_ctx->plane_state->address.type == PLN_ADDR_TYPE_VIDEO_PROGRESSIVE) @@ -2789,6 +2818,33 @@ int get_vupdate_offset_from_vsync(struct pipe_ctx *pipe_ctx) return vertical_line_start; } +void lock_all_pipes(struct dc *dc, + struct dc_state *context, + bool lock) +{ + struct pipe_ctx *pipe_ctx; + struct timing_generator *tg; + int i; + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + pipe_ctx = &context->res_ctx.pipe_ctx[i]; + tg = pipe_ctx->stream_res.tg; + /* + * Only lock the top pipe's tg to prevent redundant + * (un)locking. Also skip if pipe is disabled. + */ + if (pipe_ctx->top_pipe || + !pipe_ctx->stream || !pipe_ctx->plane_state || + !tg->funcs->is_tg_enabled(tg)) + continue; + + if (lock) + tg->funcs->lock(tg); + else + tg->funcs->unlock(tg); + } +} + static void calc_vupdate_position( struct pipe_ctx *pipe_ctx, uint32_t *start_line, @@ -2882,6 +2938,29 @@ static void dcn10_setup_vupdate_interrupt(struct pipe_ctx *pipe_ctx) tg->funcs->setup_vertical_interrupt2(tg, start_line); } +static void dcn10_unblank_stream(struct pipe_ctx *pipe_ctx, + struct dc_link_settings *link_settings) +{ + struct encoder_unblank_param params = { { 0 } }; + struct dc_stream_state *stream = pipe_ctx->stream; + struct dc_link *link = stream->link; + + /* only 3 items below are used by unblank */ + params.timing = pipe_ctx->stream->timing; + + params.link_settings.link_rate = link_settings->link_rate; + + if (dc_is_dp_signal(pipe_ctx->stream->signal)) { + if (params.timing.pixel_encoding == PIXEL_ENCODING_YCBCR420) + params.timing.pix_clk_100hz /= 2; + pipe_ctx->stream_res.stream_enc->funcs->dp_unblank(pipe_ctx->stream_res.stream_enc, ¶ms); + } + + if (link->local_sink && link->local_sink->sink_signal == SIGNAL_TYPE_EDP) { + link->dc->hwss.edp_backlight_control(link, true); + } +} + static const struct hw_sequencer_funcs dcn10_funcs = { .program_gamut_remap = program_gamut_remap, .init_hw = dcn10_init_hw, @@ -2903,7 +2982,7 @@ static const struct hw_sequencer_funcs dcn10_funcs = { .update_info_frame = dce110_update_info_frame, .enable_stream = dce110_enable_stream, .disable_stream = dce110_disable_stream, - .unblank_stream = dce110_unblank_stream, + .unblank_stream = dcn10_unblank_stream, .blank_stream = dce110_blank_stream, .enable_audio_stream = dce110_enable_audio_stream, .disable_audio_stream = dce110_disable_audio_stream, @@ -2913,7 +2992,7 @@ static const struct hw_sequencer_funcs dcn10_funcs = { .pipe_control_lock = dcn10_pipe_control_lock, .prepare_bandwidth = dcn10_prepare_bandwidth, .optimize_bandwidth = dcn10_optimize_bandwidth, - .reset_hw_ctx_wrap = reset_hw_ctx_wrap, + .reset_hw_ctx_wrap = dcn10_reset_hw_ctx_wrap, .enable_stream_timing = dcn10_enable_stream_timing, .set_drr = set_drr, .get_position = get_position, diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h index 6d66084df55f..4b3b27a5d23b 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h @@ -83,4 +83,8 @@ struct pipe_ctx *find_top_pipe_for_stream( int get_vupdate_offset_from_vsync(struct pipe_ctx *pipe_ctx); +void lock_all_pipes(struct dc *dc, + struct dc_state *context, + bool lock); + #endif /* __DC_HWSS_DCN10_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c index 98f41d250978..991622da9ed5 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c @@ -77,7 +77,7 @@ static unsigned int dcn10_get_hubbub_state(struct dc *dc, char *pBuf, unsigned i unsigned int chars_printed = 0; unsigned int remaining_buffer = bufSize; - const uint32_t ref_clk_mhz = dc_ctx->dc->res_pool->ref_clock_inKhz / 1000; + const uint32_t ref_clk_mhz = dc_ctx->dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000; static const unsigned int frac = 1000; memset(&wm, 0, sizeof(struct dcn_hubbub_wm)); @@ -115,7 +115,7 @@ static unsigned int dcn10_get_hubp_states(struct dc *dc, char *pBuf, unsigned in unsigned int chars_printed = 0; unsigned int remaining_buffer = bufSize; - const uint32_t ref_clk_mhz = dc_ctx->dc->res_pool->ref_clock_inKhz / 1000; + const uint32_t ref_clk_mhz = dc_ctx->dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000; static const unsigned int frac = 1000; if (invarOnly) @@ -472,12 +472,12 @@ static unsigned int dcn10_get_clock_states(struct dc *dc, char *pBuf, unsigned i chars_printed = snprintf_count(pBuf, bufSize, "dcfclk,dcfclk_deep_sleep,dispclk," "dppclk,fclk,socclk\n" "%d,%d,%d,%d,%d,%d\n", - dc->current_state->bw.dcn.clk.dcfclk_khz, - dc->current_state->bw.dcn.clk.dcfclk_deep_sleep_khz, - dc->current_state->bw.dcn.clk.dispclk_khz, - dc->current_state->bw.dcn.clk.dppclk_khz, - dc->current_state->bw.dcn.clk.fclk_khz, - dc->current_state->bw.dcn.clk.socclk_khz); + dc->current_state->bw_ctx.bw.dcn.clk.dcfclk_khz, + dc->current_state->bw_ctx.bw.dcn.clk.dcfclk_deep_sleep_khz, + dc->current_state->bw_ctx.bw.dcn.clk.dispclk_khz, + dc->current_state->bw_ctx.bw.dcn.clk.dppclk_khz, + dc->current_state->bw_ctx.bw.dcn.clk.fclk_khz, + dc->current_state->bw_ctx.bw.dcn.clk.socclk_khz); remaining_buffer -= chars_printed; pBuf += chars_printed; diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c index a9db372688ff..0126a44ba012 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c @@ -1304,7 +1304,6 @@ void dcn10_link_encoder_connect_dig_be_to_fe( #define HPD_REG_UPDATE_N(reg_name, n, ...) \ generic_reg_update_ex(CTX, \ HPD_REG(reg_name), \ - HPD_REG_READ(reg_name), \ n, __VA_ARGS__) #define HPD_REG_UPDATE(reg_name, field, val) \ @@ -1337,7 +1336,6 @@ void dcn10_link_encoder_disable_hpd(struct link_encoder *enc) #define AUX_REG_UPDATE_N(reg_name, n, ...) \ generic_reg_update_ex(CTX, \ AUX_REG(reg_name), \ - AUX_REG_READ(reg_name), \ n, __VA_ARGS__) #define AUX_REG_UPDATE(reg_name, field, val) \ diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c index 09d74070a49b..7eccb54c421d 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c @@ -516,6 +516,31 @@ static const struct resource_caps rv2_res_cap = { }; #endif +static const struct dc_plane_cap plane_cap = { + .type = DC_PLANE_TYPE_DCN_UNIVERSAL, + .blends_with_above = true, + .blends_with_below = true, + .per_pixel_alpha = true, + + .pixel_format_support = { + .argb8888 = true, + .nv12 = true, + .fp16 = true + }, + + .max_upscale_factor = { + .argb8888 = 16000, + .nv12 = 16000, + .fp16 = 1 + }, + + .max_downscale_factor = { + .argb8888 = 250, + .nv12 = 250, + .fp16 = 1 + } +}; + static const struct dc_debug_options debug_defaults_drv = { .sanity_checks = true, .disable_dmcu = true, @@ -848,14 +873,14 @@ void dcn10_clock_source_destroy(struct clock_source **clk_src) *clk_src = NULL; } -static struct pp_smu_funcs_rv *dcn10_pp_smu_create(struct dc_context *ctx) +static struct pp_smu_funcs *dcn10_pp_smu_create(struct dc_context *ctx) { - struct pp_smu_funcs_rv *pp_smu = kzalloc(sizeof(*pp_smu), GFP_KERNEL); + struct pp_smu_funcs *pp_smu = kzalloc(sizeof(*pp_smu), GFP_KERNEL); if (!pp_smu) return pp_smu; - dm_pp_get_funcs_rv(ctx, pp_smu); + dm_pp_get_funcs(ctx, pp_smu); return pp_smu; } @@ -865,10 +890,7 @@ static void destruct(struct dcn10_resource_pool *pool) for (i = 0; i < pool->base.stream_enc_count; i++) { if (pool->base.stream_enc[i] != NULL) { - /* TODO: free dcn version of stream encoder once implemented - * rather than using virtual stream encoder - */ - kfree(pool->base.stream_enc[i]); + kfree(DCN10STRENC_FROM_STRENC(pool->base.stream_enc[i])); pool->base.stream_enc[i] = NULL; } } @@ -921,9 +943,6 @@ static void destruct(struct dcn10_resource_pool *pool) } } - for (i = 0; i < pool->base.stream_enc_count; i++) - kfree(pool->base.stream_enc[i]); - for (i = 0; i < pool->base.audio_count; i++) { if (pool->base.audios[i]) dce_aud_destroy(&pool->base.audios[i]); @@ -1078,7 +1097,7 @@ static struct pipe_ctx *dcn10_acquire_idle_pipe_for_layer( { struct resource_context *res_ctx = &context->res_ctx; struct pipe_ctx *head_pipe = resource_get_head_pipe_for_stream(res_ctx, stream); - struct pipe_ctx *idle_pipe = find_idle_secondary_pipe(res_ctx, pool); + struct pipe_ctx *idle_pipe = find_idle_secondary_pipe(res_ctx, pool, head_pipe); if (!head_pipe) { ASSERT(0); @@ -1143,7 +1162,7 @@ static enum dc_status dcn10_validate_global(struct dc *dc, struct dc_state *cont continue; if (context->stream_status[i].plane_count > 2) - return false; + return DC_FAIL_UNSUPPORTED_1; for (j = 0; j < context->stream_status[i].plane_count; j++) { struct dc_plane_state *plane = @@ -1351,7 +1370,7 @@ static bool construct( goto fail; } - dml_init_instance(&dc->dml, DML_PROJECT_RAVEN1); + dml_init_instance(&dc->dml, &dcn1_0_soc, &dcn1_0_ip, DML_PROJECT_RAVEN1); memcpy(dc->dcn_ip, &dcn10_ip_defaults, sizeof(dcn10_ip_defaults)); memcpy(dc->dcn_soc, &dcn10_soc_defaults, sizeof(dcn10_soc_defaults)); @@ -1510,6 +1529,9 @@ static bool construct( dcn10_hw_sequencer_construct(dc); dc->caps.max_planes = pool->base.pipe_count; + for (i = 0; i < dc->caps.max_planes; ++i) + dc->caps.planes[i] = plane_cap; + dc->cap_funcs = cap_funcs; return true; @@ -1522,7 +1544,7 @@ fail: } struct resource_pool *dcn10_create_resource_pool( - uint8_t num_virtual_links, + const struct dc_init_data *init_data, struct dc *dc) { struct dcn10_resource_pool *pool = @@ -1531,7 +1553,7 @@ struct resource_pool *dcn10_create_resource_pool( if (!pool) return NULL; - if (construct(num_virtual_links, dc, pool)) + if (construct(init_data->num_virtual_links, dc, pool)) return &pool->base; BREAK_TO_DEBUGGER(); diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.h index 8f71225bc61b..999c684a0b36 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.h @@ -39,7 +39,7 @@ struct dcn10_resource_pool { struct resource_pool base; }; struct resource_pool *dcn10_create_resource_pool( - uint8_t num_virtual_links, + const struct dc_init_data *init_data, struct dc *dc); diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c index b08254121251..8ee9f6dc1d62 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c @@ -245,7 +245,8 @@ static void enc1_update_hdmi_info_packet( void enc1_stream_encoder_dp_set_stream_attribute( struct stream_encoder *enc, struct dc_crtc_timing *crtc_timing, - enum dc_color_space output_color_space) + enum dc_color_space output_color_space, + uint32_t enable_sdp_splitting) { uint32_t h_active_start; uint32_t v_active_start; @@ -298,7 +299,6 @@ void enc1_stream_encoder_dp_set_stream_attribute( break; case PIXEL_ENCODING_YCBCR420: dp_pixel_encoding = DP_PIXEL_ENCODING_TYPE_YCBCR420; - REG_UPDATE(DP_VID_TIMING, DP_VID_N_MUL, 1); break; default: dp_pixel_encoding = DP_PIXEL_ENCODING_TYPE_RGB444; @@ -726,13 +726,19 @@ void enc1_stream_encoder_update_dp_info_packets( 3, /* packetIndex */ &info_frame->hdrsmd); + if (info_frame->dpsdp.valid) + enc1_update_generic_info_packet( + enc1, + 4,/* packetIndex */ + &info_frame->dpsdp); + /* enable/disable transmission of packet(s). * If enabled, packet transmission begins on the next frame */ REG_UPDATE(DP_SEC_CNTL, DP_SEC_GSP0_ENABLE, info_frame->vsc.valid); REG_UPDATE(DP_SEC_CNTL, DP_SEC_GSP2_ENABLE, info_frame->spd.valid); REG_UPDATE(DP_SEC_CNTL, DP_SEC_GSP3_ENABLE, info_frame->hdrsmd.valid); - + REG_UPDATE(DP_SEC_CNTL, DP_SEC_GSP4_ENABLE, info_frame->dpsdp.valid); /* This bit is the master enable bit. * When enabling secondary stream engine, @@ -797,10 +803,10 @@ void enc1_stream_encoder_dp_blank( */ REG_UPDATE(DP_VID_STREAM_CNTL, DP_VID_STREAM_DIS_DEFER, 2); /* Larger delay to wait until VBLANK - use max retry of - * 10us*3000=30ms. This covers 16.6ms of typical 60 Hz mode + + * 10us*5000=50ms. This covers 41.7ms of minimum 24 Hz mode + * a little more because we may not trust delay accuracy. */ - max_retries = DP_BLANK_MAX_RETRY * 150; + max_retries = DP_BLANK_MAX_RETRY * 250; /* disable DP stream */ REG_UPDATE(DP_VID_STREAM_CNTL, DP_VID_STREAM_ENABLE, 0); @@ -833,14 +839,19 @@ void enc1_stream_encoder_dp_unblank( if (param->link_settings.link_rate != LINK_RATE_UNKNOWN) { uint32_t n_vid = 0x8000; uint32_t m_vid; + uint32_t n_multiply = 0; + uint64_t m_vid_l = n_vid; + /* YCbCr 4:2:0 : Computed VID_M will be 2X the input rate */ + if (param->timing.pixel_encoding == PIXEL_ENCODING_YCBCR420) { + /*this param->pixel_clk_khz is half of 444 rate for 420 already*/ + n_multiply = 1; + } /* M / N = Fstream / Flink * m_vid / n_vid = pixel rate / link rate */ - uint64_t m_vid_l = n_vid; - - m_vid_l *= param->pixel_clk_khz; + m_vid_l *= param->timing.pix_clk_100hz / 10; m_vid_l = div_u64(m_vid_l, param->link_settings.link_rate * LINK_RATE_REF_FREQ_IN_KHZ); @@ -859,7 +870,9 @@ void enc1_stream_encoder_dp_unblank( REG_UPDATE(DP_VID_M, DP_VID_M, m_vid); - REG_UPDATE(DP_VID_TIMING, DP_VID_M_N_GEN_EN, 1); + REG_UPDATE_2(DP_VID_TIMING, + DP_VID_M_N_GEN_EN, 1, + DP_VID_N_MUL, n_multiply); } /* set DIG_START to 0x1 to resync FIFO */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.h index b7c800e10a32..e654c2f55971 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.h @@ -462,7 +462,8 @@ void enc1_update_generic_info_packet( void enc1_stream_encoder_dp_set_stream_attribute( struct stream_encoder *enc, struct dc_crtc_timing *crtc_timing, - enum dc_color_space output_color_space); + enum dc_color_space output_color_space, + uint32_t enable_sdp_splitting); void enc1_stream_encoder_hdmi_set_stream_attribute( struct stream_encoder *enc, diff --git a/drivers/gpu/drm/amd/display/dc/dm_helpers.h b/drivers/gpu/drm/amd/display/dc/dm_helpers.h index e81b24374bcb..ccbfe9680d27 100644 --- a/drivers/gpu/drm/amd/display/dc/dm_helpers.h +++ b/drivers/gpu/drm/amd/display/dc/dm_helpers.h @@ -58,7 +58,7 @@ bool dm_helpers_dp_mst_write_payload_allocation_table( bool enable); /* - * poll pending down reply before clear payload allocation table + * poll pending down reply */ void dm_helpers_dp_mst_poll_pending_down_reply( struct dc_context *ctx, diff --git a/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h b/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h index 14bed5b1fa97..4fc4208d1472 100644 --- a/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h +++ b/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h @@ -30,6 +30,8 @@ * interface to PPLIB/SMU to setup clocks and pstate requirements on SoC */ +typedef bool BOOLEAN; + enum pp_smu_ver { /* * PP_SMU_INTERFACE_X should be interpreted as the interface defined @@ -72,29 +74,6 @@ struct pp_smu_wm_range_sets { struct pp_smu_wm_set_range writer_wm_sets[MAX_WATERMARK_SETS]; }; -struct pp_smu_display_requirement_rv { - /* PPSMC_MSG_SetDisplayCount: count - * 0 triggers S0i2 optimization - */ - unsigned int display_count; - - /* PPSMC_MSG_SetHardMinFclkByFreq: mhz - * FCLK will vary with DPM, but never below requested hard min - */ - unsigned int hard_min_fclk_mhz; - - /* PPSMC_MSG_SetHardMinDcefclkByFreq: mhz - * fixed clock at requested freq, either from FCH bypass or DFS - */ - unsigned int hard_min_dcefclk_mhz; - - /* PPSMC_MSG_SetMinDeepSleepDcefclk: mhz - * when DF is in cstate, dcf clock is further divided down - * to just above given frequency - */ - unsigned int min_deep_sleep_dcefclk_mhz; -}; - struct pp_smu_funcs_rv { struct pp_smu pp_smu; @@ -137,12 +116,6 @@ struct pp_smu_funcs_rv { /* PME w/a */ void (*set_pme_wa_enable)(struct pp_smu *pp); - /* - * Legacy functions. Used for backwards comp. with existing - * PPlib code. - */ - void (*set_display_requirement)(struct pp_smu *pp, - struct pp_smu_display_requirement_rv *req); }; struct pp_smu_funcs { diff --git a/drivers/gpu/drm/amd/display/dc/dm_services.h b/drivers/gpu/drm/amd/display/dc/dm_services.h index 1961cc6d9143..b426ba02b793 100644 --- a/drivers/gpu/drm/amd/display/dc/dm_services.h +++ b/drivers/gpu/drm/amd/display/dc/dm_services.h @@ -52,30 +52,17 @@ irq_handler_idx dm_register_interrupt( * GPU registers access * */ - +uint32_t dm_read_reg_func( + const struct dc_context *ctx, + uint32_t address, + const char *func_name); /* enable for debugging new code, this adds 50k to the driver size. */ /* #define DM_CHECK_ADDR_0 */ #define dm_read_reg(ctx, address) \ dm_read_reg_func(ctx, address, __func__) -static inline uint32_t dm_read_reg_func( - const struct dc_context *ctx, - uint32_t address, - const char *func_name) -{ - uint32_t value; -#ifdef DM_CHECK_ADDR_0 - if (address == 0) { - DC_ERR("invalid register read; address = 0\n"); - return 0; - } -#endif - value = cgs_read_register(ctx->cgs_device, address); - trace_amdgpu_dc_rreg(&ctx->perf_trace->read_count, address, value); - return value; -} #define dm_write_reg(ctx, address, value) \ dm_write_reg_func(ctx, address, value, __func__) @@ -144,10 +131,14 @@ static inline uint32_t set_reg_field_value_ex( reg_name ## __ ## reg_field ## _MASK,\ reg_name ## __ ## reg_field ## __SHIFT) -uint32_t generic_reg_update_ex(const struct dc_context *ctx, +uint32_t generic_reg_set_ex(const struct dc_context *ctx, uint32_t addr, uint32_t reg_val, int n, uint8_t shift1, uint32_t mask1, uint32_t field_value1, ...); +uint32_t generic_reg_update_ex(const struct dc_context *ctx, + uint32_t addr, int n, + uint8_t shift1, uint32_t mask1, uint32_t field_value1, ...); + #define FD(reg_field) reg_field ## __SHIFT, \ reg_field ## _MASK @@ -155,7 +146,7 @@ uint32_t generic_reg_update_ex(const struct dc_context *ctx, * return number of poll before condition is met * return 0 if condition is not meet after specified time out tries */ -unsigned int generic_reg_wait(const struct dc_context *ctx, +void generic_reg_wait(const struct dc_context *ctx, uint32_t addr, uint32_t mask, uint32_t shift, uint32_t condition_value, unsigned int delay_between_poll_us, unsigned int time_out_num_tries, const char *func_name, int line); @@ -172,11 +163,10 @@ unsigned int generic_reg_wait(const struct dc_context *ctx, #define generic_reg_update_soc15(ctx, inst_offset, reg_name, n, ...)\ generic_reg_update_ex(ctx, DCE_BASE.instance[0].segment[mm##reg_name##_BASE_IDX] + mm##reg_name + inst_offset, \ - dm_read_reg_func(ctx, mm##reg_name + DCE_BASE.instance[0].segment[mm##reg_name##_BASE_IDX] + inst_offset, __func__), \ n, __VA_ARGS__) #define generic_reg_set_soc15(ctx, inst_offset, reg_name, n, ...)\ - generic_reg_update_ex(ctx, DCE_BASE.instance[0].segment[mm##reg_name##_BASE_IDX] + mm##reg_name + inst_offset, 0, \ + generic_reg_set_ex(ctx, DCE_BASE.instance[0].segment[mm##reg_name##_BASE_IDX] + mm##reg_name + inst_offset, 0, \ n, __VA_ARGS__) #define get_reg_field_value_soc15(reg_value, block, reg_num, reg_name, reg_field)\ @@ -223,8 +213,8 @@ bool dm_pp_notify_wm_clock_changes( const struct dc_context *ctx, struct dm_pp_wm_sets_with_clock_ranges *wm_with_clock_ranges); -void dm_pp_get_funcs_rv(struct dc_context *ctx, - struct pp_smu_funcs_rv *funcs); +void dm_pp_get_funcs(struct dc_context *ctx, + struct pp_smu_funcs *funcs); /* DAL calls this function to notify PP about completion of Mode Set. * For PP it means that current DCE clocks are those which were returned diff --git a/drivers/gpu/drm/amd/display/dc/dm_services_types.h b/drivers/gpu/drm/amd/display/dc/dm_services_types.h index 77200711abbe..a3d1be20dd9d 100644 --- a/drivers/gpu/drm/amd/display/dc/dm_services_types.h +++ b/drivers/gpu/drm/amd/display/dc/dm_services_types.h @@ -29,7 +29,7 @@ #include "os_types.h" #include "dc_types.h" -struct pp_smu_funcs_rv; +struct pp_smu_funcs; struct dm_pp_clock_range { int min_khz; diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.c b/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.c index d303b789adfe..80ffd7d958b2 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.c +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.c @@ -26,40 +26,14 @@ #include "display_mode_lib.h" #include "dc_features.h" -extern const struct _vcs_dpi_ip_params_st dcn1_0_ip; -extern const struct _vcs_dpi_soc_bounding_box_st dcn1_0_soc; - -static void set_soc_bounding_box(struct _vcs_dpi_soc_bounding_box_st *soc, enum dml_project project) -{ - switch (project) { - case DML_PROJECT_RAVEN1: - *soc = dcn1_0_soc; - break; - default: - ASSERT(0); - break; - } -} - -static void set_ip_params(struct _vcs_dpi_ip_params_st *ip, enum dml_project project) +void dml_init_instance(struct display_mode_lib *lib, + const struct _vcs_dpi_soc_bounding_box_st *soc_bb, + const struct _vcs_dpi_ip_params_st *ip_params, + enum dml_project project) { - switch (project) { - case DML_PROJECT_RAVEN1: - *ip = dcn1_0_ip; - break; - default: - ASSERT(0); - break; - } -} - -void dml_init_instance(struct display_mode_lib *lib, enum dml_project project) -{ - if (lib->project != project) { - set_soc_bounding_box(&lib->soc, project); - set_ip_params(&lib->ip, project); - lib->project = project; - } + lib->soc = *soc_bb; + lib->ip = *ip_params; + lib->project = project; } const char *dml_get_status_message(enum dm_validation_status status) diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.h index a730e0209c05..1b546dba34bd 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.h @@ -41,7 +41,10 @@ struct display_mode_lib { struct dal_logger *logger; }; -void dml_init_instance(struct display_mode_lib *lib, enum dml_project project); +void dml_init_instance(struct display_mode_lib *lib, + const struct _vcs_dpi_soc_bounding_box_st *soc_bb, + const struct _vcs_dpi_ip_params_st *ip_params, + enum dml_project project); const char *dml_get_status_message(enum dm_validation_status status); diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h index 391183e3428f..c5b791d158a7 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h @@ -25,6 +25,8 @@ #ifndef __DISPLAY_MODE_STRUCTS_H__ #define __DISPLAY_MODE_STRUCTS_H__ +#define MAX_CLOCK_LIMIT_STATES 8 + typedef struct _vcs_dpi_voltage_scaling_st voltage_scaling_st; typedef struct _vcs_dpi_soc_bounding_box_st soc_bounding_box_st; typedef struct _vcs_dpi_ip_params_st ip_params_st; @@ -103,7 +105,7 @@ struct _vcs_dpi_soc_bounding_box_st { double xfc_xbuf_latency_tolerance_us; int use_urgent_burst_bw; unsigned int num_states; - struct _vcs_dpi_voltage_scaling_st clock_limits[8]; + struct _vcs_dpi_voltage_scaling_st clock_limits[MAX_CLOCK_LIMIT_STATES]; }; struct _vcs_dpi_ip_params_st { @@ -416,6 +418,7 @@ struct _vcs_dpi_display_dlg_regs_st { unsigned int refcyc_per_vm_group_flip; unsigned int refcyc_per_vm_req_vblank; unsigned int refcyc_per_vm_req_flip; + unsigned int refcyc_per_vm_dmdata; }; struct _vcs_dpi_display_ttu_regs_st { diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_rq_dlg_helpers.c b/drivers/gpu/drm/amd/display/dc/dml/display_rq_dlg_helpers.c index 48400d642610..e2d82aacd3bc 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_rq_dlg_helpers.c +++ b/drivers/gpu/drm/amd/display/dc/dml/display_rq_dlg_helpers.c @@ -321,6 +321,9 @@ void print__dlg_regs_st(struct display_mode_lib *mode_lib, display_dlg_regs_st d dml_print( "DML_RQ_DLG_CALC: xfc_reg_remote_surface_flip_latency = 0x%0x\n", dlg_regs.xfc_reg_remote_surface_flip_latency); + dml_print( + "DML_RQ_DLG_CALC: refcyc_per_vm_dmdata = 0x%0x\n", + dlg_regs.refcyc_per_vm_dmdata); dml_print("DML_RQ_DLG_CALC: =====================================\n"); } diff --git a/drivers/gpu/drm/amd/display/dc/inc/clock_source.h b/drivers/gpu/drm/amd/display/dc/inc/clock_source.h index fe6301cb8681..1b01a9a58d14 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/clock_source.h +++ b/drivers/gpu/drm/amd/display/dc/inc/clock_source.h @@ -167,7 +167,7 @@ struct clock_source_funcs { struct pixel_clk_params *, struct pll_settings *); bool (*get_pixel_clk_frequency_100hz)( - struct clock_source *clock_source, + const struct clock_source *clock_source, unsigned int inst, unsigned int *pixel_clk_khz); }; diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_status.h b/drivers/gpu/drm/amd/display/dc/inc/core_status.h index 2e61a22ef4b2..8dca3b7700e5 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/core_status.h +++ b/drivers/gpu/drm/amd/display/dc/inc/core_status.h @@ -43,7 +43,7 @@ enum dc_status { DC_FAIL_BANDWIDTH_VALIDATE = 13, /* BW and Watermark validation */ DC_FAIL_SCALING = 14, DC_FAIL_DP_LINK_TRAINING = 15, - + DC_FAIL_UNSUPPORTED_1 = 18, DC_ERROR_UNEXPECTED = -1 }; diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h index 986ed1728644..6f5ab05d6467 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h +++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h @@ -95,10 +95,10 @@ struct resource_funcs { void (*link_init)(struct dc_link *link); struct link_encoder *(*link_enc_create)( const struct encoder_init_data *init); - bool (*validate_bandwidth)( struct dc *dc, - struct dc_state *context); + struct dc_state *context, + bool fast_validate); enum dc_status (*validate_global)( struct dc *dc, @@ -144,8 +144,7 @@ struct resource_pool { struct stream_encoder *stream_enc[MAX_PIPES * 2]; struct hubbub *hubbub; struct mpc *mpc; - struct pp_smu_funcs_rv *pp_smu; - struct pp_smu_display_requirement_rv pp_smu_req; + struct pp_smu_funcs *pp_smu; struct dce_aux *engines[MAX_PIPES]; struct dce_i2c_hw *hw_i2cs[MAX_PIPES]; struct dce_i2c_sw *sw_i2cs[MAX_PIPES]; @@ -154,7 +153,12 @@ struct resource_pool { unsigned int pipe_count; unsigned int underlay_pipe_index; unsigned int stream_enc_count; - unsigned int ref_clock_inKhz; + + struct { + unsigned int xtalin_clock_inKhz; + unsigned int dccg_ref_clock_inKhz; + unsigned int dchub_ref_clock_inKhz; + } ref_clocks; unsigned int timing_generator_count; /* @@ -262,18 +266,22 @@ struct dcn_bw_output { struct dcn_watermark_set watermarks; }; -union bw_context { +union bw_output { struct dcn_bw_output dcn; struct dce_bw_output dce; }; +struct bw_context { + union bw_output bw; + struct display_mode_lib dml; +}; /** * struct dc_state - The full description of a state requested by a user * * @streams: Stream properties * @stream_status: The planes on a given stream * @res_ctx: Persistent state of resources - * @bw: The output from bandwidth and watermark calculations + * @bw_ctx: The output from bandwidth and watermark calculations and the DML * @pp_display_cfg: PowerPlay clocks and settings * @dcn_bw_vars: non-stack memory to support bandwidth calculations * @@ -285,7 +293,7 @@ struct dc_state { struct resource_context res_ctx; - union bw_context bw; + struct bw_context bw_ctx; /* Note: these are big structures, do *not* put on stack! */ struct dm_pp_display_configuration pp_display_cfg; @@ -293,7 +301,11 @@ struct dc_state { struct dcn_bw_internal_vars dcn_bw_vars; #endif - struct clk_mgr *dccg; + struct clk_mgr *clk_mgr; + + struct { + bool full_update_needed : 1; + } commit_hints; struct kref refcount; }; diff --git a/drivers/gpu/drm/amd/display/dc/inc/dc_link_ddc.h b/drivers/gpu/drm/amd/display/dc/inc/dc_link_ddc.h index 16fd4dc6c4dd..b1fab251c09b 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/dc_link_ddc.h +++ b/drivers/gpu/drm/amd/display/dc/inc/dc_link_ddc.h @@ -95,8 +95,9 @@ bool dal_ddc_service_query_ddc_data( uint8_t *read_buf, uint32_t read_size); -int dc_link_aux_transfer(struct ddc_service *ddc, - struct aux_payload *payload); +int dc_link_aux_transfer_raw(struct ddc_service *ddc, + struct aux_payload *payload, + enum aux_channel_operation_result *operation_result); bool dc_link_aux_transfer_with_retries(struct ddc_service *ddc, struct aux_payload *payload); diff --git a/drivers/gpu/drm/amd/display/dc/inc/dcn_calcs.h b/drivers/gpu/drm/amd/display/dc/inc/dcn_calcs.h index ece954a40a8e..263c09630c06 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/dcn_calcs.h +++ b/drivers/gpu/drm/amd/display/dc/inc/dcn_calcs.h @@ -621,7 +621,8 @@ extern const struct dcn_ip_params dcn10_ip_defaults; bool dcn_validate_bandwidth( struct dc *dc, - struct dc_state *context); + struct dc_state *context, + bool fast_validate); unsigned int dcn_find_dcfclk_suits_all( const struct dc *dc, @@ -631,5 +632,7 @@ void dcn_bw_update_from_pplib(struct dc *dc); void dcn_bw_notify_pplib_of_wm_ranges(struct dc *dc); void dcn_bw_sync_calcs_and_dml(struct dc *dc); +enum source_macro_tile_size swizzle_mode_to_macro_tile_size(enum swizzle_mode_values sw_mode); + #endif /* __DCN_CALCS_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h index 23a4b18e5fee..31bd6d5183ab 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h @@ -42,6 +42,8 @@ struct clk_mgr_funcs { bool safe_to_lower); int (*get_dp_ref_clk_frequency)(struct clk_mgr *clk_mgr); + + void (*init_clocks)(struct clk_mgr *clk_mgr); }; #endif /* __DAL_CLK_MGR_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h index 95a56d012626..05ee5295d2c1 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h @@ -39,6 +39,10 @@ struct dccg_funcs { void (*update_dpp_dto)(struct dccg *dccg, int dpp_inst, int req_dppclk); + void (*get_dccg_ref_freq)(struct dccg *dccg, + unsigned int xtalin_freq_inKhz, + unsigned int *dccg_ref_freq_inKhz); + void (*dccg_init)(struct dccg *dccg); }; #endif //__DAL_DCCG_H__ diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h index 9d2d8e51306c..93667e8b23b3 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h @@ -73,6 +73,16 @@ struct hubbub_funcs { void (*wm_read_state)(struct hubbub *hubbub, struct dcn_hubbub_wm *wm); + + void (*get_dchub_ref_freq)(struct hubbub *hubbub, + unsigned int dccg_ref_freq_inKhz, + unsigned int *dchub_ref_freq_inKhz); + + void (*program_watermarks)( + struct hubbub *hubbub, + struct dcn_watermark_set *watermarks, + unsigned int refclk_mhz, + bool safe_to_lower); }; struct hubbub { diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dmcu.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dmcu.h index cbaa43853611..c68f0ce346c7 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/dmcu.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dmcu.h @@ -70,6 +70,8 @@ struct dmcu_funcs { void (*get_psr_wait_loop)(struct dmcu *dmcu, unsigned int *psr_wait_loop_number); bool (*is_dmcu_initialized)(struct dmcu *dmcu); + bool (*lock_phy)(struct dmcu *dmcu); + bool (*unlock_phy)(struct dmcu *dmcu); }; #endif diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h b/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h index 1cd07e94ee63..455df4999797 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h @@ -130,6 +130,7 @@ struct hubp_funcs { void (*hubp_clear_underflow)(struct hubp *hubp); void (*hubp_disable_control)(struct hubp *hubp, bool disable_hubp); unsigned int (*hubp_get_underflow_status)(struct hubp *hubp); + void (*hubp_init)(struct hubp *hubp); }; diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h b/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h index da85537a4488..4c8e2c6fb6db 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h @@ -146,6 +146,12 @@ struct out_csc_color_matrix { uint16_t regval[12]; }; +enum gamut_remap_select { + GAMUT_REMAP_BYPASS = 0, + GAMUT_REMAP_COEFF, + GAMUT_REMAP_COMA_COEFF, + GAMUT_REMAP_COMB_COEFF +}; enum opp_regamma { OPP_REGAMMA_BYPASS = 0, diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h b/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h index 4051493557bc..49854eb73d1d 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h @@ -63,11 +63,13 @@ struct encoder_info_frame { struct dc_info_packet vsc; /* HDR Static MetaData */ struct dc_info_packet hdrsmd; + /* custom sdp message */ + struct dc_info_packet dpsdp; }; struct encoder_unblank_param { struct dc_link_settings link_settings; - unsigned int pixel_clk_khz; + struct dc_crtc_timing timing; }; struct encoder_set_dp_phy_pattern_param { @@ -88,7 +90,8 @@ struct stream_encoder_funcs { void (*dp_set_stream_attribute)( struct stream_encoder *enc, struct dc_crtc_timing *crtc_timing, - enum dc_color_space output_color_space); + enum dc_color_space output_color_space, + uint32_t enable_sdp_splitting); void (*hdmi_set_stream_attribute)( struct stream_encoder *enc, diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h index c25f7df7b5e3..067d53caf28a 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h @@ -187,8 +187,10 @@ struct timing_generator_funcs { bool (*did_triggered_reset_occur)(struct timing_generator *tg); void (*setup_global_swap_lock)(struct timing_generator *tg, const struct dcp_gsl_params *gsl_params); + void (*setup_global_lock)(struct timing_generator *tg); void (*unlock)(struct timing_generator *tg); void (*lock)(struct timing_generator *tg); + void (*lock_global)(struct timing_generator *tg); void (*enable_reset_trigger)(struct timing_generator *tg, int source_tg_inst); void (*enable_crtc_reset)(struct timing_generator *tg, diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h index 7676f25216b1..33905468e2b9 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h @@ -176,6 +176,10 @@ struct hw_sequencer_funcs { struct dc *dc, struct pipe_ctx *pipe, bool lock); + void (*pipe_control_lock_global)( + struct dc *dc, + struct pipe_ctx *pipe, + bool lock); void (*blank_pixel_data)( struct dc *dc, struct pipe_ctx *pipe_ctx, diff --git a/drivers/gpu/drm/amd/display/dc/inc/reg_helper.h b/drivers/gpu/drm/amd/display/dc/inc/reg_helper.h index cf5a84b9e27c..8503d9cc4763 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/reg_helper.h +++ b/drivers/gpu/drm/amd/display/dc/inc/reg_helper.h @@ -52,7 +52,7 @@ /* macro to set register fields. */ #define REG_SET_N(reg_name, n, initial_val, ...) \ - generic_reg_update_ex(CTX, \ + generic_reg_set_ex(CTX, \ REG(reg_name), \ initial_val, \ n, __VA_ARGS__) @@ -225,7 +225,6 @@ #define REG_UPDATE_N(reg_name, n, ...) \ generic_reg_update_ex(CTX, \ REG(reg_name), \ - REG_READ(reg_name), \ n, __VA_ARGS__) #define REG_UPDATE(reg_name, field, val) \ @@ -380,16 +379,11 @@ /* macro to update a register field to specified values in given sequences. * useful when toggling bits */ -#define REG_UPDATE_SEQ(reg, field, value1, value2) \ -{ uint32_t val = REG_UPDATE(reg, field, value1); \ - REG_SET(reg, val, field, value2); } - -/* macro to update fields in register 1 field at a time in given order */ -#define REG_UPDATE_1BY1_2(reg, f1, v1, f2, v2) \ +#define REG_UPDATE_SEQ_2(reg, f1, v1, f2, v2) \ { uint32_t val = REG_UPDATE(reg, f1, v1); \ REG_SET(reg, val, f2, v2); } -#define REG_UPDATE_1BY1_3(reg, f1, v1, f2, v2, f3, v3) \ +#define REG_UPDATE_SEQ_3(reg, f1, v1, f2, v2, f3, v3) \ { uint32_t val = REG_UPDATE(reg, f1, v1); \ val = REG_SET(reg, val, f2, v2); \ REG_SET(reg, val, f3, v3); } diff --git a/drivers/gpu/drm/amd/display/dc/inc/resource.h b/drivers/gpu/drm/amd/display/dc/inc/resource.h index 0086a2f1d21a..3ce0a4fc5822 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/resource.h +++ b/drivers/gpu/drm/amd/display/dc/inc/resource.h @@ -70,11 +70,9 @@ bool resource_construct( struct resource_pool *pool, const struct resource_create_funcs *create_funcs); -struct resource_pool *dc_create_resource_pool( - struct dc *dc, - int num_virtual_links, - enum dce_version dc_version, - struct hw_asic_id asic_id); +struct resource_pool *dc_create_resource_pool(struct dc *dc, + const struct dc_init_data *init_data, + enum dce_version dc_version); void dc_destroy_resource_pool(struct dc *dc); @@ -131,7 +129,8 @@ bool resource_attach_surfaces_to_context( struct pipe_ctx *find_idle_secondary_pipe( struct resource_context *res_ctx, - const struct resource_pool *pool); + const struct resource_pool *pool, + const struct pipe_ctx *primary_pipe); bool resource_is_stream_unchanged( struct dc_state *old_context, struct dc_stream_state *stream); @@ -172,4 +171,7 @@ void update_audio_usage( unsigned int resource_pixel_format_to_bpp(enum surface_pixel_format format); +struct pipe_ctx *dc_res_get_odm_bottom_pipe(struct pipe_ctx *pipe_ctx); +bool dc_res_is_odm_head_pipe(struct pipe_ctx *pipe_ctx); + #endif /* DRIVERS_GPU_DRM_AMD_DC_DEV_DC_INC_RESOURCE_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/irq/dce110/irq_service_dce110.c b/drivers/gpu/drm/amd/display/dc/irq/dce110/irq_service_dce110.c index afe0876fe6f8..86987f5e8bd5 100644 --- a/drivers/gpu/drm/amd/display/dc/irq/dce110/irq_service_dce110.c +++ b/drivers/gpu/drm/amd/display/dc/irq/dce110/irq_service_dce110.c @@ -81,6 +81,11 @@ static const struct irq_source_info_funcs vblank_irq_info_funcs = { .ack = NULL }; +static const struct irq_source_info_funcs vupdate_irq_info_funcs = { + .set = NULL, + .ack = NULL +}; + #define hpd_int_entry(reg_num)\ [DC_IRQ_SOURCE_HPD1 + reg_num] = {\ .enable_reg = mmHPD ## reg_num ## _DC_HPD_INT_CONTROL,\ @@ -137,7 +142,7 @@ static const struct irq_source_info_funcs vblank_irq_info_funcs = { CRTC_V_UPDATE_INT_STATUS__CRTC_V_UPDATE_INT_CLEAR_MASK,\ .ack_value =\ CRTC_V_UPDATE_INT_STATUS__CRTC_V_UPDATE_INT_CLEAR_MASK,\ - .funcs = &vblank_irq_info_funcs\ + .funcs = &vupdate_irq_info_funcs\ } #define vblank_int_entry(reg_num)\ diff --git a/drivers/gpu/drm/amd/display/dc/irq/dce120/irq_service_dce120.c b/drivers/gpu/drm/amd/display/dc/irq/dce120/irq_service_dce120.c index 1ea7256ec89b..750ba0ab4106 100644 --- a/drivers/gpu/drm/amd/display/dc/irq/dce120/irq_service_dce120.c +++ b/drivers/gpu/drm/amd/display/dc/irq/dce120/irq_service_dce120.c @@ -84,6 +84,11 @@ static const struct irq_source_info_funcs vblank_irq_info_funcs = { .ack = NULL }; +static const struct irq_source_info_funcs vupdate_irq_info_funcs = { + .set = NULL, + .ack = NULL +}; + #define BASE_INNER(seg) \ DCE_BASE__INST0_SEG ## seg @@ -140,7 +145,7 @@ static const struct irq_source_info_funcs vblank_irq_info_funcs = { IRQ_REG_ENTRY(CRTC, reg_num,\ CRTC_INTERRUPT_CONTROL, CRTC_V_UPDATE_INT_MSK,\ CRTC_V_UPDATE_INT_STATUS, CRTC_V_UPDATE_INT_CLEAR),\ - .funcs = &vblank_irq_info_funcs\ + .funcs = &vupdate_irq_info_funcs\ } #define vblank_int_entry(reg_num)\ diff --git a/drivers/gpu/drm/amd/display/dc/irq/dce80/irq_service_dce80.c b/drivers/gpu/drm/amd/display/dc/irq/dce80/irq_service_dce80.c index 8a2066c313fe..de218fe84a43 100644 --- a/drivers/gpu/drm/amd/display/dc/irq/dce80/irq_service_dce80.c +++ b/drivers/gpu/drm/amd/display/dc/irq/dce80/irq_service_dce80.c @@ -84,6 +84,10 @@ static const struct irq_source_info_funcs vblank_irq_info_funcs = { .ack = NULL }; +static const struct irq_source_info_funcs vupdate_irq_info_funcs = { + .set = NULL, + .ack = NULL +}; #define hpd_int_entry(reg_num)\ [DC_IRQ_SOURCE_INVALID + reg_num] = {\ @@ -142,7 +146,7 @@ static const struct irq_source_info_funcs vblank_irq_info_funcs = { CRTC_V_UPDATE_INT_STATUS__CRTC_V_UPDATE_INT_CLEAR_MASK,\ .ack_value =\ CRTC_V_UPDATE_INT_STATUS__CRTC_V_UPDATE_INT_CLEAR_MASK,\ - .funcs = &vblank_irq_info_funcs\ + .funcs = &vupdate_irq_info_funcs\ } #define vblank_int_entry(reg_num)\ diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn10/irq_service_dcn10.c b/drivers/gpu/drm/amd/display/dc/irq/dcn10/irq_service_dcn10.c index e04ae49243f6..10ac6deff5ff 100644 --- a/drivers/gpu/drm/amd/display/dc/irq/dcn10/irq_service_dcn10.c +++ b/drivers/gpu/drm/amd/display/dc/irq/dcn10/irq_service_dcn10.c @@ -56,6 +56,18 @@ enum dc_irq_source to_dal_irq_source_dcn10( return DC_IRQ_SOURCE_VBLANK5; case DCN_1_0__SRCID__DC_D6_OTG_VSTARTUP: return DC_IRQ_SOURCE_VBLANK6; + case DCN_1_0__SRCID__OTG0_IHC_V_UPDATE_NO_LOCK_INTERRUPT: + return DC_IRQ_SOURCE_VUPDATE1; + case DCN_1_0__SRCID__OTG1_IHC_V_UPDATE_NO_LOCK_INTERRUPT: + return DC_IRQ_SOURCE_VUPDATE2; + case DCN_1_0__SRCID__OTG2_IHC_V_UPDATE_NO_LOCK_INTERRUPT: + return DC_IRQ_SOURCE_VUPDATE3; + case DCN_1_0__SRCID__OTG3_IHC_V_UPDATE_NO_LOCK_INTERRUPT: + return DC_IRQ_SOURCE_VUPDATE4; + case DCN_1_0__SRCID__OTG4_IHC_V_UPDATE_NO_LOCK_INTERRUPT: + return DC_IRQ_SOURCE_VUPDATE5; + case DCN_1_0__SRCID__OTG5_IHC_V_UPDATE_NO_LOCK_INTERRUPT: + return DC_IRQ_SOURCE_VUPDATE6; case DCN_1_0__SRCID__HUBP0_FLIP_INTERRUPT: return DC_IRQ_SOURCE_PFLIP1; case DCN_1_0__SRCID__HUBP1_FLIP_INTERRUPT: @@ -153,6 +165,11 @@ static const struct irq_source_info_funcs vblank_irq_info_funcs = { .ack = NULL }; +static const struct irq_source_info_funcs vupdate_no_lock_irq_info_funcs = { + .set = NULL, + .ack = NULL +}; + #define BASE_INNER(seg) \ DCE_BASE__INST0_SEG ## seg @@ -203,12 +220,15 @@ static const struct irq_source_info_funcs vblank_irq_info_funcs = { .funcs = &pflip_irq_info_funcs\ } -#define vupdate_int_entry(reg_num)\ +/* vupdate_no_lock_int_entry maps to DC_IRQ_SOURCE_VUPDATEx, to match semantic + * of DCE's DC_IRQ_SOURCE_VUPDATEx. + */ +#define vupdate_no_lock_int_entry(reg_num)\ [DC_IRQ_SOURCE_VUPDATE1 + reg_num] = {\ IRQ_REG_ENTRY(OTG, reg_num,\ - OTG_GLOBAL_SYNC_STATUS, VUPDATE_INT_EN,\ - OTG_GLOBAL_SYNC_STATUS, VUPDATE_EVENT_CLEAR),\ - .funcs = &vblank_irq_info_funcs\ + OTG_GLOBAL_SYNC_STATUS, VUPDATE_NO_LOCK_INT_EN,\ + OTG_GLOBAL_SYNC_STATUS, VUPDATE_NO_LOCK_EVENT_CLEAR),\ + .funcs = &vupdate_no_lock_irq_info_funcs\ } #define vblank_int_entry(reg_num)\ @@ -315,12 +335,12 @@ irq_source_info_dcn10[DAL_IRQ_SOURCES_NUMBER] = { dc_underflow_int_entry(6), [DC_IRQ_SOURCE_DMCU_SCP] = dummy_irq_entry(), [DC_IRQ_SOURCE_VBIOS_SW] = dummy_irq_entry(), - vupdate_int_entry(0), - vupdate_int_entry(1), - vupdate_int_entry(2), - vupdate_int_entry(3), - vupdate_int_entry(4), - vupdate_int_entry(5), + vupdate_no_lock_int_entry(0), + vupdate_no_lock_int_entry(1), + vupdate_no_lock_int_entry(2), + vupdate_no_lock_int_entry(3), + vupdate_no_lock_int_entry(4), + vupdate_no_lock_int_entry(5), vblank_int_entry(0), vblank_int_entry(1), vblank_int_entry(2), diff --git a/drivers/gpu/drm/amd/display/dc/virtual/virtual_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/virtual/virtual_stream_encoder.c index 3dc1733eea20..fdcf9e66d852 100644 --- a/drivers/gpu/drm/amd/display/dc/virtual/virtual_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/virtual/virtual_stream_encoder.c @@ -29,7 +29,8 @@ static void virtual_stream_encoder_dp_set_stream_attribute( struct stream_encoder *enc, struct dc_crtc_timing *crtc_timing, - enum dc_color_space output_color_space) {} + enum dc_color_space output_color_space, + uint32_t enable_sdp_splitting) {} static void virtual_stream_encoder_hdmi_set_stream_attribute( struct stream_encoder *enc, diff --git a/drivers/gpu/drm/amd/display/include/fixed31_32.h b/drivers/gpu/drm/amd/display/include/fixed31_32.h index 52a73332befb..89ef9f6860e5 100644 --- a/drivers/gpu/drm/amd/display/include/fixed31_32.h +++ b/drivers/gpu/drm/amd/display/include/fixed31_32.h @@ -503,6 +503,8 @@ static inline int dc_fixpt_ceil(struct fixed31_32 arg) * fractional */ +unsigned int dc_fixpt_u4d19(struct fixed31_32 arg); + unsigned int dc_fixpt_u3d19(struct fixed31_32 arg); unsigned int dc_fixpt_u2d19(struct fixed31_32 arg); diff --git a/drivers/gpu/drm/amd/display/include/signal_types.h b/drivers/gpu/drm/amd/display/include/signal_types.h index f56d2891475f..beed70179bb5 100644 --- a/drivers/gpu/drm/amd/display/include/signal_types.h +++ b/drivers/gpu/drm/amd/display/include/signal_types.h @@ -45,6 +45,11 @@ enum signal_type { }; /* help functions for signal types manipulation */ +static inline bool dc_is_hdmi_tmds_signal(enum signal_type signal) +{ + return (signal == SIGNAL_TYPE_HDMI_TYPE_A); +} + static inline bool dc_is_hdmi_signal(enum signal_type signal) { return (signal == SIGNAL_TYPE_HDMI_TYPE_A); diff --git a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c index 0fbc8fbc3541..a1055413bade 100644 --- a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c +++ b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c @@ -1854,6 +1854,8 @@ bool mod_color_calculate_degamma_params(struct dc_transfer_func *input_tf, coordinates_x, axis_x, curve, MAX_HW_POINTS, tf_pts, mapUserRamp && ramp && ramp->type == GAMMA_RGB_256); + if (ramp->type == GAMMA_CUSTOM) + apply_lut_1d(ramp, MAX_HW_POINTS, tf_pts); ret = true; diff --git a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c index bfd27f10879e..19b1eaebe484 100644 --- a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c +++ b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c @@ -37,6 +37,8 @@ #define RENDER_TIMES_MAX_COUNT 10 /* Threshold to exit BTR (to avoid frequent enter-exits at the lower limit) */ #define BTR_EXIT_MARGIN 2000 +/* Threshold to change BTR multiplier (to avoid frequent changes) */ +#define BTR_DRIFT_MARGIN 2000 /*Threshold to exit fixed refresh rate*/ #define FIXED_REFRESH_EXIT_MARGIN_IN_HZ 4 /* Number of consecutive frames to check before entering/exiting fixed refresh*/ @@ -48,6 +50,93 @@ struct core_freesync { struct dc *dc; }; +void setFieldWithMask(unsigned char *dest, unsigned int mask, unsigned int value) +{ + unsigned int shift = 0; + + if (!mask || !dest) + return; + + while (!((mask >> shift) & 1)) + shift++; + + //reset + *dest = *dest & ~mask; + //set + //dont let value span past mask + value = value & (mask >> shift); + //insert value + *dest = *dest | (value << shift); +} + +// VTEM Byte Offset +#define VRR_VTEM_PB0 0 +#define VRR_VTEM_PB1 1 +#define VRR_VTEM_PB2 2 +#define VRR_VTEM_PB3 3 +#define VRR_VTEM_PB4 4 +#define VRR_VTEM_PB5 5 +#define VRR_VTEM_PB6 6 + +#define VRR_VTEM_MD0 7 +#define VRR_VTEM_MD1 8 +#define VRR_VTEM_MD2 9 +#define VRR_VTEM_MD3 10 + + +// VTEM Byte Masks +//PB0 +#define MASK__VRR_VTEM_PB0__RESERVED0 0x01 +#define MASK__VRR_VTEM_PB0__SYNC 0x02 +#define MASK__VRR_VTEM_PB0__VFR 0x04 +#define MASK__VRR_VTEM_PB0__AFR 0x08 +#define MASK__VRR_VTEM_PB0__DS_TYPE 0x30 + //0: Periodic pseudo-static EM Data Set + //1: Periodic dynamic EM Data Set + //2: Unique EM Data Set + //3: Reserved +#define MASK__VRR_VTEM_PB0__END 0x40 +#define MASK__VRR_VTEM_PB0__NEW 0x80 + +//PB1 +#define MASK__VRR_VTEM_PB1__RESERVED1 0xFF + +//PB2 +#define MASK__VRR_VTEM_PB2__ORGANIZATION_ID 0xFF + //0: This is a Vendor Specific EM Data Set + //1: This EM Data Set is defined by This Specification (HDMI 2.1 r102.clean) + //2: This EM Data Set is defined by CTA-861-G + //3: This EM Data Set is defined by VESA +//PB3 +#define MASK__VRR_VTEM_PB3__DATA_SET_TAG_MSB 0xFF +//PB4 +#define MASK__VRR_VTEM_PB4__DATA_SET_TAG_LSB 0xFF +//PB5 +#define MASK__VRR_VTEM_PB5__DATA_SET_LENGTH_MSB 0xFF +//PB6 +#define MASK__VRR_VTEM_PB6__DATA_SET_LENGTH_LSB 0xFF + + + +//PB7-27 (20 bytes): +//PB7 = MD0 +#define MASK__VRR_VTEM_MD0__VRR_EN 0x01 +#define MASK__VRR_VTEM_MD0__M_CONST 0x02 +#define MASK__VRR_VTEM_MD0__RESERVED2 0x0C +#define MASK__VRR_VTEM_MD0__FVA_FACTOR_M1 0xF0 + +//MD1 +#define MASK__VRR_VTEM_MD1__BASE_VFRONT 0xFF + +//MD2 +#define MASK__VRR_VTEM_MD2__BASE_REFRESH_RATE_98 0x03 +#define MASK__VRR_VTEM_MD2__RB 0x04 +#define MASK__VRR_VTEM_MD2__RESERVED3 0xF8 + +//MD3 +#define MASK__VRR_VTEM_MD3__BASE_REFRESH_RATE_07 0xFF + + #define MOD_FREESYNC_TO_CORE(mod_freesync)\ container_of(mod_freesync, struct core_freesync, public) @@ -248,6 +337,7 @@ static void apply_below_the_range(struct core_freesync *core_freesync, unsigned int frames_to_insert = 0; unsigned int min_frame_duration_in_ns = 0; unsigned int max_render_time_in_us = in_out_vrr->max_duration_in_us; + unsigned int delta_from_mid_point_delta_in_us; min_frame_duration_in_ns = ((unsigned int) (div64_u64( (1000000000ULL * 1000000), @@ -318,10 +408,27 @@ static void apply_below_the_range(struct core_freesync *core_freesync, /* Choose number of frames to insert based on how close it * can get to the mid point of the variable range. */ - if (delta_from_mid_point_in_us_1 < delta_from_mid_point_in_us_2) + if (delta_from_mid_point_in_us_1 < delta_from_mid_point_in_us_2) { frames_to_insert = mid_point_frames_ceil; - else + delta_from_mid_point_delta_in_us = delta_from_mid_point_in_us_2 - + delta_from_mid_point_in_us_1; + } else { frames_to_insert = mid_point_frames_floor; + delta_from_mid_point_delta_in_us = delta_from_mid_point_in_us_1 - + delta_from_mid_point_in_us_2; + } + + /* Prefer current frame multiplier when BTR is enabled unless it drifts + * too far from the midpoint + */ + if (in_out_vrr->btr.frames_to_insert != 0 && + delta_from_mid_point_delta_in_us < BTR_DRIFT_MARGIN) { + if (((last_render_time_in_us / in_out_vrr->btr.frames_to_insert) < + in_out_vrr->max_duration_in_us) && + ((last_render_time_in_us / in_out_vrr->btr.frames_to_insert) > + in_out_vrr->min_duration_in_us)) + frames_to_insert = in_out_vrr->btr.frames_to_insert; + } /* Either we've calculated the number of frames to insert, * or we need to insert min duration frames @@ -330,10 +437,8 @@ static void apply_below_the_range(struct core_freesync *core_freesync, inserted_frame_duration_in_us = last_render_time_in_us / frames_to_insert; - if (inserted_frame_duration_in_us < - (1000000 / in_out_vrr->max_refresh_in_uhz)) - inserted_frame_duration_in_us = - (1000000 / in_out_vrr->max_refresh_in_uhz); + if (inserted_frame_duration_in_us < in_out_vrr->min_duration_in_us) + inserted_frame_duration_in_us = in_out_vrr->min_duration_in_us; /* Cache the calculated variables */ in_out_vrr->btr.inserted_duration_in_us = @@ -469,16 +574,14 @@ static void build_vrr_infopacket_header_vtem(enum signal_type signal, // HB0, HB1, HB2 indicates PacketType VTEMPacket infopacket->hb0 = 0x7F; infopacket->hb1 = 0xC0; - infopacket->hb2 = 0x00; - /* HB3 Bit Fields - * Reserved :1 = 0 - * Sync :1 = 0 - * VFR :1 = 1 - * Ds_Type :2 = 0 - * End :1 = 0 - * New :1 = 0 - */ - infopacket->hb3 = 0x20; + infopacket->hb2 = 0x00; //sequence_index + + setFieldWithMask(&infopacket->sb[VRR_VTEM_PB0], MASK__VRR_VTEM_PB0__VFR, 1); + setFieldWithMask(&infopacket->sb[VRR_VTEM_PB2], MASK__VRR_VTEM_PB2__ORGANIZATION_ID, 1); + setFieldWithMask(&infopacket->sb[VRR_VTEM_PB3], MASK__VRR_VTEM_PB3__DATA_SET_TAG_MSB, 0); + setFieldWithMask(&infopacket->sb[VRR_VTEM_PB4], MASK__VRR_VTEM_PB4__DATA_SET_TAG_LSB, 1); + setFieldWithMask(&infopacket->sb[VRR_VTEM_PB5], MASK__VRR_VTEM_PB5__DATA_SET_LENGTH_MSB, 0); + setFieldWithMask(&infopacket->sb[VRR_VTEM_PB6], MASK__VRR_VTEM_PB6__DATA_SET_LENGTH_LSB, 4); } static void build_vrr_infopacket_header_v1(enum signal_type signal, @@ -583,45 +686,36 @@ static void build_vrr_vtem_infopacket_data(const struct dc_stream_state *stream, const struct mod_vrr_params *vrr, struct dc_info_packet *infopacket) { - /* dc_info_packet to VtemPacket Translation of Bit-fields, - * SB[6] - * unsigned char VRR_EN :1 - * unsigned char M_CONST :1 - * unsigned char Reserved2 :2 - * unsigned char FVA_Factor_M1 :4 - * SB[7] - * unsigned char Base_Vfront :8 - * SB[8] - * unsigned char Base_Refresh_Rate_98 :2 - * unsigned char RB :1 - * unsigned char Reserved3 :5 - * SB[9] - * unsigned char Base_RefreshRate_07 :8 - */ unsigned int fieldRateInHz; if (vrr->state == VRR_STATE_ACTIVE_VARIABLE || - vrr->state == VRR_STATE_ACTIVE_FIXED){ - infopacket->sb[6] |= 0x80; //VRR_EN Bit = 1 + vrr->state == VRR_STATE_ACTIVE_FIXED) { + setFieldWithMask(&infopacket->sb[VRR_VTEM_MD0], MASK__VRR_VTEM_MD0__VRR_EN, 1); } else { - infopacket->sb[6] &= 0x7F; //VRR_EN Bit = 0 + setFieldWithMask(&infopacket->sb[VRR_VTEM_MD0], MASK__VRR_VTEM_MD0__VRR_EN, 0); } if (!stream->timing.vic) { - infopacket->sb[7] = stream->timing.v_front_porch; + setFieldWithMask(&infopacket->sb[VRR_VTEM_MD1], MASK__VRR_VTEM_MD1__BASE_VFRONT, + stream->timing.v_front_porch); + /* TODO: In dal2, we check mode flags for a reduced blanking timing. * Need a way to relay that information to this function. * if("ReducedBlanking") * { - * infopacket->sb[8] |= 0x20; //Set 3rd bit to 1 + * setFieldWithMask(&infopacket->sb[VRR_VTEM_MD2], MASK__VRR_VTEM_MD2__RB, 1; * } */ + + //TODO: DAL2 does FixPoint and rounding. Here we might need to account for that fieldRateInHz = (stream->timing.pix_clk_100hz * 100)/ - (stream->timing.h_total * stream->timing.v_total); + (stream->timing.h_total * stream->timing.v_total); - infopacket->sb[8] |= ((fieldRateInHz & 0x300) >> 2); - infopacket->sb[9] |= fieldRateInHz & 0xFF; + setFieldWithMask(&infopacket->sb[VRR_VTEM_MD2], MASK__VRR_VTEM_MD2__BASE_REFRESH_RATE_98, + fieldRateInHz >> 8); + setFieldWithMask(&infopacket->sb[VRR_VTEM_MD3], MASK__VRR_VTEM_MD3__BASE_REFRESH_RATE_07, + fieldRateInHz); } infopacket->valid = true; @@ -745,6 +839,8 @@ static void build_vrr_infopacket_vtem(const struct dc_stream_state *stream, { //VTEM info packet for HdmiVrr + memset(infopacket, 0, sizeof(struct dc_info_packet)); + //VTEM Packet is structured differently build_vrr_infopacket_header_vtem(stream->signal, infopacket); build_vrr_vtem_infopacket_data(stream, vrr, infopacket); diff --git a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c index 038b88221c5f..b3810b864676 100644 --- a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c +++ b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c @@ -41,9 +41,12 @@ static const unsigned char min_reduction_table[13] = { static const unsigned char max_reduction_table[13] = { 0xf5, 0xe5, 0xd9, 0xcd, 0xb1, 0xa5, 0xa5, 0x80, 0x65, 0x4d, 0x4d, 0x4d, 0x32}; -/* ABM 2.2 Min Reduction effectively disabled (100% for all configs)*/ +/* Possible ABM 2.2 Min Reduction configs from least aggressive to most aggressive + * 0 1 2 3 4 5 6 7 8 9 10 11 12 + * 100 100 100 100 100 100 100 100 100 92.2 83.1 75.3 75.3 % + */ static const unsigned char min_reduction_table_v_2_2[13] = { -0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; +0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xeb, 0xd4, 0xc0, 0xc0}; /* Possible ABM 2.2 Max Reduction configs from least aggressive to most aggressive * 0 1 2 3 4 5 6 7 8 9 10 11 12 @@ -408,9 +411,9 @@ void fill_iram_v_2_2(struct iram_table_v_2_2 *ram_table, struct dmcu_iram_parame ram_table->flags = 0x0; ram_table->deviation_gain[0] = 0xb3; - ram_table->deviation_gain[1] = 0xb3; - ram_table->deviation_gain[2] = 0xb3; - ram_table->deviation_gain[3] = 0xb3; + ram_table->deviation_gain[1] = 0xa8; + ram_table->deviation_gain[2] = 0x98; + ram_table->deviation_gain[3] = 0x68; ram_table->min_reduction[0][0] = min_reduction_table_v_2_2[abm_config[set][0]]; ram_table->min_reduction[1][0] = min_reduction_table_v_2_2[abm_config[set][0]]; @@ -505,7 +508,7 @@ void fill_iram_v_2_2(struct iram_table_v_2_2 *ram_table, struct dmcu_iram_parame ram_table->contrastFactor[0] = 0x99; ram_table->contrastFactor[1] = 0x99; - ram_table->contrastFactor[2] = 0x99; + ram_table->contrastFactor[2] = 0x90; ram_table->contrastFactor[3] = 0x80; ram_table->iir_curve[0] = 0x65; diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h index 470d7b89071a..574bf6e70763 100644 --- a/drivers/gpu/drm/amd/include/amd_shared.h +++ b/drivers/gpu/drm/amd/include/amd_shared.h @@ -137,6 +137,7 @@ enum DC_FEATURE_MASK { DC_FBC_MASK = 0x1, }; +enum amd_dpm_forced_level; /** * struct amd_ip_funcs - general hooks for managing amdgpu IP Blocks */ @@ -186,6 +187,8 @@ struct amd_ip_funcs { enum amd_powergating_state state); /** @get_clockgating_state: get current clockgating status */ void (*get_clockgating_state)(void *handle, u32 *flags); + /** @enable_umd_pstate: enable UMD powerstate */ + int (*enable_umd_pstate)(void *handle, enum amd_dpm_forced_level *level); }; diff --git a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_1_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_1_0_offset.h index 721c61171045..5a44e614ab7e 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_1_0_offset.h +++ b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_1_0_offset.h @@ -2347,6 +2347,8 @@ #define mmHUBP0_DCHUBP_VMPG_CONFIG_BASE_IDX 2 #define mmHUBP0_HUBPREQ_DEBUG_DB 0x0569 #define mmHUBP0_HUBPREQ_DEBUG_DB_BASE_IDX 2 +#define mmHUBP0_HUBPREQ_DEBUG 0x056a +#define mmHUBP0_HUBPREQ_DEBUG_BASE_IDX 2 #define mmHUBP0_HUBP_MEASURE_WIN_CTRL_DCFCLK 0x056e #define mmHUBP0_HUBP_MEASURE_WIN_CTRL_DCFCLK_BASE_IDX 2 #define mmHUBP0_HUBP_MEASURE_WIN_CTRL_DPPCLK 0x056f @@ -2631,6 +2633,8 @@ #define mmHUBP1_DCHUBP_VMPG_CONFIG_BASE_IDX 2 #define mmHUBP1_HUBPREQ_DEBUG_DB 0x062d #define mmHUBP1_HUBPREQ_DEBUG_DB_BASE_IDX 2 +#define mmHUBP1_HUBPREQ_DEBUG 0x062e +#define mmHUBP1_HUBPREQ_DEBUG_BASE_IDX 2 #define mmHUBP1_HUBP_MEASURE_WIN_CTRL_DCFCLK 0x0632 #define mmHUBP1_HUBP_MEASURE_WIN_CTRL_DCFCLK_BASE_IDX 2 #define mmHUBP1_HUBP_MEASURE_WIN_CTRL_DPPCLK 0x0633 @@ -2915,6 +2919,8 @@ #define mmHUBP2_DCHUBP_VMPG_CONFIG_BASE_IDX 2 #define mmHUBP2_HUBPREQ_DEBUG_DB 0x06f1 #define mmHUBP2_HUBPREQ_DEBUG_DB_BASE_IDX 2 +#define mmHUBP2_HUBPREQ_DEBUG 0x06f2 +#define mmHUBP2_HUBPREQ_DEBUG_BASE_IDX 2 #define mmHUBP2_HUBP_MEASURE_WIN_CTRL_DCFCLK 0x06f6 #define mmHUBP2_HUBP_MEASURE_WIN_CTRL_DCFCLK_BASE_IDX 2 #define mmHUBP2_HUBP_MEASURE_WIN_CTRL_DPPCLK 0x06f7 @@ -3199,6 +3205,8 @@ #define mmHUBP3_DCHUBP_VMPG_CONFIG_BASE_IDX 2 #define mmHUBP3_HUBPREQ_DEBUG_DB 0x07b5 #define mmHUBP3_HUBPREQ_DEBUG_DB_BASE_IDX 2 +#define mmHUBP3_HUBPREQ_DEBUG 0x07b6 +#define mmHUBP3_HUBPREQ_DEBUG_BASE_IDX 2 #define mmHUBP3_HUBP_MEASURE_WIN_CTRL_DCFCLK 0x07ba #define mmHUBP3_HUBP_MEASURE_WIN_CTRL_DCFCLK_BASE_IDX 2 #define mmHUBP3_HUBP_MEASURE_WIN_CTRL_DPPCLK 0x07bb diff --git a/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_1_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_1_0_offset.h index 442ca7c471a5..6109f5ad25ad 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_1_0_offset.h +++ b/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_1_0_offset.h @@ -141,6 +141,8 @@ #define mmUVD_GPCOM_VCPU_DATA0_BASE_IDX 1 #define mmUVD_GPCOM_VCPU_DATA1 0x03c5 #define mmUVD_GPCOM_VCPU_DATA1_BASE_IDX 1 +#define mmUVD_ENGINE_CNTL 0x03c6 +#define mmUVD_ENGINE_CNTL_BASE_IDX 1 #define mmUVD_UDEC_DBW_UV_ADDR_CONFIG 0x03d2 #define mmUVD_UDEC_DBW_UV_ADDR_CONFIG_BASE_IDX 1 #define mmUVD_UDEC_ADDR_CONFIG 0x03d3 diff --git a/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_1_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_1_0_sh_mask.h index 63457f9df4c5..f84bed6eecb9 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_1_0_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_1_0_sh_mask.h @@ -312,6 +312,11 @@ //UVD_GPCOM_VCPU_DATA1 #define UVD_GPCOM_VCPU_DATA1__DATA1__SHIFT 0x0 #define UVD_GPCOM_VCPU_DATA1__DATA1_MASK 0xFFFFFFFFL +//UVD_ENGINE_CNTL +#define UVD_ENGINE_CNTL__ENGINE_START_MASK 0x1 +#define UVD_ENGINE_CNTL__ENGINE_START__SHIFT 0x0 +#define UVD_ENGINE_CNTL__ENGINE_START_MODE_MASK 0x2 +#define UVD_ENGINE_CNTL__ENGINE_START_MODE__SHIFT 0x1 //UVD_UDEC_DBW_UV_ADDR_CONFIG #define UVD_UDEC_DBW_UV_ADDR_CONFIG__NUM_PIPES__SHIFT 0x0 #define UVD_UDEC_DBW_UV_ADDR_CONFIG__PIPE_INTERLEAVE_SIZE__SHIFT 0x3 diff --git a/drivers/gpu/drm/amd/include/atomfirmware.h b/drivers/gpu/drm/amd/include/atomfirmware.h index 8eb0bb241210..d3075adb3297 100644 --- a/drivers/gpu/drm/amd/include/atomfirmware.h +++ b/drivers/gpu/drm/amd/include/atomfirmware.h @@ -494,6 +494,9 @@ enum atombios_firmware_capability ATOM_FIRMWARE_CAP_FIRMWARE_POSTED = 0x00000001, ATOM_FIRMWARE_CAP_GPU_VIRTUALIZATION = 0x00000002, ATOM_FIRMWARE_CAP_WMI_SUPPORT = 0x00000040, + ATOM_FIRMWARE_CAP_HWEMU_ENABLE = 0x00000080, + ATOM_FIRMWARE_CAP_HWEMU_UMC_CFG = 0x00000100, + ATOM_FIRMWARE_CAP_SRAM_ECC = 0x00000200, }; enum atom_cooling_solution_id{ @@ -528,6 +531,35 @@ struct atom_firmware_info_v3_2 { uint32_t reserved2[3]; }; +struct atom_firmware_info_v3_3 +{ + struct atom_common_table_header table_header; + uint32_t firmware_revision; + uint32_t bootup_sclk_in10khz; + uint32_t bootup_mclk_in10khz; + uint32_t firmware_capability; // enum atombios_firmware_capability + uint32_t main_call_parser_entry; /* direct address of main parser call in VBIOS binary. */ + uint32_t bios_scratch_reg_startaddr; // 1st bios scratch register dword address + uint16_t bootup_vddc_mv; + uint16_t bootup_vddci_mv; + uint16_t bootup_mvddc_mv; + uint16_t bootup_vddgfx_mv; + uint8_t mem_module_id; + uint8_t coolingsolution_id; /*0: Air cooling; 1: Liquid cooling ... */ + uint8_t reserved1[2]; + uint32_t mc_baseaddr_high; + uint32_t mc_baseaddr_low; + uint8_t board_i2c_feature_id; // enum of atom_board_i2c_feature_id_def + uint8_t board_i2c_feature_gpio_id; // i2c id find in gpio_lut data table gpio_id + uint8_t board_i2c_feature_slave_addr; + uint8_t reserved3; + uint16_t bootup_mvddq_mv; + uint16_t bootup_mvpp_mv; + uint32_t zfbstartaddrin16mb; + uint32_t pplib_pptable_id; // if pplib_pptable_id!=0, pplib get powerplay table inside driver instead of from VBIOS + uint32_t reserved2[2]; +}; + /* *************************************************************************** Data Table lcd_info structure @@ -686,6 +718,7 @@ enum atom_encoder_caps_def ATOM_ENCODER_CAP_RECORD_HBR2_EN =0x02, // DP1.2 HBR2 setting is qualified and HBR2 can be enabled ATOM_ENCODER_CAP_RECORD_HDMI6Gbps_EN =0x04, // HDMI2.0 6Gbps enable or not. ATOM_ENCODER_CAP_RECORD_HBR3_EN =0x08, // DP1.3 HBR3 is supported by board. + ATOM_ENCODER_CAP_RECORD_USB_C_TYPE =0x100, // the DP connector is a USB-C type. }; struct atom_encoder_caps_record @@ -1226,16 +1259,17 @@ struct atom_gfx_info_v2_3 { uint32_t rm21_sram_vmin_value; }; -struct atom_gfx_info_v2_4 { +struct atom_gfx_info_v2_4 +{ struct atom_common_table_header table_header; uint8_t gfxip_min_ver; uint8_t gfxip_max_ver; - uint8_t gc_num_se; - uint8_t max_tile_pipes; - uint8_t gc_num_cu_per_sh; - uint8_t gc_num_sh_per_se; - uint8_t gc_num_rb_per_se; - uint8_t gc_num_tccs; + uint8_t max_shader_engines; + uint8_t reserved; + uint8_t max_cu_per_sh; + uint8_t max_sh_per_se; + uint8_t max_backends_per_se; + uint8_t max_texture_channel_caches; uint32_t regaddr_cp_dma_src_addr; uint32_t regaddr_cp_dma_src_addr_hi; uint32_t regaddr_cp_dma_dst_addr; @@ -1780,6 +1814,56 @@ struct atom_umc_info_v3_1 uint32_t mem_refclk_10khz; }; +// umc_info.umc_config +enum atom_umc_config_def { + UMC_CONFIG__ENABLE_1KB_INTERLEAVE_MODE = 0x00000001, + UMC_CONFIG__DEFAULT_MEM_ECC_ENABLE = 0x00000002, + UMC_CONFIG__ENABLE_HBM_LANE_REPAIR = 0x00000004, + UMC_CONFIG__ENABLE_BANK_HARVESTING = 0x00000008, + UMC_CONFIG__ENABLE_PHY_REINIT = 0x00000010, + UMC_CONFIG__DISABLE_UCODE_CHKSTATUS = 0x00000020, +}; + +struct atom_umc_info_v3_2 +{ + struct atom_common_table_header table_header; + uint32_t ucode_version; + uint32_t ucode_rom_startaddr; + uint32_t ucode_length; + uint16_t umc_reg_init_offset; + uint16_t customer_ucode_name_offset; + uint16_t mclk_ss_percentage; + uint16_t mclk_ss_rate_10hz; + uint8_t umcip_min_ver; + uint8_t umcip_max_ver; + uint8_t vram_type; //enum of atom_dgpu_vram_type + uint8_t umc_config; + uint32_t mem_refclk_10khz; + uint32_t pstate_uclk_10khz[4]; + uint16_t umcgoldenoffset; + uint16_t densitygoldenoffset; +}; + +struct atom_umc_info_v3_3 +{ + struct atom_common_table_header table_header; + uint32_t ucode_reserved; + uint32_t ucode_rom_startaddr; + uint32_t ucode_length; + uint16_t umc_reg_init_offset; + uint16_t customer_ucode_name_offset; + uint16_t mclk_ss_percentage; + uint16_t mclk_ss_rate_10hz; + uint8_t umcip_min_ver; + uint8_t umcip_max_ver; + uint8_t vram_type; //enum of atom_dgpu_vram_type + uint8_t umc_config; + uint32_t mem_refclk_10khz; + uint32_t pstate_uclk_10khz[4]; + uint16_t umcgoldenoffset; + uint16_t densitygoldenoffset; + uint32_t reserved[4]; +}; /* *************************************************************************** diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h index 5f3c10ebff08..b897aca9b4c9 100644 --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h @@ -85,18 +85,6 @@ enum kgd_memory_pool { KGD_POOL_FRAMEBUFFER = 3, }; -enum kgd_engine_type { - KGD_ENGINE_PFP = 1, - KGD_ENGINE_ME, - KGD_ENGINE_CE, - KGD_ENGINE_MEC1, - KGD_ENGINE_MEC2, - KGD_ENGINE_RLC, - KGD_ENGINE_SDMA1, - KGD_ENGINE_SDMA2, - KGD_ENGINE_MAX -}; - /** * enum kfd_sched_policy * @@ -230,8 +218,6 @@ struct tile_config { * @hqd_sdma_destroy: Destructs and preempts the SDMA queue assigned to that * SDMA hqd slot. * - * @get_fw_version: Returns FW versions from the header - * * @set_scratch_backing_va: Sets VA for scratch backing memory of a VMID. * Only used for no cp scheduling mode * @@ -311,8 +297,6 @@ struct kfd2kgd_calls { struct kgd_dev *kgd, uint8_t vmid); - uint16_t (*get_fw_version)(struct kgd_dev *kgd, - enum kgd_engine_type type); void (*set_scratch_backing_va)(struct kgd_dev *kgd, uint64_t va, uint32_t vmid); int (*get_tile_config)(struct kgd_dev *kgd, struct tile_config *config); diff --git a/drivers/gpu/drm/amd/include/linux/chash.h b/drivers/gpu/drm/amd/include/linux/chash.h deleted file mode 100644 index 6dc159924ed1..000000000000 --- a/drivers/gpu/drm/amd/include/linux/chash.h +++ /dev/null @@ -1,366 +0,0 @@ -/* - * Copyright 2017 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#ifndef _LINUX_CHASH_H -#define _LINUX_CHASH_H - -#include <linux/types.h> -#include <linux/hash.h> -#include <linux/bug.h> -#include <asm/bitsperlong.h> - -#if BITS_PER_LONG == 32 -# define _CHASH_LONG_SHIFT 5 -#elif BITS_PER_LONG == 64 -# define _CHASH_LONG_SHIFT 6 -#else -# error "Unexpected BITS_PER_LONG" -#endif - -struct __chash_table { - u8 bits; - u8 key_size; - unsigned int value_size; - u32 size_mask; - unsigned long *occup_bitmap, *valid_bitmap; - union { - u32 *keys32; - u64 *keys64; - }; - u8 *values; - -#ifdef CONFIG_CHASH_STATS - u64 hits, hits_steps, hits_time_ns; - u64 miss, miss_steps, miss_time_ns; - u64 relocs, reloc_dist; -#endif -}; - -#define __CHASH_BITMAP_SIZE(bits) \ - (((1 << (bits)) + BITS_PER_LONG - 1) / BITS_PER_LONG) -#define __CHASH_ARRAY_SIZE(bits, size) \ - ((((size) << (bits)) + sizeof(long) - 1) / sizeof(long)) - -#define __CHASH_DATA_SIZE(bits, key_size, value_size) \ - (__CHASH_BITMAP_SIZE(bits) * 2 + \ - __CHASH_ARRAY_SIZE(bits, key_size) + \ - __CHASH_ARRAY_SIZE(bits, value_size)) - -#define STRUCT_CHASH_TABLE(bits, key_size, value_size) \ - struct { \ - struct __chash_table table; \ - unsigned long data \ - [__CHASH_DATA_SIZE(bits, key_size, value_size)];\ - } - -/** - * struct chash_table - Dynamically allocated closed hash table - * - * Use this struct for dynamically allocated hash tables (using - * chash_table_alloc and chash_table_free), where the size is - * determined at runtime. - */ -struct chash_table { - struct __chash_table table; - unsigned long *data; -}; - -/** - * DECLARE_CHASH_TABLE - macro to declare a closed hash table - * @table: name of the declared hash table - * @bts: Table size will be 2^bits entries - * @key_sz: Size of hash keys in bytes, 4 or 8 - * @val_sz: Size of data values in bytes, can be 0 - * - * This declares the hash table variable with a static size. - * - * The closed hash table stores key-value pairs with low memory and - * lookup overhead. In operation it performs no dynamic memory - * management. The data being stored does not require any - * list_heads. The hash table performs best with small @val_sz and as - * long as some space (about 50%) is left free in the table. But the - * table can still work reasonably efficiently even when filled up to - * about 90%. If bigger data items need to be stored and looked up, - * store the pointer to it as value in the hash table. - * - * @val_sz may be 0. This can be useful when all the stored - * information is contained in the key itself and the fact that it is - * in the hash table (or not). - */ -#define DECLARE_CHASH_TABLE(table, bts, key_sz, val_sz) \ - STRUCT_CHASH_TABLE(bts, key_sz, val_sz) table - -#ifdef CONFIG_CHASH_STATS -#define __CHASH_STATS_INIT(prefix), \ - prefix.hits = 0, \ - prefix.hits_steps = 0, \ - prefix.hits_time_ns = 0, \ - prefix.miss = 0, \ - prefix.miss_steps = 0, \ - prefix.miss_time_ns = 0, \ - prefix.relocs = 0, \ - prefix.reloc_dist = 0 -#else -#define __CHASH_STATS_INIT(prefix) -#endif - -#define __CHASH_TABLE_INIT(prefix, data, bts, key_sz, val_sz) \ - prefix.bits = (bts), \ - prefix.key_size = (key_sz), \ - prefix.value_size = (val_sz), \ - prefix.size_mask = ((1 << bts) - 1), \ - prefix.occup_bitmap = &data[0], \ - prefix.valid_bitmap = &data \ - [__CHASH_BITMAP_SIZE(bts)], \ - prefix.keys64 = (u64 *)&data \ - [__CHASH_BITMAP_SIZE(bts) * 2], \ - prefix.values = (u8 *)&data \ - [__CHASH_BITMAP_SIZE(bts) * 2 + \ - __CHASH_ARRAY_SIZE(bts, key_sz)] \ - __CHASH_STATS_INIT(prefix) - -/** - * DEFINE_CHASH_TABLE - macro to define and initialize a closed hash table - * @tbl: name of the declared hash table - * @bts: Table size will be 2^bits entries - * @key_sz: Size of hash keys in bytes, 4 or 8 - * @val_sz: Size of data values in bytes, can be 0 - * - * Note: the macro can be used for global and local hash table variables. - */ -#define DEFINE_CHASH_TABLE(tbl, bts, key_sz, val_sz) \ - DECLARE_CHASH_TABLE(tbl, bts, key_sz, val_sz) = { \ - .table = { \ - __CHASH_TABLE_INIT(, (tbl).data, bts, key_sz, val_sz) \ - }, \ - .data = {0} \ - } - -/** - * INIT_CHASH_TABLE - Initialize a hash table declared by DECLARE_CHASH_TABLE - * @tbl: name of the declared hash table - * @bts: Table size will be 2^bits entries - * @key_sz: Size of hash keys in bytes, 4 or 8 - * @val_sz: Size of data values in bytes, can be 0 - */ -#define INIT_CHASH_TABLE(tbl, bts, key_sz, val_sz) \ - __CHASH_TABLE_INIT(((tbl).table), (tbl).data, bts, key_sz, val_sz) - -int chash_table_alloc(struct chash_table *table, u8 bits, u8 key_size, - unsigned int value_size, gfp_t gfp_mask); -void chash_table_free(struct chash_table *table); - -/** - * chash_table_dump_stats - Dump statistics of a closed hash table - * @tbl: Pointer to the table structure - * - * Dumps some performance statistics of the table gathered in operation - * in the kernel log using pr_debug. If CONFIG_DYNAMIC_DEBUG is enabled, - * user must turn on messages for chash.c (file chash.c +p). - */ -#ifdef CONFIG_CHASH_STATS -#define chash_table_dump_stats(tbl) __chash_table_dump_stats(&(*tbl).table) - -void __chash_table_dump_stats(struct __chash_table *table); -#else -#define chash_table_dump_stats(tbl) -#endif - -/** - * chash_table_reset_stats - Reset statistics of a closed hash table - * @tbl: Pointer to the table structure - */ -#ifdef CONFIG_CHASH_STATS -#define chash_table_reset_stats(tbl) __chash_table_reset_stats(&(*tbl).table) - -static inline void __chash_table_reset_stats(struct __chash_table *table) -{ - (void)table __CHASH_STATS_INIT((*table)); -} -#else -#define chash_table_reset_stats(tbl) -#endif - -/** - * chash_table_copy_in - Copy a new value into the hash table - * @tbl: Pointer to the table structure - * @key: Key of the entry to add or update - * @value: Pointer to value to copy, may be NULL - * - * If @key already has an entry, its value is replaced. Otherwise a - * new entry is added. If @value is NULL, the value is left unchanged - * or uninitialized. Returns 1 if an entry already existed, 0 if a new - * entry was added or %-ENOMEM if there was no free space in the - * table. - */ -#define chash_table_copy_in(tbl, key, value) \ - __chash_table_copy_in(&(*tbl).table, key, value) - -int __chash_table_copy_in(struct __chash_table *table, u64 key, - const void *value); - -/** - * chash_table_copy_out - Copy a value out of the hash table - * @tbl: Pointer to the table structure - * @key: Key of the entry to find - * @value: Pointer to value to copy, may be NULL - * - * If @value is not NULL and the table has a non-0 value_size, the - * value at @key is copied to @value. Returns the slot index of the - * entry or %-EINVAL if @key was not found. - */ -#define chash_table_copy_out(tbl, key, value) \ - __chash_table_copy_out(&(*tbl).table, key, value, false) - -int __chash_table_copy_out(struct __chash_table *table, u64 key, - void *value, bool remove); - -/** - * chash_table_remove - Remove an entry from the hash table - * @tbl: Pointer to the table structure - * @key: Key of the entry to find - * @value: Pointer to value to copy, may be NULL - * - * If @value is not NULL and the table has a non-0 value_size, the - * value at @key is copied to @value. The entry is removed from the - * table. Returns the slot index of the removed entry or %-EINVAL if - * @key was not found. - */ -#define chash_table_remove(tbl, key, value) \ - __chash_table_copy_out(&(*tbl).table, key, value, true) - -/* - * Low level iterator API used internally by the above functions. - */ -struct chash_iter { - struct __chash_table *table; - unsigned long mask; - int slot; -}; - -/** - * CHASH_ITER_INIT - Initialize a hash table iterator - * @tbl: Pointer to hash table to iterate over - * @s: Initial slot number - */ -#define CHASH_ITER_INIT(table, s) { \ - table, \ - 1UL << ((s) & (BITS_PER_LONG - 1)), \ - s \ - } -/** - * CHASH_ITER_SET - Set hash table iterator to new slot - * @iter: Iterator - * @s: Slot number - */ -#define CHASH_ITER_SET(iter, s) \ - (iter).mask = 1UL << ((s) & (BITS_PER_LONG - 1)), \ - (iter).slot = (s) -/** - * CHASH_ITER_INC - Increment hash table iterator - * @table: Hash table to iterate over - * - * Wraps around at the end. - */ -#define CHASH_ITER_INC(iter) do { \ - (iter).mask = (iter).mask << 1 | \ - (iter).mask >> (BITS_PER_LONG - 1); \ - (iter).slot = ((iter).slot + 1) & (iter).table->size_mask; \ - } while (0) - -static inline bool chash_iter_is_valid(const struct chash_iter iter) -{ - BUG_ON((unsigned)iter.slot >= (1 << iter.table->bits)); - return !!(iter.table->valid_bitmap[iter.slot >> _CHASH_LONG_SHIFT] & - iter.mask); -} -static inline bool chash_iter_is_empty(const struct chash_iter iter) -{ - BUG_ON((unsigned)iter.slot >= (1 << iter.table->bits)); - return !(iter.table->occup_bitmap[iter.slot >> _CHASH_LONG_SHIFT] & - iter.mask); -} - -static inline void chash_iter_set_valid(const struct chash_iter iter) -{ - BUG_ON((unsigned)iter.slot >= (1 << iter.table->bits)); - iter.table->valid_bitmap[iter.slot >> _CHASH_LONG_SHIFT] |= iter.mask; - iter.table->occup_bitmap[iter.slot >> _CHASH_LONG_SHIFT] |= iter.mask; -} -static inline void chash_iter_set_invalid(const struct chash_iter iter) -{ - BUG_ON((unsigned)iter.slot >= (1 << iter.table->bits)); - iter.table->valid_bitmap[iter.slot >> _CHASH_LONG_SHIFT] &= ~iter.mask; -} -static inline void chash_iter_set_empty(const struct chash_iter iter) -{ - BUG_ON((unsigned)iter.slot >= (1 << iter.table->bits)); - iter.table->occup_bitmap[iter.slot >> _CHASH_LONG_SHIFT] &= ~iter.mask; -} - -static inline u32 chash_iter_key32(const struct chash_iter iter) -{ - BUG_ON(iter.table->key_size != 4); - BUG_ON((unsigned)iter.slot >= (1 << iter.table->bits)); - return iter.table->keys32[iter.slot]; -} -static inline u64 chash_iter_key64(const struct chash_iter iter) -{ - BUG_ON(iter.table->key_size != 8); - BUG_ON((unsigned)iter.slot >= (1 << iter.table->bits)); - return iter.table->keys64[iter.slot]; -} -static inline u64 chash_iter_key(const struct chash_iter iter) -{ - BUG_ON((unsigned)iter.slot >= (1 << iter.table->bits)); - return (iter.table->key_size == 4) ? - iter.table->keys32[iter.slot] : iter.table->keys64[iter.slot]; -} - -static inline u32 chash_iter_hash32(const struct chash_iter iter) -{ - BUG_ON(iter.table->key_size != 4); - return hash_32(chash_iter_key32(iter), iter.table->bits); -} - -static inline u32 chash_iter_hash64(const struct chash_iter iter) -{ - BUG_ON(iter.table->key_size != 8); - return hash_64(chash_iter_key64(iter), iter.table->bits); -} - -static inline u32 chash_iter_hash(const struct chash_iter iter) -{ - return (iter.table->key_size == 4) ? - hash_32(chash_iter_key32(iter), iter.table->bits) : - hash_64(chash_iter_key64(iter), iter.table->bits); -} - -static inline void *chash_iter_value(const struct chash_iter iter) -{ - BUG_ON((unsigned)iter.slot >= (1 << iter.table->bits)); - return iter.table->values + - ((unsigned long)iter.slot * iter.table->value_size); -} - -#endif /* _LINUX_CHASH_H */ diff --git a/drivers/gpu/drm/amd/lib/Kconfig b/drivers/gpu/drm/amd/lib/Kconfig deleted file mode 100644 index 776ef3434c10..000000000000 --- a/drivers/gpu/drm/amd/lib/Kconfig +++ /dev/null @@ -1,28 +0,0 @@ -menu "AMD Library routines" - -# -# Closed hash table -# -config CHASH - tristate - default DRM_AMDGPU - help - Statically sized closed hash table implementation with low - memory and CPU overhead. - -config CHASH_STATS - bool "Closed hash table performance statistics" - depends on CHASH - default n - help - Enable collection of performance statistics for closed hash tables. - -config CHASH_SELFTEST - bool "Closed hash table self test" - depends on CHASH - default n - help - Runs a selftest during module load. Several module parameters - are available to modify the behaviour of the test. - -endmenu diff --git a/drivers/gpu/drm/amd/lib/Makefile b/drivers/gpu/drm/amd/lib/Makefile deleted file mode 100644 index 690243001e1a..000000000000 --- a/drivers/gpu/drm/amd/lib/Makefile +++ /dev/null @@ -1,32 +0,0 @@ -# -# Copyright 2017 Advanced Micro Devices, Inc. -# -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR -# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -# OTHER DEALINGS IN THE SOFTWARE. -# -# -# Makefile for AMD library routines, which are used by AMD driver -# components. -# -# This is for common library routines that can be shared between AMD -# driver components or later moved to kernel/lib for sharing with -# other drivers. - -ccflags-y := -I$(src)/../include - -obj-$(CONFIG_CHASH) += chash.o diff --git a/drivers/gpu/drm/amd/lib/chash.c b/drivers/gpu/drm/amd/lib/chash.c deleted file mode 100644 index b8e45f356a1c..000000000000 --- a/drivers/gpu/drm/amd/lib/chash.c +++ /dev/null @@ -1,638 +0,0 @@ -/* - * Copyright 2017 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#include <linux/types.h> -#include <linux/hash.h> -#include <linux/bug.h> -#include <linux/slab.h> -#include <linux/module.h> -#include <linux/sched/clock.h> -#include <asm/div64.h> -#include <linux/chash.h> - -/** - * chash_table_alloc - Allocate closed hash table - * @table: Pointer to the table structure - * @bits: Table size will be 2^bits entries - * @key_size: Size of hash keys in bytes, 4 or 8 - * @value_size: Size of data values in bytes, can be 0 - */ -int chash_table_alloc(struct chash_table *table, u8 bits, u8 key_size, - unsigned int value_size, gfp_t gfp_mask) -{ - if (bits > 31) - return -EINVAL; - - if (key_size != 4 && key_size != 8) - return -EINVAL; - - table->data = kcalloc(__CHASH_DATA_SIZE(bits, key_size, value_size), - sizeof(long), gfp_mask); - if (!table->data) - return -ENOMEM; - - __CHASH_TABLE_INIT(table->table, table->data, - bits, key_size, value_size); - - return 0; -} -EXPORT_SYMBOL(chash_table_alloc); - -/** - * chash_table_free - Free closed hash table - * @table: Pointer to the table structure - */ -void chash_table_free(struct chash_table *table) -{ - kfree(table->data); -} -EXPORT_SYMBOL(chash_table_free); - -#ifdef CONFIG_CHASH_STATS - -#define DIV_FRAC(nom, denom, quot, frac, frac_digits) do { \ - u64 __nom = (nom); \ - u64 __denom = (denom); \ - u64 __quot, __frac; \ - u32 __rem; \ - \ - while (__denom >> 32) { \ - __nom >>= 1; \ - __denom >>= 1; \ - } \ - __quot = __nom; \ - __rem = do_div(__quot, __denom); \ - __frac = __rem * (frac_digits) + (__denom >> 1); \ - do_div(__frac, __denom); \ - (quot) = __quot; \ - (frac) = __frac; \ - } while (0) - -void __chash_table_dump_stats(struct __chash_table *table) -{ - struct chash_iter iter = CHASH_ITER_INIT(table, 0); - u32 filled = 0, empty = 0, tombstones = 0; - u64 quot1, quot2; - u32 frac1, frac2; - - do { - if (chash_iter_is_valid(iter)) - filled++; - else if (chash_iter_is_empty(iter)) - empty++; - else - tombstones++; - CHASH_ITER_INC(iter); - } while (iter.slot); - - pr_debug("chash: key size %u, value size %u\n", - table->key_size, table->value_size); - pr_debug(" Slots total/filled/empty/tombstones: %u / %u / %u / %u\n", - 1 << table->bits, filled, empty, tombstones); - if (table->hits > 0) { - DIV_FRAC(table->hits_steps, table->hits, quot1, frac1, 1000); - DIV_FRAC(table->hits * 1000, table->hits_time_ns, - quot2, frac2, 1000); - } else { - quot1 = quot2 = 0; - frac1 = frac2 = 0; - } - pr_debug(" Hits (avg.cost, rate): %llu (%llu.%03u, %llu.%03u M/s)\n", - table->hits, quot1, frac1, quot2, frac2); - if (table->miss > 0) { - DIV_FRAC(table->miss_steps, table->miss, quot1, frac1, 1000); - DIV_FRAC(table->miss * 1000, table->miss_time_ns, - quot2, frac2, 1000); - } else { - quot1 = quot2 = 0; - frac1 = frac2 = 0; - } - pr_debug(" Misses (avg.cost, rate): %llu (%llu.%03u, %llu.%03u M/s)\n", - table->miss, quot1, frac1, quot2, frac2); - if (table->hits + table->miss > 0) { - DIV_FRAC(table->hits_steps + table->miss_steps, - table->hits + table->miss, quot1, frac1, 1000); - DIV_FRAC((table->hits + table->miss) * 1000, - (table->hits_time_ns + table->miss_time_ns), - quot2, frac2, 1000); - } else { - quot1 = quot2 = 0; - frac1 = frac2 = 0; - } - pr_debug(" Total (avg.cost, rate): %llu (%llu.%03u, %llu.%03u M/s)\n", - table->hits + table->miss, quot1, frac1, quot2, frac2); - if (table->relocs > 0) { - DIV_FRAC(table->hits + table->miss, table->relocs, - quot1, frac1, 1000); - DIV_FRAC(table->reloc_dist, table->relocs, quot2, frac2, 1000); - pr_debug(" Relocations (freq, avg.dist): %llu (1:%llu.%03u, %llu.%03u)\n", - table->relocs, quot1, frac1, quot2, frac2); - } else { - pr_debug(" No relocations\n"); - } -} -EXPORT_SYMBOL(__chash_table_dump_stats); - -#undef DIV_FRAC -#endif - -#define CHASH_INC(table, a) ((a) = ((a) + 1) & (table)->size_mask) -#define CHASH_ADD(table, a, b) (((a) + (b)) & (table)->size_mask) -#define CHASH_SUB(table, a, b) (((a) - (b)) & (table)->size_mask) -#define CHASH_IN_RANGE(table, slot, first, last) \ - (CHASH_SUB(table, slot, first) <= CHASH_SUB(table, last, first)) - -/*#define CHASH_DEBUG Uncomment this to enable verbose debug output*/ -#ifdef CHASH_DEBUG -static void chash_table_dump(struct __chash_table *table) -{ - struct chash_iter iter = CHASH_ITER_INIT(table, 0); - - do { - if ((iter.slot & 3) == 0) - pr_debug("%04x: ", iter.slot); - - if (chash_iter_is_valid(iter)) - pr_debug("[%016llx] ", chash_iter_key(iter)); - else if (chash_iter_is_empty(iter)) - pr_debug("[ <empty> ] "); - else - pr_debug("[ <tombstone> ] "); - - if ((iter.slot & 3) == 3) - pr_debug("\n"); - - CHASH_ITER_INC(iter); - } while (iter.slot); - - if ((iter.slot & 3) != 0) - pr_debug("\n"); -} - -static int chash_table_check(struct __chash_table *table) -{ - u32 hash; - struct chash_iter iter = CHASH_ITER_INIT(table, 0); - struct chash_iter cur = CHASH_ITER_INIT(table, 0); - - do { - if (!chash_iter_is_valid(iter)) { - CHASH_ITER_INC(iter); - continue; - } - - hash = chash_iter_hash(iter); - CHASH_ITER_SET(cur, hash); - while (cur.slot != iter.slot) { - if (chash_iter_is_empty(cur)) { - pr_err("Path to element at %x with hash %x broken at slot %x\n", - iter.slot, hash, cur.slot); - chash_table_dump(table); - return -EINVAL; - } - CHASH_ITER_INC(cur); - } - - CHASH_ITER_INC(iter); - } while (iter.slot); - - return 0; -} -#endif - -static void chash_iter_relocate(struct chash_iter dst, struct chash_iter src) -{ - BUG_ON(src.table == dst.table && src.slot == dst.slot); - BUG_ON(src.table->key_size != dst.table->key_size); - BUG_ON(src.table->value_size != dst.table->value_size); - - if (dst.table->key_size == 4) - dst.table->keys32[dst.slot] = src.table->keys32[src.slot]; - else - dst.table->keys64[dst.slot] = src.table->keys64[src.slot]; - - if (dst.table->value_size) - memcpy(chash_iter_value(dst), chash_iter_value(src), - dst.table->value_size); - - chash_iter_set_valid(dst); - chash_iter_set_invalid(src); - -#ifdef CONFIG_CHASH_STATS - if (src.table == dst.table) { - dst.table->relocs++; - dst.table->reloc_dist += - CHASH_SUB(dst.table, src.slot, dst.slot); - } -#endif -} - -/** - * __chash_table_find - Helper for looking up a hash table entry - * @iter: Pointer to hash table iterator - * @key: Key of the entry to find - * @for_removal: set to true if the element will be removed soon - * - * Searches for an entry in the hash table with a given key. iter must - * be initialized by the caller to point to the home position of the - * hypothetical entry, i.e. it must be initialized with the hash table - * and the key's hash as the initial slot for the search. - * - * This function also does some local clean-up to speed up future - * look-ups by relocating entries to better slots and removing - * tombstones that are no longer needed. - * - * If @for_removal is true, the function avoids relocating the entry - * that is being returned. - * - * Returns 0 if the search is successful. In this case iter is updated - * to point to the found entry. Otherwise %-EINVAL is returned and the - * iter is updated to point to the first available slot for the given - * key. If the table is full, the slot is set to -1. - */ -static int chash_table_find(struct chash_iter *iter, u64 key, - bool for_removal) -{ -#ifdef CONFIG_CHASH_STATS - u64 ts1 = local_clock(); -#endif - u32 hash = iter->slot; - struct chash_iter first_redundant = CHASH_ITER_INIT(iter->table, -1); - int first_avail = (for_removal ? -2 : -1); - - while (!chash_iter_is_valid(*iter) || chash_iter_key(*iter) != key) { - if (chash_iter_is_empty(*iter)) { - /* Found an empty slot, which ends the - * search. Clean up any preceding tombstones - * that are no longer needed because they lead - * to no-where - */ - if ((int)first_redundant.slot < 0) - goto not_found; - while (first_redundant.slot != iter->slot) { - if (!chash_iter_is_valid(first_redundant)) - chash_iter_set_empty(first_redundant); - CHASH_ITER_INC(first_redundant); - } -#ifdef CHASH_DEBUG - chash_table_check(iter->table); -#endif - goto not_found; - } else if (!chash_iter_is_valid(*iter)) { - /* Found a tombstone. Remember it as candidate - * for relocating the entry we're looking for - * or for adding a new entry with the given key - */ - if (first_avail == -1) - first_avail = iter->slot; - /* Or mark it as the start of a series of - * potentially redundant tombstones - */ - else if (first_redundant.slot == -1) - CHASH_ITER_SET(first_redundant, iter->slot); - } else if (first_redundant.slot >= 0) { - /* Found a valid, occupied slot with a - * preceding series of tombstones. Relocate it - * to a better position that no longer depends - * on those tombstones - */ - u32 cur_hash = chash_iter_hash(*iter); - - if (!CHASH_IN_RANGE(iter->table, cur_hash, - first_redundant.slot + 1, - iter->slot)) { - /* This entry has a hash at or before - * the first tombstone we found. We - * can relocate it to that tombstone - * and advance to the next tombstone - */ - chash_iter_relocate(first_redundant, *iter); - do { - CHASH_ITER_INC(first_redundant); - } while (chash_iter_is_valid(first_redundant)); - } else if (cur_hash != iter->slot) { - /* Relocate entry to its home position - * or as close as possible so it no - * longer depends on any preceding - * tombstones - */ - struct chash_iter new_iter = - CHASH_ITER_INIT(iter->table, cur_hash); - - while (new_iter.slot != iter->slot && - chash_iter_is_valid(new_iter)) - CHASH_ITER_INC(new_iter); - - if (new_iter.slot != iter->slot) - chash_iter_relocate(new_iter, *iter); - } - } - - CHASH_ITER_INC(*iter); - if (iter->slot == hash) { - iter->slot = -1; - goto not_found; - } - } - -#ifdef CONFIG_CHASH_STATS - iter->table->hits++; - iter->table->hits_steps += CHASH_SUB(iter->table, iter->slot, hash) + 1; -#endif - - if (first_avail >= 0) { - CHASH_ITER_SET(first_redundant, first_avail); - chash_iter_relocate(first_redundant, *iter); - iter->slot = first_redundant.slot; - iter->mask = first_redundant.mask; - } - -#ifdef CONFIG_CHASH_STATS - iter->table->hits_time_ns += local_clock() - ts1; -#endif - - return 0; - -not_found: -#ifdef CONFIG_CHASH_STATS - iter->table->miss++; - iter->table->miss_steps += (iter->slot < 0) ? - (1 << iter->table->bits) : - CHASH_SUB(iter->table, iter->slot, hash) + 1; -#endif - - if (first_avail >= 0) - CHASH_ITER_SET(*iter, first_avail); - -#ifdef CONFIG_CHASH_STATS - iter->table->miss_time_ns += local_clock() - ts1; -#endif - - return -EINVAL; -} - -int __chash_table_copy_in(struct __chash_table *table, u64 key, - const void *value) -{ - u32 hash = (table->key_size == 4) ? - hash_32(key, table->bits) : hash_64(key, table->bits); - struct chash_iter iter = CHASH_ITER_INIT(table, hash); - int r = chash_table_find(&iter, key, false); - - /* Found an existing entry */ - if (!r) { - if (value && table->value_size) - memcpy(chash_iter_value(iter), value, - table->value_size); - return 1; - } - - /* Is there a place to add a new entry? */ - if (iter.slot < 0) { - pr_err("Hash table overflow\n"); - return -ENOMEM; - } - - chash_iter_set_valid(iter); - - if (table->key_size == 4) - table->keys32[iter.slot] = key; - else - table->keys64[iter.slot] = key; - if (value && table->value_size) - memcpy(chash_iter_value(iter), value, table->value_size); - - return 0; -} -EXPORT_SYMBOL(__chash_table_copy_in); - -int __chash_table_copy_out(struct __chash_table *table, u64 key, - void *value, bool remove) -{ - u32 hash = (table->key_size == 4) ? - hash_32(key, table->bits) : hash_64(key, table->bits); - struct chash_iter iter = CHASH_ITER_INIT(table, hash); - int r = chash_table_find(&iter, key, remove); - - if (r < 0) - return r; - - if (value && table->value_size) - memcpy(value, chash_iter_value(iter), table->value_size); - - if (remove) - chash_iter_set_invalid(iter); - - return iter.slot; -} -EXPORT_SYMBOL(__chash_table_copy_out); - -#ifdef CONFIG_CHASH_SELFTEST -/** - * chash_self_test - Run a self-test of the hash table implementation - * @bits: Table size will be 2^bits entries - * @key_size: Size of hash keys in bytes, 4 or 8 - * @min_fill: Minimum fill level during the test - * @max_fill: Maximum fill level during the test - * @iterations: Number of test iterations - * - * The test adds and removes entries from a hash table, cycling the - * fill level between min_fill and max_fill entries. Also tests lookup - * and value retrieval. - */ -static int __init chash_self_test(u8 bits, u8 key_size, - int min_fill, int max_fill, - u64 iterations) -{ - struct chash_table table; - int ret; - u64 add_count, rmv_count; - u64 value; - - if (key_size == 4 && iterations > 0xffffffff) - return -EINVAL; - if (min_fill >= max_fill) - return -EINVAL; - - ret = chash_table_alloc(&table, bits, key_size, sizeof(u64), - GFP_KERNEL); - if (ret) { - pr_err("chash_table_alloc failed: %d\n", ret); - return ret; - } - - for (add_count = 0, rmv_count = 0; add_count < iterations; - add_count++) { - /* When we hit the max_fill level, remove entries down - * to min_fill - */ - if (add_count - rmv_count == max_fill) { - u64 find_count = rmv_count; - - /* First try to find all entries that we're - * about to remove, confirm their value, test - * writing them back a second time. - */ - for (; add_count - find_count > min_fill; - find_count++) { - ret = chash_table_copy_out(&table, find_count, - &value); - if (ret < 0) { - pr_err("chash_table_copy_out failed: %d\n", - ret); - goto out; - } - if (value != ~find_count) { - pr_err("Wrong value retrieved for key 0x%llx, expected 0x%llx got 0x%llx\n", - find_count, ~find_count, value); -#ifdef CHASH_DEBUG - chash_table_dump(&table.table); -#endif - ret = -EFAULT; - goto out; - } - ret = chash_table_copy_in(&table, find_count, - &value); - if (ret != 1) { - pr_err("copy_in second time returned %d, expected 1\n", - ret); - ret = -EFAULT; - goto out; - } - } - /* Remove them until we hit min_fill level */ - for (; add_count - rmv_count > min_fill; rmv_count++) { - ret = chash_table_remove(&table, rmv_count, - NULL); - if (ret < 0) { - pr_err("chash_table_remove failed: %d\n", - ret); - goto out; - } - } - } - - /* Add a new value */ - value = ~add_count; - ret = chash_table_copy_in(&table, add_count, &value); - if (ret != 0) { - pr_err("copy_in first time returned %d, expected 0\n", - ret); - ret = -EFAULT; - goto out; - } - } - - chash_table_dump_stats(&table); - chash_table_reset_stats(&table); - -out: - chash_table_free(&table); - return ret; -} - -static unsigned int chash_test_bits = 10; -MODULE_PARM_DESC(test_bits, - "Selftest number of hash bits ([4..20], default=10)"); -module_param_named(test_bits, chash_test_bits, uint, 0444); - -static unsigned int chash_test_keysize = 8; -MODULE_PARM_DESC(test_keysize, "Selftest keysize (4 or 8, default=8)"); -module_param_named(test_keysize, chash_test_keysize, uint, 0444); - -static unsigned int chash_test_minfill; -MODULE_PARM_DESC(test_minfill, "Selftest minimum #entries (default=50%)"); -module_param_named(test_minfill, chash_test_minfill, uint, 0444); - -static unsigned int chash_test_maxfill; -MODULE_PARM_DESC(test_maxfill, "Selftest maximum #entries (default=80%)"); -module_param_named(test_maxfill, chash_test_maxfill, uint, 0444); - -static unsigned long chash_test_iters; -MODULE_PARM_DESC(test_iters, "Selftest iterations (default=1000 x #entries)"); -module_param_named(test_iters, chash_test_iters, ulong, 0444); - -static int __init chash_init(void) -{ - int ret; - u64 ts1_ns; - - /* Skip self test on user errors */ - if (chash_test_bits < 4 || chash_test_bits > 20) { - pr_err("chash: test_bits out of range [4..20].\n"); - return 0; - } - if (chash_test_keysize != 4 && chash_test_keysize != 8) { - pr_err("chash: test_keysize invalid. Must be 4 or 8.\n"); - return 0; - } - - if (!chash_test_minfill) - chash_test_minfill = (1 << chash_test_bits) / 2; - if (!chash_test_maxfill) - chash_test_maxfill = (1 << chash_test_bits) * 4 / 5; - if (!chash_test_iters) - chash_test_iters = (1 << chash_test_bits) * 1000; - - if (chash_test_minfill >= (1 << chash_test_bits)) { - pr_err("chash: test_minfill too big. Must be < table size.\n"); - return 0; - } - if (chash_test_maxfill >= (1 << chash_test_bits)) { - pr_err("chash: test_maxfill too big. Must be < table size.\n"); - return 0; - } - if (chash_test_minfill >= chash_test_maxfill) { - pr_err("chash: test_minfill must be < test_maxfill.\n"); - return 0; - } - if (chash_test_keysize == 4 && chash_test_iters > 0xffffffff) { - pr_err("chash: test_iters must be < 4G for 4 byte keys.\n"); - return 0; - } - - ts1_ns = local_clock(); - ret = chash_self_test(chash_test_bits, chash_test_keysize, - chash_test_minfill, chash_test_maxfill, - chash_test_iters); - if (!ret) { - u64 ts_delta_us = local_clock() - ts1_ns; - u64 iters_per_second = (u64)chash_test_iters * 1000000; - - do_div(ts_delta_us, 1000); - do_div(iters_per_second, ts_delta_us); - pr_info("chash: self test took %llu us, %llu iterations/s\n", - ts_delta_us, iters_per_second); - } else { - pr_err("chash: self test failed: %d\n", ret); - } - - return ret; -} - -module_init(chash_init); - -#endif /* CONFIG_CHASH_SELFTEST */ - -MODULE_DESCRIPTION("Closed hash table"); -MODULE_LICENSE("GPL and additional rights"); diff --git a/drivers/gpu/drm/amd/powerplay/Makefile b/drivers/gpu/drm/amd/powerplay/Makefile index 231785a9e24c..ec87b3430d12 100644 --- a/drivers/gpu/drm/amd/powerplay/Makefile +++ b/drivers/gpu/drm/amd/powerplay/Makefile @@ -35,7 +35,7 @@ AMD_POWERPLAY = $(addsuffix /Makefile,$(addprefix $(FULL_AMD_PATH)/powerplay/,$( include $(AMD_POWERPLAY) -POWER_MGR = amd_powerplay.o +POWER_MGR = amd_powerplay.o amdgpu_smu.o smu_v11_0.o vega20_ppt.o AMD_PP_POWER = $(addprefix $(AMD_PP_PATH)/,$(POWER_MGR)) diff --git a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c index 3f73f7cd18b9..bea1587d352d 100644 --- a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c +++ b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c @@ -53,7 +53,7 @@ static int amd_powerplay_create(struct amdgpu_device *adev) mutex_init(&hwmgr->smu_lock); hwmgr->chip_family = adev->family; hwmgr->chip_id = adev->asic_type; - hwmgr->feature_mask = adev->powerplay.pp_feature; + hwmgr->feature_mask = adev->pm.pp_feature; hwmgr->display_config = &adev->pm.pm_display_cfg; adev->powerplay.pp_handle = hwmgr; adev->powerplay.pp_funcs = &pp_dpm_funcs; @@ -1304,7 +1304,7 @@ static int pp_notify_smu_enable_pwe(void *handle) if (hwmgr->hwmgr_func->smus_notify_pwe == NULL) { pr_info_ratelimited("%s was not implemented.\n", __func__); - return -EINVAL;; + return -EINVAL; } mutex_lock(&hwmgr->smu_lock); @@ -1341,7 +1341,7 @@ static int pp_set_min_deep_sleep_dcefclk(void *handle, uint32_t clock) if (hwmgr->hwmgr_func->set_min_deep_sleep_dcefclk == NULL) { pr_debug("%s was not implemented.\n", __func__); - return -EINVAL;; + return -EINVAL; } mutex_lock(&hwmgr->smu_lock); @@ -1360,7 +1360,7 @@ static int pp_set_hard_min_dcefclk_by_freq(void *handle, uint32_t clock) if (hwmgr->hwmgr_func->set_hard_min_dcefclk_by_freq == NULL) { pr_debug("%s was not implemented.\n", __func__); - return -EINVAL;; + return -EINVAL; } mutex_lock(&hwmgr->smu_lock); @@ -1379,7 +1379,7 @@ static int pp_set_hard_min_fclk_by_freq(void *handle, uint32_t clock) if (hwmgr->hwmgr_func->set_hard_min_fclk_by_freq == NULL) { pr_debug("%s was not implemented.\n", __func__); - return -EINVAL;; + return -EINVAL; } mutex_lock(&hwmgr->smu_lock); diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c new file mode 100644 index 000000000000..c058c784180e --- /dev/null +++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c @@ -0,0 +1,1253 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "pp_debug.h" +#include <linux/firmware.h> +#include <drm/drmP.h> +#include "amdgpu.h" +#include "amdgpu_smu.h" +#include "soc15_common.h" +#include "smu_v11_0.h" +#include "atom.h" +#include "amd_pcie.h" + +int smu_dpm_set_power_gate(struct smu_context *smu, uint32_t block_type, + bool gate) +{ + int ret = 0; + + switch (block_type) { + case AMD_IP_BLOCK_TYPE_UVD: + ret = smu_dpm_set_uvd_enable(smu, gate); + break; + case AMD_IP_BLOCK_TYPE_VCE: + ret = smu_dpm_set_vce_enable(smu, gate); + break; + default: + break; + } + + return ret; +} + +enum amd_pm_state_type smu_get_current_power_state(struct smu_context *smu) +{ + /* not support power state */ + return POWER_STATE_TYPE_DEFAULT; +} + +int smu_get_power_num_states(struct smu_context *smu, + struct pp_states_info *state_info) +{ + if (!state_info) + return -EINVAL; + + /* not support power state */ + memset(state_info, 0, sizeof(struct pp_states_info)); + state_info->nums = 0; + + return 0; +} + +int smu_common_read_sensor(struct smu_context *smu, enum amd_pp_sensors sensor, + void *data, uint32_t *size) +{ + int ret = 0; + + switch (sensor) { + case AMDGPU_PP_SENSOR_STABLE_PSTATE_SCLK: + *((uint32_t *)data) = smu->pstate_sclk; + *size = 4; + break; + case AMDGPU_PP_SENSOR_STABLE_PSTATE_MCLK: + *((uint32_t *)data) = smu->pstate_mclk; + *size = 4; + break; + case AMDGPU_PP_SENSOR_ENABLED_SMC_FEATURES_MASK: + ret = smu_feature_get_enabled_mask(smu, (uint32_t *)data, 2); + *size = 8; + break; + default: + ret = -EINVAL; + break; + } + + if (ret) + *size = 0; + + return ret; +} + +int smu_update_table_with_arg(struct smu_context *smu, uint16_t table_id, uint16_t exarg, + void *table_data, bool drv2smu) +{ + struct smu_table_context *smu_table = &smu->smu_table; + struct smu_table *table = NULL; + int ret = 0; + uint32_t table_index; + + if (!table_data || table_id >= smu_table->table_count) + return -EINVAL; + + table_index = (exarg << 16) | table_id; + + table = &smu_table->tables[table_id]; + + if (drv2smu) + memcpy(table->cpu_addr, table_data, table->size); + + ret = smu_send_smc_msg_with_param(smu, SMU_MSG_SetDriverDramAddrHigh, + upper_32_bits(table->mc_address)); + if (ret) + return ret; + ret = smu_send_smc_msg_with_param(smu, SMU_MSG_SetDriverDramAddrLow, + lower_32_bits(table->mc_address)); + if (ret) + return ret; + ret = smu_send_smc_msg_with_param(smu, drv2smu ? + SMU_MSG_TransferTableDram2Smu : + SMU_MSG_TransferTableSmu2Dram, + table_index); + if (ret) + return ret; + + if (!drv2smu) + memcpy(table_data, table->cpu_addr, table->size); + + return ret; +} + +bool is_support_sw_smu(struct amdgpu_device *adev) +{ + if (amdgpu_dpm != 1) + return false; + + if (adev->asic_type >= CHIP_VEGA20 && adev->asic_type != CHIP_RAVEN) + return true; + + return false; +} + +int smu_sys_get_pp_table(struct smu_context *smu, void **table) +{ + struct smu_table_context *smu_table = &smu->smu_table; + + if (!smu_table->power_play_table && !smu_table->hardcode_pptable) + return -EINVAL; + + if (smu_table->hardcode_pptable) + *table = smu_table->hardcode_pptable; + else + *table = smu_table->power_play_table; + + return smu_table->power_play_table_size; +} + +int smu_sys_set_pp_table(struct smu_context *smu, void *buf, size_t size) +{ + struct smu_table_context *smu_table = &smu->smu_table; + ATOM_COMMON_TABLE_HEADER *header = (ATOM_COMMON_TABLE_HEADER *)buf; + int ret = 0; + + if (header->usStructureSize != size) { + pr_err("pp table size not matched !\n"); + return -EIO; + } + + mutex_lock(&smu->mutex); + if (!smu_table->hardcode_pptable) + smu_table->hardcode_pptable = kzalloc(size, GFP_KERNEL); + if (!smu_table->hardcode_pptable) { + ret = -ENOMEM; + goto failed; + } + + memcpy(smu_table->hardcode_pptable, buf, size); + smu_table->power_play_table = smu_table->hardcode_pptable; + smu_table->power_play_table_size = size; + mutex_unlock(&smu->mutex); + + ret = smu_reset(smu); + if (ret) + pr_info("smu reset failed, ret = %d\n", ret); + + return ret; + +failed: + mutex_unlock(&smu->mutex); + return ret; +} + +int smu_feature_init_dpm(struct smu_context *smu) +{ + struct smu_feature *feature = &smu->smu_feature; + int ret = 0; + uint32_t unallowed_feature_mask[SMU_FEATURE_MAX/32]; + + mutex_lock(&feature->mutex); + bitmap_fill(feature->allowed, SMU_FEATURE_MAX); + mutex_unlock(&feature->mutex); + + ret = smu_get_unallowed_feature_mask(smu, unallowed_feature_mask, + SMU_FEATURE_MAX/32); + if (ret) + return ret; + + mutex_lock(&feature->mutex); + bitmap_andnot(feature->allowed, feature->allowed, + (unsigned long *)unallowed_feature_mask, + feature->feature_num); + mutex_unlock(&feature->mutex); + + return ret; +} + +int smu_feature_is_enabled(struct smu_context *smu, int feature_id) +{ + struct smu_feature *feature = &smu->smu_feature; + int ret = 0; + + WARN_ON(feature_id > feature->feature_num); + + mutex_lock(&feature->mutex); + ret = test_bit(feature_id, feature->enabled); + mutex_unlock(&feature->mutex); + + return ret; +} + +int smu_feature_set_enabled(struct smu_context *smu, int feature_id, bool enable) +{ + struct smu_feature *feature = &smu->smu_feature; + int ret = 0; + + WARN_ON(feature_id > feature->feature_num); + + mutex_lock(&feature->mutex); + ret = smu_feature_update_enable_state(smu, feature_id, enable); + if (ret) + goto failed; + + if (enable) + test_and_set_bit(feature_id, feature->enabled); + else + test_and_clear_bit(feature_id, feature->enabled); + +failed: + mutex_unlock(&feature->mutex); + + return ret; +} + +int smu_feature_is_supported(struct smu_context *smu, int feature_id) +{ + struct smu_feature *feature = &smu->smu_feature; + int ret = 0; + + WARN_ON(feature_id > feature->feature_num); + + mutex_lock(&feature->mutex); + ret = test_bit(feature_id, feature->supported); + mutex_unlock(&feature->mutex); + + return ret; +} + +int smu_feature_set_supported(struct smu_context *smu, int feature_id, + bool enable) +{ + struct smu_feature *feature = &smu->smu_feature; + int ret = 0; + + WARN_ON(feature_id > feature->feature_num); + + mutex_unlock(&feature->mutex); + if (enable) + test_and_set_bit(feature_id, feature->supported); + else + test_and_clear_bit(feature_id, feature->supported); + mutex_unlock(&feature->mutex); + + return ret; +} + +static int smu_set_funcs(struct amdgpu_device *adev) +{ + struct smu_context *smu = &adev->smu; + + switch (adev->asic_type) { + case CHIP_VEGA20: + adev->pm.pp_feature &= ~PP_GFXOFF_MASK; + if (adev->pm.pp_feature & PP_OVERDRIVE_MASK) + smu->od_enabled = true; + smu_v11_0_set_smu_funcs(smu); + break; + default: + return -EINVAL; + } + + return 0; +} + +static int smu_early_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct smu_context *smu = &adev->smu; + + smu->adev = adev; + mutex_init(&smu->mutex); + + return smu_set_funcs(adev); +} + +static int smu_late_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct smu_context *smu = &adev->smu; + mutex_lock(&smu->mutex); + smu_handle_task(&adev->smu, + smu->smu_dpm.dpm_level, + AMD_PP_TASK_COMPLETE_INIT); + mutex_unlock(&smu->mutex); + + return 0; +} + +int smu_get_atom_data_table(struct smu_context *smu, uint32_t table, + uint16_t *size, uint8_t *frev, uint8_t *crev, + uint8_t **addr) +{ + struct amdgpu_device *adev = smu->adev; + uint16_t data_start; + + if (!amdgpu_atom_parse_data_header(adev->mode_info.atom_context, table, + size, frev, crev, &data_start)) + return -EINVAL; + + *addr = (uint8_t *)adev->mode_info.atom_context->bios + data_start; + + return 0; +} + +static int smu_initialize_pptable(struct smu_context *smu) +{ + /* TODO */ + return 0; +} + +static int smu_smc_table_sw_init(struct smu_context *smu) +{ + int ret; + + ret = smu_initialize_pptable(smu); + if (ret) { + pr_err("Failed to init smu_initialize_pptable!\n"); + return ret; + } + + /** + * Create smu_table structure, and init smc tables such as + * TABLE_PPTABLE, TABLE_WATERMARKS, TABLE_SMU_METRICS, and etc. + */ + ret = smu_init_smc_tables(smu); + if (ret) { + pr_err("Failed to init smc tables!\n"); + return ret; + } + + /** + * Create smu_power_context structure, and allocate smu_dpm_context and + * context size to fill the smu_power_context data. + */ + ret = smu_init_power(smu); + if (ret) { + pr_err("Failed to init smu_init_power!\n"); + return ret; + } + + return 0; +} + +static int smu_smc_table_sw_fini(struct smu_context *smu) +{ + int ret; + + ret = smu_fini_smc_tables(smu); + if (ret) { + pr_err("Failed to smu_fini_smc_tables!\n"); + return ret; + } + + return 0; +} + +static int smu_sw_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct smu_context *smu = &adev->smu; + int ret; + + if (!is_support_sw_smu(adev)) + return -EINVAL; + + smu->pool_size = adev->pm.smu_prv_buffer_size; + smu->smu_feature.feature_num = SMU_FEATURE_MAX; + mutex_init(&smu->smu_feature.mutex); + bitmap_zero(smu->smu_feature.supported, SMU_FEATURE_MAX); + bitmap_zero(smu->smu_feature.enabled, SMU_FEATURE_MAX); + bitmap_zero(smu->smu_feature.allowed, SMU_FEATURE_MAX); + smu->watermarks_bitmap = 0; + smu->power_profile_mode = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT; + smu->default_power_profile_mode = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT; + + smu->workload_mask = 1 << smu->workload_prority[PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT]; + smu->workload_prority[PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT] = 0; + smu->workload_prority[PP_SMC_POWER_PROFILE_FULLSCREEN3D] = 1; + smu->workload_prority[PP_SMC_POWER_PROFILE_POWERSAVING] = 2; + smu->workload_prority[PP_SMC_POWER_PROFILE_VIDEO] = 3; + smu->workload_prority[PP_SMC_POWER_PROFILE_VR] = 4; + smu->workload_prority[PP_SMC_POWER_PROFILE_COMPUTE] = 5; + smu->workload_prority[PP_SMC_POWER_PROFILE_CUSTOM] = 6; + + smu->workload_setting[0] = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT; + smu->workload_setting[1] = PP_SMC_POWER_PROFILE_FULLSCREEN3D; + smu->workload_setting[2] = PP_SMC_POWER_PROFILE_POWERSAVING; + smu->workload_setting[3] = PP_SMC_POWER_PROFILE_VIDEO; + smu->workload_setting[4] = PP_SMC_POWER_PROFILE_VR; + smu->workload_setting[5] = PP_SMC_POWER_PROFILE_COMPUTE; + smu->workload_setting[6] = PP_SMC_POWER_PROFILE_CUSTOM; + smu->display_config = &adev->pm.pm_display_cfg; + + smu->smu_dpm.dpm_level = AMD_DPM_FORCED_LEVEL_AUTO; + smu->smu_dpm.requested_dpm_level = AMD_DPM_FORCED_LEVEL_AUTO; + ret = smu_init_microcode(smu); + if (ret) { + pr_err("Failed to load smu firmware!\n"); + return ret; + } + + ret = smu_smc_table_sw_init(smu); + if (ret) { + pr_err("Failed to sw init smc table!\n"); + return ret; + } + + return 0; +} + +static int smu_sw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct smu_context *smu = &adev->smu; + int ret; + + if (!is_support_sw_smu(adev)) + return -EINVAL; + + ret = smu_smc_table_sw_fini(smu); + if (ret) { + pr_err("Failed to sw fini smc table!\n"); + return ret; + } + + ret = smu_fini_power(smu); + if (ret) { + pr_err("Failed to init smu_fini_power!\n"); + return ret; + } + + return 0; +} + +static int smu_init_fb_allocations(struct smu_context *smu) +{ + struct amdgpu_device *adev = smu->adev; + struct smu_table_context *smu_table = &smu->smu_table; + struct smu_table *tables = smu_table->tables; + uint32_t table_count = smu_table->table_count; + uint32_t i = 0; + int32_t ret = 0; + + if (table_count <= 0) + return -EINVAL; + + for (i = 0 ; i < table_count; i++) { + if (tables[i].size == 0) + continue; + ret = amdgpu_bo_create_kernel(adev, + tables[i].size, + tables[i].align, + tables[i].domain, + &tables[i].bo, + &tables[i].mc_address, + &tables[i].cpu_addr); + if (ret) + goto failed; + } + + return 0; +failed: + for (; i > 0; i--) { + if (tables[i].size == 0) + continue; + amdgpu_bo_free_kernel(&tables[i].bo, + &tables[i].mc_address, + &tables[i].cpu_addr); + + } + return ret; +} + +static int smu_fini_fb_allocations(struct smu_context *smu) +{ + struct smu_table_context *smu_table = &smu->smu_table; + struct smu_table *tables = smu_table->tables; + uint32_t table_count = smu_table->table_count; + uint32_t i = 0; + + if (table_count == 0 || tables == NULL) + return 0; + + for (i = 0 ; i < table_count; i++) { + if (tables[i].size == 0) + continue; + amdgpu_bo_free_kernel(&tables[i].bo, + &tables[i].mc_address, + &tables[i].cpu_addr); + } + + return 0; +} + +static int smu_override_pcie_parameters(struct smu_context *smu) +{ + struct amdgpu_device *adev = smu->adev; + uint32_t pcie_gen = 0, pcie_width = 0, smu_pcie_arg; + int ret; + + if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4) + pcie_gen = 3; + else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3) + pcie_gen = 2; + else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2) + pcie_gen = 1; + else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1) + pcie_gen = 0; + + /* Bit 31:16: LCLK DPM level. 0 is DPM0, and 1 is DPM1 + * Bit 15:8: PCIE GEN, 0 to 3 corresponds to GEN1 to GEN4 + * Bit 7:0: PCIE lane width, 1 to 7 corresponds is x1 to x32 + */ + if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X16) + pcie_width = 6; + else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X12) + pcie_width = 5; + else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X8) + pcie_width = 4; + else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X4) + pcie_width = 3; + else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X2) + pcie_width = 2; + else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X1) + pcie_width = 1; + + smu_pcie_arg = (1 << 16) | (pcie_gen << 8) | pcie_width; + ret = smu_send_smc_msg_with_param(smu, + SMU_MSG_OverridePcieParameters, + smu_pcie_arg); + if (ret) + pr_err("[%s] Attempt to override pcie params failed!\n", __func__); + return ret; +} + +static int smu_smc_table_hw_init(struct smu_context *smu, + bool initialize) +{ + struct amdgpu_device *adev = smu->adev; + int ret; + + if (smu_is_dpm_running(smu) && adev->in_suspend) { + pr_info("dpm has been enabled\n"); + return 0; + } + + ret = smu_init_display(smu); + if (ret) + return ret; + + if (initialize) { + ret = smu_read_pptable_from_vbios(smu); + if (ret) + return ret; + + /* get boot_values from vbios to set revision, gfxclk, and etc. */ + ret = smu_get_vbios_bootup_values(smu); + if (ret) + return ret; + + ret = smu_get_clk_info_from_vbios(smu); + if (ret) + return ret; + + /* + * check if the format_revision in vbios is up to pptable header + * version, and the structure size is not 0. + */ + ret = smu_get_clk_info_from_vbios(smu); + if (ret) + return ret; + + ret = smu_check_pptable(smu); + if (ret) + return ret; + + /* + * allocate vram bos to store smc table contents. + */ + ret = smu_init_fb_allocations(smu); + if (ret) + return ret; + + /* + * Parse pptable format and fill PPTable_t smc_pptable to + * smu_table_context structure. And read the smc_dpm_table from vbios, + * then fill it into smc_pptable. + */ + ret = smu_parse_pptable(smu); + if (ret) + return ret; + + /* + * Send msg GetDriverIfVersion to check if the return value is equal + * with DRIVER_IF_VERSION of smc header. + */ + ret = smu_check_fw_version(smu); + if (ret) + return ret; + } + + /* + * Copy pptable bo in the vram to smc with SMU MSGs such as + * SetDriverDramAddr and TransferTableDram2Smu. + */ + ret = smu_write_pptable(smu); + if (ret) + return ret; + + /* issue RunAfllBtc msg */ + ret = smu_run_afll_btc(smu); + if (ret) + return ret; + + ret = smu_feature_set_allowed_mask(smu); + if (ret) + return ret; + + ret = smu_system_features_control(smu, true); + if (ret) + return ret; + + ret = smu_override_pcie_parameters(smu); + if (ret) + return ret; + + ret = smu_notify_display_change(smu); + if (ret) + return ret; + + /* + * Set min deep sleep dce fclk with bootup value from vbios via + * SetMinDeepSleepDcefclk MSG. + */ + ret = smu_set_min_dcef_deep_sleep(smu); + if (ret) + return ret; + + /* + * Set initialized values (get from vbios) to dpm tables context such as + * gfxclk, memclk, dcefclk, and etc. And enable the DPM feature for each + * type of clks. + */ + if (initialize) { + ret = smu_populate_smc_pptable(smu); + if (ret) + return ret; + + ret = smu_init_max_sustainable_clocks(smu); + if (ret) + return ret; + } + + ret = smu_set_od8_default_settings(smu, initialize); + if (ret) + return ret; + + if (initialize) { + ret = smu_populate_umd_state_clk(smu); + if (ret) + return ret; + + ret = smu_get_power_limit(smu, &smu->default_power_limit, false); + if (ret) + return ret; + } + + /* + * Set PMSTATUSLOG table bo address with SetToolsDramAddr MSG for tools. + */ + ret = smu_set_tool_table_location(smu); + + return ret; +} + +/** + * smu_alloc_memory_pool - allocate memory pool in the system memory + * + * @smu: amdgpu_device pointer + * + * This memory pool will be used for SMC use and msg SetSystemVirtualDramAddr + * and DramLogSetDramAddr can notify it changed. + * + * Returns 0 on success, error on failure. + */ +static int smu_alloc_memory_pool(struct smu_context *smu) +{ + struct amdgpu_device *adev = smu->adev; + struct smu_table_context *smu_table = &smu->smu_table; + struct smu_table *memory_pool = &smu_table->memory_pool; + uint64_t pool_size = smu->pool_size; + int ret = 0; + + if (pool_size == SMU_MEMORY_POOL_SIZE_ZERO) + return ret; + + memory_pool->size = pool_size; + memory_pool->align = PAGE_SIZE; + memory_pool->domain = AMDGPU_GEM_DOMAIN_GTT; + + switch (pool_size) { + case SMU_MEMORY_POOL_SIZE_256_MB: + case SMU_MEMORY_POOL_SIZE_512_MB: + case SMU_MEMORY_POOL_SIZE_1_GB: + case SMU_MEMORY_POOL_SIZE_2_GB: + ret = amdgpu_bo_create_kernel(adev, + memory_pool->size, + memory_pool->align, + memory_pool->domain, + &memory_pool->bo, + &memory_pool->mc_address, + &memory_pool->cpu_addr); + break; + default: + break; + } + + return ret; +} + +static int smu_free_memory_pool(struct smu_context *smu) +{ + struct smu_table_context *smu_table = &smu->smu_table; + struct smu_table *memory_pool = &smu_table->memory_pool; + int ret = 0; + + if (memory_pool->size == SMU_MEMORY_POOL_SIZE_ZERO) + return ret; + + amdgpu_bo_free_kernel(&memory_pool->bo, + &memory_pool->mc_address, + &memory_pool->cpu_addr); + + memset(memory_pool, 0, sizeof(struct smu_table)); + + return ret; +} + +static int smu_hw_init(void *handle) +{ + int ret; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct smu_context *smu = &adev->smu; + + if (!is_support_sw_smu(adev)) + return -EINVAL; + + if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { + ret = smu_load_microcode(smu); + if (ret) + return ret; + } + + ret = smu_check_fw_status(smu); + if (ret) { + pr_err("SMC firmware status is not correct\n"); + return ret; + } + + mutex_lock(&smu->mutex); + + ret = smu_feature_init_dpm(smu); + if (ret) + goto failed; + + ret = smu_smc_table_hw_init(smu, true); + if (ret) + goto failed; + + ret = smu_alloc_memory_pool(smu); + if (ret) + goto failed; + + /* + * Use msg SetSystemVirtualDramAddr and DramLogSetDramAddr can notify + * pool location. + */ + ret = smu_notify_memory_pool_location(smu); + if (ret) + goto failed; + + ret = smu_start_thermal_control(smu); + if (ret) + goto failed; + + mutex_unlock(&smu->mutex); + + adev->pm.dpm_enabled = true; + + pr_info("SMU is initialized successfully!\n"); + + return 0; + +failed: + mutex_unlock(&smu->mutex); + return ret; +} + +static int smu_hw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct smu_context *smu = &adev->smu; + struct smu_table_context *table_context = &smu->smu_table; + int ret = 0; + + if (!is_support_sw_smu(adev)) + return -EINVAL; + + kfree(table_context->driver_pptable); + table_context->driver_pptable = NULL; + + kfree(table_context->max_sustainable_clocks); + table_context->max_sustainable_clocks = NULL; + + kfree(table_context->od_feature_capabilities); + table_context->od_feature_capabilities = NULL; + + kfree(table_context->od_settings_max); + table_context->od_settings_max = NULL; + + kfree(table_context->od_settings_min); + table_context->od_settings_min = NULL; + + kfree(table_context->overdrive_table); + table_context->overdrive_table = NULL; + + kfree(table_context->od8_settings); + table_context->od8_settings = NULL; + + ret = smu_fini_fb_allocations(smu); + if (ret) + return ret; + + ret = smu_free_memory_pool(smu); + if (ret) + return ret; + + return 0; +} + +int smu_reset(struct smu_context *smu) +{ + struct amdgpu_device *adev = smu->adev; + int ret = 0; + + ret = smu_hw_fini(adev); + if (ret) + return ret; + + ret = smu_hw_init(adev); + if (ret) + return ret; + + return ret; +} + +static int smu_suspend(void *handle) +{ + int ret; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct smu_context *smu = &adev->smu; + + if (!is_support_sw_smu(adev)) + return -EINVAL; + + ret = smu_system_features_control(smu, false); + if (ret) + return ret; + + smu->watermarks_bitmap &= ~(WATERMARKS_LOADED); + + return 0; +} + +static int smu_resume(void *handle) +{ + int ret; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct smu_context *smu = &adev->smu; + + if (!is_support_sw_smu(adev)) + return -EINVAL; + + pr_info("SMU is resuming...\n"); + + mutex_lock(&smu->mutex); + + ret = smu_smc_table_hw_init(smu, false); + if (ret) + goto failed; + + ret = smu_start_thermal_control(smu); + if (ret) + goto failed; + + mutex_unlock(&smu->mutex); + + pr_info("SMU is resumed successfully!\n"); + + return 0; +failed: + mutex_unlock(&smu->mutex); + return ret; +} + +int smu_display_configuration_change(struct smu_context *smu, + const struct amd_pp_display_configuration *display_config) +{ + int index = 0; + int num_of_active_display = 0; + + if (!is_support_sw_smu(smu->adev)) + return -EINVAL; + + if (!display_config) + return -EINVAL; + + mutex_lock(&smu->mutex); + + smu_set_deep_sleep_dcefclk(smu, + display_config->min_dcef_deep_sleep_set_clk / 100); + + for (index = 0; index < display_config->num_path_including_non_display; index++) { + if (display_config->displays[index].controller_id != 0) + num_of_active_display++; + } + + smu_set_active_display_count(smu, num_of_active_display); + + smu_store_cc6_data(smu, display_config->cpu_pstate_separation_time, + display_config->cpu_cc6_disable, + display_config->cpu_pstate_disable, + display_config->nb_pstate_switch_disable); + + mutex_unlock(&smu->mutex); + + return 0; +} + +static int smu_get_clock_info(struct smu_context *smu, + struct smu_clock_info *clk_info, + enum smu_perf_level_designation designation) +{ + int ret; + struct smu_performance_level level = {0}; + + if (!clk_info) + return -EINVAL; + + ret = smu_get_perf_level(smu, PERF_LEVEL_ACTIVITY, &level); + if (ret) + return -EINVAL; + + clk_info->min_mem_clk = level.memory_clock; + clk_info->min_eng_clk = level.core_clock; + clk_info->min_bus_bandwidth = level.non_local_mem_freq * level.non_local_mem_width; + + ret = smu_get_perf_level(smu, designation, &level); + if (ret) + return -EINVAL; + + clk_info->min_mem_clk = level.memory_clock; + clk_info->min_eng_clk = level.core_clock; + clk_info->min_bus_bandwidth = level.non_local_mem_freq * level.non_local_mem_width; + + return 0; +} + +int smu_get_current_clocks(struct smu_context *smu, + struct amd_pp_clock_info *clocks) +{ + struct amd_pp_simple_clock_info simple_clocks = {0}; + struct smu_clock_info hw_clocks; + int ret = 0; + + if (!is_support_sw_smu(smu->adev)) + return -EINVAL; + + mutex_lock(&smu->mutex); + + smu_get_dal_power_level(smu, &simple_clocks); + + if (smu->support_power_containment) + ret = smu_get_clock_info(smu, &hw_clocks, + PERF_LEVEL_POWER_CONTAINMENT); + else + ret = smu_get_clock_info(smu, &hw_clocks, PERF_LEVEL_ACTIVITY); + + if (ret) { + pr_err("Error in smu_get_clock_info\n"); + goto failed; + } + + clocks->min_engine_clock = hw_clocks.min_eng_clk; + clocks->max_engine_clock = hw_clocks.max_eng_clk; + clocks->min_memory_clock = hw_clocks.min_mem_clk; + clocks->max_memory_clock = hw_clocks.max_mem_clk; + clocks->min_bus_bandwidth = hw_clocks.min_bus_bandwidth; + clocks->max_bus_bandwidth = hw_clocks.max_bus_bandwidth; + clocks->max_engine_clock_in_sr = hw_clocks.max_eng_clk; + clocks->min_engine_clock_in_sr = hw_clocks.min_eng_clk; + + if (simple_clocks.level == 0) + clocks->max_clocks_state = PP_DAL_POWERLEVEL_7; + else + clocks->max_clocks_state = simple_clocks.level; + + if (!smu_get_current_shallow_sleep_clocks(smu, &hw_clocks)) { + clocks->max_engine_clock_in_sr = hw_clocks.max_eng_clk; + clocks->min_engine_clock_in_sr = hw_clocks.min_eng_clk; + } + +failed: + mutex_unlock(&smu->mutex); + return ret; +} + +static int smu_set_clockgating_state(void *handle, + enum amd_clockgating_state state) +{ + return 0; +} + +static int smu_set_powergating_state(void *handle, + enum amd_powergating_state state) +{ + return 0; +} + +static int smu_enable_umd_pstate(void *handle, + enum amd_dpm_forced_level *level) +{ + uint32_t profile_mode_mask = AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD | + AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK | + AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK | + AMD_DPM_FORCED_LEVEL_PROFILE_PEAK; + + struct smu_context *smu = (struct smu_context*)(handle); + struct smu_dpm_context *smu_dpm_ctx = &(smu->smu_dpm); + if (!smu_dpm_ctx->dpm_context) + return -EINVAL; + + if (!(smu_dpm_ctx->dpm_level & profile_mode_mask)) { + /* enter umd pstate, save current level, disable gfx cg*/ + if (*level & profile_mode_mask) { + smu_dpm_ctx->saved_dpm_level = smu_dpm_ctx->dpm_level; + smu_dpm_ctx->enable_umd_pstate = true; + amdgpu_device_ip_set_clockgating_state(smu->adev, + AMD_IP_BLOCK_TYPE_GFX, + AMD_CG_STATE_UNGATE); + amdgpu_device_ip_set_powergating_state(smu->adev, + AMD_IP_BLOCK_TYPE_GFX, + AMD_PG_STATE_UNGATE); + } + } else { + /* exit umd pstate, restore level, enable gfx cg*/ + if (!(*level & profile_mode_mask)) { + if (*level == AMD_DPM_FORCED_LEVEL_PROFILE_EXIT) + *level = smu_dpm_ctx->saved_dpm_level; + smu_dpm_ctx->enable_umd_pstate = false; + amdgpu_device_ip_set_clockgating_state(smu->adev, + AMD_IP_BLOCK_TYPE_GFX, + AMD_CG_STATE_GATE); + amdgpu_device_ip_set_powergating_state(smu->adev, + AMD_IP_BLOCK_TYPE_GFX, + AMD_PG_STATE_GATE); + } + } + + return 0; +} + +int smu_adjust_power_state_dynamic(struct smu_context *smu, + enum amd_dpm_forced_level level, + bool skip_display_settings) +{ + int ret = 0; + int index = 0; + uint32_t sclk_mask, mclk_mask, soc_mask; + long workload; + struct smu_dpm_context *smu_dpm_ctx = &(smu->smu_dpm); + + if (!skip_display_settings) { + ret = smu_display_config_changed(smu); + if (ret) { + pr_err("Failed to change display config!"); + return ret; + } + } + + ret = smu_apply_clocks_adjust_rules(smu); + if (ret) { + pr_err("Failed to apply clocks adjust rules!"); + return ret; + } + + if (!skip_display_settings) { + ret = smu_notify_smc_dispaly_config(smu); + if (ret) { + pr_err("Failed to notify smc display config!"); + return ret; + } + } + + if (smu_dpm_ctx->dpm_level != level) { + switch (level) { + case AMD_DPM_FORCED_LEVEL_HIGH: + ret = smu_force_dpm_limit_value(smu, true); + break; + case AMD_DPM_FORCED_LEVEL_LOW: + ret = smu_force_dpm_limit_value(smu, false); + break; + + case AMD_DPM_FORCED_LEVEL_AUTO: + ret = smu_unforce_dpm_levels(smu); + break; + + case AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD: + case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK: + case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK: + case AMD_DPM_FORCED_LEVEL_PROFILE_PEAK: + ret = smu_get_profiling_clk_mask(smu, level, + &sclk_mask, + &mclk_mask, + &soc_mask); + if (ret) + return ret; + smu_force_clk_levels(smu, PP_SCLK, 1 << sclk_mask); + smu_force_clk_levels(smu, PP_MCLK, 1 << mclk_mask); + break; + + case AMD_DPM_FORCED_LEVEL_MANUAL: + case AMD_DPM_FORCED_LEVEL_PROFILE_EXIT: + default: + break; + } + + if (!ret) + smu_dpm_ctx->dpm_level = level; + } + + if (smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL) { + index = fls(smu->workload_mask); + index = index > 0 && index <= WORKLOAD_POLICY_MAX ? index - 1 : 0; + workload = smu->workload_setting[index]; + + if (smu->power_profile_mode != workload) + smu_set_power_profile_mode(smu, &workload, 0); + } + + return ret; +} + +int smu_handle_task(struct smu_context *smu, + enum amd_dpm_forced_level level, + enum amd_pp_task task_id) +{ + int ret = 0; + + switch (task_id) { + case AMD_PP_TASK_DISPLAY_CONFIG_CHANGE: + ret = smu_pre_display_config_changed(smu); + if (ret) + return ret; + ret = smu_set_cpu_power_state(smu); + if (ret) + return ret; + ret = smu_adjust_power_state_dynamic(smu, level, false); + break; + case AMD_PP_TASK_COMPLETE_INIT: + case AMD_PP_TASK_READJUST_POWER_STATE: + ret = smu_adjust_power_state_dynamic(smu, level, true); + break; + default: + break; + } + + return ret; +} + +const struct amd_ip_funcs smu_ip_funcs = { + .name = "smu", + .early_init = smu_early_init, + .late_init = smu_late_init, + .sw_init = smu_sw_init, + .sw_fini = smu_sw_fini, + .hw_init = smu_hw_init, + .hw_fini = smu_hw_fini, + .suspend = smu_suspend, + .resume = smu_resume, + .is_idle = NULL, + .check_soft_reset = NULL, + .wait_for_idle = NULL, + .soft_reset = NULL, + .set_clockgating_state = smu_set_clockgating_state, + .set_powergating_state = smu_set_powergating_state, + .enable_umd_pstate = smu_enable_umd_pstate, +}; + +const struct amdgpu_ip_block_version smu_v11_0_ip_block = +{ + .type = AMD_IP_BLOCK_TYPE_SMC, + .major = 11, + .minor = 0, + .rev = 0, + .funcs = &smu_ip_funcs, +}; diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/Makefile b/drivers/gpu/drm/amd/powerplay/hwmgr/Makefile index 0b3c6d1d52e4..cc63705920dc 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/Makefile +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/Makefile @@ -35,7 +35,8 @@ HARDWARE_MGR = hwmgr.o processpptables.o \ vega12_thermal.o \ pp_overdriver.o smu_helper.o \ vega20_processpptables.o vega20_hwmgr.o vega20_powertune.o \ - vega20_thermal.o common_baco.o vega10_baco.o vega20_baco.o + vega20_thermal.o common_baco.o vega10_baco.o vega20_baco.o \ + vega12_baco.o smu9_baco.o AMD_PP_HWMGR = $(addprefix $(AMD_PP_PATH)/hwmgr/,$(HARDWARE_MGR)) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c index c1c51c115e57..70f7f47a2fcf 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c @@ -76,7 +76,7 @@ int phm_set_power_state(struct pp_hwmgr *hwmgr, int phm_enable_dynamic_state_management(struct pp_hwmgr *hwmgr) { struct amdgpu_device *adev = NULL; - int ret = -EINVAL;; + int ret = -EINVAL; PHM_FUNC_CHECK(hwmgr); adev = hwmgr->adev; diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c index 0ad8fe4a6277..9a595f7525e6 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c @@ -35,6 +35,7 @@ #include "smu10_hwmgr.h" #include "power_state.h" #include "soc15_common.h" +#include "smu10.h" #define SMU10_MAX_DEEPSLEEP_DIVIDER_ID 5 #define SMU10_MINIMUM_ENGINE_CLOCK 800 /* 8Mhz, the low boundary of engine clock allowed on this chip */ @@ -114,11 +115,6 @@ static int smu10_initialize_dpm_defaults(struct pp_hwmgr *hwmgr) smu10_data->num_active_display = 0; smu10_data->deep_sleep_dcefclk = 0; - if (hwmgr->feature_mask & PP_GFXOFF_MASK) - smu10_data->gfx_off_controled_by_driver = true; - else - smu10_data->gfx_off_controled_by_driver = false; - phm_cap_unset(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_SclkDeepSleep); @@ -209,18 +205,13 @@ static int smu10_set_clock_limit(struct pp_hwmgr *hwmgr, const void *input) return 0; } -static inline uint32_t convert_10k_to_mhz(uint32_t clock) -{ - return (clock + 99) / 100; -} - static int smu10_set_min_deep_sleep_dcefclk(struct pp_hwmgr *hwmgr, uint32_t clock) { struct smu10_hwmgr *smu10_data = (struct smu10_hwmgr *)(hwmgr->backend); if (smu10_data->need_min_deep_sleep_dcefclk && - smu10_data->deep_sleep_dcefclk != convert_10k_to_mhz(clock)) { - smu10_data->deep_sleep_dcefclk = convert_10k_to_mhz(clock); + smu10_data->deep_sleep_dcefclk != clock) { + smu10_data->deep_sleep_dcefclk = clock; smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetMinDeepSleepDcefclk, smu10_data->deep_sleep_dcefclk); @@ -233,8 +224,8 @@ static int smu10_set_hard_min_dcefclk_by_freq(struct pp_hwmgr *hwmgr, uint32_t c struct smu10_hwmgr *smu10_data = (struct smu10_hwmgr *)(hwmgr->backend); if (smu10_data->dcf_actual_hard_min_freq && - smu10_data->dcf_actual_hard_min_freq != convert_10k_to_mhz(clock)) { - smu10_data->dcf_actual_hard_min_freq = convert_10k_to_mhz(clock); + smu10_data->dcf_actual_hard_min_freq != clock) { + smu10_data->dcf_actual_hard_min_freq = clock; smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetHardMinDcefclkByFreq, smu10_data->dcf_actual_hard_min_freq); @@ -247,8 +238,8 @@ static int smu10_set_hard_min_fclk_by_freq(struct pp_hwmgr *hwmgr, uint32_t cloc struct smu10_hwmgr *smu10_data = (struct smu10_hwmgr *)(hwmgr->backend); if (smu10_data->f_actual_hard_min_freq && - smu10_data->f_actual_hard_min_freq != convert_10k_to_mhz(clock)) { - smu10_data->f_actual_hard_min_freq = convert_10k_to_mhz(clock); + smu10_data->f_actual_hard_min_freq != clock) { + smu10_data->f_actual_hard_min_freq = clock; smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetHardMinFclkByFreq, smu10_data->f_actual_hard_min_freq); @@ -330,9 +321,9 @@ static bool smu10_is_gfx_on(struct pp_hwmgr *hwmgr) static int smu10_disable_gfx_off(struct pp_hwmgr *hwmgr) { - struct smu10_hwmgr *smu10_data = (struct smu10_hwmgr *)(hwmgr->backend); + struct amdgpu_device *adev = hwmgr->adev; - if (smu10_data->gfx_off_controled_by_driver) { + if (adev->pm.pp_feature & PP_GFXOFF_MASK) { smum_send_msg_to_smc(hwmgr, PPSMC_MSG_DisableGfxOff); /* confirm gfx is back to "on" state */ @@ -350,9 +341,9 @@ static int smu10_disable_dpm_tasks(struct pp_hwmgr *hwmgr) static int smu10_enable_gfx_off(struct pp_hwmgr *hwmgr) { - struct smu10_hwmgr *smu10_data = (struct smu10_hwmgr *)(hwmgr->backend); + struct amdgpu_device *adev = hwmgr->adev; - if (smu10_data->gfx_off_controled_by_driver) + if (adev->pm.pp_feature & PP_GFXOFF_MASK) smum_send_msg_to_smc(hwmgr, PPSMC_MSG_EnableGfxOff); return 0; @@ -577,7 +568,6 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr, enum amd_dpm_forced_level level) { struct smu10_hwmgr *data = hwmgr->backend; - struct amdgpu_device *adev = hwmgr->adev; uint32_t min_sclk = hwmgr->display_config->min_core_set_clock; uint32_t min_mclk = hwmgr->display_config->min_mem_set_clock/100; @@ -586,11 +576,6 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr, return 0; } - /* Disable UMDPSTATE support on rv2 temporarily */ - if ((adev->asic_type == CHIP_RAVEN) && - (adev->rev_id >= 8)) - return 0; - if (min_sclk < data->gfx_min_freq_limit) min_sclk = data->gfx_min_freq_limit; @@ -1205,6 +1190,94 @@ static void smu10_powergate_vcn(struct pp_hwmgr *hwmgr, bool bgate) } } +static int conv_power_profile_to_pplib_workload(int power_profile) +{ + int pplib_workload = 0; + + switch (power_profile) { + case PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT: + pplib_workload = WORKLOAD_DEFAULT_BIT; + break; + case PP_SMC_POWER_PROFILE_FULLSCREEN3D: + pplib_workload = WORKLOAD_PPLIB_FULL_SCREEN_3D_BIT; + break; + case PP_SMC_POWER_PROFILE_POWERSAVING: + pplib_workload = WORKLOAD_PPLIB_POWER_SAVING_BIT; + break; + case PP_SMC_POWER_PROFILE_VIDEO: + pplib_workload = WORKLOAD_PPLIB_VIDEO_BIT; + break; + case PP_SMC_POWER_PROFILE_VR: + pplib_workload = WORKLOAD_PPLIB_VR_BIT; + break; + case PP_SMC_POWER_PROFILE_COMPUTE: + pplib_workload = WORKLOAD_PPLIB_COMPUTE_BIT; + break; + } + + return pplib_workload; +} + +static int smu10_get_power_profile_mode(struct pp_hwmgr *hwmgr, char *buf) +{ + uint32_t i, size = 0; + static const uint8_t + profile_mode_setting[6][4] = {{70, 60, 0, 0,}, + {70, 60, 1, 3,}, + {90, 60, 0, 0,}, + {70, 60, 0, 0,}, + {70, 90, 0, 0,}, + {30, 60, 0, 6,}, + }; + static const char *profile_name[6] = { + "BOOTUP_DEFAULT", + "3D_FULL_SCREEN", + "POWER_SAVING", + "VIDEO", + "VR", + "COMPUTE"}; + static const char *title[6] = {"NUM", + "MODE_NAME", + "BUSY_SET_POINT", + "FPS", + "USE_RLC_BUSY", + "MIN_ACTIVE_LEVEL"}; + + if (!buf) + return -EINVAL; + + size += sprintf(buf + size, "%s %16s %s %s %s %s\n",title[0], + title[1], title[2], title[3], title[4], title[5]); + + for (i = 0; i <= PP_SMC_POWER_PROFILE_COMPUTE; i++) + size += sprintf(buf + size, "%3d %14s%s: %14d %3d %10d %14d\n", + i, profile_name[i], (i == hwmgr->power_profile_mode) ? "*" : " ", + profile_mode_setting[i][0], profile_mode_setting[i][1], + profile_mode_setting[i][2], profile_mode_setting[i][3]); + + return size; +} + +static int smu10_set_power_profile_mode(struct pp_hwmgr *hwmgr, long *input, uint32_t size) +{ + int workload_type = 0; + + if (input[size] > PP_SMC_POWER_PROFILE_COMPUTE) { + pr_err("Invalid power profile mode %ld\n", input[size]); + return -EINVAL; + } + hwmgr->power_profile_mode = input[size]; + + /* conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT */ + workload_type = + conv_power_profile_to_pplib_workload(hwmgr->power_profile_mode); + smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_ActiveProcessNotify, + 1 << workload_type); + + return 0; +} + + static const struct pp_hwmgr_func smu10_hwmgr_funcs = { .backend_init = smu10_hwmgr_backend_init, .backend_fini = smu10_hwmgr_backend_fini, @@ -1246,6 +1319,8 @@ static const struct pp_hwmgr_func smu10_hwmgr_funcs = { .powergate_sdma = smu10_powergate_sdma, .set_hard_min_dcefclk_by_freq = smu10_set_hard_min_dcefclk_by_freq, .set_hard_min_fclk_by_freq = smu10_set_hard_min_fclk_by_freq, + .get_power_profile_mode = smu10_get_power_profile_mode, + .set_power_profile_mode = smu10_set_power_profile_mode, }; int smu10_init_function_pointers(struct pp_hwmgr *hwmgr) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c index 83d3d935f3ac..048757e8f494 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c @@ -77,7 +77,7 @@ #define PCIE_BUS_CLK 10000 #define TCLK (PCIE_BUS_CLK / 10) -static const struct profile_mode_setting smu7_profiling[7] = +static struct profile_mode_setting smu7_profiling[7] = {{0, 0, 0, 0, 0, 0, 0, 0}, {1, 0, 100, 30, 1, 0, 100, 10}, {1, 10, 0, 30, 0, 0, 0, 0}, @@ -4984,17 +4984,27 @@ static int smu7_set_power_profile_mode(struct pp_hwmgr *hwmgr, long *input, uint mode = input[size]; switch (mode) { case PP_SMC_POWER_PROFILE_CUSTOM: - if (size < 8) + if (size < 8 && size != 0) return -EINVAL; - - tmp.bupdate_sclk = input[0]; - tmp.sclk_up_hyst = input[1]; - tmp.sclk_down_hyst = input[2]; - tmp.sclk_activity = input[3]; - tmp.bupdate_mclk = input[4]; - tmp.mclk_up_hyst = input[5]; - tmp.mclk_down_hyst = input[6]; - tmp.mclk_activity = input[7]; + /* If only CUSTOM is passed in, use the saved values. Check + * that we actually have a CUSTOM profile by ensuring that + * the "use sclk" or the "use mclk" bits are set + */ + tmp = smu7_profiling[PP_SMC_POWER_PROFILE_CUSTOM]; + if (size == 0) { + if (tmp.bupdate_sclk == 0 && tmp.bupdate_mclk == 0) + return -EINVAL; + } else { + tmp.bupdate_sclk = input[0]; + tmp.sclk_up_hyst = input[1]; + tmp.sclk_down_hyst = input[2]; + tmp.sclk_activity = input[3]; + tmp.bupdate_mclk = input[4]; + tmp.mclk_up_hyst = input[5]; + tmp.mclk_down_hyst = input[6]; + tmp.mclk_activity = input[7]; + smu7_profiling[PP_SMC_POWER_PROFILE_CUSTOM] = tmp; + } if (!smum_update_dpm_settings(hwmgr, &tmp)) { memcpy(&data->current_profile_setting, &tmp, sizeof(struct profile_mode_setting)); hwmgr->power_profile_mode = mode; diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu9_baco.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu9_baco.c new file mode 100644 index 000000000000..de0a37f7c632 --- /dev/null +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu9_baco.c @@ -0,0 +1,66 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "soc15.h" +#include "soc15_hw_ip.h" +#include "vega10_ip_offset.h" +#include "soc15_common.h" +#include "vega10_inc.h" +#include "smu9_baco.h" + +int smu9_baco_get_capability(struct pp_hwmgr *hwmgr, bool *cap) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)(hwmgr->adev); + uint32_t reg, data; + + *cap = false; + if (!phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_BACO)) + return 0; + + WREG32(0x12074, 0xFFF0003B); + data = RREG32(0x12075); + + if (data == 0x1) { + reg = RREG32_SOC15(NBIF, 0, mmRCC_BIF_STRAP0); + + if (reg & RCC_BIF_STRAP0__STRAP_PX_CAPABLE_MASK) + *cap = true; + } + + return 0; +} + +int smu9_baco_get_state(struct pp_hwmgr *hwmgr, enum BACO_STATE *state) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)(hwmgr->adev); + uint32_t reg; + + reg = RREG32_SOC15(NBIF, 0, mmBACO_CNTL); + + if (reg & BACO_CNTL__BACO_MODE_MASK) + /* gfx has already entered BACO state */ + *state = BACO_STATE_IN; + else + *state = BACO_STATE_OUT; + return 0; +} diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu9_baco.h b/drivers/gpu/drm/amd/powerplay/hwmgr/smu9_baco.h new file mode 100644 index 000000000000..84e90f801ac3 --- /dev/null +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu9_baco.h @@ -0,0 +1,31 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __SMU9_BACO_H__ +#define __SMU9_BACO_H__ +#include "hwmgr.h" +#include "common_baco.h" + +extern int smu9_baco_get_capability(struct pp_hwmgr *hwmgr, bool *cap); +extern int smu9_baco_get_state(struct pp_hwmgr *hwmgr, enum BACO_STATE *state); + +#endif diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_baco.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_baco.c index 7337be5602e4..d168af4a4d78 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_baco.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_baco.c @@ -85,48 +85,11 @@ static const struct soc15_baco_cmd_entry clean_baco_tbl[] = {CMD_WRITE, SOC15_REG_ENTRY(NBIF, 0, mmBIOS_SCRATCH_7), 0, 0, 0, 0}, }; -int vega10_baco_get_capability(struct pp_hwmgr *hwmgr, bool *cap) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)(hwmgr->adev); - uint32_t reg, data; - - *cap = false; - if (!phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_BACO)) - return 0; - - WREG32(0x12074, 0xFFF0003B); - data = RREG32(0x12075); - - if (data == 0x1) { - reg = RREG32_SOC15(NBIF, 0, mmRCC_BIF_STRAP0); - - if (reg & RCC_BIF_STRAP0__STRAP_PX_CAPABLE_MASK) - *cap = true; - } - - return 0; -} - -int vega10_baco_get_state(struct pp_hwmgr *hwmgr, enum BACO_STATE *state) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)(hwmgr->adev); - uint32_t reg; - - reg = RREG32_SOC15(NBIF, 0, mmBACO_CNTL); - - if (reg & BACO_CNTL__BACO_MODE_MASK) - /* gfx has already entered BACO state */ - *state = BACO_STATE_IN; - else - *state = BACO_STATE_OUT; - return 0; -} - int vega10_baco_set_state(struct pp_hwmgr *hwmgr, enum BACO_STATE state) { enum BACO_STATE cur_state; - vega10_baco_get_state(hwmgr, &cur_state); + smu9_baco_get_state(hwmgr, &cur_state); if (cur_state == state) /* aisc already in the target state */ diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_baco.h b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_baco.h index f7a3ffa744b3..96d793f026a5 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_baco.h +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_baco.h @@ -22,11 +22,8 @@ */ #ifndef __VEGA10_BACO_H__ #define __VEGA10_BACO_H__ -#include "hwmgr.h" -#include "common_baco.h" +#include "smu9_baco.h" -extern int vega10_baco_get_capability(struct pp_hwmgr *hwmgr, bool *cap); -extern int vega10_baco_get_state(struct pp_hwmgr *hwmgr, enum BACO_STATE *state); extern int vega10_baco_set_state(struct pp_hwmgr *hwmgr, enum BACO_STATE state); #endif diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c index 5c4f701939ea..384c37875cd0 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c @@ -1427,6 +1427,15 @@ static int vega10_setup_default_dpm_tables(struct pp_hwmgr *hwmgr) vega10_setup_default_pcie_table(hwmgr); + /* Zero out the saved copy of the CUSTOM profile + * This will be checked when trying to set the profile + * and will require that new values be passed in + */ + data->custom_profile_mode[0] = 0; + data->custom_profile_mode[1] = 0; + data->custom_profile_mode[2] = 0; + data->custom_profile_mode[3] = 0; + /* save a copy of the default DPM table */ memcpy(&(data->golden_dpm_table), &(data->dpm_table), sizeof(struct vega10_dpm_table)); @@ -4904,16 +4913,23 @@ static int vega10_set_power_profile_mode(struct pp_hwmgr *hwmgr, long *input, ui uint8_t FPS; uint8_t use_rlc_busy; uint8_t min_active_level; + uint32_t power_profile_mode = input[size]; - hwmgr->power_profile_mode = input[size]; - - smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetWorkloadMask, - 1<<hwmgr->power_profile_mode); - - if (hwmgr->power_profile_mode == PP_SMC_POWER_PROFILE_CUSTOM) { - if (size == 0 || size > 4) + if (power_profile_mode == PP_SMC_POWER_PROFILE_CUSTOM) { + if (size != 0 && size != 4) return -EINVAL; + /* If size = 0 and the CUSTOM profile has been set already + * then just apply the profile. The copy stored in the hwmgr + * is zeroed out on init + */ + if (size == 0) { + if (data->custom_profile_mode[0] != 0) + goto out; + else + return -EINVAL; + } + data->custom_profile_mode[0] = busy_set_point = input[0]; data->custom_profile_mode[1] = FPS = input[1]; data->custom_profile_mode[2] = use_rlc_busy = input[2]; @@ -4924,6 +4940,11 @@ static int vega10_set_power_profile_mode(struct pp_hwmgr *hwmgr, long *input, ui use_rlc_busy << 16 | min_active_level<<24); } +out: + smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetWorkloadMask, + 1 << power_profile_mode); + hwmgr->power_profile_mode = power_profile_mode; + return 0; } @@ -5170,8 +5191,8 @@ static const struct pp_hwmgr_func vega10_hwmgr_funcs = { .set_power_limit = vega10_set_power_limit, .odn_edit_dpm_table = vega10_odn_edit_dpm_table, .get_performance_level = vega10_get_performance_level, - .get_asic_baco_capability = vega10_baco_get_capability, - .get_asic_baco_state = vega10_baco_get_state, + .get_asic_baco_capability = smu9_baco_get_capability, + .get_asic_baco_state = smu9_baco_get_state, .set_asic_baco_state = vega10_baco_set_state, .enable_mgpu_fan_boost = vega10_enable_mgpu_fan_boost, .get_ppfeature_status = vega10_get_ppfeature_status, diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_baco.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_baco.c new file mode 100644 index 000000000000..9d8ca94a8f0c --- /dev/null +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_baco.c @@ -0,0 +1,119 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "soc15.h" +#include "soc15_hw_ip.h" +#include "vega10_ip_offset.h" +#include "soc15_common.h" +#include "vega12_inc.h" +#include "vega12_ppsmc.h" +#include "vega12_baco.h" + +static const struct soc15_baco_cmd_entry pre_baco_tbl[] = +{ + { CMD_READMODIFYWRITE, NBIF_HWID, 0, mmBIF_DOORBELL_CNTL_BASE_IDX, mmBIF_DOORBELL_CNTL, BIF_DOORBELL_CNTL__DOORBELL_MONITOR_EN_MASK, BIF_DOORBELL_CNTL__DOORBELL_MONITOR_EN__SHIFT, 0, 0 }, + { CMD_WRITE, NBIF_HWID, 0, mmBIF_FB_EN_BASE_IDX, mmBIF_FB_EN, 0, 0, 0, 0 }, + { CMD_READMODIFYWRITE, NBIF_HWID, 0, mmRCC_BACO_CNTL_MISC_BASE_IDX, mmBACO_CNTL, BACO_CNTL__BACO_DSTATE_BYPASS_MASK, BACO_CNTL__BACO_DSTATE_BYPASS__SHIFT, 0, 1 }, + { CMD_READMODIFYWRITE, NBIF_HWID, 0, mmRCC_BACO_CNTL_MISC_BASE_IDX, mmBACO_CNTL, BACO_CNTL__BACO_RST_INTR_MASK_MASK, BACO_CNTL__BACO_RST_INTR_MASK__SHIFT, 0, 1 } +}; + +static const struct soc15_baco_cmd_entry enter_baco_tbl[] = +{ + { CMD_WAITFOR, THM_HWID, 0, mmTHM_BACO_CNTL_BASE_IDX, mmTHM_BACO_CNTL, THM_BACO_CNTL__SOC_DOMAIN_IDLE_MASK, THM_BACO_CNTL__SOC_DOMAIN_IDLE__SHIFT, 0xffffffff, 0x80000000 }, + { CMD_READMODIFYWRITE, NBIF_HWID, 0, mmRCC_BACO_CNTL_MISC_BASE_IDX, mmBACO_CNTL, BACO_CNTL__BACO_EN_MASK, BACO_CNTL__BACO_EN__SHIFT, 0, 1 }, + { CMD_READMODIFYWRITE, NBIF_HWID, 0, mmRCC_BACO_CNTL_MISC_BASE_IDX, mmBACO_CNTL, BACO_CNTL__BACO_BIF_LCLK_SWITCH_MASK, BACO_CNTL__BACO_BIF_LCLK_SWITCH__SHIFT, 0, 1 }, + { CMD_READMODIFYWRITE, NBIF_HWID, 0, mmRCC_BACO_CNTL_MISC_BASE_IDX, mmBACO_CNTL, BACO_CNTL__BACO_DUMMY_EN_MASK, BACO_CNTL__BACO_DUMMY_EN__SHIFT, 0, 1 }, + { CMD_READMODIFYWRITE, THM_HWID, 0, mmTHM_BACO_CNTL_BASE_IDX, mmTHM_BACO_CNTL, THM_BACO_CNTL__BACO_SOC_VDCI_RESET_MASK, THM_BACO_CNTL__BACO_SOC_VDCI_RESET__SHIFT, 0, 1 }, + { CMD_READMODIFYWRITE, THM_HWID, 0, mmTHM_BACO_CNTL_BASE_IDX, mmTHM_BACO_CNTL, THM_BACO_CNTL__BACO_SMNCLK_MUX_MASK, THM_BACO_CNTL__BACO_SMNCLK_MUX__SHIFT, 0, 1 }, + { CMD_READMODIFYWRITE, THM_HWID, 0, mmTHM_BACO_CNTL_BASE_IDX, mmTHM_BACO_CNTL, THM_BACO_CNTL__BACO_ISO_EN_MASK, THM_BACO_CNTL__BACO_ISO_EN__SHIFT, 0, 1 }, + { CMD_READMODIFYWRITE, THM_HWID, 0, mmTHM_BACO_CNTL_BASE_IDX, mmTHM_BACO_CNTL, THM_BACO_CNTL__BACO_AEB_ISO_EN_MASK, THM_BACO_CNTL__BACO_AEB_ISO_EN__SHIFT, 0, 1 }, + { CMD_READMODIFYWRITE, THM_HWID, 0, mmTHM_BACO_CNTL_BASE_IDX, mmTHM_BACO_CNTL, THM_BACO_CNTL__BACO_ANA_ISO_EN_MASK, THM_BACO_CNTL__BACO_ANA_ISO_EN__SHIFT, 0, 1 }, + { CMD_READMODIFYWRITE, THM_HWID, 0, mmTHM_BACO_CNTL_BASE_IDX, mmTHM_BACO_CNTL, THM_BACO_CNTL__BACO_SOC_REFCLK_OFF_MASK, THM_BACO_CNTL__BACO_SOC_REFCLK_OFF__SHIFT, 0, 1 }, + { CMD_READMODIFYWRITE, NBIF_HWID, 0, mmRCC_BACO_CNTL_MISC_BASE_IDX, mmBACO_CNTL, BACO_CNTL__BACO_POWER_OFF_MASK, BACO_CNTL__BACO_POWER_OFF__SHIFT, 0, 1 }, + { CMD_DELAY_MS, 0, 0, 0, 5, 0 }, + { CMD_READMODIFYWRITE, THM_HWID, 0, mmTHM_BACO_CNTL_BASE_IDX, mmTHM_BACO_CNTL, THM_BACO_CNTL__BACO_RESET_EN_MASK, THM_BACO_CNTL__BACO_RESET_EN__SHIFT, 0, 1 }, + { CMD_READMODIFYWRITE, THM_HWID, 0, mmTHM_BACO_CNTL_BASE_IDX, mmTHM_BACO_CNTL, THM_BACO_CNTL__BACO_PWROKRAW_CNTL_MASK, THM_BACO_CNTL__BACO_PWROKRAW_CNTL__SHIFT, 0, 0 }, + { CMD_WAITFOR, NBIF_HWID, 0, mmRCC_BACO_CNTL_MISC_BASE_IDX, mmBACO_CNTL, BACO_CNTL__BACO_MODE_MASK, BACO_CNTL__BACO_MODE__SHIFT, 0xffffffff, 0x100 } +}; + +static const struct soc15_baco_cmd_entry exit_baco_tbl[] = +{ + { CMD_READMODIFYWRITE, NBIF_HWID, 0, mmRCC_BACO_CNTL_MISC_BASE_IDX, mmBACO_CNTL, BACO_CNTL__BACO_POWER_OFF_MASK, BACO_CNTL__BACO_POWER_OFF__SHIFT, 0, 0 }, + { CMD_DELAY_MS, 0, 0, 0, 0, 0, 0, 10, 0 }, + { CMD_READMODIFYWRITE, THM_HWID, 0, mmTHM_BACO_CNTL_BASE_IDX, mmTHM_BACO_CNTL, THM_BACO_CNTL__BACO_SOC_REFCLK_OFF_MASK, THM_BACO_CNTL__BACO_SOC_REFCLK_OFF__SHIFT, 0, 0 }, + { CMD_READMODIFYWRITE, THM_HWID, 0, mmTHM_BACO_CNTL_BASE_IDX, mmTHM_BACO_CNTL, THM_BACO_CNTL__BACO_ANA_ISO_EN_MASK, THM_BACO_CNTL__BACO_ANA_ISO_EN__SHIFT, 0, 0 }, + { CMD_READMODIFYWRITE, THM_HWID, 0, mmTHM_BACO_CNTL_BASE_IDX, mmTHM_BACO_CNTL, THM_BACO_CNTL__BACO_AEB_ISO_EN_MASK, THM_BACO_CNTL__BACO_AEB_ISO_EN__SHIFT, 0, 0 }, + { CMD_READMODIFYWRITE, THM_HWID, 0, mmTHM_BACO_CNTL_BASE_IDX, mmTHM_BACO_CNTL, THM_BACO_CNTL__BACO_ISO_EN_MASK, THM_BACO_CNTL__BACO_ISO_EN__SHIFT, 0, 0 }, + { CMD_READMODIFYWRITE, THM_HWID, 0, mmTHM_BACO_CNTL_BASE_IDX, mmTHM_BACO_CNTL, THM_BACO_CNTL__BACO_PWROKRAW_CNTL_MASK, THM_BACO_CNTL__BACO_PWROKRAW_CNTL__SHIFT, 0, 1 }, + { CMD_READMODIFYWRITE, THM_HWID, 0, mmTHM_BACO_CNTL_BASE_IDX, mmTHM_BACO_CNTL, THM_BACO_CNTL__BACO_SMNCLK_MUX_MASK, THM_BACO_CNTL__BACO_SMNCLK_MUX__SHIFT, 0, 0 }, + { CMD_READMODIFYWRITE, THM_HWID, 0, mmTHM_BACO_CNTL_BASE_IDX, mmTHM_BACO_CNTL, THM_BACO_CNTL__BACO_SOC_VDCI_RESET_MASK, THM_BACO_CNTL__BACO_SOC_VDCI_RESET__SHIFT, 0, 0 }, + { CMD_READMODIFYWRITE, THM_HWID, 0, mmTHM_BACO_CNTL_BASE_IDX, mmTHM_BACO_CNTL, THM_BACO_CNTL__BACO_EXIT_MASK, THM_BACO_CNTL__BACO_EXIT__SHIFT, 0, 1 }, + { CMD_READMODIFYWRITE, THM_HWID, 0, mmTHM_BACO_CNTL_BASE_IDX, mmTHM_BACO_CNTL, THM_BACO_CNTL__BACO_RESET_EN_MASK, THM_BACO_CNTL__BACO_RESET_EN__SHIFT, 0, 0 }, + { CMD_WAITFOR, THM_HWID, 0, mmTHM_BACO_CNTL_BASE_IDX, mmTHM_BACO_CNTL, THM_BACO_CNTL__BACO_EXIT_MASK, 0, 0xffffffff, 0 }, + { CMD_READMODIFYWRITE, THM_HWID, 0, mmTHM_BACO_CNTL_BASE_IDX, mmTHM_BACO_CNTL, THM_BACO_CNTL__BACO_SB_AXI_FENCE_MASK, THM_BACO_CNTL__BACO_SB_AXI_FENCE__SHIFT, 0, 0 }, + { CMD_READMODIFYWRITE, NBIF_HWID, 0, mmRCC_BACO_CNTL_MISC_BASE_IDX, mmBACO_CNTL, BACO_CNTL__BACO_DUMMY_EN_MASK, BACO_CNTL__BACO_DUMMY_EN__SHIFT, 0, 0 }, + { CMD_READMODIFYWRITE, NBIF_HWID, 0, mmRCC_BACO_CNTL_MISC_BASE_IDX, mmBACO_CNTL, BACO_CNTL__BACO_BIF_LCLK_SWITCH_MASK, BACO_CNTL__BACO_BIF_LCLK_SWITCH__SHIFT, 0, 0 }, + { CMD_READMODIFYWRITE, NBIF_HWID, 0, mmRCC_BACO_CNTL_MISC_BASE_IDX, mmBACO_CNTL, BACO_CNTL__BACO_EN_MASK, BACO_CNTL__BACO_EN__SHIFT, 0, 0 }, + { CMD_WAITFOR, NBIF_HWID, 0, mmRCC_BACO_CNTL_MISC_BASE_IDX, mmBACO_CNTL, BACO_CNTL__BACO_MODE_MASK, 0, 0xffffffff, 0 } +}; + +static const struct soc15_baco_cmd_entry clean_baco_tbl[] = +{ + { CMD_WRITE, NBIF_HWID, 0, mmBIOS_SCRATCH_6_BASE_IDX, mmBIOS_SCRATCH_6, 0, 0, 0, 0 }, + { CMD_WRITE, NBIF_HWID, 0, mmBIOS_SCRATCH_7_BASE_IDX, mmBIOS_SCRATCH_7, 0, 0, 0, 0 } +}; + +int vega12_baco_set_state(struct pp_hwmgr *hwmgr, enum BACO_STATE state) +{ + enum BACO_STATE cur_state; + + smu9_baco_get_state(hwmgr, &cur_state); + + if (cur_state == state) + /* aisc already in the target state */ + return 0; + + if (state == BACO_STATE_IN) { + if (soc15_baco_program_registers(hwmgr, pre_baco_tbl, + ARRAY_SIZE(pre_baco_tbl))) { + if (smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_EnterBaco, 0)) + return -EINVAL; + + if (soc15_baco_program_registers(hwmgr, enter_baco_tbl, + ARRAY_SIZE(enter_baco_tbl))) + return 0; + } + } else if (state == BACO_STATE_OUT) { + /* HW requires at least 20ms between regulator off and on */ + msleep(20); + /* Execute Hardware BACO exit sequence */ + if (soc15_baco_program_registers(hwmgr, exit_baco_tbl, + ARRAY_SIZE(exit_baco_tbl))) { + if (soc15_baco_program_registers(hwmgr, clean_baco_tbl, + ARRAY_SIZE(clean_baco_tbl))) + return 0; + } + } + + return -EINVAL; +} diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_baco.h b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_baco.h new file mode 100644 index 000000000000..57b72e5a95ae --- /dev/null +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_baco.h @@ -0,0 +1,29 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __VEGA12_BACO_H__ +#define __VEGA12_BACO_H__ +#include "smu9_baco.h" + +extern int vega12_baco_set_state(struct pp_hwmgr *hwmgr, enum BACO_STATE state); + +#endif diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c index bdb48e94eff6..707cd4b0357f 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c @@ -45,6 +45,7 @@ #include "ppinterrupt.h" #include "pp_overdriver.h" #include "pp_thermal.h" +#include "vega12_baco.h" static int vega12_force_clock_level(struct pp_hwmgr *hwmgr, @@ -2626,8 +2627,12 @@ static const struct pp_hwmgr_func vega12_hwmgr_funcs = { .start_thermal_controller = vega12_start_thermal_controller, .powergate_gfx = vega12_gfx_off_control, .get_performance_level = vega12_get_performance_level, + .get_asic_baco_capability = smu9_baco_get_capability, + .get_asic_baco_state = smu9_baco_get_state, + .set_asic_baco_state = vega12_baco_set_state, .get_ppfeature_status = vega12_get_ppfeature_status, .set_ppfeature_status = vega12_set_ppfeature_status, + }; int vega12_hwmgr_init(struct pp_hwmgr *hwmgr) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_inc.h b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_inc.h index 30b278c50222..e6d9e84059e1 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_inc.h +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_inc.h @@ -35,5 +35,7 @@ #include "asic_reg/gc/gc_9_2_1_sh_mask.h" #include "asic_reg/nbio/nbio_6_1_offset.h" +#include "asic_reg/nbio/nbio_6_1_offset.h" +#include "asic_reg/nbio/nbio_6_1_sh_mask.h" #endif diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_baco.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_baco.c index 5e8602a79b1c..df6ff9252401 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_baco.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_baco.c @@ -27,6 +27,7 @@ #include "vega20_inc.h" #include "vega20_ppsmc.h" #include "vega20_baco.h" +#include "vega20_smumgr.h" @@ -101,3 +102,14 @@ int vega20_baco_set_state(struct pp_hwmgr *hwmgr, enum BACO_STATE state) return 0; } + +int vega20_baco_apply_vdci_flush_workaround(struct pp_hwmgr *hwmgr) +{ + int ret = 0; + + ret = vega20_set_pptable_driver_address(hwmgr); + if (ret) + return ret; + + return smum_send_msg_to_smc(hwmgr, PPSMC_MSG_BacoWorkAroundFlushVDCI); +} diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_baco.h b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_baco.h index 51c7f8392925..f06471e712dc 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_baco.h +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_baco.h @@ -28,5 +28,6 @@ extern int vega20_baco_get_capability(struct pp_hwmgr *hwmgr, bool *cap); extern int vega20_baco_get_state(struct pp_hwmgr *hwmgr, enum BACO_STATE *state); extern int vega20_baco_set_state(struct pp_hwmgr *hwmgr, enum BACO_STATE state); +extern int vega20_baco_apply_vdci_flush_workaround(struct pp_hwmgr *hwmgr); #endif diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c index 23b5b94a4939..9b9f87b84910 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c @@ -434,6 +434,7 @@ static int vega20_hwmgr_backend_init(struct pp_hwmgr *hwmgr) hwmgr->platform_descriptor.clockStep.memoryClock = 500; data->total_active_cus = adev->gfx.cu_info.number; + data->is_custom_profile_set = false; return 0; } @@ -450,6 +451,7 @@ static int vega20_init_sclk_threshold(struct pp_hwmgr *hwmgr) static int vega20_setup_asic_task(struct pp_hwmgr *hwmgr) { + struct amdgpu_device *adev = (struct amdgpu_device *)(hwmgr->adev); int ret = 0; ret = vega20_init_sclk_threshold(hwmgr); @@ -457,7 +459,15 @@ static int vega20_setup_asic_task(struct pp_hwmgr *hwmgr) "Failed to init sclk threshold!", return ret); - return 0; + if (adev->in_baco_reset) { + adev->in_baco_reset = 0; + + ret = vega20_baco_apply_vdci_flush_workaround(hwmgr); + if (ret) + pr_err("Failed to apply vega20 baco workaround!\n"); + } + + return ret; } /* @@ -3450,7 +3460,18 @@ static void vega20_power_gate_vce(struct pp_hwmgr *hwmgr, bool bgate) return ; data->vce_power_gated = bgate; - vega20_enable_disable_vce_dpm(hwmgr, !bgate); + if (bgate) { + vega20_enable_disable_vce_dpm(hwmgr, !bgate); + amdgpu_device_ip_set_powergating_state(hwmgr->adev, + AMD_IP_BLOCK_TYPE_VCE, + AMD_PG_STATE_GATE); + } else { + amdgpu_device_ip_set_powergating_state(hwmgr->adev, + AMD_IP_BLOCK_TYPE_VCE, + AMD_PG_STATE_UNGATE); + vega20_enable_disable_vce_dpm(hwmgr, !bgate); + } + } static void vega20_power_gate_uvd(struct pp_hwmgr *hwmgr, bool bgate) @@ -3826,16 +3847,19 @@ static int vega20_set_power_profile_mode(struct pp_hwmgr *hwmgr, long *input, ui { DpmActivityMonitorCoeffInt_t activity_monitor; int workload_type, result = 0; + uint32_t power_profile_mode = input[size]; - hwmgr->power_profile_mode = input[size]; - - if (hwmgr->power_profile_mode > PP_SMC_POWER_PROFILE_CUSTOM) { - pr_err("Invalid power profile mode %d\n", hwmgr->power_profile_mode); + if (power_profile_mode > PP_SMC_POWER_PROFILE_CUSTOM) { + pr_err("Invalid power profile mode %d\n", power_profile_mode); return -EINVAL; } - if (hwmgr->power_profile_mode == PP_SMC_POWER_PROFILE_CUSTOM) { - if (size < 10) + if (power_profile_mode == PP_SMC_POWER_PROFILE_CUSTOM) { + struct vega20_hwmgr *data = + (struct vega20_hwmgr *)(hwmgr->backend); + if (size == 0 && !data->is_custom_profile_set) + return -EINVAL; + if (size < 10 && size != 0) return -EINVAL; result = vega20_get_activity_monitor_coeff(hwmgr, @@ -3845,6 +3869,13 @@ static int vega20_set_power_profile_mode(struct pp_hwmgr *hwmgr, long *input, ui "[SetPowerProfile] Failed to get activity monitor!", return result); + /* If size==0, then we want to apply the already-configured + * CUSTOM profile again. Just apply it, since we checked its + * validity above + */ + if (size == 0) + goto out; + switch (input[0]) { case 0: /* Gfxclk */ activity_monitor.Gfx_FPS = input[1]; @@ -3895,17 +3926,21 @@ static int vega20_set_power_profile_mode(struct pp_hwmgr *hwmgr, long *input, ui result = vega20_set_activity_monitor_coeff(hwmgr, (uint8_t *)(&activity_monitor), WORKLOAD_PPLIB_CUSTOM_BIT); + data->is_custom_profile_set = true; PP_ASSERT_WITH_CODE(!result, "[SetPowerProfile] Failed to set activity monitor!", return result); } +out: /* conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT */ workload_type = - conv_power_profile_to_pplib_workload(hwmgr->power_profile_mode); + conv_power_profile_to_pplib_workload(power_profile_mode); smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetWorkloadMask, 1 << workload_type); + hwmgr->power_profile_mode = power_profile_mode; + return 0; } diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.h b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.h index ac2a3118a0ae..2c3125f82b24 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.h +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.h @@ -531,6 +531,8 @@ struct vega20_hwmgr { bool pcie_parameters_override; uint32_t pcie_gen_level1; uint32_t pcie_width_level1; + + bool is_custom_profile_set; }; #define VEGA20_DPM2_NEAR_TDP_DEC 10 diff --git a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h new file mode 100644 index 000000000000..c8b168b3413b --- /dev/null +++ b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h @@ -0,0 +1,770 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef __AMDGPU_SMU_H__ +#define __AMDGPU_SMU_H__ + +#include "amdgpu.h" +#include "kgd_pp_interface.h" +#include "dm_pp_interface.h" + +struct smu_hw_power_state { + unsigned int magic; +}; + +struct smu_power_state; + +enum smu_state_ui_label { + SMU_STATE_UI_LABEL_NONE, + SMU_STATE_UI_LABEL_BATTERY, + SMU_STATE_UI_TABEL_MIDDLE_LOW, + SMU_STATE_UI_LABEL_BALLANCED, + SMU_STATE_UI_LABEL_MIDDLE_HIGHT, + SMU_STATE_UI_LABEL_PERFORMANCE, + SMU_STATE_UI_LABEL_BACO, +}; + +enum smu_state_classification_flag { + SMU_STATE_CLASSIFICATION_FLAG_BOOT = 0x0001, + SMU_STATE_CLASSIFICATION_FLAG_THERMAL = 0x0002, + SMU_STATE_CLASSIFICATIN_FLAG_LIMITED_POWER_SOURCE = 0x0004, + SMU_STATE_CLASSIFICATION_FLAG_RESET = 0x0008, + SMU_STATE_CLASSIFICATION_FLAG_FORCED = 0x0010, + SMU_STATE_CLASSIFICATION_FLAG_USER_3D_PERFORMANCE = 0x0020, + SMU_STATE_CLASSIFICATION_FLAG_USER_2D_PERFORMANCE = 0x0040, + SMU_STATE_CLASSIFICATION_FLAG_3D_PERFORMANCE = 0x0080, + SMU_STATE_CLASSIFICATION_FLAG_AC_OVERDIRVER_TEMPLATE = 0x0100, + SMU_STATE_CLASSIFICATION_FLAG_UVD = 0x0200, + SMU_STATE_CLASSIFICATION_FLAG_3D_PERFORMANCE_LOW = 0x0400, + SMU_STATE_CLASSIFICATION_FLAG_ACPI = 0x0800, + SMU_STATE_CLASSIFICATION_FLAG_HD2 = 0x1000, + SMU_STATE_CLASSIFICATION_FLAG_UVD_HD = 0x2000, + SMU_STATE_CLASSIFICATION_FLAG_UVD_SD = 0x4000, + SMU_STATE_CLASSIFICATION_FLAG_USER_DC_PERFORMANCE = 0x8000, + SMU_STATE_CLASSIFICATION_FLAG_DC_OVERDIRVER_TEMPLATE = 0x10000, + SMU_STATE_CLASSIFICATION_FLAG_BACO = 0x20000, + SMU_STATE_CLASSIFICATIN_FLAG_LIMITED_POWER_SOURCE2 = 0x40000, + SMU_STATE_CLASSIFICATION_FLAG_ULV = 0x80000, + SMU_STATE_CLASSIFICATION_FLAG_UVD_MVC = 0x100000, +}; + +struct smu_state_classification_block { + enum smu_state_ui_label ui_label; + enum smu_state_classification_flag flags; + int bios_index; + bool temporary_state; + bool to_be_deleted; +}; + +struct smu_state_pcie_block { + unsigned int lanes; +}; + +enum smu_refreshrate_source { + SMU_REFRESHRATE_SOURCE_EDID, + SMU_REFRESHRATE_SOURCE_EXPLICIT +}; + +struct smu_state_display_block { + bool disable_frame_modulation; + bool limit_refreshrate; + enum smu_refreshrate_source refreshrate_source; + int explicit_refreshrate; + int edid_refreshrate_index; + bool enable_vari_bright; +}; + +struct smu_state_memroy_block { + bool dll_off; + uint8_t m3arb; + uint8_t unused[3]; +}; + +struct smu_state_software_algorithm_block { + bool disable_load_balancing; + bool enable_sleep_for_timestamps; +}; + +struct smu_temperature_range { + int min; + int max; +}; + +struct smu_state_validation_block { + bool single_display_only; + bool disallow_on_dc; + uint8_t supported_power_levels; +}; + +struct smu_uvd_clocks { + uint32_t vclk; + uint32_t dclk; +}; + +/** +* Structure to hold a SMU Power State. +*/ +struct smu_power_state { + uint32_t id; + struct list_head ordered_list; + struct list_head all_states_list; + + struct smu_state_classification_block classification; + struct smu_state_validation_block validation; + struct smu_state_pcie_block pcie; + struct smu_state_display_block display; + struct smu_state_memroy_block memory; + struct smu_temperature_range temperatures; + struct smu_state_software_algorithm_block software; + struct smu_uvd_clocks uvd_clocks; + struct smu_hw_power_state hardware; +}; + +enum smu_message_type +{ + SMU_MSG_TestMessage = 0, + SMU_MSG_GetSmuVersion, + SMU_MSG_GetDriverIfVersion, + SMU_MSG_SetAllowedFeaturesMaskLow, + SMU_MSG_SetAllowedFeaturesMaskHigh, + SMU_MSG_EnableAllSmuFeatures, + SMU_MSG_DisableAllSmuFeatures, + SMU_MSG_EnableSmuFeaturesLow, + SMU_MSG_EnableSmuFeaturesHigh, + SMU_MSG_DisableSmuFeaturesLow, + SMU_MSG_DisableSmuFeaturesHigh, + SMU_MSG_GetEnabledSmuFeaturesLow, + SMU_MSG_GetEnabledSmuFeaturesHigh, + SMU_MSG_SetWorkloadMask, + SMU_MSG_SetPptLimit, + SMU_MSG_SetDriverDramAddrHigh, + SMU_MSG_SetDriverDramAddrLow, + SMU_MSG_SetToolsDramAddrHigh, + SMU_MSG_SetToolsDramAddrLow, + SMU_MSG_TransferTableSmu2Dram, + SMU_MSG_TransferTableDram2Smu, + SMU_MSG_UseDefaultPPTable, + SMU_MSG_UseBackupPPTable, + SMU_MSG_RunBtc, + SMU_MSG_RequestI2CBus, + SMU_MSG_ReleaseI2CBus, + SMU_MSG_SetFloorSocVoltage, + SMU_MSG_SoftReset, + SMU_MSG_StartBacoMonitor, + SMU_MSG_CancelBacoMonitor, + SMU_MSG_EnterBaco, + SMU_MSG_SetSoftMinByFreq, + SMU_MSG_SetSoftMaxByFreq, + SMU_MSG_SetHardMinByFreq, + SMU_MSG_SetHardMaxByFreq, + SMU_MSG_GetMinDpmFreq, + SMU_MSG_GetMaxDpmFreq, + SMU_MSG_GetDpmFreqByIndex, + SMU_MSG_GetDpmClockFreq, + SMU_MSG_GetSsVoltageByDpm, + SMU_MSG_SetMemoryChannelConfig, + SMU_MSG_SetGeminiMode, + SMU_MSG_SetGeminiApertureHigh, + SMU_MSG_SetGeminiApertureLow, + SMU_MSG_SetMinLinkDpmByIndex, + SMU_MSG_OverridePcieParameters, + SMU_MSG_OverDriveSetPercentage, + SMU_MSG_SetMinDeepSleepDcefclk, + SMU_MSG_ReenableAcDcInterrupt, + SMU_MSG_NotifyPowerSource, + SMU_MSG_SetUclkFastSwitch, + SMU_MSG_SetUclkDownHyst, + SMU_MSG_GfxDeviceDriverReset, + SMU_MSG_GetCurrentRpm, + SMU_MSG_SetVideoFps, + SMU_MSG_SetTjMax, + SMU_MSG_SetFanTemperatureTarget, + SMU_MSG_PrepareMp1ForUnload, + SMU_MSG_DramLogSetDramAddrHigh, + SMU_MSG_DramLogSetDramAddrLow, + SMU_MSG_DramLogSetDramSize, + SMU_MSG_SetFanMaxRpm, + SMU_MSG_SetFanMinPwm, + SMU_MSG_ConfigureGfxDidt, + SMU_MSG_NumOfDisplays, + SMU_MSG_RemoveMargins, + SMU_MSG_ReadSerialNumTop32, + SMU_MSG_ReadSerialNumBottom32, + SMU_MSG_SetSystemVirtualDramAddrHigh, + SMU_MSG_SetSystemVirtualDramAddrLow, + SMU_MSG_WaflTest, + SMU_MSG_SetFclkGfxClkRatio, + SMU_MSG_AllowGfxOff, + SMU_MSG_DisallowGfxOff, + SMU_MSG_GetPptLimit, + SMU_MSG_GetDcModeMaxDpmFreq, + SMU_MSG_GetDebugData, + SMU_MSG_SetXgmiMode, + SMU_MSG_RunAfllBtc, + SMU_MSG_ExitBaco, + SMU_MSG_PrepareMp1ForReset, + SMU_MSG_PrepareMp1ForShutdown, + SMU_MSG_SetMGpuFanBoostLimitRpm, + SMU_MSG_GetAVFSVoltageByDpm, + SMU_MSG_MAX_COUNT, +}; + +enum smu_memory_pool_size +{ + SMU_MEMORY_POOL_SIZE_ZERO = 0, + SMU_MEMORY_POOL_SIZE_256_MB = 0x10000000, + SMU_MEMORY_POOL_SIZE_512_MB = 0x20000000, + SMU_MEMORY_POOL_SIZE_1_GB = 0x40000000, + SMU_MEMORY_POOL_SIZE_2_GB = 0x80000000, +}; + +#define SMU_TABLE_INIT(tables, table_id, s, a, d) \ + do { \ + tables[table_id].size = s; \ + tables[table_id].align = a; \ + tables[table_id].domain = d; \ + } while (0) + +struct smu_table { + uint64_t size; + uint32_t align; + uint8_t domain; + uint64_t mc_address; + void *cpu_addr; + struct amdgpu_bo *bo; +}; + +enum smu_perf_level_designation { + PERF_LEVEL_ACTIVITY, + PERF_LEVEL_POWER_CONTAINMENT, +}; + +struct smu_performance_level { + uint32_t core_clock; + uint32_t memory_clock; + uint32_t vddc; + uint32_t vddci; + uint32_t non_local_mem_freq; + uint32_t non_local_mem_width; +}; + +struct smu_clock_info { + uint32_t min_mem_clk; + uint32_t max_mem_clk; + uint32_t min_eng_clk; + uint32_t max_eng_clk; + uint32_t min_bus_bandwidth; + uint32_t max_bus_bandwidth; +}; + +struct smu_bios_boot_up_values +{ + uint32_t revision; + uint32_t gfxclk; + uint32_t uclk; + uint32_t socclk; + uint32_t dcefclk; + uint32_t eclk; + uint32_t vclk; + uint32_t dclk; + uint16_t vddc; + uint16_t vddci; + uint16_t mvddc; + uint16_t vdd_gfx; + uint8_t cooling_id; + uint32_t pp_table_id; +}; + +struct smu_table_context +{ + void *power_play_table; + uint32_t power_play_table_size; + void *hardcode_pptable; + + void *max_sustainable_clocks; + struct smu_bios_boot_up_values boot_values; + void *driver_pptable; + struct smu_table *tables; + uint32_t table_count; + struct smu_table memory_pool; + uint16_t software_shutdown_temp; + uint8_t thermal_controller_type; + uint16_t TDPODLimit; + + uint8_t *od_feature_capabilities; + uint32_t *od_settings_max; + uint32_t *od_settings_min; + void *overdrive_table; + void *od8_settings; + bool od_gfxclk_update; + bool od_memclk_update; +}; + +struct smu_dpm_context { + uint32_t dpm_context_size; + void *dpm_context; + void *golden_dpm_context; + bool enable_umd_pstate; + enum amd_dpm_forced_level dpm_level; + enum amd_dpm_forced_level saved_dpm_level; + enum amd_dpm_forced_level requested_dpm_level; + struct smu_power_state *dpm_request_power_state; + struct smu_power_state *dpm_current_power_state; + struct mclock_latency_table *mclk_latency_table; +}; + +struct smu_power_context { + void *power_context; + uint32_t power_context_size; +}; + + +#define SMU_FEATURE_MAX (64) +struct smu_feature +{ + uint32_t feature_num; + DECLARE_BITMAP(supported, SMU_FEATURE_MAX); + DECLARE_BITMAP(allowed, SMU_FEATURE_MAX); + DECLARE_BITMAP(enabled, SMU_FEATURE_MAX); + struct mutex mutex; +}; + +struct smu_clocks { + uint32_t engine_clock; + uint32_t memory_clock; + uint32_t bus_bandwidth; + uint32_t engine_clock_in_sr; + uint32_t dcef_clock; + uint32_t dcef_clock_in_sr; +}; + +#define MAX_REGULAR_DPM_NUM 16 +struct mclk_latency_entries { + uint32_t frequency; + uint32_t latency; +}; +struct mclock_latency_table { + uint32_t count; + struct mclk_latency_entries entries[MAX_REGULAR_DPM_NUM]; +}; + +#define WORKLOAD_POLICY_MAX 7 +struct smu_context +{ + struct amdgpu_device *adev; + + const struct smu_funcs *funcs; + const struct pptable_funcs *ppt_funcs; + struct mutex mutex; + uint64_t pool_size; + + struct smu_table_context smu_table; + struct smu_dpm_context smu_dpm; + struct smu_power_context smu_power; + struct smu_feature smu_feature; + struct amd_pp_display_configuration *display_config; + + uint32_t pstate_sclk; + uint32_t pstate_mclk; + + bool od_enabled; + uint32_t power_limit; + uint32_t default_power_limit; + + bool support_power_containment; + bool disable_watermark; + +#define WATERMARKS_EXIST (1 << 0) +#define WATERMARKS_LOADED (1 << 1) + uint32_t watermarks_bitmap; + + uint32_t workload_mask; + uint32_t workload_prority[WORKLOAD_POLICY_MAX]; + uint32_t workload_setting[WORKLOAD_POLICY_MAX]; + uint32_t power_profile_mode; + uint32_t default_power_profile_mode; + + uint32_t smc_if_version; +}; + +struct pptable_funcs { + int (*alloc_dpm_context)(struct smu_context *smu); + int (*store_powerplay_table)(struct smu_context *smu); + int (*check_powerplay_table)(struct smu_context *smu); + int (*append_powerplay_table)(struct smu_context *smu); + int (*get_smu_msg_index)(struct smu_context *smu, uint32_t index); + int (*run_afll_btc)(struct smu_context *smu); + int (*get_unallowed_feature_mask)(struct smu_context *smu, uint32_t *feature_mask, uint32_t num); + enum amd_pm_state_type (*get_current_power_state)(struct smu_context *smu); + int (*set_default_dpm_table)(struct smu_context *smu); + int (*set_power_state)(struct smu_context *smu); + int (*populate_umd_state_clk)(struct smu_context *smu); + int (*print_clk_levels)(struct smu_context *smu, enum pp_clock_type type, char *buf); + int (*force_clk_levels)(struct smu_context *smu, enum pp_clock_type type, uint32_t mask); + int (*set_default_od8_settings)(struct smu_context *smu); + int (*update_specified_od8_value)(struct smu_context *smu, + uint32_t index, + uint32_t value); + int (*get_od_percentage)(struct smu_context *smu, enum pp_clock_type type); + int (*set_od_percentage)(struct smu_context *smu, + enum pp_clock_type type, + uint32_t value); + int (*od_edit_dpm_table)(struct smu_context *smu, + enum PP_OD_DPM_TABLE_COMMAND type, + long *input, uint32_t size); + int (*get_clock_by_type_with_latency)(struct smu_context *smu, + enum amd_pp_clock_type type, + struct + pp_clock_levels_with_latency + *clocks); + int (*get_clock_by_type_with_voltage)(struct smu_context *smu, + enum amd_pp_clock_type type, + struct + pp_clock_levels_with_voltage + *clocks); + int (*get_power_profile_mode)(struct smu_context *smu, char *buf); + int (*set_power_profile_mode)(struct smu_context *smu, long *input, uint32_t size); + enum amd_dpm_forced_level (*get_performance_level)(struct smu_context *smu); + int (*force_performance_level)(struct smu_context *smu, enum amd_dpm_forced_level level); + int (*pre_display_config_changed)(struct smu_context *smu); + int (*display_config_changed)(struct smu_context *smu); + int (*apply_clocks_adjust_rules)(struct smu_context *smu); + int (*notify_smc_dispaly_config)(struct smu_context *smu); + int (*force_dpm_limit_value)(struct smu_context *smu, bool highest); + int (*unforce_dpm_levels)(struct smu_context *smu); + int (*upload_dpm_level)(struct smu_context *smu, bool max, + uint32_t feature_mask); + int (*get_profiling_clk_mask)(struct smu_context *smu, + enum amd_dpm_forced_level level, + uint32_t *sclk_mask, + uint32_t *mclk_mask, + uint32_t *soc_mask); + int (*set_cpu_power_state)(struct smu_context *smu); +}; + +struct smu_funcs +{ + int (*init_microcode)(struct smu_context *smu); + int (*init_smc_tables)(struct smu_context *smu); + int (*fini_smc_tables)(struct smu_context *smu); + int (*init_power)(struct smu_context *smu); + int (*fini_power)(struct smu_context *smu); + int (*load_microcode)(struct smu_context *smu); + int (*check_fw_status)(struct smu_context *smu); + int (*read_pptable_from_vbios)(struct smu_context *smu); + int (*get_vbios_bootup_values)(struct smu_context *smu); + int (*get_clk_info_from_vbios)(struct smu_context *smu); + int (*check_pptable)(struct smu_context *smu); + int (*parse_pptable)(struct smu_context *smu); + int (*populate_smc_pptable)(struct smu_context *smu); + int (*check_fw_version)(struct smu_context *smu); + int (*write_pptable)(struct smu_context *smu); + int (*set_min_dcef_deep_sleep)(struct smu_context *smu); + int (*set_tool_table_location)(struct smu_context *smu); + int (*notify_memory_pool_location)(struct smu_context *smu); + int (*write_watermarks_table)(struct smu_context *smu); + int (*set_last_dcef_min_deep_sleep_clk)(struct smu_context *smu); + int (*system_features_control)(struct smu_context *smu, bool en); + int (*send_smc_msg)(struct smu_context *smu, uint16_t msg); + int (*send_smc_msg_with_param)(struct smu_context *smu, uint16_t msg, uint32_t param); + int (*read_smc_arg)(struct smu_context *smu, uint32_t *arg); + int (*init_display)(struct smu_context *smu); + int (*set_allowed_mask)(struct smu_context *smu); + int (*get_enabled_mask)(struct smu_context *smu, uint32_t *feature_mask, uint32_t num); + bool (*is_dpm_running)(struct smu_context *smu); + int (*update_feature_enable_state)(struct smu_context *smu, uint32_t feature_id, bool enabled); + int (*notify_display_change)(struct smu_context *smu); + int (*get_power_limit)(struct smu_context *smu, uint32_t *limit, bool def); + int (*set_power_limit)(struct smu_context *smu, uint32_t n); + int (*get_current_clk_freq)(struct smu_context *smu, uint32_t clk_id, uint32_t *value); + int (*init_max_sustainable_clocks)(struct smu_context *smu); + int (*start_thermal_control)(struct smu_context *smu); + int (*read_sensor)(struct smu_context *smu, enum amd_pp_sensors sensor, + void *data, uint32_t *size); + int (*set_deep_sleep_dcefclk)(struct smu_context *smu, uint32_t clk); + int (*set_active_display_count)(struct smu_context *smu, uint32_t count); + int (*store_cc6_data)(struct smu_context *smu, uint32_t separation_time, + bool cc6_disable, bool pstate_disable, + bool pstate_switch_disable); + int (*get_clock_by_type)(struct smu_context *smu, + enum amd_pp_clock_type type, + struct amd_pp_clocks *clocks); + int (*get_max_high_clocks)(struct smu_context *smu, + struct amd_pp_simple_clock_info *clocks); + int (*display_clock_voltage_request)(struct smu_context *smu, struct + pp_display_clock_request + *clock_req); + int (*get_dal_power_level)(struct smu_context *smu, + struct amd_pp_simple_clock_info *clocks); + int (*get_perf_level)(struct smu_context *smu, + enum smu_perf_level_designation designation, + struct smu_performance_level *level); + int (*get_current_shallow_sleep_clocks)(struct smu_context *smu, + struct smu_clock_info *clocks); + int (*notify_smu_enable_pwe)(struct smu_context *smu); + int (*set_watermarks_for_clock_ranges)(struct smu_context *smu, + struct dm_pp_wm_sets_with_clock_ranges_soc15 *clock_ranges); + int (*set_od8_default_settings)(struct smu_context *smu, + bool initialize); + int (*conv_power_profile_to_pplib_workload)(int power_profile); + int (*get_power_profile_mode)(struct smu_context *smu, char *buf); + int (*set_power_profile_mode)(struct smu_context *smu, long *input, uint32_t size); + int (*update_od8_settings)(struct smu_context *smu, + uint32_t index, + uint32_t value); + int (*dpm_set_uvd_enable)(struct smu_context *smu, bool enable); + int (*dpm_set_vce_enable)(struct smu_context *smu, bool enable); + uint32_t (*get_sclk)(struct smu_context *smu, bool low); + uint32_t (*get_mclk)(struct smu_context *smu, bool low); + int (*get_current_rpm)(struct smu_context *smu, uint32_t *speed); + uint32_t (*get_fan_control_mode)(struct smu_context *smu); + int (*set_fan_control_mode)(struct smu_context *smu, uint32_t mode); + int (*get_fan_speed_percent)(struct smu_context *smu, uint32_t *speed); + int (*set_fan_speed_percent)(struct smu_context *smu, uint32_t speed); + int (*set_fan_speed_rpm)(struct smu_context *smu, uint32_t speed); + int (*set_xgmi_pstate)(struct smu_context *smu, uint32_t pstate); + +}; + +#define smu_init_microcode(smu) \ + ((smu)->funcs->init_microcode ? (smu)->funcs->init_microcode((smu)) : 0) +#define smu_init_smc_tables(smu) \ + ((smu)->funcs->init_smc_tables ? (smu)->funcs->init_smc_tables((smu)) : 0) +#define smu_fini_smc_tables(smu) \ + ((smu)->funcs->fini_smc_tables ? (smu)->funcs->fini_smc_tables((smu)) : 0) +#define smu_init_power(smu) \ + ((smu)->funcs->init_power ? (smu)->funcs->init_power((smu)) : 0) +#define smu_fini_power(smu) \ + ((smu)->funcs->fini_power ? (smu)->funcs->fini_power((smu)) : 0) +#define smu_load_microcode(smu) \ + ((smu)->funcs->load_microcode ? (smu)->funcs->load_microcode((smu)) : 0) +#define smu_check_fw_status(smu) \ + ((smu)->funcs->check_fw_status ? (smu)->funcs->check_fw_status((smu)) : 0) +#define smu_read_pptable_from_vbios(smu) \ + ((smu)->funcs->read_pptable_from_vbios ? (smu)->funcs->read_pptable_from_vbios((smu)) : 0) +#define smu_get_vbios_bootup_values(smu) \ + ((smu)->funcs->get_vbios_bootup_values ? (smu)->funcs->get_vbios_bootup_values((smu)) : 0) +#define smu_get_clk_info_from_vbios(smu) \ + ((smu)->funcs->get_clk_info_from_vbios ? (smu)->funcs->get_clk_info_from_vbios((smu)) : 0) +#define smu_check_pptable(smu) \ + ((smu)->funcs->check_pptable ? (smu)->funcs->check_pptable((smu)) : 0) +#define smu_parse_pptable(smu) \ + ((smu)->funcs->parse_pptable ? (smu)->funcs->parse_pptable((smu)) : 0) +#define smu_populate_smc_pptable(smu) \ + ((smu)->funcs->populate_smc_pptable ? (smu)->funcs->populate_smc_pptable((smu)) : 0) +#define smu_check_fw_version(smu) \ + ((smu)->funcs->check_fw_version ? (smu)->funcs->check_fw_version((smu)) : 0) +#define smu_write_pptable(smu) \ + ((smu)->funcs->write_pptable ? (smu)->funcs->write_pptable((smu)) : 0) +#define smu_set_min_dcef_deep_sleep(smu) \ + ((smu)->funcs->set_min_dcef_deep_sleep ? (smu)->funcs->set_min_dcef_deep_sleep((smu)) : 0) +#define smu_set_tool_table_location(smu) \ + ((smu)->funcs->set_tool_table_location ? (smu)->funcs->set_tool_table_location((smu)) : 0) +#define smu_notify_memory_pool_location(smu) \ + ((smu)->funcs->notify_memory_pool_location ? (smu)->funcs->notify_memory_pool_location((smu)) : 0) +#define smu_write_watermarks_table(smu) \ + ((smu)->funcs->write_watermarks_table ? (smu)->funcs->write_watermarks_table((smu)) : 0) +#define smu_set_last_dcef_min_deep_sleep_clk(smu) \ + ((smu)->funcs->set_last_dcef_min_deep_sleep_clk ? (smu)->funcs->set_last_dcef_min_deep_sleep_clk((smu)) : 0) +#define smu_system_features_control(smu, en) \ + ((smu)->funcs->system_features_control ? (smu)->funcs->system_features_control((smu), (en)) : 0) +#define smu_init_max_sustainable_clocks(smu) \ + ((smu)->funcs->init_max_sustainable_clocks ? (smu)->funcs->init_max_sustainable_clocks((smu)) : 0) +#define smu_set_od8_default_settings(smu, initialize) \ + ((smu)->funcs->set_od8_default_settings ? (smu)->funcs->set_od8_default_settings((smu), (initialize)) : 0) +#define smu_update_od8_settings(smu, index, value) \ + ((smu)->funcs->update_od8_settings ? (smu)->funcs->update_od8_settings((smu), (index), (value)) : 0) +#define smu_get_current_rpm(smu, speed) \ + ((smu)->funcs->get_current_rpm ? (smu)->funcs->get_current_rpm((smu), (speed)) : 0) +#define smu_set_fan_speed_rpm(smu, speed) \ + ((smu)->funcs->set_fan_speed_rpm ? (smu)->funcs->set_fan_speed_rpm((smu), (speed)) : 0) +#define smu_send_smc_msg(smu, msg) \ + ((smu)->funcs->send_smc_msg? (smu)->funcs->send_smc_msg((smu), (msg)) : 0) +#define smu_send_smc_msg_with_param(smu, msg, param) \ + ((smu)->funcs->send_smc_msg_with_param? (smu)->funcs->send_smc_msg_with_param((smu), (msg), (param)) : 0) +#define smu_read_smc_arg(smu, arg) \ + ((smu)->funcs->read_smc_arg? (smu)->funcs->read_smc_arg((smu), (arg)) : 0) +#define smu_alloc_dpm_context(smu) \ + ((smu)->ppt_funcs->alloc_dpm_context ? (smu)->ppt_funcs->alloc_dpm_context((smu)) : 0) +#define smu_init_display(smu) \ + ((smu)->funcs->init_display ? (smu)->funcs->init_display((smu)) : 0) +#define smu_feature_set_allowed_mask(smu) \ + ((smu)->funcs->set_allowed_mask? (smu)->funcs->set_allowed_mask((smu)) : 0) +#define smu_feature_get_enabled_mask(smu, mask, num) \ + ((smu)->funcs->get_enabled_mask? (smu)->funcs->get_enabled_mask((smu), (mask), (num)) : 0) +#define smu_is_dpm_running(smu) \ + ((smu)->funcs->is_dpm_running ? (smu)->funcs->is_dpm_running((smu)) : 0) +#define smu_feature_update_enable_state(smu, feature_id, enabled) \ + ((smu)->funcs->update_feature_enable_state? (smu)->funcs->update_feature_enable_state((smu), (feature_id), (enabled)) : 0) +#define smu_notify_display_change(smu) \ + ((smu)->funcs->notify_display_change? (smu)->funcs->notify_display_change((smu)) : 0) +#define smu_store_powerplay_table(smu) \ + ((smu)->ppt_funcs->store_powerplay_table ? (smu)->ppt_funcs->store_powerplay_table((smu)) : 0) +#define smu_check_powerplay_table(smu) \ + ((smu)->ppt_funcs->check_powerplay_table ? (smu)->ppt_funcs->check_powerplay_table((smu)) : 0) +#define smu_append_powerplay_table(smu) \ + ((smu)->ppt_funcs->append_powerplay_table ? (smu)->ppt_funcs->append_powerplay_table((smu)) : 0) +#define smu_set_default_dpm_table(smu) \ + ((smu)->ppt_funcs->set_default_dpm_table ? (smu)->ppt_funcs->set_default_dpm_table((smu)) : 0) +#define smu_populate_umd_state_clk(smu) \ + ((smu)->ppt_funcs->populate_umd_state_clk ? (smu)->ppt_funcs->populate_umd_state_clk((smu)) : 0) +#define smu_set_default_od8_settings(smu) \ + ((smu)->ppt_funcs->set_default_od8_settings ? (smu)->ppt_funcs->set_default_od8_settings((smu)) : 0) +#define smu_update_specified_od8_value(smu, index, value) \ + ((smu)->ppt_funcs->update_specified_od8_value ? (smu)->ppt_funcs->update_specified_od8_value((smu), (index), (value)) : 0) +#define smu_get_power_limit(smu, limit, def) \ + ((smu)->funcs->get_power_limit ? (smu)->funcs->get_power_limit((smu), (limit), (def)) : 0) +#define smu_set_power_limit(smu, limit) \ + ((smu)->funcs->set_power_limit ? (smu)->funcs->set_power_limit((smu), (limit)) : 0) +#define smu_get_current_clk_freq(smu, clk_id, value) \ + ((smu)->funcs->get_current_clk_freq? (smu)->funcs->get_current_clk_freq((smu), (clk_id), (value)) : 0) +#define smu_print_clk_levels(smu, type, buf) \ + ((smu)->ppt_funcs->print_clk_levels ? (smu)->ppt_funcs->print_clk_levels((smu), (type), (buf)) : 0) +#define smu_force_clk_levels(smu, type, level) \ + ((smu)->ppt_funcs->force_clk_levels ? (smu)->ppt_funcs->force_clk_levels((smu), (type), (level)) : 0) +#define smu_get_od_percentage(smu, type) \ + ((smu)->ppt_funcs->get_od_percentage ? (smu)->ppt_funcs->get_od_percentage((smu), (type)) : 0) +#define smu_set_od_percentage(smu, type, value) \ + ((smu)->ppt_funcs->set_od_percentage ? (smu)->ppt_funcs->set_od_percentage((smu), (type), (value)) : 0) +#define smu_od_edit_dpm_table(smu, type, input, size) \ + ((smu)->ppt_funcs->od_edit_dpm_table ? (smu)->ppt_funcs->od_edit_dpm_table((smu), (type), (input), (size)) : 0) +#define smu_start_thermal_control(smu) \ + ((smu)->funcs->start_thermal_control? (smu)->funcs->start_thermal_control((smu)) : 0) +#define smu_read_sensor(smu, sensor, data, size) \ + ((smu)->funcs->read_sensor? (smu)->funcs->read_sensor((smu), (sensor), (data), (size)) : 0) +#define smu_get_power_profile_mode(smu, buf) \ + ((smu)->funcs->get_power_profile_mode ? (smu)->funcs->get_power_profile_mode((smu), buf) : 0) +#define smu_set_power_profile_mode(smu, param, param_size) \ + ((smu)->funcs->set_power_profile_mode ? (smu)->funcs->set_power_profile_mode((smu), (param), (param_size)) : 0) +#define smu_get_performance_level(smu) \ + ((smu)->ppt_funcs->get_performance_level ? (smu)->ppt_funcs->get_performance_level((smu)) : 0) +#define smu_force_performance_level(smu, level) \ + ((smu)->ppt_funcs->force_performance_level ? (smu)->ppt_funcs->force_performance_level((smu), (level)) : 0) +#define smu_pre_display_config_changed(smu) \ + ((smu)->ppt_funcs->pre_display_config_changed ? (smu)->ppt_funcs->pre_display_config_changed((smu)) : 0) +#define smu_display_config_changed(smu) \ + ((smu)->ppt_funcs->display_config_changed ? (smu)->ppt_funcs->display_config_changed((smu)) : 0) +#define smu_apply_clocks_adjust_rules(smu) \ + ((smu)->ppt_funcs->apply_clocks_adjust_rules ? (smu)->ppt_funcs->apply_clocks_adjust_rules((smu)) : 0) +#define smu_notify_smc_dispaly_config(smu) \ + ((smu)->ppt_funcs->notify_smc_dispaly_config ? (smu)->ppt_funcs->notify_smc_dispaly_config((smu)) : 0) +#define smu_force_dpm_limit_value(smu, highest) \ + ((smu)->ppt_funcs->force_dpm_limit_value ? (smu)->ppt_funcs->force_dpm_limit_value((smu), (highest)) : 0) +#define smu_unforce_dpm_levels(smu) \ + ((smu)->ppt_funcs->unforce_dpm_levels ? (smu)->ppt_funcs->unforce_dpm_levels((smu)) : 0) +#define smu_upload_dpm_level(smu, max, feature_mask) \ + ((smu)->ppt_funcs->upload_dpm_level ? (smu)->ppt_funcs->upload_dpm_level((smu), (max), (feature_mask)) : 0) +#define smu_get_profiling_clk_mask(smu, level, sclk_mask, mclk_mask, soc_mask) \ + ((smu)->ppt_funcs->get_profiling_clk_mask ? (smu)->ppt_funcs->get_profiling_clk_mask((smu), (level), (sclk_mask), (mclk_mask), (soc_mask)) : 0) +#define smu_set_cpu_power_state(smu) \ + ((smu)->ppt_funcs->set_cpu_power_state ? (smu)->ppt_funcs->set_cpu_power_state((smu)) : 0) +#define smu_get_fan_control_mode(smu) \ + ((smu)->funcs->get_fan_control_mode ? (smu)->funcs->get_fan_control_mode((smu)) : 0) +#define smu_set_fan_control_mode(smu, value) \ + ((smu)->funcs->set_fan_control_mode ? (smu)->funcs->set_fan_control_mode((smu), (value)) : 0) +#define smu_get_fan_speed_percent(smu, speed) \ + ((smu)->funcs->get_fan_speed_percent ? (smu)->funcs->get_fan_speed_percent((smu), (speed)) : 0) +#define smu_set_fan_speed_percent(smu, speed) \ + ((smu)->funcs->set_fan_speed_percent ? (smu)->funcs->set_fan_speed_percent((smu), (speed)) : 0) + +#define smu_msg_get_index(smu, msg) \ + ((smu)->ppt_funcs? ((smu)->ppt_funcs->get_smu_msg_index? (smu)->ppt_funcs->get_smu_msg_index((smu), (msg)) : -EINVAL) : -EINVAL) +#define smu_run_afll_btc(smu) \ + ((smu)->ppt_funcs? ((smu)->ppt_funcs->run_afll_btc? (smu)->ppt_funcs->run_afll_btc((smu)) : 0) : 0) +#define smu_get_unallowed_feature_mask(smu, feature_mask, num) \ + ((smu)->ppt_funcs? ((smu)->ppt_funcs->get_unallowed_feature_mask? (smu)->ppt_funcs->get_unallowed_feature_mask((smu), (feature_mask), (num)) : 0) : 0) +#define smu_set_deep_sleep_dcefclk(smu, clk) \ + ((smu)->funcs->set_deep_sleep_dcefclk ? (smu)->funcs->set_deep_sleep_dcefclk((smu), (clk)) : 0) +#define smu_set_active_display_count(smu, count) \ + ((smu)->funcs->set_active_display_count ? (smu)->funcs->set_active_display_count((smu), (count)) : 0) +#define smu_store_cc6_data(smu, st, cc6_dis, pst_dis, pst_sw_dis) \ + ((smu)->funcs->store_cc6_data ? (smu)->funcs->store_cc6_data((smu), (st), (cc6_dis), (pst_dis), (pst_sw_dis)) : 0) +#define smu_get_clock_by_type(smu, type, clocks) \ + ((smu)->funcs->get_clock_by_type ? (smu)->funcs->get_clock_by_type((smu), (type), (clocks)) : 0) +#define smu_get_max_high_clocks(smu, clocks) \ + ((smu)->funcs->get_max_high_clocks ? (smu)->funcs->get_max_high_clocks((smu), (clocks)) : 0) +#define smu_get_clock_by_type_with_latency(smu, type, clocks) \ + ((smu)->ppt_funcs->get_clock_by_type_with_latency ? (smu)->ppt_funcs->get_clock_by_type_with_latency((smu), (type), (clocks)) : 0) +#define smu_get_clock_by_type_with_voltage(smu, type, clocks) \ + ((smu)->ppt_funcs->get_clock_by_type_with_voltage ? (smu)->ppt_funcs->get_clock_by_type_with_voltage((smu), (type), (clocks)) : 0) +#define smu_display_clock_voltage_request(smu, clock_req) \ + ((smu)->funcs->display_clock_voltage_request ? (smu)->funcs->display_clock_voltage_request((smu), (clock_req)) : 0) +#define smu_get_dal_power_level(smu, clocks) \ + ((smu)->funcs->get_dal_power_level ? (smu)->funcs->get_dal_power_level((smu), (clocks)) : 0) +#define smu_get_perf_level(smu, designation, level) \ + ((smu)->funcs->get_perf_level ? (smu)->funcs->get_perf_level((smu), (designation), (level)) : 0) +#define smu_get_current_shallow_sleep_clocks(smu, clocks) \ + ((smu)->funcs->get_current_shallow_sleep_clocks ? (smu)->funcs->get_current_shallow_sleep_clocks((smu), (clocks)) : 0) +#define smu_notify_smu_enable_pwe(smu) \ + ((smu)->funcs->notify_smu_enable_pwe ? (smu)->funcs->notify_smu_enable_pwe((smu)) : 0) +#define smu_set_watermarks_for_clock_ranges(smu, clock_ranges) \ + ((smu)->funcs->set_watermarks_for_clock_ranges ? (smu)->funcs->set_watermarks_for_clock_ranges((smu), (clock_ranges)) : 0) +#define smu_dpm_set_uvd_enable(smu, enable) \ + ((smu)->funcs->dpm_set_uvd_enable ? (smu)->funcs->dpm_set_uvd_enable((smu), (enable)) : 0) +#define smu_dpm_set_vce_enable(smu, enable) \ + ((smu)->funcs->dpm_set_vce_enable ? (smu)->funcs->dpm_set_vce_enable((smu), (enable)) : 0) +#define smu_get_sclk(smu, low) \ + ((smu)->funcs->get_sclk ? (smu)->funcs->get_sclk((smu), (low)) : 0) +#define smu_get_mclk(smu, low) \ + ((smu)->funcs->get_mclk ? (smu)->funcs->get_mclk((smu), (low)) : 0) +#define smu_set_xgmi_pstate(smu, pstate) \ + ((smu)->funcs->set_xgmi_pstate ? (smu)->funcs->set_xgmi_pstate((smu), (pstate)) : 0) + + +extern int smu_get_atom_data_table(struct smu_context *smu, uint32_t table, + uint16_t *size, uint8_t *frev, uint8_t *crev, + uint8_t **addr); + +extern const struct amd_ip_funcs smu_ip_funcs; + +extern const struct amdgpu_ip_block_version smu_v11_0_ip_block; +extern int smu_feature_init_dpm(struct smu_context *smu); + +extern int smu_feature_is_enabled(struct smu_context *smu, int feature_id); +extern int smu_feature_set_enabled(struct smu_context *smu, int feature_id, bool enable); +extern int smu_feature_is_supported(struct smu_context *smu, int feature_id); +extern int smu_feature_set_supported(struct smu_context *smu, int feature_id, bool enable); + +int smu_update_table_with_arg(struct smu_context *smu, uint16_t table_id, uint16_t exarg, + void *table_data, bool drv2smu); +#define smu_update_table(smu, table_id, table_data, drv2smu) \ + smu_update_table_with_arg((smu), (table_id), 0, (table_data), (drv2smu)) + +bool is_support_sw_smu(struct amdgpu_device *adev); +int smu_reset(struct smu_context *smu); +int smu_common_read_sensor(struct smu_context *smu, enum amd_pp_sensors sensor, + void *data, uint32_t *size); +int smu_sys_get_pp_table(struct smu_context *smu, void **table); +int smu_sys_set_pp_table(struct smu_context *smu, void *buf, size_t size); +int smu_get_power_num_states(struct smu_context *smu, struct pp_states_info *state_info); +enum amd_pm_state_type smu_get_current_power_state(struct smu_context *smu); + +/* smu to display interface */ +extern int smu_display_configuration_change(struct smu_context *smu, const + struct amd_pp_display_configuration + *display_config); +extern int smu_get_current_clocks(struct smu_context *smu, + struct amd_pp_clock_info *clocks); +extern int smu_dpm_set_power_gate(struct smu_context *smu,uint32_t block_type, bool gate); +extern int smu_handle_task(struct smu_context *smu, + enum amd_dpm_forced_level level, + enum amd_pp_task task_id); +#endif diff --git a/drivers/gpu/drm/amd/powerplay/inc/rv_ppsmc.h b/drivers/gpu/drm/amd/powerplay/inc/rv_ppsmc.h index a2991fa2e6f8..90879e4092a3 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/rv_ppsmc.h +++ b/drivers/gpu/drm/amd/powerplay/inc/rv_ppsmc.h @@ -85,7 +85,6 @@ #define PPSMC_MSG_SetRccPfcPmeRestoreRegister 0x36 #define PPSMC_Message_Count 0x37 - typedef uint16_t PPSMC_Result; typedef int PPSMC_Msg; diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu10.h b/drivers/gpu/drm/amd/powerplay/inc/smu10.h index 9e837a5014c5..b96520528240 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/smu10.h +++ b/drivers/gpu/drm/amd/powerplay/inc/smu10.h @@ -136,12 +136,14 @@ #define FEATURE_CORE_CSTATES_MASK (1 << FEATURE_CORE_CSTATES_BIT) /* Workload bits */ -#define WORKLOAD_PPLIB_FULL_SCREEN_3D_BIT 0 -#define WORKLOAD_PPLIB_VIDEO_BIT 2 -#define WORKLOAD_PPLIB_VR_BIT 3 -#define WORKLOAD_PPLIB_COMPUTE_BIT 4 -#define WORKLOAD_PPLIB_CUSTOM_BIT 5 -#define WORKLOAD_PPLIB_COUNT 6 +#define WORKLOAD_DEFAULT_BIT 0 +#define WORKLOAD_PPLIB_FULL_SCREEN_3D_BIT 1 +#define WORKLOAD_PPLIB_POWER_SAVING_BIT 2 +#define WORKLOAD_PPLIB_VIDEO_BIT 3 +#define WORKLOAD_PPLIB_VR_BIT 4 +#define WORKLOAD_PPLIB_COMPUTE_BIT 5 +#define WORKLOAD_PPLIB_CUSTOM_BIT 6 +#define WORKLOAD_PPLIB_COUNT 7 typedef struct { /* MP1_EXT_SCRATCH0 */ diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h b/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h new file mode 100644 index 000000000000..aa8d81f4111e --- /dev/null +++ b/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h @@ -0,0 +1,89 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __SMU_V11_0_H__ +#define __SMU_V11_0_H__ + +#include "amdgpu_smu.h" + +/* MP Apertures */ +#define MP0_Public 0x03800000 +#define MP0_SRAM 0x03900000 +#define MP1_Public 0x03b00000 +#define MP1_SRAM 0x03c00004 + +/* address block */ +#define smnMP1_FIRMWARE_FLAGS 0x3010024 +#define smnMP0_FW_INTF 0x30101c0 +#define smnMP1_PUB_CTRL 0x3010b14 + +struct smu_11_0_max_sustainable_clocks { + uint32_t display_clock; + uint32_t phy_clock; + uint32_t pixel_clock; + uint32_t uclock; + uint32_t dcef_clock; + uint32_t soc_clock; +}; + +struct smu_11_0_dpm_table { + uint32_t min; /* MHz */ + uint32_t max; /* MHz */ +}; + +struct smu_11_0_dpm_tables { + struct smu_11_0_dpm_table soc_table; + struct smu_11_0_dpm_table gfx_table; + struct smu_11_0_dpm_table uclk_table; + struct smu_11_0_dpm_table eclk_table; + struct smu_11_0_dpm_table vclk_table; + struct smu_11_0_dpm_table dclk_table; + struct smu_11_0_dpm_table dcef_table; + struct smu_11_0_dpm_table pixel_table; + struct smu_11_0_dpm_table display_table; + struct smu_11_0_dpm_table phy_table; + struct smu_11_0_dpm_table fclk_table; +}; + +struct smu_11_0_dpm_context { + struct smu_11_0_dpm_tables dpm_tables; + uint32_t workload_policy_mask; + uint32_t dcef_min_ds_clk; +}; + +enum smu_11_0_power_state { + SMU_11_0_POWER_STATE__D0 = 0, + SMU_11_0_POWER_STATE__D1, + SMU_11_0_POWER_STATE__D3, /* Sleep*/ + SMU_11_0_POWER_STATE__D4, /* Hibernate*/ + SMU_11_0_POWER_STATE__D5, /* Power off*/ +}; + +struct smu_11_0_power_context { + uint32_t power_source; + uint8_t in_power_limit_boost_mode; + enum smu_11_0_power_state power_state; +}; + +void smu_v11_0_set_smu_funcs(struct smu_context *smu); + +#endif diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0_ppsmc.h b/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0_ppsmc.h new file mode 100644 index 000000000000..f466f624ad32 --- /dev/null +++ b/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0_ppsmc.h @@ -0,0 +1,128 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef SMU_V11_0_PPSMC_H +#define SMU_V11_0_PPSMC_H + +// SMU Response Codes: +#define PPSMC_Result_OK 0x1 +#define PPSMC_Result_Failed 0xFF +#define PPSMC_Result_UnknownCmd 0xFE +#define PPSMC_Result_CmdRejectedPrereq 0xFD +#define PPSMC_Result_CmdRejectedBusy 0xFC + +// Message Definitions: +// BASIC +#define PPSMC_MSG_TestMessage 0x1 +#define PPSMC_MSG_GetSmuVersion 0x2 +#define PPSMC_MSG_GetDriverIfVersion 0x3 +#define PPSMC_MSG_SetAllowedFeaturesMaskLow 0x4 +#define PPSMC_MSG_SetAllowedFeaturesMaskHigh 0x5 +#define PPSMC_MSG_EnableAllSmuFeatures 0x6 +#define PPSMC_MSG_DisableAllSmuFeatures 0x7 +#define PPSMC_MSG_EnableSmuFeaturesLow 0x8 +#define PPSMC_MSG_EnableSmuFeaturesHigh 0x9 +#define PPSMC_MSG_DisableSmuFeaturesLow 0xA +#define PPSMC_MSG_DisableSmuFeaturesHigh 0xB +#define PPSMC_MSG_GetEnabledSmuFeaturesLow 0xC +#define PPSMC_MSG_GetEnabledSmuFeaturesHigh 0xD +#define PPSMC_MSG_SetDriverDramAddrHigh 0xE +#define PPSMC_MSG_SetDriverDramAddrLow 0xF +#define PPSMC_MSG_SetToolsDramAddrHigh 0x10 +#define PPSMC_MSG_SetToolsDramAddrLow 0x11 +#define PPSMC_MSG_TransferTableSmu2Dram 0x12 +#define PPSMC_MSG_TransferTableDram2Smu 0x13 +#define PPSMC_MSG_UseDefaultPPTable 0x14 +#define PPSMC_MSG_UseBackupPPTable 0x15 +#define PPSMC_MSG_SetSystemVirtualDramAddrHigh 0x16 +#define PPSMC_MSG_SetSystemVirtualDramAddrLow 0x17 + +//BACO/BAMACO/BOMACO +#define PPSMC_MSG_EnterBaco 0x18 +#define PPSMC_MSG_ExitBaco 0x19 + +//DPM +#define PPSMC_MSG_SetSoftMinByFreq 0x1A +#define PPSMC_MSG_SetSoftMaxByFreq 0x1B +#define PPSMC_MSG_SetHardMinByFreq 0x1C +#define PPSMC_MSG_SetHardMaxByFreq 0x1D +#define PPSMC_MSG_GetMinDpmFreq 0x1E +#define PPSMC_MSG_GetMaxDpmFreq 0x1F +#define PPSMC_MSG_GetDpmFreqByIndex 0x20 +#define PPSMC_MSG_OverridePcieParameters 0x21 +#define PPSMC_MSG_SetMinDeepSleepDcefclk 0x22 +#define PPSMC_MSG_SetWorkloadMask 0x23 +#define PPSMC_MSG_SetUclkFastSwitch 0x24 +#define PPSMC_MSG_GetAvfsVoltageByDpm 0x25 +#define PPSMC_MSG_SetVideoFps 0x26 +#define PPSMC_MSG_GetDcModeMaxDpmFreq 0x27 + +//Power Gating +#define PPSMC_MSG_AllowGfxOff 0x28 +#define PPSMC_MSG_DisallowGfxOff 0x29 +#define PPSMC_MSG_PowerUpVcn 0x2A +#define PPSMC_MSG_PowerDownVcn 0x2B +#define PPSMC_MSG_PowerUpJpeg 0x2C +#define PPSMC_MSG_PowerDownJpeg 0x2D +//reserve 0x2A to 0x2F for PG harvesting TBD + +//I2C Interface +#define PPSMC_RequestI2cTransaction 0x30 + +//Resets +#define PPSMC_MSG_SoftReset 0x31 //FIXME Need confirmation from driver +#define PPSMC_MSG_PrepareMp1ForUnload 0x32 +#define PPSMC_MSG_PrepareMp1ForReset 0x33 +#define PPSMC_MSG_PrepareMp1ForShutdown 0x34 + +//ACDC Power Source +#define PPSMC_MSG_SetPptLimit 0x35 +#define PPSMC_MSG_GetPptLimit 0x36 +#define PPSMC_MSG_ReenableAcDcInterrupt 0x37 +#define PPSMC_MSG_NotifyPowerSource 0x38 +//#define PPSMC_MSG_GfxDeviceDriverReset 0x39 //FIXME mode1 and 2 resets will go directly go PSP + +//BTC +#define PPSMC_MSG_RunBtc 0x3A + +//Debug +#define PPSMC_MSG_DramLogSetDramAddrHigh 0x3B +#define PPSMC_MSG_DramLogSetDramAddrLow 0x3C +#define PPSMC_MSG_DramLogSetDramSize 0x3D +#define PPSMC_MSG_GetDebugData 0x3E + +//Others +#define PPSMC_MSG_ConfigureGfxDidt 0x3F +#define PPSMC_MSG_NumOfDisplays 0x40 + +#define PPSMC_MSG_SetMemoryChannelConfig 0x41 +#define PPSMC_MSG_SetGeminiMode 0x42 +#define PPSMC_MSG_SetGeminiApertureHigh 0x43 +#define PPSMC_MSG_SetGeminiApertureLow 0x44 + +#define PPSMC_Message_Count 0x45 + +typedef uint32_t PPSMC_Result; +typedef uint32_t PPSMC_Msg; + +#endif diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0_pptable.h b/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0_pptable.h new file mode 100644 index 000000000000..92c65b80bde2 --- /dev/null +++ b/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0_pptable.h @@ -0,0 +1,147 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef SMU_11_0_PPTABLE_H +#define SMU_11_0_PPTABLE_H + + +#define SMU_11_0_TABLE_FORMAT_REVISION 12 + +//// POWERPLAYTABLE::ulPlatformCaps +#define SMU_11_0_PP_PLATFORM_CAP_POWERPLAY 0x1 +#define SMU_11_0_PP_PLATFORM_CAP_SBIOSPOWERSOURCE 0x2 +#define SMU_11_0_PP_PLATFORM_CAP_HARDWAREDC 0x4 +#define SMU_11_0_PP_PLATFORM_CAP_BACO 0x8 +#define SMU_11_0_PP_PLATFORM_CAP_MACO 0x10 +#define SMU_11_0_PP_PLATFORM_CAP_SHADOWPSTATE 0x20 + +// SMU_11_0_PP_THERMALCONTROLLER - Thermal Controller Type +#define SMU_11_0_PP_THERMALCONTROLLER_NONE 0 + +#define SMU_11_0_PP_OVERDRIVE_VERSION 0x0800 +#define SMU_11_0_PP_POWERSAVINGCLOCK_VERSION 0x0100 + +enum SMU_11_0_ODFEATURE_ID { + SMU_11_0_ODFEATURE_GFXCLK_LIMITS = 1 << 0, //GFXCLK Limit feature + SMU_11_0_ODFEATURE_GFXCLK_CURVE = 1 << 1, //GFXCLK Curve feature + SMU_11_0_ODFEATURE_UCLK_MAX = 1 << 2, //UCLK Limit feature + SMU_11_0_ODFEATURE_POWER_LIMIT = 1 << 3, //Power Limit feature + SMU_11_0_ODFEATURE_FAN_ACOUSTIC_LIMIT = 1 << 4, //Fan Acoustic RPM feature + SMU_11_0_ODFEATURE_FAN_SPEED_MIN = 1 << 5, //Minimum Fan Speed feature + SMU_11_0_ODFEATURE_TEMPERATURE_FAN = 1 << 6, //Fan Target Temperature Limit feature + SMU_11_0_ODFEATURE_TEMPERATURE_SYSTEM = 1 << 7, //Operating Temperature Limit feature + SMU_11_0_ODFEATURE_MEMORY_TIMING_TUNE = 1 << 8, //AC Timing Tuning feature + SMU_11_0_ODFEATURE_FAN_ZERO_RPM_CONTROL = 1 << 9, //Zero RPM feature + SMU_11_0_ODFEATURE_AUTO_UV_ENGINE = 1 << 10, //Auto Under Volt GFXCLK feature + SMU_11_0_ODFEATURE_AUTO_OC_ENGINE = 1 << 11, //Auto Over Clock GFXCLK feature + SMU_11_0_ODFEATURE_AUTO_OC_MEMORY = 1 << 12, //Auto Over Clock MCLK feature + SMU_11_0_ODFEATURE_FAN_CURVE = 1 << 13, //VICTOR TODO + SMU_11_0_ODFEATURE_COUNT = 14, +}; +#define SMU_11_0_MAX_ODFEATURE 32 //Maximum Number of OD Features + +enum SMU_11_0_ODSETTING_ID { + SMU_11_0_ODSETTING_GFXCLKFMAX = 0, + SMU_11_0_ODSETTING_GFXCLKFMIN, + SMU_11_0_ODSETTING_VDDGFXCURVEFREQ_P1, + SMU_11_0_ODSETTING_VDDGFXCURVEVOLTAGE_P1, + SMU_11_0_ODSETTING_VDDGFXCURVEFREQ_P2, + SMU_11_0_ODSETTING_VDDGFXCURVEVOLTAGE_P2, + SMU_11_0_ODSETTING_VDDGFXCURVEFREQ_P3, + SMU_11_0_ODSETTING_VDDGFXCURVEVOLTAGE_P3, + SMU_11_0_ODSETTING_UCLKFMAX, + SMU_11_0_ODSETTING_POWERPERCENTAGE, + SMU_11_0_ODSETTING_FANRPMMIN, + SMU_11_0_ODSETTING_FANRPMACOUSTICLIMIT, + SMU_11_0_ODSETTING_FANTARGETTEMPERATURE, + SMU_11_0_ODSETTING_OPERATINGTEMPMAX, + SMU_11_0_ODSETTING_ACTIMING, + SMU_11_0_ODSETTING_FAN_ZERO_RPM_CONTROL, + SMU_11_0_ODSETTING_AUTOUVENGINE, + SMU_11_0_ODSETTING_AUTOOCENGINE, + SMU_11_0_ODSETTING_AUTOOCMEMORY, + SMU_11_0_ODSETTING_COUNT, +}; +#define SMU_11_0_MAX_ODSETTING 32 //Maximum Number of ODSettings + +struct smu_11_0_overdrive_table +{ + uint8_t revision; //Revision = SMU_11_0_PP_OVERDRIVE_VERSION + uint8_t reserve[3]; //Zero filled field reserved for future use + uint32_t feature_count; //Total number of supported features + uint32_t setting_count; //Total number of supported settings + uint8_t cap[SMU_11_0_MAX_ODFEATURE]; //OD feature support flags + uint32_t max[SMU_11_0_MAX_ODSETTING]; //default maximum settings + uint32_t min[SMU_11_0_MAX_ODSETTING]; //default minimum settings +} __attribute__((packed)); + +enum SMU_11_0_PPCLOCK_ID { + SMU_11_0_PPCLOCK_GFXCLK = 0, + SMU_11_0_PPCLOCK_VCLK, + SMU_11_0_PPCLOCK_DCLK, + SMU_11_0_PPCLOCK_ECLK, + SMU_11_0_PPCLOCK_SOCCLK, + SMU_11_0_PPCLOCK_UCLK, + SMU_11_0_PPCLOCK_DCEFCLK, + SMU_11_0_PPCLOCK_DISPCLK, + SMU_11_0_PPCLOCK_PIXCLK, + SMU_11_0_PPCLOCK_PHYCLK, + SMU_11_0_PPCLOCK_COUNT, +}; +#define SMU_11_0_MAX_PPCLOCK 16 //Maximum Number of PP Clocks + +struct smu_11_0_power_saving_clock_table +{ + uint8_t revision; //Revision = SMU_11_0_PP_POWERSAVINGCLOCK_VERSION + uint8_t reserve[3]; //Zero filled field reserved for future use + uint32_t count; //power_saving_clock_count = SMU_11_0_PPCLOCK_COUNT + uint32_t max[SMU_11_0_MAX_PPCLOCK]; //PowerSavingClock Mode Clock Maximum array In MHz + uint32_t min[SMU_11_0_MAX_PPCLOCK]; //PowerSavingClock Mode Clock Minimum array In MHz +} __attribute__((packed)); + +struct smu_11_0_powerplay_table +{ + struct atom_common_table_header header; + uint8_t table_revision; + uint32_t table_size; //Driver portion table size. The offset to smc_pptable including header size + uint32_t golden_pp_id; + uint32_t golden_revision; + uint16_t format_id; + uint32_t platform_caps; //POWERPLAYABLE::ulPlatformCaps + + uint8_t thermal_controller_type; //one of SMU_11_0_PP_THERMALCONTROLLER + + uint16_t small_power_limit1; + uint16_t small_power_limit2; + uint16_t boost_power_limit; + uint16_t od_turbo_power_limit; //Power limit setting for Turbo mode in Performance UI Tuning. + uint16_t od_power_save_power_limit; //Power limit setting for PowerSave/Optimal mode in Performance UI Tuning. + uint16_t software_shutdown_temp; + + uint16_t reserve[6]; //Zero filled field reserved for future use + + struct smu_11_0_power_saving_clock_table power_saving_clock; + struct smu_11_0_overdrive_table overdrive_table; + + PPTable_t smc_pptable; //PPTable_t in smu11_driver_if.h +} __attribute__((packed)); + +#endif diff --git a/drivers/gpu/drm/amd/powerplay/inc/vega20_ppsmc.h b/drivers/gpu/drm/amd/powerplay/inc/vega20_ppsmc.h index 4f63a736ea0e..a0883038f3c3 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/vega20_ppsmc.h +++ b/drivers/gpu/drm/amd/powerplay/inc/vega20_ppsmc.h @@ -119,7 +119,8 @@ #define PPSMC_MSG_PrepareMp1ForShutdown 0x5A #define PPSMC_MSG_SetMGpuFanBoostLimitRpm 0x5D #define PPSMC_MSG_GetAVFSVoltageByDpm 0x5F -#define PPSMC_Message_Count 0x60 +#define PPSMC_MSG_BacoWorkAroundFlushVDCI 0x60 +#define PPSMC_Message_Count 0x61 typedef uint32_t PPSMC_Result; typedef uint32_t PPSMC_Msg; diff --git a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c new file mode 100644 index 000000000000..92903a4cc4d8 --- /dev/null +++ b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c @@ -0,0 +1,1977 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "pp_debug.h" +#include <linux/firmware.h> +#include "amdgpu.h" +#include "amdgpu_smu.h" +#include "atomfirmware.h" +#include "amdgpu_atomfirmware.h" +#include "smu_v11_0.h" +#include "smu11_driver_if.h" +#include "soc15_common.h" +#include "atom.h" +#include "vega20_ppt.h" +#include "pp_thermal.h" + +#include "asic_reg/thm/thm_11_0_2_offset.h" +#include "asic_reg/thm/thm_11_0_2_sh_mask.h" +#include "asic_reg/mp/mp_9_0_offset.h" +#include "asic_reg/mp/mp_9_0_sh_mask.h" +#include "asic_reg/nbio/nbio_7_4_offset.h" +#include "asic_reg/smuio/smuio_9_0_offset.h" +#include "asic_reg/smuio/smuio_9_0_sh_mask.h" + +MODULE_FIRMWARE("amdgpu/vega20_smc.bin"); + +#define SMU11_TOOL_SIZE 0x19000 +#define SMU11_THERMAL_MINIMUM_ALERT_TEMP 0 +#define SMU11_THERMAL_MAXIMUM_ALERT_TEMP 255 + +#define SMU11_TEMPERATURE_UNITS_PER_CENTIGRADES 1000 +#define SMU11_VOLTAGE_SCALE 4 + +#define SMC_DPM_FEATURE (FEATURE_DPM_PREFETCHER_MASK | \ + FEATURE_DPM_GFXCLK_MASK | \ + FEATURE_DPM_UCLK_MASK | \ + FEATURE_DPM_SOCCLK_MASK | \ + FEATURE_DPM_UVD_MASK | \ + FEATURE_DPM_VCE_MASK | \ + FEATURE_DPM_MP0CLK_MASK | \ + FEATURE_DPM_LINK_MASK | \ + FEATURE_DPM_DCEFCLK_MASK) + +static int smu_v11_0_send_msg_without_waiting(struct smu_context *smu, + uint16_t msg) +{ + struct amdgpu_device *adev = smu->adev; + WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_66, msg); + return 0; +} + +static int smu_v11_0_read_arg(struct smu_context *smu, uint32_t *arg) +{ + struct amdgpu_device *adev = smu->adev; + + *arg = RREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_82); + return 0; +} + +static int smu_v11_0_wait_for_response(struct smu_context *smu) +{ + struct amdgpu_device *adev = smu->adev; + uint32_t cur_value, i; + + for (i = 0; i < adev->usec_timeout; i++) { + cur_value = RREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_90); + if ((cur_value & MP1_C2PMSG_90__CONTENT_MASK) != 0) + break; + udelay(1); + } + + /* timeout means wrong logic */ + if (i == adev->usec_timeout) + return -ETIME; + + return RREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_90) == 0x1 ? 0 : -EIO; +} + +static int smu_v11_0_send_msg(struct smu_context *smu, uint16_t msg) +{ + struct amdgpu_device *adev = smu->adev; + int ret = 0, index = 0; + + index = smu_msg_get_index(smu, msg); + if (index < 0) + return index; + + smu_v11_0_wait_for_response(smu); + + WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_90, 0); + + smu_v11_0_send_msg_without_waiting(smu, (uint16_t)index); + + ret = smu_v11_0_wait_for_response(smu); + + if (ret) + pr_err("Failed to send message 0x%x, response 0x%x\n", index, + ret); + + return ret; + +} + +static int +smu_v11_0_send_msg_with_param(struct smu_context *smu, uint16_t msg, + uint32_t param) +{ + + struct amdgpu_device *adev = smu->adev; + int ret = 0, index = 0; + + index = smu_msg_get_index(smu, msg); + if (index < 0) + return index; + + ret = smu_v11_0_wait_for_response(smu); + if (ret) + pr_err("Failed to send message 0x%x, response 0x%x, param 0x%x\n", + index, ret, param); + + WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_90, 0); + + WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_82, param); + + smu_v11_0_send_msg_without_waiting(smu, (uint16_t)index); + + ret = smu_v11_0_wait_for_response(smu); + if (ret) + pr_err("Failed to send message 0x%x, response 0x%x param 0x%x\n", + index, ret, param); + + return ret; +} + +static int smu_v11_0_init_microcode(struct smu_context *smu) +{ + struct amdgpu_device *adev = smu->adev; + const char *chip_name; + char fw_name[30]; + int err = 0; + const struct smc_firmware_header_v1_0 *hdr; + const struct common_firmware_header *header; + struct amdgpu_firmware_info *ucode = NULL; + + switch (adev->asic_type) { + case CHIP_VEGA20: + chip_name = "vega20"; + break; + default: + BUG(); + } + + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_smc.bin", chip_name); + + err = request_firmware(&adev->pm.fw, fw_name, adev->dev); + if (err) + goto out; + err = amdgpu_ucode_validate(adev->pm.fw); + if (err) + goto out; + + hdr = (const struct smc_firmware_header_v1_0 *) adev->pm.fw->data; + amdgpu_ucode_print_smc_hdr(&hdr->header); + adev->pm.fw_version = le32_to_cpu(hdr->header.ucode_version); + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + ucode = &adev->firmware.ucode[AMDGPU_UCODE_ID_SMC]; + ucode->ucode_id = AMDGPU_UCODE_ID_SMC; + ucode->fw = adev->pm.fw; + header = (const struct common_firmware_header *)ucode->fw->data; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); + } + +out: + if (err) { + DRM_ERROR("smu_v11_0: Failed to load firmware \"%s\"\n", + fw_name); + release_firmware(adev->pm.fw); + adev->pm.fw = NULL; + } + return err; +} + +static int smu_v11_0_load_microcode(struct smu_context *smu) +{ + return 0; +} + +static int smu_v11_0_check_fw_status(struct smu_context *smu) +{ + struct amdgpu_device *adev = smu->adev; + uint32_t mp1_fw_flags; + + mp1_fw_flags = RREG32_PCIE(MP1_Public | + (smnMP1_FIRMWARE_FLAGS & 0xffffffff)); + + if ((mp1_fw_flags & MP1_FIRMWARE_FLAGS__INTERRUPTS_ENABLED_MASK) >> + MP1_FIRMWARE_FLAGS__INTERRUPTS_ENABLED__SHIFT) + return 0; + + return -EIO; +} + +static int smu_v11_0_check_fw_version(struct smu_context *smu) +{ + uint32_t smu_version = 0xff; + int ret = 0; + + ret = smu_send_smc_msg(smu, SMU_MSG_GetDriverIfVersion); + if (ret) + goto err; + + ret = smu_read_smc_arg(smu, &smu_version); + if (ret) + goto err; + + if (smu_version != smu->smc_if_version) + ret = -EINVAL; +err: + return ret; +} + +static int smu_v11_0_read_pptable_from_vbios(struct smu_context *smu) +{ + int ret, index; + uint16_t size; + uint8_t frev, crev; + void *table; + + index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, + powerplayinfo); + + ret = smu_get_atom_data_table(smu, index, &size, &frev, &crev, + (uint8_t **)&table); + if (ret) + return ret; + + if (!smu->smu_table.power_play_table) + smu->smu_table.power_play_table = table; + if (!smu->smu_table.power_play_table_size) + smu->smu_table.power_play_table_size = size; + + return 0; +} + +static int smu_v11_0_init_dpm_context(struct smu_context *smu) +{ + struct smu_dpm_context *smu_dpm = &smu->smu_dpm; + + if (smu_dpm->dpm_context || smu_dpm->dpm_context_size != 0) + return -EINVAL; + + return smu_alloc_dpm_context(smu); +} + +static int smu_v11_0_fini_dpm_context(struct smu_context *smu) +{ + struct smu_dpm_context *smu_dpm = &smu->smu_dpm; + + if (!smu_dpm->dpm_context || smu_dpm->dpm_context_size == 0) + return -EINVAL; + + kfree(smu_dpm->dpm_context); + kfree(smu_dpm->golden_dpm_context); + kfree(smu_dpm->dpm_current_power_state); + kfree(smu_dpm->dpm_request_power_state); + smu_dpm->dpm_context = NULL; + smu_dpm->golden_dpm_context = NULL; + smu_dpm->dpm_context_size = 0; + smu_dpm->dpm_current_power_state = NULL; + smu_dpm->dpm_request_power_state = NULL; + + return 0; +} + +static int smu_v11_0_init_smc_tables(struct smu_context *smu) +{ + struct smu_table_context *smu_table = &smu->smu_table; + struct smu_table *tables = NULL; + int ret = 0; + + if (smu_table->tables || smu_table->table_count != 0) + return -EINVAL; + + tables = kcalloc(TABLE_COUNT, sizeof(struct smu_table), GFP_KERNEL); + if (!tables) + return -ENOMEM; + + smu_table->tables = tables; + smu_table->table_count = TABLE_COUNT; + + SMU_TABLE_INIT(tables, TABLE_PPTABLE, sizeof(PPTable_t), + PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM); + SMU_TABLE_INIT(tables, TABLE_WATERMARKS, sizeof(Watermarks_t), + PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM); + SMU_TABLE_INIT(tables, TABLE_SMU_METRICS, sizeof(SmuMetrics_t), + PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM); + SMU_TABLE_INIT(tables, TABLE_OVERDRIVE, sizeof(OverDriveTable_t), + PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM); + SMU_TABLE_INIT(tables, TABLE_PMSTATUSLOG, SMU11_TOOL_SIZE, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM); + SMU_TABLE_INIT(tables, TABLE_ACTIVITY_MONITOR_COEFF, + sizeof(DpmActivityMonitorCoeffInt_t), + PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM); + + ret = smu_v11_0_init_dpm_context(smu); + if (ret) + return ret; + + return 0; +} + +static int smu_v11_0_fini_smc_tables(struct smu_context *smu) +{ + struct smu_table_context *smu_table = &smu->smu_table; + int ret = 0; + + if (!smu_table->tables || smu_table->table_count == 0) + return -EINVAL; + + kfree(smu_table->tables); + smu_table->tables = NULL; + smu_table->table_count = 0; + + ret = smu_v11_0_fini_dpm_context(smu); + if (ret) + return ret; + return 0; +} + +static int smu_v11_0_init_power(struct smu_context *smu) +{ + struct smu_power_context *smu_power = &smu->smu_power; + + if (smu_power->power_context || smu_power->power_context_size != 0) + return -EINVAL; + + smu_power->power_context = kzalloc(sizeof(struct smu_11_0_dpm_context), + GFP_KERNEL); + if (!smu_power->power_context) + return -ENOMEM; + smu_power->power_context_size = sizeof(struct smu_11_0_dpm_context); + + return 0; +} + +static int smu_v11_0_fini_power(struct smu_context *smu) +{ + struct smu_power_context *smu_power = &smu->smu_power; + + if (!smu_power->power_context || smu_power->power_context_size == 0) + return -EINVAL; + + kfree(smu_power->power_context); + smu_power->power_context = NULL; + smu_power->power_context_size = 0; + + return 0; +} + +int smu_v11_0_get_vbios_bootup_values(struct smu_context *smu) +{ + int ret, index; + uint16_t size; + uint8_t frev, crev; + struct atom_common_table_header *header; + struct atom_firmware_info_v3_3 *v_3_3; + struct atom_firmware_info_v3_1 *v_3_1; + + index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, + firmwareinfo); + + ret = smu_get_atom_data_table(smu, index, &size, &frev, &crev, + (uint8_t **)&header); + if (ret) + return ret; + + if (header->format_revision != 3) { + pr_err("unknown atom_firmware_info version! for smu11\n"); + return -EINVAL; + } + + switch (header->content_revision) { + case 0: + case 1: + case 2: + v_3_1 = (struct atom_firmware_info_v3_1 *)header; + smu->smu_table.boot_values.revision = v_3_1->firmware_revision; + smu->smu_table.boot_values.gfxclk = v_3_1->bootup_sclk_in10khz; + smu->smu_table.boot_values.uclk = v_3_1->bootup_mclk_in10khz; + smu->smu_table.boot_values.socclk = 0; + smu->smu_table.boot_values.dcefclk = 0; + smu->smu_table.boot_values.vddc = v_3_1->bootup_vddc_mv; + smu->smu_table.boot_values.vddci = v_3_1->bootup_vddci_mv; + smu->smu_table.boot_values.mvddc = v_3_1->bootup_mvddc_mv; + smu->smu_table.boot_values.vdd_gfx = v_3_1->bootup_vddgfx_mv; + smu->smu_table.boot_values.cooling_id = v_3_1->coolingsolution_id; + smu->smu_table.boot_values.pp_table_id = 0; + break; + case 3: + default: + v_3_3 = (struct atom_firmware_info_v3_3 *)header; + smu->smu_table.boot_values.revision = v_3_3->firmware_revision; + smu->smu_table.boot_values.gfxclk = v_3_3->bootup_sclk_in10khz; + smu->smu_table.boot_values.uclk = v_3_3->bootup_mclk_in10khz; + smu->smu_table.boot_values.socclk = 0; + smu->smu_table.boot_values.dcefclk = 0; + smu->smu_table.boot_values.vddc = v_3_3->bootup_vddc_mv; + smu->smu_table.boot_values.vddci = v_3_3->bootup_vddci_mv; + smu->smu_table.boot_values.mvddc = v_3_3->bootup_mvddc_mv; + smu->smu_table.boot_values.vdd_gfx = v_3_3->bootup_vddgfx_mv; + smu->smu_table.boot_values.cooling_id = v_3_3->coolingsolution_id; + smu->smu_table.boot_values.pp_table_id = v_3_3->pplib_pptable_id; + } + + return 0; +} + +static int smu_v11_0_get_clk_info_from_vbios(struct smu_context *smu) +{ + int ret, index; + struct amdgpu_device *adev = smu->adev; + struct atom_get_smu_clock_info_parameters_v3_1 input = {0}; + struct atom_get_smu_clock_info_output_parameters_v3_1 *output; + + input.clk_id = SMU11_SYSPLL0_SOCCLK_ID; + input.command = GET_SMU_CLOCK_INFO_V3_1_GET_CLOCK_FREQ; + index = get_index_into_master_table(atom_master_list_of_command_functions_v2_1, + getsmuclockinfo); + + ret = amdgpu_atom_execute_table(adev->mode_info.atom_context, index, + (uint32_t *)&input); + if (ret) + return -EINVAL; + + output = (struct atom_get_smu_clock_info_output_parameters_v3_1 *)&input; + smu->smu_table.boot_values.socclk = le32_to_cpu(output->atom_smu_outputclkfreq.smu_clock_freq_hz) / 10000; + + memset(&input, 0, sizeof(input)); + input.clk_id = SMU11_SYSPLL0_DCEFCLK_ID; + input.command = GET_SMU_CLOCK_INFO_V3_1_GET_CLOCK_FREQ; + index = get_index_into_master_table(atom_master_list_of_command_functions_v2_1, + getsmuclockinfo); + + ret = amdgpu_atom_execute_table(adev->mode_info.atom_context, index, + (uint32_t *)&input); + if (ret) + return -EINVAL; + + output = (struct atom_get_smu_clock_info_output_parameters_v3_1 *)&input; + smu->smu_table.boot_values.dcefclk = le32_to_cpu(output->atom_smu_outputclkfreq.smu_clock_freq_hz) / 10000; + + memset(&input, 0, sizeof(input)); + input.clk_id = SMU11_SYSPLL0_ECLK_ID; + input.command = GET_SMU_CLOCK_INFO_V3_1_GET_CLOCK_FREQ; + index = get_index_into_master_table(atom_master_list_of_command_functions_v2_1, + getsmuclockinfo); + + ret = amdgpu_atom_execute_table(adev->mode_info.atom_context, index, + (uint32_t *)&input); + if (ret) + return -EINVAL; + + output = (struct atom_get_smu_clock_info_output_parameters_v3_1 *)&input; + smu->smu_table.boot_values.eclk = le32_to_cpu(output->atom_smu_outputclkfreq.smu_clock_freq_hz) / 10000; + + memset(&input, 0, sizeof(input)); + input.clk_id = SMU11_SYSPLL0_VCLK_ID; + input.command = GET_SMU_CLOCK_INFO_V3_1_GET_CLOCK_FREQ; + index = get_index_into_master_table(atom_master_list_of_command_functions_v2_1, + getsmuclockinfo); + + ret = amdgpu_atom_execute_table(adev->mode_info.atom_context, index, + (uint32_t *)&input); + if (ret) + return -EINVAL; + + output = (struct atom_get_smu_clock_info_output_parameters_v3_1 *)&input; + smu->smu_table.boot_values.vclk = le32_to_cpu(output->atom_smu_outputclkfreq.smu_clock_freq_hz) / 10000; + + memset(&input, 0, sizeof(input)); + input.clk_id = SMU11_SYSPLL0_DCLK_ID; + input.command = GET_SMU_CLOCK_INFO_V3_1_GET_CLOCK_FREQ; + index = get_index_into_master_table(atom_master_list_of_command_functions_v2_1, + getsmuclockinfo); + + ret = amdgpu_atom_execute_table(adev->mode_info.atom_context, index, + (uint32_t *)&input); + if (ret) + return -EINVAL; + + output = (struct atom_get_smu_clock_info_output_parameters_v3_1 *)&input; + smu->smu_table.boot_values.dclk = le32_to_cpu(output->atom_smu_outputclkfreq.smu_clock_freq_hz) / 10000; + + return 0; +} + +static int smu_v11_0_notify_memory_pool_location(struct smu_context *smu) +{ + struct smu_table_context *smu_table = &smu->smu_table; + struct smu_table *memory_pool = &smu_table->memory_pool; + int ret = 0; + uint64_t address; + uint32_t address_low, address_high; + + if (memory_pool->size == 0 || memory_pool->cpu_addr == NULL) + return ret; + + address = (uintptr_t)memory_pool->cpu_addr; + address_high = (uint32_t)upper_32_bits(address); + address_low = (uint32_t)lower_32_bits(address); + + ret = smu_send_smc_msg_with_param(smu, + SMU_MSG_SetSystemVirtualDramAddrHigh, + address_high); + if (ret) + return ret; + ret = smu_send_smc_msg_with_param(smu, + SMU_MSG_SetSystemVirtualDramAddrLow, + address_low); + if (ret) + return ret; + + address = memory_pool->mc_address; + address_high = (uint32_t)upper_32_bits(address); + address_low = (uint32_t)lower_32_bits(address); + + ret = smu_send_smc_msg_with_param(smu, SMU_MSG_DramLogSetDramAddrHigh, + address_high); + if (ret) + return ret; + ret = smu_send_smc_msg_with_param(smu, SMU_MSG_DramLogSetDramAddrLow, + address_low); + if (ret) + return ret; + ret = smu_send_smc_msg_with_param(smu, SMU_MSG_DramLogSetDramSize, + (uint32_t)memory_pool->size); + if (ret) + return ret; + + return ret; +} + +static int smu_v11_0_check_pptable(struct smu_context *smu) +{ + int ret; + + ret = smu_check_powerplay_table(smu); + return ret; +} + +static int smu_v11_0_parse_pptable(struct smu_context *smu) +{ + int ret; + + struct smu_table_context *table_context = &smu->smu_table; + + if (table_context->driver_pptable) + return -EINVAL; + + table_context->driver_pptable = kzalloc(sizeof(PPTable_t), GFP_KERNEL); + + if (!table_context->driver_pptable) + return -ENOMEM; + + ret = smu_store_powerplay_table(smu); + if (ret) + return -EINVAL; + + ret = smu_append_powerplay_table(smu); + + return ret; +} + +static int smu_v11_0_populate_smc_pptable(struct smu_context *smu) +{ + int ret; + + ret = smu_set_default_dpm_table(smu); + + return ret; +} + +static int smu_v11_0_write_pptable(struct smu_context *smu) +{ + struct smu_table_context *table_context = &smu->smu_table; + int ret = 0; + + ret = smu_update_table(smu, TABLE_PPTABLE, table_context->driver_pptable, true); + + return ret; +} + +static int smu_v11_0_write_watermarks_table(struct smu_context *smu) +{ + return smu_update_table(smu, TABLE_WATERMARKS, + smu->smu_table.tables[TABLE_WATERMARKS].cpu_addr, true); +} + +static int smu_v11_0_set_deep_sleep_dcefclk(struct smu_context *smu, uint32_t clk) +{ + int ret; + + ret = smu_send_smc_msg_with_param(smu, + SMU_MSG_SetMinDeepSleepDcefclk, clk); + if (ret) + pr_err("SMU11 attempt to set divider for DCEFCLK Failed!"); + + return ret; +} + +static int smu_v11_0_set_min_dcef_deep_sleep(struct smu_context *smu) +{ + struct smu_table_context *table_context = &smu->smu_table; + + if (!table_context) + return -EINVAL; + + return smu_set_deep_sleep_dcefclk(smu, + table_context->boot_values.dcefclk / 100); +} + +static int smu_v11_0_set_tool_table_location(struct smu_context *smu) +{ + int ret = 0; + struct smu_table *tool_table = &smu->smu_table.tables[TABLE_PMSTATUSLOG]; + + if (tool_table->mc_address) { + ret = smu_send_smc_msg_with_param(smu, + SMU_MSG_SetToolsDramAddrHigh, + upper_32_bits(tool_table->mc_address)); + if (!ret) + ret = smu_send_smc_msg_with_param(smu, + SMU_MSG_SetToolsDramAddrLow, + lower_32_bits(tool_table->mc_address)); + } + + return ret; +} + +static int smu_v11_0_init_display(struct smu_context *smu) +{ + int ret = 0; + ret = smu_send_smc_msg_with_param(smu, SMU_MSG_NumOfDisplays, 0); + return ret; +} + +static int smu_v11_0_update_feature_enable_state(struct smu_context *smu, uint32_t feature_id, bool enabled) +{ + uint32_t feature_low = 0, feature_high = 0; + int ret = 0; + + if (feature_id >= 0 && feature_id < 31) + feature_low = (1 << feature_id); + else if (feature_id > 31 && feature_id < 63) + feature_high = (1 << feature_id); + else + return -EINVAL; + + if (enabled) { + ret = smu_send_smc_msg_with_param(smu, SMU_MSG_EnableSmuFeaturesLow, + feature_low); + if (ret) + return ret; + ret = smu_send_smc_msg_with_param(smu, SMU_MSG_EnableSmuFeaturesHigh, + feature_high); + if (ret) + return ret; + + } else { + ret = smu_send_smc_msg_with_param(smu, SMU_MSG_DisableSmuFeaturesLow, + feature_low); + if (ret) + return ret; + ret = smu_send_smc_msg_with_param(smu, SMU_MSG_DisableSmuFeaturesHigh, + feature_high); + if (ret) + return ret; + + } + + return ret; +} + +static int smu_v11_0_set_allowed_mask(struct smu_context *smu) +{ + struct smu_feature *feature = &smu->smu_feature; + int ret = 0; + uint32_t feature_mask[2]; + + mutex_lock(&feature->mutex); + if (bitmap_empty(feature->allowed, SMU_FEATURE_MAX) || feature->feature_num < 64) + goto failed; + + bitmap_copy((unsigned long *)feature_mask, feature->allowed, 64); + + ret = smu_send_smc_msg_with_param(smu, SMU_MSG_SetAllowedFeaturesMaskHigh, + feature_mask[1]); + if (ret) + goto failed; + + ret = smu_send_smc_msg_with_param(smu, SMU_MSG_SetAllowedFeaturesMaskLow, + feature_mask[0]); + if (ret) + goto failed; + +failed: + mutex_unlock(&feature->mutex); + return ret; +} + +static int smu_v11_0_get_enabled_mask(struct smu_context *smu, + uint32_t *feature_mask, uint32_t num) +{ + uint32_t feature_mask_high = 0, feature_mask_low = 0; + int ret = 0; + + if (!feature_mask || num < 2) + return -EINVAL; + + ret = smu_send_smc_msg(smu, SMU_MSG_GetEnabledSmuFeaturesHigh); + if (ret) + return ret; + ret = smu_read_smc_arg(smu, &feature_mask_high); + if (ret) + return ret; + + ret = smu_send_smc_msg(smu, SMU_MSG_GetEnabledSmuFeaturesLow); + if (ret) + return ret; + ret = smu_read_smc_arg(smu, &feature_mask_low); + if (ret) + return ret; + + feature_mask[0] = feature_mask_low; + feature_mask[1] = feature_mask_high; + + return ret; +} + +static bool smu_v11_0_is_dpm_running(struct smu_context *smu) +{ + int ret = 0; + uint32_t feature_mask[2]; + unsigned long feature_enabled; + ret = smu_v11_0_get_enabled_mask(smu, feature_mask, 2); + feature_enabled = (unsigned long)((uint64_t)feature_mask[0] | + ((uint64_t)feature_mask[1] << 32)); + return !!(feature_enabled & SMC_DPM_FEATURE); +} + +static int smu_v11_0_system_features_control(struct smu_context *smu, + bool en) +{ + struct smu_feature *feature = &smu->smu_feature; + uint32_t feature_mask[2]; + int ret = 0; + + ret = smu_send_smc_msg(smu, (en ? SMU_MSG_EnableAllSmuFeatures : + SMU_MSG_DisableAllSmuFeatures)); + if (ret) + return ret; + ret = smu_feature_get_enabled_mask(smu, feature_mask, 2); + if (ret) + return ret; + + bitmap_copy(feature->enabled, (unsigned long *)&feature_mask, + feature->feature_num); + bitmap_copy(feature->supported, (unsigned long *)&feature_mask, + feature->feature_num); + + return ret; +} + +static int smu_v11_0_notify_display_change(struct smu_context *smu) +{ + int ret = 0; + + if (smu_feature_is_enabled(smu, FEATURE_DPM_UCLK_BIT)) + ret = smu_send_smc_msg_with_param(smu, SMU_MSG_SetUclkFastSwitch, 1); + + return ret; +} + +static int +smu_v11_0_get_max_sustainable_clock(struct smu_context *smu, uint32_t *clock, + PPCLK_e clock_select) +{ + int ret = 0; + + ret = smu_send_smc_msg_with_param(smu, SMU_MSG_GetDcModeMaxDpmFreq, + clock_select << 16); + if (ret) { + pr_err("[GetMaxSustainableClock] Failed to get max DC clock from SMC!"); + return ret; + } + + ret = smu_read_smc_arg(smu, clock); + if (ret) + return ret; + + if (*clock != 0) + return 0; + + /* if DC limit is zero, return AC limit */ + ret = smu_send_smc_msg_with_param(smu, SMU_MSG_GetMaxDpmFreq, + clock_select << 16); + if (ret) { + pr_err("[GetMaxSustainableClock] failed to get max AC clock from SMC!"); + return ret; + } + + ret = smu_read_smc_arg(smu, clock); + + return ret; +} + +static int smu_v11_0_init_max_sustainable_clocks(struct smu_context *smu) +{ + struct smu_11_0_max_sustainable_clocks *max_sustainable_clocks; + int ret = 0; + + max_sustainable_clocks = kzalloc(sizeof(struct smu_11_0_max_sustainable_clocks), + GFP_KERNEL); + smu->smu_table.max_sustainable_clocks = (void *)max_sustainable_clocks; + + max_sustainable_clocks->uclock = smu->smu_table.boot_values.uclk / 100; + max_sustainable_clocks->soc_clock = smu->smu_table.boot_values.socclk / 100; + max_sustainable_clocks->dcef_clock = smu->smu_table.boot_values.dcefclk / 100; + max_sustainable_clocks->display_clock = 0xFFFFFFFF; + max_sustainable_clocks->phy_clock = 0xFFFFFFFF; + max_sustainable_clocks->pixel_clock = 0xFFFFFFFF; + + if (smu_feature_is_enabled(smu, FEATURE_DPM_UCLK_BIT)) { + ret = smu_v11_0_get_max_sustainable_clock(smu, + &(max_sustainable_clocks->uclock), + PPCLK_UCLK); + if (ret) { + pr_err("[%s] failed to get max UCLK from SMC!", + __func__); + return ret; + } + } + + if (smu_feature_is_enabled(smu, FEATURE_DPM_SOCCLK_BIT)) { + ret = smu_v11_0_get_max_sustainable_clock(smu, + &(max_sustainable_clocks->soc_clock), + PPCLK_SOCCLK); + if (ret) { + pr_err("[%s] failed to get max SOCCLK from SMC!", + __func__); + return ret; + } + } + + if (smu_feature_is_enabled(smu, FEATURE_DPM_DCEFCLK_BIT)) { + ret = smu_v11_0_get_max_sustainable_clock(smu, + &(max_sustainable_clocks->dcef_clock), + PPCLK_DCEFCLK); + if (ret) { + pr_err("[%s] failed to get max DCEFCLK from SMC!", + __func__); + return ret; + } + + ret = smu_v11_0_get_max_sustainable_clock(smu, + &(max_sustainable_clocks->display_clock), + PPCLK_DISPCLK); + if (ret) { + pr_err("[%s] failed to get max DISPCLK from SMC!", + __func__); + return ret; + } + ret = smu_v11_0_get_max_sustainable_clock(smu, + &(max_sustainable_clocks->phy_clock), + PPCLK_PHYCLK); + if (ret) { + pr_err("[%s] failed to get max PHYCLK from SMC!", + __func__); + return ret; + } + ret = smu_v11_0_get_max_sustainable_clock(smu, + &(max_sustainable_clocks->pixel_clock), + PPCLK_PIXCLK); + if (ret) { + pr_err("[%s] failed to get max PIXCLK from SMC!", + __func__); + return ret; + } + } + + if (max_sustainable_clocks->soc_clock < max_sustainable_clocks->uclock) + max_sustainable_clocks->uclock = max_sustainable_clocks->soc_clock; + + return 0; +} + +static int smu_v11_0_get_power_limit(struct smu_context *smu, + uint32_t *limit, + bool get_default) +{ + int ret = 0; + + if (get_default) { + mutex_lock(&smu->mutex); + *limit = smu->default_power_limit; + if (smu->od_enabled) { + *limit *= (100 + smu->smu_table.TDPODLimit); + *limit /= 100; + } + mutex_unlock(&smu->mutex); + } else { + ret = smu_send_smc_msg_with_param(smu, SMU_MSG_GetPptLimit, + POWER_SOURCE_AC << 16); + if (ret) { + pr_err("[%s] get PPT limit failed!", __func__); + return ret; + } + smu_read_smc_arg(smu, limit); + smu->power_limit = *limit; + } + + return ret; +} + +static int smu_v11_0_set_power_limit(struct smu_context *smu, uint32_t n) +{ + uint32_t max_power_limit; + int ret = 0; + + if (n == 0) + n = smu->default_power_limit; + + max_power_limit = smu->default_power_limit; + + if (smu->od_enabled) { + max_power_limit *= (100 + smu->smu_table.TDPODLimit); + max_power_limit /= 100; + } + + if (smu_feature_is_enabled(smu, FEATURE_PPT_BIT)) + ret = smu_send_smc_msg_with_param(smu, SMU_MSG_SetPptLimit, n); + if (ret) { + pr_err("[%s] Set power limit Failed!", __func__); + return ret; + } + + return ret; +} + +static int smu_v11_0_get_current_clk_freq(struct smu_context *smu, uint32_t clk_id, uint32_t *value) +{ + int ret = 0; + uint32_t freq; + + if (clk_id >= PPCLK_COUNT || !value) + return -EINVAL; + + ret = smu_send_smc_msg_with_param(smu, + SMU_MSG_GetDpmClockFreq, (clk_id << 16)); + if (ret) + return ret; + + ret = smu_read_smc_arg(smu, &freq); + if (ret) + return ret; + + freq *= 100; + *value = freq; + + return ret; +} + +static int smu_v11_0_get_thermal_range(struct smu_context *smu, + struct PP_TemperatureRange *range) +{ + memcpy(range, &SMU7ThermalWithDelayPolicy[0], sizeof(struct PP_TemperatureRange)); + + range->max = smu->smu_table.software_shutdown_temp * + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + + return 0; +} + +static int smu_v11_0_set_thermal_range(struct smu_context *smu, + struct PP_TemperatureRange *range) +{ + struct amdgpu_device *adev = smu->adev; + int low = SMU11_THERMAL_MINIMUM_ALERT_TEMP * + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + int high = SMU11_THERMAL_MAXIMUM_ALERT_TEMP * + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + uint32_t val; + + if (low < range->min) + low = range->min; + if (high > range->max) + high = range->max; + + if (low > high) + return -EINVAL; + + val = RREG32_SOC15(THM, 0, mmTHM_THERMAL_INT_CTRL); + val = REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, MAX_IH_CREDIT, 5); + val = REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, THERM_IH_HW_ENA, 1); + val = REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, DIG_THERM_INTH, (high / PP_TEMPERATURE_UNITS_PER_CENTIGRADES)); + val = REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, DIG_THERM_INTL, (low / PP_TEMPERATURE_UNITS_PER_CENTIGRADES)); + val = val & (~THM_THERMAL_INT_CTRL__THERM_TRIGGER_MASK_MASK); + + WREG32_SOC15(THM, 0, mmTHM_THERMAL_INT_CTRL, val); + + return 0; +} + +static int smu_v11_0_enable_thermal_alert(struct smu_context *smu) +{ + struct amdgpu_device *adev = smu->adev; + uint32_t val = 0; + + val |= (1 << THM_THERMAL_INT_ENA__THERM_INTH_CLR__SHIFT); + val |= (1 << THM_THERMAL_INT_ENA__THERM_INTL_CLR__SHIFT); + val |= (1 << THM_THERMAL_INT_ENA__THERM_TRIGGER_CLR__SHIFT); + + WREG32_SOC15(THM, 0, mmTHM_THERMAL_INT_ENA, val); + + return 0; +} + +static int smu_v11_0_set_thermal_fan_table(struct smu_context *smu) +{ + int ret; + struct smu_table_context *table_context = &smu->smu_table; + PPTable_t *pptable = table_context->driver_pptable; + + ret = smu_send_smc_msg_with_param(smu, SMU_MSG_SetFanTemperatureTarget, + (uint32_t)pptable->FanTargetTemperature); + + return ret; +} + +static int smu_v11_0_start_thermal_control(struct smu_context *smu) +{ + int ret = 0; + struct PP_TemperatureRange range; + struct amdgpu_device *adev = smu->adev; + + smu_v11_0_get_thermal_range(smu, &range); + + if (smu->smu_table.thermal_controller_type) { + ret = smu_v11_0_set_thermal_range(smu, &range); + if (ret) + return ret; + + ret = smu_v11_0_enable_thermal_alert(smu); + if (ret) + return ret; + ret = smu_v11_0_set_thermal_fan_table(smu); + if (ret) + return ret; + } + + adev->pm.dpm.thermal.min_temp = range.min; + adev->pm.dpm.thermal.max_temp = range.max; + + return ret; +} + +static int smu_v11_0_get_current_activity_percent(struct smu_context *smu, + uint32_t *value) +{ + int ret = 0; + SmuMetrics_t metrics; + + if (!value) + return -EINVAL; + + ret = smu_update_table(smu, TABLE_SMU_METRICS, (void *)&metrics, false); + if (ret) + return ret; + + *value = metrics.AverageGfxActivity; + + return 0; +} + +static int smu_v11_0_thermal_get_temperature(struct smu_context *smu, uint32_t *value) +{ + struct amdgpu_device *adev = smu->adev; + uint32_t temp = 0; + + if (!value) + return -EINVAL; + + temp = RREG32_SOC15(THM, 0, mmCG_MULT_THERMAL_STATUS); + temp = (temp & CG_MULT_THERMAL_STATUS__CTF_TEMP_MASK) >> + CG_MULT_THERMAL_STATUS__CTF_TEMP__SHIFT; + + temp = temp & 0x1ff; + temp *= SMU11_TEMPERATURE_UNITS_PER_CENTIGRADES; + + *value = temp; + + return 0; +} + +static int smu_v11_0_get_gpu_power(struct smu_context *smu, uint32_t *value) +{ + int ret = 0; + SmuMetrics_t metrics; + + if (!value) + return -EINVAL; + + ret = smu_update_table(smu, TABLE_SMU_METRICS, (void *)&metrics, false); + if (ret) + return ret; + + *value = metrics.CurrSocketPower << 8; + + return 0; +} + +static uint16_t convert_to_vddc(uint8_t vid) +{ + return (uint16_t) ((6200 - (vid * 25)) / SMU11_VOLTAGE_SCALE); +} + +static int smu_v11_0_get_gfx_vdd(struct smu_context *smu, uint32_t *value) +{ + struct amdgpu_device *adev = smu->adev; + uint32_t vdd = 0, val_vid = 0; + + if (!value) + return -EINVAL; + val_vid = (RREG32_SOC15(SMUIO, 0, mmSMUSVI0_TEL_PLANE0) & + SMUSVI0_TEL_PLANE0__SVI0_PLANE0_VDDCOR_MASK) >> + SMUSVI0_TEL_PLANE0__SVI0_PLANE0_VDDCOR__SHIFT; + + vdd = (uint32_t)convert_to_vddc((uint8_t)val_vid); + + *value = vdd; + + return 0; + +} + +static int smu_v11_0_read_sensor(struct smu_context *smu, + enum amd_pp_sensors sensor, + void *data, uint32_t *size) +{ + struct smu_table_context *table_context = &smu->smu_table; + PPTable_t *pptable = table_context->driver_pptable; + int ret = 0; + switch (sensor) { + case AMDGPU_PP_SENSOR_GPU_LOAD: + ret = smu_v11_0_get_current_activity_percent(smu, + (uint32_t *)data); + *size = 4; + break; + case AMDGPU_PP_SENSOR_GFX_MCLK: + ret = smu_get_current_clk_freq(smu, PPCLK_UCLK, (uint32_t *)data); + *size = 4; + break; + case AMDGPU_PP_SENSOR_GFX_SCLK: + ret = smu_get_current_clk_freq(smu, PPCLK_GFXCLK, (uint32_t *)data); + *size = 4; + break; + case AMDGPU_PP_SENSOR_GPU_TEMP: + ret = smu_v11_0_thermal_get_temperature(smu, (uint32_t *)data); + *size = 4; + break; + case AMDGPU_PP_SENSOR_GPU_POWER: + ret = smu_v11_0_get_gpu_power(smu, (uint32_t *)data); + *size = 4; + break; + case AMDGPU_PP_SENSOR_VDDGFX: + ret = smu_v11_0_get_gfx_vdd(smu, (uint32_t *)data); + *size = 4; + break; + case AMDGPU_PP_SENSOR_UVD_POWER: + *(uint32_t *)data = smu_feature_is_enabled(smu, FEATURE_DPM_UVD_BIT) ? 1 : 0; + *size = 4; + break; + case AMDGPU_PP_SENSOR_VCE_POWER: + *(uint32_t *)data = smu_feature_is_enabled(smu, FEATURE_DPM_VCE_BIT) ? 1 : 0; + *size = 4; + break; + case AMDGPU_PP_SENSOR_MIN_FAN_RPM: + *(uint32_t *)data = 0; + *size = 4; + break; + case AMDGPU_PP_SENSOR_MAX_FAN_RPM: + *(uint32_t *)data = pptable->FanMaximumRpm; + *size = 4; + break; + default: + ret = smu_common_read_sensor(smu, sensor, data, size); + break; + } + + if (ret) + *size = 0; + + return ret; +} + +static int +smu_v11_0_display_clock_voltage_request(struct smu_context *smu, + struct pp_display_clock_request + *clock_req) +{ + enum amd_pp_clock_type clk_type = clock_req->clock_type; + int ret = 0; + PPCLK_e clk_select = 0; + uint32_t clk_freq = clock_req->clock_freq_in_khz / 1000; + + if (smu_feature_is_enabled(smu, FEATURE_DPM_DCEFCLK_BIT)) { + switch (clk_type) { + case amd_pp_dcef_clock: + clk_select = PPCLK_DCEFCLK; + break; + case amd_pp_disp_clock: + clk_select = PPCLK_DISPCLK; + break; + case amd_pp_pixel_clock: + clk_select = PPCLK_PIXCLK; + break; + case amd_pp_phy_clock: + clk_select = PPCLK_PHYCLK; + break; + default: + pr_info("[%s] Invalid Clock Type!", __func__); + ret = -EINVAL; + break; + } + + if (ret) + goto failed; + + ret = smu_send_smc_msg_with_param(smu, SMU_MSG_SetHardMinByFreq, + (clk_select << 16) | clk_freq); + } + +failed: + return ret; +} + +static int smu_v11_0_set_watermarks_table(struct smu_context *smu, + Watermarks_t *table, struct + dm_pp_wm_sets_with_clock_ranges_soc15 + *clock_ranges) +{ + int i; + + if (!table || !clock_ranges) + return -EINVAL; + + if (clock_ranges->num_wm_dmif_sets > 4 || + clock_ranges->num_wm_mcif_sets > 4) + return -EINVAL; + + for (i = 0; i < clock_ranges->num_wm_dmif_sets; i++) { + table->WatermarkRow[1][i].MinClock = + cpu_to_le16((uint16_t) + (clock_ranges->wm_dmif_clocks_ranges[i].wm_min_dcfclk_clk_in_khz / + 1000)); + table->WatermarkRow[1][i].MaxClock = + cpu_to_le16((uint16_t) + (clock_ranges->wm_dmif_clocks_ranges[i].wm_max_dcfclk_clk_in_khz / + 1000)); + table->WatermarkRow[1][i].MinUclk = + cpu_to_le16((uint16_t) + (clock_ranges->wm_dmif_clocks_ranges[i].wm_min_mem_clk_in_khz / + 1000)); + table->WatermarkRow[1][i].MaxUclk = + cpu_to_le16((uint16_t) + (clock_ranges->wm_dmif_clocks_ranges[i].wm_max_mem_clk_in_khz / + 1000)); + table->WatermarkRow[1][i].WmSetting = (uint8_t) + clock_ranges->wm_dmif_clocks_ranges[i].wm_set_id; + } + + for (i = 0; i < clock_ranges->num_wm_mcif_sets; i++) { + table->WatermarkRow[0][i].MinClock = + cpu_to_le16((uint16_t) + (clock_ranges->wm_mcif_clocks_ranges[i].wm_min_socclk_clk_in_khz / + 1000)); + table->WatermarkRow[0][i].MaxClock = + cpu_to_le16((uint16_t) + (clock_ranges->wm_mcif_clocks_ranges[i].wm_max_socclk_clk_in_khz / + 1000)); + table->WatermarkRow[0][i].MinUclk = + cpu_to_le16((uint16_t) + (clock_ranges->wm_mcif_clocks_ranges[i].wm_min_mem_clk_in_khz / + 1000)); + table->WatermarkRow[0][i].MaxUclk = + cpu_to_le16((uint16_t) + (clock_ranges->wm_mcif_clocks_ranges[i].wm_max_mem_clk_in_khz / + 1000)); + table->WatermarkRow[0][i].WmSetting = (uint8_t) + clock_ranges->wm_mcif_clocks_ranges[i].wm_set_id; + } + + return 0; +} + +static int +smu_v11_0_set_watermarks_for_clock_ranges(struct smu_context *smu, struct + dm_pp_wm_sets_with_clock_ranges_soc15 + *clock_ranges) +{ + int ret = 0; + struct smu_table *watermarks = &smu->smu_table.tables[TABLE_WATERMARKS]; + Watermarks_t *table = watermarks->cpu_addr; + + if (!smu->disable_watermark && + smu_feature_is_enabled(smu, FEATURE_DPM_DCEFCLK_BIT) && + smu_feature_is_enabled(smu, FEATURE_DPM_SOCCLK_BIT)) { + smu_v11_0_set_watermarks_table(smu, table, clock_ranges); + smu->watermarks_bitmap |= WATERMARKS_EXIST; + smu->watermarks_bitmap &= ~WATERMARKS_LOADED; + } + + return ret; +} + +static int smu_v11_0_get_clock_ranges(struct smu_context *smu, + uint32_t *clock, + PPCLK_e clock_select, + bool max) +{ + int ret; + *clock = 0; + if (max) { + ret = smu_send_smc_msg_with_param(smu, SMU_MSG_GetMaxDpmFreq, + (clock_select << 16)); + if (ret) { + pr_err("[GetClockRanges] Failed to get max clock from SMC!\n"); + return ret; + } + smu_read_smc_arg(smu, clock); + } else { + ret = smu_send_smc_msg_with_param(smu, SMU_MSG_GetMinDpmFreq, + (clock_select << 16)); + if (ret) { + pr_err("[GetClockRanges] Failed to get min clock from SMC!\n"); + return ret; + } + smu_read_smc_arg(smu, clock); + } + + return 0; +} + +static uint32_t smu_v11_0_dpm_get_sclk(struct smu_context *smu, bool low) +{ + uint32_t gfx_clk; + int ret; + + if (!smu_feature_is_enabled(smu, FEATURE_DPM_GFXCLK_BIT)) { + pr_err("[GetSclks]: gfxclk dpm not enabled!\n"); + return -EPERM; + } + + if (low) { + ret = smu_v11_0_get_clock_ranges(smu, &gfx_clk, PPCLK_GFXCLK, false); + if (ret) { + pr_err("[GetSclks]: fail to get min PPCLK_GFXCLK\n"); + return ret; + } + } else { + ret = smu_v11_0_get_clock_ranges(smu, &gfx_clk, PPCLK_GFXCLK, true); + if (ret) { + pr_err("[GetSclks]: fail to get max PPCLK_GFXCLK\n"); + return ret; + } + } + + return (gfx_clk * 100); +} + +static uint32_t smu_v11_0_dpm_get_mclk(struct smu_context *smu, bool low) +{ + uint32_t mem_clk; + int ret; + + if (!smu_feature_is_enabled(smu, FEATURE_DPM_UCLK_BIT)) { + pr_err("[GetMclks]: memclk dpm not enabled!\n"); + return -EPERM; + } + + if (low) { + ret = smu_v11_0_get_clock_ranges(smu, &mem_clk, PPCLK_UCLK, false); + if (ret) { + pr_err("[GetMclks]: fail to get min PPCLK_UCLK\n"); + return ret; + } + } else { + ret = smu_v11_0_get_clock_ranges(smu, &mem_clk, PPCLK_GFXCLK, true); + if (ret) { + pr_err("[GetMclks]: fail to get max PPCLK_UCLK\n"); + return ret; + } + } + + return (mem_clk * 100); +} + +static int smu_v11_0_set_od8_default_settings(struct smu_context *smu, + bool initialize) +{ + struct smu_table_context *table_context = &smu->smu_table; + int ret; + + if (initialize) { + if (table_context->overdrive_table) + return -EINVAL; + + table_context->overdrive_table = kzalloc(sizeof(OverDriveTable_t), GFP_KERNEL); + + if (!table_context->overdrive_table) + return -ENOMEM; + + ret = smu_update_table(smu, TABLE_OVERDRIVE, table_context->overdrive_table, false); + if (ret) { + pr_err("Failed to export over drive table!\n"); + return ret; + } + + smu_set_default_od8_settings(smu); + } + + ret = smu_update_table(smu, TABLE_OVERDRIVE, table_context->overdrive_table, true); + if (ret) { + pr_err("Failed to import over drive table!\n"); + return ret; + } + + return 0; +} + +static int smu_v11_0_conv_power_profile_to_pplib_workload(int power_profile) +{ + int pplib_workload = 0; + + switch (power_profile) { + case PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT: + pplib_workload = WORKLOAD_DEFAULT_BIT; + break; + case PP_SMC_POWER_PROFILE_FULLSCREEN3D: + pplib_workload = WORKLOAD_PPLIB_FULL_SCREEN_3D_BIT; + break; + case PP_SMC_POWER_PROFILE_POWERSAVING: + pplib_workload = WORKLOAD_PPLIB_POWER_SAVING_BIT; + break; + case PP_SMC_POWER_PROFILE_VIDEO: + pplib_workload = WORKLOAD_PPLIB_VIDEO_BIT; + break; + case PP_SMC_POWER_PROFILE_VR: + pplib_workload = WORKLOAD_PPLIB_VR_BIT; + break; + case PP_SMC_POWER_PROFILE_COMPUTE: + pplib_workload = WORKLOAD_PPLIB_COMPUTE_BIT; + break; + case PP_SMC_POWER_PROFILE_CUSTOM: + pplib_workload = WORKLOAD_PPLIB_CUSTOM_BIT; + break; + } + + return pplib_workload; +} + +static int smu_v11_0_get_power_profile_mode(struct smu_context *smu, char *buf) +{ + DpmActivityMonitorCoeffInt_t activity_monitor; + uint32_t i, size = 0; + uint16_t workload_type = 0; + static const char *profile_name[] = { + "BOOTUP_DEFAULT", + "3D_FULL_SCREEN", + "POWER_SAVING", + "VIDEO", + "VR", + "COMPUTE", + "CUSTOM"}; + static const char *title[] = { + "PROFILE_INDEX(NAME)", + "CLOCK_TYPE(NAME)", + "FPS", + "UseRlcBusy", + "MinActiveFreqType", + "MinActiveFreq", + "BoosterFreqType", + "BoosterFreq", + "PD_Data_limit_c", + "PD_Data_error_coeff", + "PD_Data_error_rate_coeff"}; + int result = 0; + + if (!buf) + return -EINVAL; + + size += sprintf(buf + size, "%16s %s %s %s %s %s %s %s %s %s %s\n", + title[0], title[1], title[2], title[3], title[4], title[5], + title[6], title[7], title[8], title[9], title[10]); + + for (i = 0; i <= PP_SMC_POWER_PROFILE_CUSTOM; i++) { + /* conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT */ + workload_type = smu_v11_0_conv_power_profile_to_pplib_workload(i); + result = smu_update_table_with_arg(smu, TABLE_ACTIVITY_MONITOR_COEFF, + workload_type, &activity_monitor, false); + if (result) { + pr_err("[%s] Failed to get activity monitor!", __func__); + return result; + } + + size += sprintf(buf + size, "%2d %14s%s:\n", + i, profile_name[i], (i == smu->power_profile_mode) ? "*" : " "); + + size += sprintf(buf + size, "%19s %d(%13s) %7d %7d %7d %7d %7d %7d %7d %7d %7d\n", + " ", + 0, + "GFXCLK", + activity_monitor.Gfx_FPS, + activity_monitor.Gfx_UseRlcBusy, + activity_monitor.Gfx_MinActiveFreqType, + activity_monitor.Gfx_MinActiveFreq, + activity_monitor.Gfx_BoosterFreqType, + activity_monitor.Gfx_BoosterFreq, + activity_monitor.Gfx_PD_Data_limit_c, + activity_monitor.Gfx_PD_Data_error_coeff, + activity_monitor.Gfx_PD_Data_error_rate_coeff); + + size += sprintf(buf + size, "%19s %d(%13s) %7d %7d %7d %7d %7d %7d %7d %7d %7d\n", + " ", + 1, + "SOCCLK", + activity_monitor.Soc_FPS, + activity_monitor.Soc_UseRlcBusy, + activity_monitor.Soc_MinActiveFreqType, + activity_monitor.Soc_MinActiveFreq, + activity_monitor.Soc_BoosterFreqType, + activity_monitor.Soc_BoosterFreq, + activity_monitor.Soc_PD_Data_limit_c, + activity_monitor.Soc_PD_Data_error_coeff, + activity_monitor.Soc_PD_Data_error_rate_coeff); + + size += sprintf(buf + size, "%19s %d(%13s) %7d %7d %7d %7d %7d %7d %7d %7d %7d\n", + " ", + 2, + "UCLK", + activity_monitor.Mem_FPS, + activity_monitor.Mem_UseRlcBusy, + activity_monitor.Mem_MinActiveFreqType, + activity_monitor.Mem_MinActiveFreq, + activity_monitor.Mem_BoosterFreqType, + activity_monitor.Mem_BoosterFreq, + activity_monitor.Mem_PD_Data_limit_c, + activity_monitor.Mem_PD_Data_error_coeff, + activity_monitor.Mem_PD_Data_error_rate_coeff); + + size += sprintf(buf + size, "%19s %d(%13s) %7d %7d %7d %7d %7d %7d %7d %7d %7d\n", + " ", + 3, + "FCLK", + activity_monitor.Fclk_FPS, + activity_monitor.Fclk_UseRlcBusy, + activity_monitor.Fclk_MinActiveFreqType, + activity_monitor.Fclk_MinActiveFreq, + activity_monitor.Fclk_BoosterFreqType, + activity_monitor.Fclk_BoosterFreq, + activity_monitor.Fclk_PD_Data_limit_c, + activity_monitor.Fclk_PD_Data_error_coeff, + activity_monitor.Fclk_PD_Data_error_rate_coeff); + } + + return size; +} + +static int smu_v11_0_set_power_profile_mode(struct smu_context *smu, long *input, uint32_t size) +{ + DpmActivityMonitorCoeffInt_t activity_monitor; + int workload_type = 0, ret = 0; + + smu->power_profile_mode = input[size]; + + if (smu->power_profile_mode > PP_SMC_POWER_PROFILE_CUSTOM) { + pr_err("Invalid power profile mode %d\n", smu->power_profile_mode); + return -EINVAL; + } + + if (smu->power_profile_mode == PP_SMC_POWER_PROFILE_CUSTOM) { + ret = smu_update_table_with_arg(smu, TABLE_ACTIVITY_MONITOR_COEFF, + WORKLOAD_PPLIB_CUSTOM_BIT, &activity_monitor, false); + if (ret) { + pr_err("[%s] Failed to get activity monitor!", __func__); + return ret; + } + + switch (input[0]) { + case 0: /* Gfxclk */ + activity_monitor.Gfx_FPS = input[1]; + activity_monitor.Gfx_UseRlcBusy = input[2]; + activity_monitor.Gfx_MinActiveFreqType = input[3]; + activity_monitor.Gfx_MinActiveFreq = input[4]; + activity_monitor.Gfx_BoosterFreqType = input[5]; + activity_monitor.Gfx_BoosterFreq = input[6]; + activity_monitor.Gfx_PD_Data_limit_c = input[7]; + activity_monitor.Gfx_PD_Data_error_coeff = input[8]; + activity_monitor.Gfx_PD_Data_error_rate_coeff = input[9]; + break; + case 1: /* Socclk */ + activity_monitor.Soc_FPS = input[1]; + activity_monitor.Soc_UseRlcBusy = input[2]; + activity_monitor.Soc_MinActiveFreqType = input[3]; + activity_monitor.Soc_MinActiveFreq = input[4]; + activity_monitor.Soc_BoosterFreqType = input[5]; + activity_monitor.Soc_BoosterFreq = input[6]; + activity_monitor.Soc_PD_Data_limit_c = input[7]; + activity_monitor.Soc_PD_Data_error_coeff = input[8]; + activity_monitor.Soc_PD_Data_error_rate_coeff = input[9]; + break; + case 2: /* Uclk */ + activity_monitor.Mem_FPS = input[1]; + activity_monitor.Mem_UseRlcBusy = input[2]; + activity_monitor.Mem_MinActiveFreqType = input[3]; + activity_monitor.Mem_MinActiveFreq = input[4]; + activity_monitor.Mem_BoosterFreqType = input[5]; + activity_monitor.Mem_BoosterFreq = input[6]; + activity_monitor.Mem_PD_Data_limit_c = input[7]; + activity_monitor.Mem_PD_Data_error_coeff = input[8]; + activity_monitor.Mem_PD_Data_error_rate_coeff = input[9]; + break; + case 3: /* Fclk */ + activity_monitor.Fclk_FPS = input[1]; + activity_monitor.Fclk_UseRlcBusy = input[2]; + activity_monitor.Fclk_MinActiveFreqType = input[3]; + activity_monitor.Fclk_MinActiveFreq = input[4]; + activity_monitor.Fclk_BoosterFreqType = input[5]; + activity_monitor.Fclk_BoosterFreq = input[6]; + activity_monitor.Fclk_PD_Data_limit_c = input[7]; + activity_monitor.Fclk_PD_Data_error_coeff = input[8]; + activity_monitor.Fclk_PD_Data_error_rate_coeff = input[9]; + break; + } + + ret = smu_update_table_with_arg(smu, TABLE_ACTIVITY_MONITOR_COEFF, + WORKLOAD_PPLIB_COMPUTE_BIT, &activity_monitor, true); + if (ret) { + pr_err("[%s] Failed to set activity monitor!", __func__); + return ret; + } + } + + /* conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT */ + workload_type = + smu_v11_0_conv_power_profile_to_pplib_workload(smu->power_profile_mode); + smu_send_smc_msg_with_param(smu, SMU_MSG_SetWorkloadMask, + 1 << workload_type); + + return ret; +} + +static int smu_v11_0_update_od8_settings(struct smu_context *smu, + uint32_t index, + uint32_t value) +{ + struct smu_table_context *table_context = &smu->smu_table; + int ret; + + ret = smu_update_table(smu, TABLE_OVERDRIVE, + table_context->overdrive_table, false); + if (ret) { + pr_err("Failed to export over drive table!\n"); + return ret; + } + + smu_update_specified_od8_value(smu, index, value); + + ret = smu_update_table(smu, TABLE_OVERDRIVE, + table_context->overdrive_table, true); + if (ret) { + pr_err("Failed to import over drive table!\n"); + return ret; + } + + return 0; +} + +static int smu_v11_0_dpm_set_uvd_enable(struct smu_context *smu, bool enable) +{ + if (!smu_feature_is_supported(smu, FEATURE_DPM_VCE_BIT)) + return 0; + + if (enable == smu_feature_is_enabled(smu, FEATURE_DPM_VCE_BIT)) + return 0; + + return smu_feature_set_enabled(smu, FEATURE_DPM_VCE_BIT, enable); +} + +static int smu_v11_0_dpm_set_vce_enable(struct smu_context *smu, bool enable) +{ + if (!smu_feature_is_supported(smu, FEATURE_DPM_UVD_BIT)) + return 0; + + if (enable == smu_feature_is_enabled(smu, FEATURE_DPM_UVD_BIT)) + return 0; + + return smu_feature_set_enabled(smu, FEATURE_DPM_UVD_BIT, enable); +} + +static int smu_v11_0_get_current_rpm(struct smu_context *smu, + uint32_t *current_rpm) +{ + int ret; + + ret = smu_send_smc_msg(smu, SMU_MSG_GetCurrentRpm); + + if (ret) { + pr_err("Attempt to get current RPM from SMC Failed!\n"); + return ret; + } + + smu_read_smc_arg(smu, current_rpm); + + return 0; +} + +static uint32_t +smu_v11_0_get_fan_control_mode(struct smu_context *smu) +{ + if (!smu_feature_is_enabled(smu, FEATURE_FAN_CONTROL_BIT)) + return AMD_FAN_CTRL_MANUAL; + else + return AMD_FAN_CTRL_AUTO; +} + +static int +smu_v11_0_get_fan_speed_percent(struct smu_context *smu, + uint32_t *speed) +{ + int ret = 0; + uint32_t percent = 0; + uint32_t current_rpm; + PPTable_t *pptable = smu->smu_table.driver_pptable; + + ret = smu_v11_0_get_current_rpm(smu, ¤t_rpm); + percent = current_rpm * 100 / pptable->FanMaximumRpm; + *speed = percent > 100 ? 100 : percent; + + return ret; +} + +static int +smu_v11_0_smc_fan_control(struct smu_context *smu, bool start) +{ + int ret = 0; + + if (smu_feature_is_supported(smu, FEATURE_FAN_CONTROL_BIT)) + return 0; + + ret = smu_feature_set_enabled(smu, FEATURE_FAN_CONTROL_BIT, start); + if (ret) + pr_err("[%s]%s smc FAN CONTROL feature failed!", + __func__, (start ? "Start" : "Stop")); + + return ret; +} + +static int +smu_v11_0_set_fan_static_mode(struct smu_context *smu, uint32_t mode) +{ + struct amdgpu_device *adev = smu->adev; + + WREG32_SOC15(THM, 0, mmCG_FDO_CTRL2, + REG_SET_FIELD(RREG32_SOC15(THM, 0, mmCG_FDO_CTRL2), + CG_FDO_CTRL2, TMIN, 0)); + WREG32_SOC15(THM, 0, mmCG_FDO_CTRL2, + REG_SET_FIELD(RREG32_SOC15(THM, 0, mmCG_FDO_CTRL2), + CG_FDO_CTRL2, FDO_PWM_MODE, mode)); + + return 0; +} + +static int +smu_v11_0_set_fan_speed_percent(struct smu_context *smu, uint32_t speed) +{ + struct amdgpu_device *adev = smu->adev; + uint32_t duty100; + uint32_t duty; + uint64_t tmp64; + bool stop = 0; + + if (speed > 100) + speed = 100; + + if (smu_v11_0_smc_fan_control(smu, stop)) + return -EINVAL; + duty100 = REG_GET_FIELD(RREG32_SOC15(THM, 0, mmCG_FDO_CTRL1), + CG_FDO_CTRL1, FMAX_DUTY100); + if (!duty100) + return -EINVAL; + + tmp64 = (uint64_t)speed * duty100; + do_div(tmp64, 100); + duty = (uint32_t)tmp64; + + WREG32_SOC15(THM, 0, mmCG_FDO_CTRL0, + REG_SET_FIELD(RREG32_SOC15(THM, 0, mmCG_FDO_CTRL0), + CG_FDO_CTRL0, FDO_STATIC_DUTY, duty)); + + return smu_v11_0_set_fan_static_mode(smu, FDO_PWM_MODE_STATIC); +} + +static int +smu_v11_0_set_fan_control_mode(struct smu_context *smu, + uint32_t mode) +{ + int ret = 0; + bool start = 1; + bool stop = 0; + + switch (mode) { + case AMD_FAN_CTRL_NONE: + ret = smu_v11_0_set_fan_speed_percent(smu, 100); + break; + case AMD_FAN_CTRL_MANUAL: + ret = smu_v11_0_smc_fan_control(smu, stop); + break; + case AMD_FAN_CTRL_AUTO: + ret = smu_v11_0_smc_fan_control(smu, start); + break; + default: + break; + } + + if (ret) { + pr_err("[%s]Set fan control mode failed!", __func__); + return -EINVAL; + } + + return ret; +} + +static int smu_v11_0_set_fan_speed_rpm(struct smu_context *smu, + uint32_t speed) +{ + struct amdgpu_device *adev = smu->adev; + int ret; + uint32_t tach_period, crystal_clock_freq; + bool stop = 0; + + if (!speed) + return -EINVAL; + + mutex_lock(&(smu->mutex)); + ret = smu_v11_0_smc_fan_control(smu, stop); + if (ret) + goto set_fan_speed_rpm_failed; + + crystal_clock_freq = amdgpu_asic_get_xclk(adev); + tach_period = 60 * crystal_clock_freq * 10000 / (8 * speed); + WREG32_SOC15(THM, 0, mmCG_TACH_CTRL, + REG_SET_FIELD(RREG32_SOC15(THM, 0, mmCG_TACH_CTRL), + CG_TACH_CTRL, TARGET_PERIOD, + tach_period)); + + ret = smu_v11_0_set_fan_static_mode(smu, FDO_PWM_MODE_STATIC_RPM); + +set_fan_speed_rpm_failed: + mutex_unlock(&(smu->mutex)); + return ret; +} + +static int smu_v11_0_set_xgmi_pstate(struct smu_context *smu, + uint32_t pstate) +{ + int ret = 0; + mutex_lock(&(smu->mutex)); + ret = smu_send_smc_msg_with_param(smu, + SMU_MSG_SetXgmiMode, + pstate ? XGMI_STATE_D0 : XGMI_STATE_D3); + mutex_unlock(&(smu->mutex)); + return ret; +} + +static const struct smu_funcs smu_v11_0_funcs = { + .init_microcode = smu_v11_0_init_microcode, + .load_microcode = smu_v11_0_load_microcode, + .check_fw_status = smu_v11_0_check_fw_status, + .check_fw_version = smu_v11_0_check_fw_version, + .send_smc_msg = smu_v11_0_send_msg, + .send_smc_msg_with_param = smu_v11_0_send_msg_with_param, + .read_smc_arg = smu_v11_0_read_arg, + .read_pptable_from_vbios = smu_v11_0_read_pptable_from_vbios, + .init_smc_tables = smu_v11_0_init_smc_tables, + .fini_smc_tables = smu_v11_0_fini_smc_tables, + .init_power = smu_v11_0_init_power, + .fini_power = smu_v11_0_fini_power, + .get_vbios_bootup_values = smu_v11_0_get_vbios_bootup_values, + .get_clk_info_from_vbios = smu_v11_0_get_clk_info_from_vbios, + .notify_memory_pool_location = smu_v11_0_notify_memory_pool_location, + .check_pptable = smu_v11_0_check_pptable, + .parse_pptable = smu_v11_0_parse_pptable, + .populate_smc_pptable = smu_v11_0_populate_smc_pptable, + .write_pptable = smu_v11_0_write_pptable, + .write_watermarks_table = smu_v11_0_write_watermarks_table, + .set_min_dcef_deep_sleep = smu_v11_0_set_min_dcef_deep_sleep, + .set_tool_table_location = smu_v11_0_set_tool_table_location, + .init_display = smu_v11_0_init_display, + .set_allowed_mask = smu_v11_0_set_allowed_mask, + .get_enabled_mask = smu_v11_0_get_enabled_mask, + .is_dpm_running = smu_v11_0_is_dpm_running, + .system_features_control = smu_v11_0_system_features_control, + .update_feature_enable_state = smu_v11_0_update_feature_enable_state, + .notify_display_change = smu_v11_0_notify_display_change, + .get_power_limit = smu_v11_0_get_power_limit, + .set_power_limit = smu_v11_0_set_power_limit, + .get_current_clk_freq = smu_v11_0_get_current_clk_freq, + .init_max_sustainable_clocks = smu_v11_0_init_max_sustainable_clocks, + .start_thermal_control = smu_v11_0_start_thermal_control, + .read_sensor = smu_v11_0_read_sensor, + .set_deep_sleep_dcefclk = smu_v11_0_set_deep_sleep_dcefclk, + .display_clock_voltage_request = smu_v11_0_display_clock_voltage_request, + .set_watermarks_for_clock_ranges = smu_v11_0_set_watermarks_for_clock_ranges, + .get_sclk = smu_v11_0_dpm_get_sclk, + .get_mclk = smu_v11_0_dpm_get_mclk, + .set_od8_default_settings = smu_v11_0_set_od8_default_settings, + .conv_power_profile_to_pplib_workload = smu_v11_0_conv_power_profile_to_pplib_workload, + .get_power_profile_mode = smu_v11_0_get_power_profile_mode, + .set_power_profile_mode = smu_v11_0_set_power_profile_mode, + .update_od8_settings = smu_v11_0_update_od8_settings, + .dpm_set_uvd_enable = smu_v11_0_dpm_set_uvd_enable, + .dpm_set_vce_enable = smu_v11_0_dpm_set_vce_enable, + .get_current_rpm = smu_v11_0_get_current_rpm, + .get_fan_control_mode = smu_v11_0_get_fan_control_mode, + .set_fan_control_mode = smu_v11_0_set_fan_control_mode, + .get_fan_speed_percent = smu_v11_0_get_fan_speed_percent, + .set_fan_speed_percent = smu_v11_0_set_fan_speed_percent, + .set_fan_speed_rpm = smu_v11_0_set_fan_speed_rpm, + .set_xgmi_pstate = smu_v11_0_set_xgmi_pstate, +}; + +void smu_v11_0_set_smu_funcs(struct smu_context *smu) +{ + struct amdgpu_device *adev = smu->adev; + + smu->funcs = &smu_v11_0_funcs; + switch (adev->asic_type) { + case CHIP_VEGA20: + vega20_set_ppt_funcs(smu); + break; + default: + pr_warn("Unknown asic for smu11\n"); + } +} diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/smu10_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/smu10_smumgr.c index d111dd4e03d7..6d11076a79ba 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/smu10_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/smu10_smumgr.c @@ -212,6 +212,10 @@ static int smu10_start_smu(struct pp_hwmgr *hwmgr) hwmgr->smu_version = smu10_read_arg_from_smc(hwmgr); adev->pm.fw_version = hwmgr->smu_version >> 8; + if (adev->rev_id < 0x8 && adev->pdev->device != 0x15d8 && + adev->pm.fw_version < 0x1e45) + adev->pm.pp_feature &= ~PP_GFXOFF_MASK; + if (smu10_verify_smc_interface(hwmgr)) return -EINVAL; diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/vega20_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/vega20_smumgr.c index ba00744c3413..f301a73f6df1 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/vega20_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/vega20_smumgr.c @@ -367,6 +367,26 @@ static int vega20_set_tools_address(struct pp_hwmgr *hwmgr) return ret; } +int vega20_set_pptable_driver_address(struct pp_hwmgr *hwmgr) +{ + struct vega20_smumgr *priv = + (struct vega20_smumgr *)(hwmgr->smu_backend); + int ret = 0; + + PP_ASSERT_WITH_CODE((ret = vega20_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SetDriverDramAddrHigh, + upper_32_bits(priv->smu_tables.entry[TABLE_PPTABLE].mc_addr))) == 0, + "[SetPPtabeDriverAddress] Attempt to Set Dram Addr High Failed!", + return ret); + PP_ASSERT_WITH_CODE((ret = vega20_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SetDriverDramAddrLow, + lower_32_bits(priv->smu_tables.entry[TABLE_PPTABLE].mc_addr))) == 0, + "[SetPPtabeDriverAddress] Attempt to Set Dram Addr Low Failed!", + return ret); + + return ret; +} + static int vega20_smu_init(struct pp_hwmgr *hwmgr) { struct vega20_smumgr *priv; diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/vega20_smumgr.h b/drivers/gpu/drm/amd/powerplay/smumgr/vega20_smumgr.h index 77349c3f0162..ec953ab13e87 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/vega20_smumgr.h +++ b/drivers/gpu/drm/amd/powerplay/smumgr/vega20_smumgr.h @@ -55,6 +55,7 @@ int vega20_set_activity_monitor_coeff(struct pp_hwmgr *hwmgr, uint8_t *table, uint16_t workload_type); int vega20_get_activity_monitor_coeff(struct pp_hwmgr *hwmgr, uint8_t *table, uint16_t workload_type); +int vega20_set_pptable_driver_address(struct pp_hwmgr *hwmgr); #endif diff --git a/drivers/gpu/drm/amd/powerplay/vega20_ppt.c b/drivers/gpu/drm/amd/powerplay/vega20_ppt.c new file mode 100644 index 000000000000..8fafcbdb1dfd --- /dev/null +++ b/drivers/gpu/drm/amd/powerplay/vega20_ppt.c @@ -0,0 +1,2413 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "pp_debug.h" +#include <linux/firmware.h> +#include "amdgpu.h" +#include "amdgpu_smu.h" +#include "atomfirmware.h" +#include "amdgpu_atomfirmware.h" +#include "smu_v11_0.h" +#include "smu11_driver_if.h" +#include "soc15_common.h" +#include "atom.h" +#include "power_state.h" +#include "vega20_ppt.h" +#include "vega20_pptable.h" +#include "vega20_ppsmc.h" +#include "nbio/nbio_7_4_sh_mask.h" + +#define smnPCIE_LC_SPEED_CNTL 0x11140290 +#define smnPCIE_LC_LINK_WIDTH_CNTL 0x11140288 + +#define MSG_MAP(msg) \ + [SMU_MSG_##msg] = PPSMC_MSG_##msg + +static int vega20_message_map[SMU_MSG_MAX_COUNT] = { + MSG_MAP(TestMessage), + MSG_MAP(GetSmuVersion), + MSG_MAP(GetDriverIfVersion), + MSG_MAP(SetAllowedFeaturesMaskLow), + MSG_MAP(SetAllowedFeaturesMaskHigh), + MSG_MAP(EnableAllSmuFeatures), + MSG_MAP(DisableAllSmuFeatures), + MSG_MAP(EnableSmuFeaturesLow), + MSG_MAP(EnableSmuFeaturesHigh), + MSG_MAP(DisableSmuFeaturesLow), + MSG_MAP(DisableSmuFeaturesHigh), + MSG_MAP(GetEnabledSmuFeaturesLow), + MSG_MAP(GetEnabledSmuFeaturesHigh), + MSG_MAP(SetWorkloadMask), + MSG_MAP(SetPptLimit), + MSG_MAP(SetDriverDramAddrHigh), + MSG_MAP(SetDriverDramAddrLow), + MSG_MAP(SetToolsDramAddrHigh), + MSG_MAP(SetToolsDramAddrLow), + MSG_MAP(TransferTableSmu2Dram), + MSG_MAP(TransferTableDram2Smu), + MSG_MAP(UseDefaultPPTable), + MSG_MAP(UseBackupPPTable), + MSG_MAP(RunBtc), + MSG_MAP(RequestI2CBus), + MSG_MAP(ReleaseI2CBus), + MSG_MAP(SetFloorSocVoltage), + MSG_MAP(SoftReset), + MSG_MAP(StartBacoMonitor), + MSG_MAP(CancelBacoMonitor), + MSG_MAP(EnterBaco), + MSG_MAP(SetSoftMinByFreq), + MSG_MAP(SetSoftMaxByFreq), + MSG_MAP(SetHardMinByFreq), + MSG_MAP(SetHardMaxByFreq), + MSG_MAP(GetMinDpmFreq), + MSG_MAP(GetMaxDpmFreq), + MSG_MAP(GetDpmFreqByIndex), + MSG_MAP(GetDpmClockFreq), + MSG_MAP(GetSsVoltageByDpm), + MSG_MAP(SetMemoryChannelConfig), + MSG_MAP(SetGeminiMode), + MSG_MAP(SetGeminiApertureHigh), + MSG_MAP(SetGeminiApertureLow), + MSG_MAP(SetMinLinkDpmByIndex), + MSG_MAP(OverridePcieParameters), + MSG_MAP(OverDriveSetPercentage), + MSG_MAP(SetMinDeepSleepDcefclk), + MSG_MAP(ReenableAcDcInterrupt), + MSG_MAP(NotifyPowerSource), + MSG_MAP(SetUclkFastSwitch), + MSG_MAP(SetUclkDownHyst), + MSG_MAP(GetCurrentRpm), + MSG_MAP(SetVideoFps), + MSG_MAP(SetTjMax), + MSG_MAP(SetFanTemperatureTarget), + MSG_MAP(PrepareMp1ForUnload), + MSG_MAP(DramLogSetDramAddrHigh), + MSG_MAP(DramLogSetDramAddrLow), + MSG_MAP(DramLogSetDramSize), + MSG_MAP(SetFanMaxRpm), + MSG_MAP(SetFanMinPwm), + MSG_MAP(ConfigureGfxDidt), + MSG_MAP(NumOfDisplays), + MSG_MAP(RemoveMargins), + MSG_MAP(ReadSerialNumTop32), + MSG_MAP(ReadSerialNumBottom32), + MSG_MAP(SetSystemVirtualDramAddrHigh), + MSG_MAP(SetSystemVirtualDramAddrLow), + MSG_MAP(WaflTest), + MSG_MAP(SetFclkGfxClkRatio), + MSG_MAP(AllowGfxOff), + MSG_MAP(DisallowGfxOff), + MSG_MAP(GetPptLimit), + MSG_MAP(GetDcModeMaxDpmFreq), + MSG_MAP(GetDebugData), + MSG_MAP(SetXgmiMode), + MSG_MAP(RunAfllBtc), + MSG_MAP(ExitBaco), + MSG_MAP(PrepareMp1ForReset), + MSG_MAP(PrepareMp1ForShutdown), + MSG_MAP(SetMGpuFanBoostLimitRpm), + MSG_MAP(GetAVFSVoltageByDpm), +}; + +static int vega20_get_smu_msg_index(struct smu_context *smc, uint32_t index) +{ + int val; + + if (index >= SMU_MSG_MAX_COUNT) + return -EINVAL; + + val = vega20_message_map[index]; + if (val > PPSMC_Message_Count) + return -EINVAL; + + return val; +} + +static int vega20_allocate_dpm_context(struct smu_context *smu) +{ + struct smu_dpm_context *smu_dpm = &smu->smu_dpm; + + if (smu_dpm->dpm_context) + return -EINVAL; + + smu_dpm->dpm_context = kzalloc(sizeof(struct vega20_dpm_table), + GFP_KERNEL); + if (!smu_dpm->dpm_context) + return -ENOMEM; + + if (smu_dpm->golden_dpm_context) + return -EINVAL; + + smu_dpm->golden_dpm_context = kzalloc(sizeof(struct vega20_dpm_table), + GFP_KERNEL); + if (!smu_dpm->golden_dpm_context) + return -ENOMEM; + + smu_dpm->dpm_context_size = sizeof(struct vega20_dpm_table); + + smu_dpm->dpm_current_power_state = kzalloc(sizeof(struct smu_power_state), + GFP_KERNEL); + if (!smu_dpm->dpm_current_power_state) + return -ENOMEM; + + smu_dpm->dpm_request_power_state = kzalloc(sizeof(struct smu_power_state), + GFP_KERNEL); + if (!smu_dpm->dpm_request_power_state) + return -ENOMEM; + + return 0; +} + +static int vega20_setup_od8_information(struct smu_context *smu) +{ + ATOM_Vega20_POWERPLAYTABLE *powerplay_table = NULL; + struct smu_table_context *table_context = &smu->smu_table; + + uint32_t od_feature_count, od_feature_array_size, + od_setting_count, od_setting_array_size; + + if (!table_context->power_play_table) + return -EINVAL; + + powerplay_table = table_context->power_play_table; + + if (powerplay_table->OverDrive8Table.ucODTableRevision == 1) { + /* Setup correct ODFeatureCount, and store ODFeatureArray from + * powerplay table to od_feature_capabilities */ + od_feature_count = + (le32_to_cpu(powerplay_table->OverDrive8Table.ODFeatureCount) > + ATOM_VEGA20_ODFEATURE_COUNT) ? + ATOM_VEGA20_ODFEATURE_COUNT : + le32_to_cpu(powerplay_table->OverDrive8Table.ODFeatureCount); + + od_feature_array_size = sizeof(uint8_t) * od_feature_count; + + if (table_context->od_feature_capabilities) + return -EINVAL; + + table_context->od_feature_capabilities = kmemdup(&powerplay_table->OverDrive8Table.ODFeatureCapabilities, + od_feature_array_size, + GFP_KERNEL); + if (!table_context->od_feature_capabilities) + return -ENOMEM; + + /* Setup correct ODSettingCount, and store ODSettingArray from + * powerplay table to od_settings_max and od_setting_min */ + od_setting_count = + (le32_to_cpu(powerplay_table->OverDrive8Table.ODSettingCount) > + ATOM_VEGA20_ODSETTING_COUNT) ? + ATOM_VEGA20_ODSETTING_COUNT : + le32_to_cpu(powerplay_table->OverDrive8Table.ODSettingCount); + + od_setting_array_size = sizeof(uint32_t) * od_setting_count; + + if (table_context->od_settings_max) + return -EINVAL; + + table_context->od_settings_max = kmemdup(&powerplay_table->OverDrive8Table.ODSettingsMax, + od_setting_array_size, + GFP_KERNEL); + + if (!table_context->od_settings_max) { + kfree(table_context->od_feature_capabilities); + table_context->od_feature_capabilities = NULL; + return -ENOMEM; + } + + if (table_context->od_settings_min) + return -EINVAL; + + table_context->od_settings_min = kmemdup(&powerplay_table->OverDrive8Table.ODSettingsMin, + od_setting_array_size, + GFP_KERNEL); + + if (!table_context->od_settings_min) { + kfree(table_context->od_feature_capabilities); + table_context->od_feature_capabilities = NULL; + kfree(table_context->od_settings_max); + table_context->od_settings_max = NULL; + return -ENOMEM; + } + } + + return 0; +} + +static int vega20_store_powerplay_table(struct smu_context *smu) +{ + ATOM_Vega20_POWERPLAYTABLE *powerplay_table = NULL; + struct smu_table_context *table_context = &smu->smu_table; + int ret; + + if (!table_context->power_play_table) + return -EINVAL; + + powerplay_table = table_context->power_play_table; + + memcpy(table_context->driver_pptable, &powerplay_table->smcPPTable, + sizeof(PPTable_t)); + + table_context->software_shutdown_temp = powerplay_table->usSoftwareShutdownTemp; + table_context->thermal_controller_type = powerplay_table->ucThermalControllerType; + table_context->TDPODLimit = le32_to_cpu(powerplay_table->OverDrive8Table.ODSettingsMax[ATOM_VEGA20_ODSETTING_POWERPERCENTAGE]); + + ret = vega20_setup_od8_information(smu); + + return ret; +} + +static int vega20_append_powerplay_table(struct smu_context *smu) +{ + struct smu_table_context *table_context = &smu->smu_table; + PPTable_t *smc_pptable = table_context->driver_pptable; + struct atom_smc_dpm_info_v4_4 *smc_dpm_table; + int index, i, ret; + + index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, + smc_dpm_info); + + ret = smu_get_atom_data_table(smu, index, NULL, NULL, NULL, + (uint8_t **)&smc_dpm_table); + if (ret) + return ret; + + smc_pptable->MaxVoltageStepGfx = smc_dpm_table->maxvoltagestepgfx; + smc_pptable->MaxVoltageStepSoc = smc_dpm_table->maxvoltagestepsoc; + + smc_pptable->VddGfxVrMapping = smc_dpm_table->vddgfxvrmapping; + smc_pptable->VddSocVrMapping = smc_dpm_table->vddsocvrmapping; + smc_pptable->VddMem0VrMapping = smc_dpm_table->vddmem0vrmapping; + smc_pptable->VddMem1VrMapping = smc_dpm_table->vddmem1vrmapping; + + smc_pptable->GfxUlvPhaseSheddingMask = smc_dpm_table->gfxulvphasesheddingmask; + smc_pptable->SocUlvPhaseSheddingMask = smc_dpm_table->soculvphasesheddingmask; + smc_pptable->ExternalSensorPresent = smc_dpm_table->externalsensorpresent; + + smc_pptable->GfxMaxCurrent = smc_dpm_table->gfxmaxcurrent; + smc_pptable->GfxOffset = smc_dpm_table->gfxoffset; + smc_pptable->Padding_TelemetryGfx = smc_dpm_table->padding_telemetrygfx; + + smc_pptable->SocMaxCurrent = smc_dpm_table->socmaxcurrent; + smc_pptable->SocOffset = smc_dpm_table->socoffset; + smc_pptable->Padding_TelemetrySoc = smc_dpm_table->padding_telemetrysoc; + + smc_pptable->Mem0MaxCurrent = smc_dpm_table->mem0maxcurrent; + smc_pptable->Mem0Offset = smc_dpm_table->mem0offset; + smc_pptable->Padding_TelemetryMem0 = smc_dpm_table->padding_telemetrymem0; + + smc_pptable->Mem1MaxCurrent = smc_dpm_table->mem1maxcurrent; + smc_pptable->Mem1Offset = smc_dpm_table->mem1offset; + smc_pptable->Padding_TelemetryMem1 = smc_dpm_table->padding_telemetrymem1; + + smc_pptable->AcDcGpio = smc_dpm_table->acdcgpio; + smc_pptable->AcDcPolarity = smc_dpm_table->acdcpolarity; + smc_pptable->VR0HotGpio = smc_dpm_table->vr0hotgpio; + smc_pptable->VR0HotPolarity = smc_dpm_table->vr0hotpolarity; + + smc_pptable->VR1HotGpio = smc_dpm_table->vr1hotgpio; + smc_pptable->VR1HotPolarity = smc_dpm_table->vr1hotpolarity; + smc_pptable->Padding1 = smc_dpm_table->padding1; + smc_pptable->Padding2 = smc_dpm_table->padding2; + + smc_pptable->LedPin0 = smc_dpm_table->ledpin0; + smc_pptable->LedPin1 = smc_dpm_table->ledpin1; + smc_pptable->LedPin2 = smc_dpm_table->ledpin2; + + smc_pptable->PllGfxclkSpreadEnabled = smc_dpm_table->pllgfxclkspreadenabled; + smc_pptable->PllGfxclkSpreadPercent = smc_dpm_table->pllgfxclkspreadpercent; + smc_pptable->PllGfxclkSpreadFreq = smc_dpm_table->pllgfxclkspreadfreq; + + smc_pptable->UclkSpreadEnabled = 0; + smc_pptable->UclkSpreadPercent = smc_dpm_table->uclkspreadpercent; + smc_pptable->UclkSpreadFreq = smc_dpm_table->uclkspreadfreq; + + smc_pptable->FclkSpreadEnabled = smc_dpm_table->fclkspreadenabled; + smc_pptable->FclkSpreadPercent = smc_dpm_table->fclkspreadpercent; + smc_pptable->FclkSpreadFreq = smc_dpm_table->fclkspreadfreq; + + smc_pptable->FllGfxclkSpreadEnabled = smc_dpm_table->fllgfxclkspreadenabled; + smc_pptable->FllGfxclkSpreadPercent = smc_dpm_table->fllgfxclkspreadpercent; + smc_pptable->FllGfxclkSpreadFreq = smc_dpm_table->fllgfxclkspreadfreq; + + for (i = 0; i < I2C_CONTROLLER_NAME_COUNT; i++) { + smc_pptable->I2cControllers[i].Enabled = + smc_dpm_table->i2ccontrollers[i].enabled; + smc_pptable->I2cControllers[i].SlaveAddress = + smc_dpm_table->i2ccontrollers[i].slaveaddress; + smc_pptable->I2cControllers[i].ControllerPort = + smc_dpm_table->i2ccontrollers[i].controllerport; + smc_pptable->I2cControllers[i].ThermalThrottler = + smc_dpm_table->i2ccontrollers[i].thermalthrottler; + smc_pptable->I2cControllers[i].I2cProtocol = + smc_dpm_table->i2ccontrollers[i].i2cprotocol; + smc_pptable->I2cControllers[i].I2cSpeed = + smc_dpm_table->i2ccontrollers[i].i2cspeed; + } + + return 0; +} + +static int vega20_check_powerplay_table(struct smu_context *smu) +{ + ATOM_Vega20_POWERPLAYTABLE *powerplay_table = NULL; + struct smu_table_context *table_context = &smu->smu_table; + + powerplay_table = table_context->power_play_table; + + if (powerplay_table->sHeader.format_revision < ATOM_VEGA20_TABLE_REVISION_VEGA20) { + pr_err("Unsupported PPTable format!"); + return -EINVAL; + } + + if (!powerplay_table->sHeader.structuresize) { + pr_err("Invalid PowerPlay Table!"); + return -EINVAL; + } + + return 0; +} + +static int vega20_run_btc_afll(struct smu_context *smu) +{ + return smu_send_smc_msg(smu, SMU_MSG_RunAfllBtc); +} + +static int +vega20_get_unallowed_feature_mask(struct smu_context *smu, + uint32_t *feature_mask, uint32_t num) +{ + if (num > 2) + return -EINVAL; + + feature_mask[0] = 0xE0041C00; + feature_mask[1] = 0xFFFFFFFE; /* bit32~bit63 is Unsupported */ + + return 0; +} + +static enum +amd_pm_state_type vega20_get_current_power_state(struct smu_context *smu) +{ + enum amd_pm_state_type pm_type; + struct smu_dpm_context *smu_dpm_ctx = &(smu->smu_dpm); + + if (!smu_dpm_ctx->dpm_context || + !smu_dpm_ctx->dpm_current_power_state) + return -EINVAL; + + mutex_lock(&(smu->mutex)); + switch (smu_dpm_ctx->dpm_current_power_state->classification.ui_label) { + case SMU_STATE_UI_LABEL_BATTERY: + pm_type = POWER_STATE_TYPE_BATTERY; + break; + case SMU_STATE_UI_LABEL_BALLANCED: + pm_type = POWER_STATE_TYPE_BALANCED; + break; + case SMU_STATE_UI_LABEL_PERFORMANCE: + pm_type = POWER_STATE_TYPE_PERFORMANCE; + break; + default: + if (smu_dpm_ctx->dpm_current_power_state->classification.flags & SMU_STATE_CLASSIFICATION_FLAG_BOOT) + pm_type = POWER_STATE_TYPE_INTERNAL_BOOT; + else + pm_type = POWER_STATE_TYPE_DEFAULT; + break; + } + mutex_unlock(&(smu->mutex)); + + return pm_type; +} + +static int +vega20_set_single_dpm_table(struct smu_context *smu, + struct vega20_single_dpm_table *single_dpm_table, + PPCLK_e clk_id) +{ + int ret = 0; + uint32_t i, num_of_levels = 0, clk; + + ret = smu_send_smc_msg_with_param(smu, + SMU_MSG_GetDpmFreqByIndex, + (clk_id << 16 | 0xFF)); + if (ret) { + pr_err("[GetNumOfDpmLevel] failed to get dpm levels!"); + return ret; + } + + smu_read_smc_arg(smu, &num_of_levels); + if (!num_of_levels) { + pr_err("[GetNumOfDpmLevel] number of clk levels is invalid!"); + return -EINVAL; + } + + single_dpm_table->count = num_of_levels; + + for (i = 0; i < num_of_levels; i++) { + ret = smu_send_smc_msg_with_param(smu, + SMU_MSG_GetDpmFreqByIndex, + (clk_id << 16 | i)); + if (ret) { + pr_err("[GetDpmFreqByIndex] failed to get dpm freq by index!"); + return ret; + } + smu_read_smc_arg(smu, &clk); + if (!clk) { + pr_err("[GetDpmFreqByIndex] clk value is invalid!"); + return -EINVAL; + } + single_dpm_table->dpm_levels[i].value = clk; + single_dpm_table->dpm_levels[i].enabled = true; + } + return 0; +} + +static void vega20_init_single_dpm_state(struct vega20_dpm_state *dpm_state) +{ + dpm_state->soft_min_level = 0x0; + dpm_state->soft_max_level = 0xffff; + dpm_state->hard_min_level = 0x0; + dpm_state->hard_max_level = 0xffff; +} + +static int vega20_set_default_dpm_table(struct smu_context *smu) +{ + int ret; + + struct smu_dpm_context *smu_dpm = &smu->smu_dpm; + struct vega20_dpm_table *dpm_table = NULL; + struct vega20_single_dpm_table *single_dpm_table; + + dpm_table = smu_dpm->dpm_context; + + /* socclk */ + single_dpm_table = &(dpm_table->soc_table); + + if (smu_feature_is_enabled(smu, FEATURE_DPM_SOCCLK_BIT)) { + ret = vega20_set_single_dpm_table(smu, single_dpm_table, + PPCLK_SOCCLK); + if (ret) { + pr_err("[SetupDefaultDpmTable] failed to get socclk dpm levels!"); + return ret; + } + } else { + single_dpm_table->count = 1; + single_dpm_table->dpm_levels[0].value = smu->smu_table.boot_values.socclk / 100; + } + vega20_init_single_dpm_state(&(single_dpm_table->dpm_state)); + + /* gfxclk */ + single_dpm_table = &(dpm_table->gfx_table); + + if (smu_feature_is_enabled(smu, FEATURE_DPM_GFXCLK_BIT)) { + ret = vega20_set_single_dpm_table(smu, single_dpm_table, + PPCLK_GFXCLK); + if (ret) { + pr_err("[SetupDefaultDpmTable] failed to get gfxclk dpm levels!"); + return ret; + } + } else { + single_dpm_table->count = 1; + single_dpm_table->dpm_levels[0].value = smu->smu_table.boot_values.gfxclk / 100; + } + vega20_init_single_dpm_state(&(single_dpm_table->dpm_state)); + + /* memclk */ + single_dpm_table = &(dpm_table->mem_table); + + if (smu_feature_is_enabled(smu, FEATURE_DPM_UCLK_BIT)) { + ret = vega20_set_single_dpm_table(smu, single_dpm_table, + PPCLK_UCLK); + if (ret) { + pr_err("[SetupDefaultDpmTable] failed to get memclk dpm levels!"); + return ret; + } + } else { + single_dpm_table->count = 1; + single_dpm_table->dpm_levels[0].value = smu->smu_table.boot_values.uclk / 100; + } + vega20_init_single_dpm_state(&(single_dpm_table->dpm_state)); + + /* eclk */ + single_dpm_table = &(dpm_table->eclk_table); + + if (smu_feature_is_enabled(smu, FEATURE_DPM_VCE_BIT)) { + ret = vega20_set_single_dpm_table(smu, single_dpm_table, PPCLK_ECLK); + if (ret) { + pr_err("[SetupDefaultDpmTable] failed to get eclk dpm levels!"); + return ret; + } + } else { + single_dpm_table->count = 1; + single_dpm_table->dpm_levels[0].value = smu->smu_table.boot_values.eclk / 100; + } + vega20_init_single_dpm_state(&(single_dpm_table->dpm_state)); + + /* vclk */ + single_dpm_table = &(dpm_table->vclk_table); + + if (smu_feature_is_enabled(smu, FEATURE_DPM_UVD_BIT)) { + ret = vega20_set_single_dpm_table(smu, single_dpm_table, PPCLK_VCLK); + if (ret) { + pr_err("[SetupDefaultDpmTable] failed to get vclk dpm levels!"); + return ret; + } + } else { + single_dpm_table->count = 1; + single_dpm_table->dpm_levels[0].value = smu->smu_table.boot_values.vclk / 100; + } + vega20_init_single_dpm_state(&(single_dpm_table->dpm_state)); + + /* dclk */ + single_dpm_table = &(dpm_table->dclk_table); + + if (smu_feature_is_enabled(smu, FEATURE_DPM_UVD_BIT)) { + ret = vega20_set_single_dpm_table(smu, single_dpm_table, PPCLK_DCLK); + if (ret) { + pr_err("[SetupDefaultDpmTable] failed to get dclk dpm levels!"); + return ret; + } + } else { + single_dpm_table->count = 1; + single_dpm_table->dpm_levels[0].value = smu->smu_table.boot_values.dclk / 100; + } + vega20_init_single_dpm_state(&(single_dpm_table->dpm_state)); + + /* dcefclk */ + single_dpm_table = &(dpm_table->dcef_table); + + if (smu_feature_is_enabled(smu, FEATURE_DPM_DCEFCLK_BIT)) { + ret = vega20_set_single_dpm_table(smu, single_dpm_table, + PPCLK_DCEFCLK); + if (ret) { + pr_err("[SetupDefaultDpmTable] failed to get dcefclk dpm levels!"); + return ret; + } + } else { + single_dpm_table->count = 1; + single_dpm_table->dpm_levels[0].value = smu->smu_table.boot_values.dcefclk / 100; + } + vega20_init_single_dpm_state(&(single_dpm_table->dpm_state)); + + /* pixclk */ + single_dpm_table = &(dpm_table->pixel_table); + + if (smu_feature_is_enabled(smu, FEATURE_DPM_DCEFCLK_BIT)) { + ret = vega20_set_single_dpm_table(smu, single_dpm_table, + PPCLK_PIXCLK); + if (ret) { + pr_err("[SetupDefaultDpmTable] failed to get pixclk dpm levels!"); + return ret; + } + } else { + single_dpm_table->count = 0; + } + vega20_init_single_dpm_state(&(single_dpm_table->dpm_state)); + + /* dispclk */ + single_dpm_table = &(dpm_table->display_table); + + if (smu_feature_is_enabled(smu, FEATURE_DPM_DCEFCLK_BIT)) { + ret = vega20_set_single_dpm_table(smu, single_dpm_table, + PPCLK_DISPCLK); + if (ret) { + pr_err("[SetupDefaultDpmTable] failed to get dispclk dpm levels!"); + return ret; + } + } else { + single_dpm_table->count = 0; + } + vega20_init_single_dpm_state(&(single_dpm_table->dpm_state)); + + /* phyclk */ + single_dpm_table = &(dpm_table->phy_table); + + if (smu_feature_is_enabled(smu, FEATURE_DPM_DCEFCLK_BIT)) { + ret = vega20_set_single_dpm_table(smu, single_dpm_table, + PPCLK_PHYCLK); + if (ret) { + pr_err("[SetupDefaultDpmTable] failed to get phyclk dpm levels!"); + return ret; + } + } else { + single_dpm_table->count = 0; + } + vega20_init_single_dpm_state(&(single_dpm_table->dpm_state)); + + /* fclk */ + single_dpm_table = &(dpm_table->fclk_table); + + if (smu_feature_is_enabled(smu,FEATURE_DPM_FCLK_BIT)) { + ret = vega20_set_single_dpm_table(smu, single_dpm_table, + PPCLK_FCLK); + if (ret) { + pr_err("[SetupDefaultDpmTable] failed to get fclk dpm levels!"); + return ret; + } + } else { + single_dpm_table->count = 0; + } + vega20_init_single_dpm_state(&(single_dpm_table->dpm_state)); + + memcpy(smu_dpm->golden_dpm_context, dpm_table, + sizeof(struct vega20_dpm_table)); + + return 0; +} + +static int vega20_populate_umd_state_clk(struct smu_context *smu) +{ + struct smu_dpm_context *smu_dpm = &smu->smu_dpm; + struct vega20_dpm_table *dpm_table = NULL; + struct vega20_single_dpm_table *gfx_table = NULL; + struct vega20_single_dpm_table *mem_table = NULL; + + dpm_table = smu_dpm->dpm_context; + gfx_table = &(dpm_table->gfx_table); + mem_table = &(dpm_table->mem_table); + + smu->pstate_sclk = gfx_table->dpm_levels[0].value; + smu->pstate_mclk = mem_table->dpm_levels[0].value; + + if (gfx_table->count > VEGA20_UMD_PSTATE_GFXCLK_LEVEL && + mem_table->count > VEGA20_UMD_PSTATE_MCLK_LEVEL) { + smu->pstate_sclk = gfx_table->dpm_levels[VEGA20_UMD_PSTATE_GFXCLK_LEVEL].value; + smu->pstate_mclk = mem_table->dpm_levels[VEGA20_UMD_PSTATE_MCLK_LEVEL].value; + } + + smu->pstate_sclk = smu->pstate_sclk * 100; + smu->pstate_mclk = smu->pstate_mclk * 100; + + return 0; +} + +static int vega20_get_clk_table(struct smu_context *smu, + struct pp_clock_levels_with_latency *clocks, + struct vega20_single_dpm_table *dpm_table) +{ + int i, count; + + count = (dpm_table->count > MAX_NUM_CLOCKS) ? MAX_NUM_CLOCKS : dpm_table->count; + clocks->num_levels = count; + + for (i = 0; i < count; i++) { + clocks->data[i].clocks_in_khz = + dpm_table->dpm_levels[i].value * 1000; + clocks->data[i].latency_in_us = 0; + } + + return 0; +} + +static int vega20_print_clk_levels(struct smu_context *smu, + enum pp_clock_type type, char *buf) +{ + int i, now, size = 0; + int ret = 0; + uint32_t gen_speed, lane_width; + struct amdgpu_device *adev = smu->adev; + struct pp_clock_levels_with_latency clocks; + struct vega20_single_dpm_table *single_dpm_table; + struct smu_table_context *table_context = &smu->smu_table; + struct smu_dpm_context *smu_dpm = &smu->smu_dpm; + struct vega20_dpm_table *dpm_table = NULL; + struct vega20_od8_settings *od8_settings = + (struct vega20_od8_settings *)table_context->od8_settings; + OverDriveTable_t *od_table = + (OverDriveTable_t *)(table_context->overdrive_table); + PPTable_t *pptable = (PPTable_t *)table_context->driver_pptable; + + dpm_table = smu_dpm->dpm_context; + + switch (type) { + case PP_SCLK: + ret = smu_get_current_clk_freq(smu, PPCLK_GFXCLK, &now); + if (ret) { + pr_err("Attempt to get current gfx clk Failed!"); + return ret; + } + + single_dpm_table = &(dpm_table->gfx_table); + ret = vega20_get_clk_table(smu, &clocks, single_dpm_table); + if (ret) { + pr_err("Attempt to get gfx clk levels Failed!"); + return ret; + } + + for (i = 0; i < clocks.num_levels; i++) + size += sprintf(buf + size, "%d: %uMhz %s\n", i, + clocks.data[i].clocks_in_khz / 1000, + (clocks.data[i].clocks_in_khz == now * 10) + ? "*" : ""); + break; + + case PP_MCLK: + ret = smu_get_current_clk_freq(smu, PPCLK_UCLK, &now); + if (ret) { + pr_err("Attempt to get current mclk Failed!"); + return ret; + } + + single_dpm_table = &(dpm_table->mem_table); + ret = vega20_get_clk_table(smu, &clocks, single_dpm_table); + if (ret) { + pr_err("Attempt to get memory clk levels Failed!"); + return ret; + } + + for (i = 0; i < clocks.num_levels; i++) + size += sprintf(buf + size, "%d: %uMhz %s\n", + i, clocks.data[i].clocks_in_khz / 1000, + (clocks.data[i].clocks_in_khz == now * 10) + ? "*" : ""); + break; + + case PP_SOCCLK: + ret = smu_get_current_clk_freq(smu, PPCLK_SOCCLK, &now); + if (ret) { + pr_err("Attempt to get current socclk Failed!"); + return ret; + } + + single_dpm_table = &(dpm_table->soc_table); + ret = vega20_get_clk_table(smu, &clocks, single_dpm_table); + if (ret) { + pr_err("Attempt to get socclk levels Failed!"); + return ret; + } + + for (i = 0; i < clocks.num_levels; i++) + size += sprintf(buf + size, "%d: %uMhz %s\n", + i, clocks.data[i].clocks_in_khz / 1000, + (clocks.data[i].clocks_in_khz == now * 10) + ? "*" : ""); + break; + + case PP_FCLK: + ret = smu_get_current_clk_freq(smu, PPCLK_FCLK, &now); + if (ret) { + pr_err("Attempt to get current fclk Failed!"); + return ret; + } + + single_dpm_table = &(dpm_table->fclk_table); + for (i = 0; i < single_dpm_table->count; i++) + size += sprintf(buf + size, "%d: %uMhz %s\n", + i, single_dpm_table->dpm_levels[i].value, + (single_dpm_table->dpm_levels[i].value == now / 100) + ? "*" : ""); + break; + + case PP_DCEFCLK: + ret = smu_get_current_clk_freq(smu, PPCLK_DCEFCLK, &now); + if (ret) { + pr_err("Attempt to get current dcefclk Failed!"); + return ret; + } + + single_dpm_table = &(dpm_table->dcef_table); + ret = vega20_get_clk_table(smu, &clocks, single_dpm_table); + if (ret) { + pr_err("Attempt to get dcefclk levels Failed!"); + return ret; + } + + for (i = 0; i < clocks.num_levels; i++) + size += sprintf(buf + size, "%d: %uMhz %s\n", + i, clocks.data[i].clocks_in_khz / 1000, + (clocks.data[i].clocks_in_khz == now * 10) ? "*" : ""); + break; + + case PP_PCIE: + gen_speed = (RREG32_PCIE(smnPCIE_LC_SPEED_CNTL) & + PSWUSP0_PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE_MASK) + >> PSWUSP0_PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE__SHIFT; + lane_width = (RREG32_PCIE(smnPCIE_LC_LINK_WIDTH_CNTL) & + PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH_RD_MASK) + >> PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH_RD__SHIFT; + for (i = 0; i < NUM_LINK_LEVELS; i++) + size += sprintf(buf + size, "%d: %s %s %dMhz %s\n", i, + (pptable->PcieGenSpeed[i] == 0) ? "2.5GT/s," : + (pptable->PcieGenSpeed[i] == 1) ? "5.0GT/s," : + (pptable->PcieGenSpeed[i] == 2) ? "8.0GT/s," : + (pptable->PcieGenSpeed[i] == 3) ? "16.0GT/s," : "", + (pptable->PcieLaneCount[i] == 1) ? "x1" : + (pptable->PcieLaneCount[i] == 2) ? "x2" : + (pptable->PcieLaneCount[i] == 3) ? "x4" : + (pptable->PcieLaneCount[i] == 4) ? "x8" : + (pptable->PcieLaneCount[i] == 5) ? "x12" : + (pptable->PcieLaneCount[i] == 6) ? "x16" : "", + pptable->LclkFreq[i], + (gen_speed == pptable->PcieGenSpeed[i]) && + (lane_width == pptable->PcieLaneCount[i]) ? + "*" : ""); + break; + + case OD_SCLK: + if (od8_settings->od8_settings_array[OD8_SETTING_GFXCLK_FMIN].feature_id && + od8_settings->od8_settings_array[OD8_SETTING_GFXCLK_FMAX].feature_id) { + size = sprintf(buf, "%s:\n", "OD_SCLK"); + size += sprintf(buf + size, "0: %10uMhz\n", + od_table->GfxclkFmin); + size += sprintf(buf + size, "1: %10uMhz\n", + od_table->GfxclkFmax); + } + + break; + + case OD_MCLK: + if (od8_settings->od8_settings_array[OD8_SETTING_UCLK_FMAX].feature_id) { + size = sprintf(buf, "%s:\n", "OD_MCLK"); + size += sprintf(buf + size, "1: %10uMhz\n", + od_table->UclkFmax); + } + + break; + + case OD_VDDC_CURVE: + if (od8_settings->od8_settings_array[OD8_SETTING_GFXCLK_FREQ1].feature_id && + od8_settings->od8_settings_array[OD8_SETTING_GFXCLK_FREQ2].feature_id && + od8_settings->od8_settings_array[OD8_SETTING_GFXCLK_FREQ3].feature_id && + od8_settings->od8_settings_array[OD8_SETTING_GFXCLK_VOLTAGE1].feature_id && + od8_settings->od8_settings_array[OD8_SETTING_GFXCLK_VOLTAGE2].feature_id && + od8_settings->od8_settings_array[OD8_SETTING_GFXCLK_VOLTAGE3].feature_id) { + size = sprintf(buf, "%s:\n", "OD_VDDC_CURVE"); + size += sprintf(buf + size, "0: %10uMhz %10dmV\n", + od_table->GfxclkFreq1, + od_table->GfxclkVolt1 / VOLTAGE_SCALE); + size += sprintf(buf + size, "1: %10uMhz %10dmV\n", + od_table->GfxclkFreq2, + od_table->GfxclkVolt2 / VOLTAGE_SCALE); + size += sprintf(buf + size, "2: %10uMhz %10dmV\n", + od_table->GfxclkFreq3, + od_table->GfxclkVolt3 / VOLTAGE_SCALE); + } + + break; + + case OD_RANGE: + size = sprintf(buf, "%s:\n", "OD_RANGE"); + + if (od8_settings->od8_settings_array[OD8_SETTING_GFXCLK_FMIN].feature_id && + od8_settings->od8_settings_array[OD8_SETTING_GFXCLK_FMAX].feature_id) { + size += sprintf(buf + size, "SCLK: %7uMhz %10uMhz\n", + od8_settings->od8_settings_array[OD8_SETTING_GFXCLK_FMIN].min_value, + od8_settings->od8_settings_array[OD8_SETTING_GFXCLK_FMAX].max_value); + } + + if (od8_settings->od8_settings_array[OD8_SETTING_UCLK_FMAX].feature_id) { + single_dpm_table = &(dpm_table->mem_table); + ret = vega20_get_clk_table(smu, &clocks, single_dpm_table); + if (ret) { + pr_err("Attempt to get memory clk levels Failed!"); + return ret; + } + + size += sprintf(buf + size, "MCLK: %7uMhz %10uMhz\n", + clocks.data[0].clocks_in_khz / 1000, + od8_settings->od8_settings_array[OD8_SETTING_UCLK_FMAX].max_value); + } + + if (od8_settings->od8_settings_array[OD8_SETTING_GFXCLK_FREQ1].feature_id && + od8_settings->od8_settings_array[OD8_SETTING_GFXCLK_FREQ2].feature_id && + od8_settings->od8_settings_array[OD8_SETTING_GFXCLK_FREQ3].feature_id && + od8_settings->od8_settings_array[OD8_SETTING_GFXCLK_VOLTAGE1].feature_id && + od8_settings->od8_settings_array[OD8_SETTING_GFXCLK_VOLTAGE2].feature_id && + od8_settings->od8_settings_array[OD8_SETTING_GFXCLK_VOLTAGE3].feature_id) { + size += sprintf(buf + size, "VDDC_CURVE_SCLK[0]: %7uMhz %10uMhz\n", + od8_settings->od8_settings_array[OD8_SETTING_GFXCLK_FREQ1].min_value, + od8_settings->od8_settings_array[OD8_SETTING_GFXCLK_FREQ1].max_value); + size += sprintf(buf + size, "VDDC_CURVE_VOLT[0]: %7dmV %11dmV\n", + od8_settings->od8_settings_array[OD8_SETTING_GFXCLK_VOLTAGE1].min_value, + od8_settings->od8_settings_array[OD8_SETTING_GFXCLK_VOLTAGE1].max_value); + size += sprintf(buf + size, "VDDC_CURVE_SCLK[1]: %7uMhz %10uMhz\n", + od8_settings->od8_settings_array[OD8_SETTING_GFXCLK_FREQ2].min_value, + od8_settings->od8_settings_array[OD8_SETTING_GFXCLK_FREQ2].max_value); + size += sprintf(buf + size, "VDDC_CURVE_VOLT[1]: %7dmV %11dmV\n", + od8_settings->od8_settings_array[OD8_SETTING_GFXCLK_VOLTAGE2].min_value, + od8_settings->od8_settings_array[OD8_SETTING_GFXCLK_VOLTAGE2].max_value); + size += sprintf(buf + size, "VDDC_CURVE_SCLK[2]: %7uMhz %10uMhz\n", + od8_settings->od8_settings_array[OD8_SETTING_GFXCLK_FREQ3].min_value, + od8_settings->od8_settings_array[OD8_SETTING_GFXCLK_FREQ3].max_value); + size += sprintf(buf + size, "VDDC_CURVE_VOLT[2]: %7dmV %11dmV\n", + od8_settings->od8_settings_array[OD8_SETTING_GFXCLK_VOLTAGE3].min_value, + od8_settings->od8_settings_array[OD8_SETTING_GFXCLK_VOLTAGE3].max_value); + } + + break; + + default: + break; + } + return size; +} + +static int vega20_upload_dpm_level(struct smu_context *smu, bool max, + uint32_t feature_mask) +{ + struct vega20_dpm_table *dpm_table; + struct vega20_single_dpm_table *single_dpm_table; + uint32_t freq; + int ret = 0; + + dpm_table = smu->smu_dpm.dpm_context; + + if (smu_feature_is_enabled(smu, FEATURE_DPM_GFXCLK_BIT) && + (feature_mask & FEATURE_DPM_GFXCLK_MASK)) { + single_dpm_table = &(dpm_table->gfx_table); + freq = max ? single_dpm_table->dpm_state.soft_max_level : + single_dpm_table->dpm_state.soft_min_level; + ret = smu_send_smc_msg_with_param(smu, + (max ? SMU_MSG_SetSoftMaxByFreq : SMU_MSG_SetSoftMinByFreq), + (PPCLK_GFXCLK << 16) | (freq & 0xffff)); + if (ret) { + pr_err("Failed to set soft %s gfxclk !\n", + max ? "max" : "min"); + return ret; + } + } + + if (smu_feature_is_enabled(smu, FEATURE_DPM_UCLK_BIT) && + (feature_mask & FEATURE_DPM_UCLK_MASK)) { + single_dpm_table = &(dpm_table->mem_table); + freq = max ? single_dpm_table->dpm_state.soft_max_level : + single_dpm_table->dpm_state.soft_min_level; + ret = smu_send_smc_msg_with_param(smu, + (max ? SMU_MSG_SetSoftMaxByFreq : SMU_MSG_SetSoftMinByFreq), + (PPCLK_UCLK << 16) | (freq & 0xffff)); + if (ret) { + pr_err("Failed to set soft %s memclk !\n", + max ? "max" : "min"); + return ret; + } + } + + if (smu_feature_is_enabled(smu, FEATURE_DPM_SOCCLK_BIT) && + (feature_mask & FEATURE_DPM_SOCCLK_MASK)) { + single_dpm_table = &(dpm_table->soc_table); + freq = max ? single_dpm_table->dpm_state.soft_max_level : + single_dpm_table->dpm_state.soft_min_level; + ret = smu_send_smc_msg_with_param(smu, + (max ? SMU_MSG_SetSoftMaxByFreq : SMU_MSG_SetSoftMinByFreq), + (PPCLK_SOCCLK << 16) | (freq & 0xffff)); + if (ret) { + pr_err("Failed to set soft %s socclk !\n", + max ? "max" : "min"); + return ret; + } + } + + if (smu_feature_is_enabled(smu, FEATURE_DPM_FCLK_BIT) && + (feature_mask & FEATURE_DPM_FCLK_MASK)) { + single_dpm_table = &(dpm_table->fclk_table); + freq = max ? single_dpm_table->dpm_state.soft_max_level : + single_dpm_table->dpm_state.soft_min_level; + ret = smu_send_smc_msg_with_param(smu, + (max ? SMU_MSG_SetSoftMaxByFreq : SMU_MSG_SetSoftMinByFreq), + (PPCLK_FCLK << 16) | (freq & 0xffff)); + if (ret) { + pr_err("Failed to set soft %s fclk !\n", + max ? "max" : "min"); + return ret; + } + } + + if (smu_feature_is_enabled(smu, FEATURE_DPM_DCEFCLK_BIT) && + (feature_mask & FEATURE_DPM_DCEFCLK_MASK)) { + single_dpm_table = &(dpm_table->dcef_table); + freq = single_dpm_table->dpm_state.hard_min_level; + if (!max) { + ret = smu_send_smc_msg_with_param(smu, + SMU_MSG_SetHardMinByFreq, + (PPCLK_DCEFCLK << 16) | (freq & 0xffff)); + if (ret) { + pr_err("Failed to set hard min dcefclk !\n"); + return ret; + } + } + } + + return ret; +} + +static int vega20_force_clk_levels(struct smu_context *smu, + enum pp_clock_type type, uint32_t mask) +{ + struct vega20_dpm_table *dpm_table; + struct vega20_single_dpm_table *single_dpm_table; + uint32_t soft_min_level, soft_max_level, hard_min_level; + struct smu_dpm_context *smu_dpm = &smu->smu_dpm; + int ret = 0; + + if (smu_dpm->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL) { + pr_info("force clock level is for dpm manual mode only.\n"); + return -EINVAL; + } + + mutex_lock(&(smu->mutex)); + + soft_min_level = mask ? (ffs(mask) - 1) : 0; + soft_max_level = mask ? (fls(mask) - 1) : 0; + + dpm_table = smu->smu_dpm.dpm_context; + + switch (type) { + case PP_SCLK: + single_dpm_table = &(dpm_table->gfx_table); + + if (soft_max_level >= single_dpm_table->count) { + pr_err("Clock level specified %d is over max allowed %d\n", + soft_max_level, single_dpm_table->count - 1); + ret = -EINVAL; + break; + } + + single_dpm_table->dpm_state.soft_min_level = + single_dpm_table->dpm_levels[soft_min_level].value; + single_dpm_table->dpm_state.soft_max_level = + single_dpm_table->dpm_levels[soft_max_level].value; + + ret = vega20_upload_dpm_level(smu, false, FEATURE_DPM_GFXCLK_MASK); + if (ret) { + pr_err("Failed to upload boot level to lowest!\n"); + break; + } + + ret = vega20_upload_dpm_level(smu, true, FEATURE_DPM_GFXCLK_MASK); + if (ret) + pr_err("Failed to upload dpm max level to highest!\n"); + + break; + + case PP_MCLK: + single_dpm_table = &(dpm_table->mem_table); + + if (soft_max_level >= single_dpm_table->count) { + pr_err("Clock level specified %d is over max allowed %d\n", + soft_max_level, single_dpm_table->count - 1); + ret = -EINVAL; + break; + } + + single_dpm_table->dpm_state.soft_min_level = + single_dpm_table->dpm_levels[soft_min_level].value; + single_dpm_table->dpm_state.soft_max_level = + single_dpm_table->dpm_levels[soft_max_level].value; + + ret = vega20_upload_dpm_level(smu, false, FEATURE_DPM_UCLK_MASK); + if (ret) { + pr_err("Failed to upload boot level to lowest!\n"); + break; + } + + ret = vega20_upload_dpm_level(smu, true, FEATURE_DPM_UCLK_MASK); + if (ret) + pr_err("Failed to upload dpm max level to highest!\n"); + + break; + + case PP_SOCCLK: + single_dpm_table = &(dpm_table->soc_table); + + if (soft_max_level >= single_dpm_table->count) { + pr_err("Clock level specified %d is over max allowed %d\n", + soft_max_level, single_dpm_table->count - 1); + ret = -EINVAL; + break; + } + + single_dpm_table->dpm_state.soft_min_level = + single_dpm_table->dpm_levels[soft_min_level].value; + single_dpm_table->dpm_state.soft_max_level = + single_dpm_table->dpm_levels[soft_max_level].value; + + ret = vega20_upload_dpm_level(smu, false, FEATURE_DPM_SOCCLK_MASK); + if (ret) { + pr_err("Failed to upload boot level to lowest!\n"); + break; + } + + ret = vega20_upload_dpm_level(smu, true, FEATURE_DPM_SOCCLK_MASK); + if (ret) + pr_err("Failed to upload dpm max level to highest!\n"); + + break; + + case PP_FCLK: + single_dpm_table = &(dpm_table->fclk_table); + + if (soft_max_level >= single_dpm_table->count) { + pr_err("Clock level specified %d is over max allowed %d\n", + soft_max_level, single_dpm_table->count - 1); + ret = -EINVAL; + break; + } + + single_dpm_table->dpm_state.soft_min_level = + single_dpm_table->dpm_levels[soft_min_level].value; + single_dpm_table->dpm_state.soft_max_level = + single_dpm_table->dpm_levels[soft_max_level].value; + + ret = vega20_upload_dpm_level(smu, false, FEATURE_DPM_FCLK_MASK); + if (ret) { + pr_err("Failed to upload boot level to lowest!\n"); + break; + } + + ret = vega20_upload_dpm_level(smu, true, FEATURE_DPM_FCLK_MASK); + if (ret) + pr_err("Failed to upload dpm max level to highest!\n"); + + break; + + case PP_DCEFCLK: + hard_min_level = soft_min_level; + single_dpm_table = &(dpm_table->dcef_table); + + if (hard_min_level >= single_dpm_table->count) { + pr_err("Clock level specified %d is over max allowed %d\n", + hard_min_level, single_dpm_table->count - 1); + ret = -EINVAL; + break; + } + + single_dpm_table->dpm_state.hard_min_level = + single_dpm_table->dpm_levels[hard_min_level].value; + + ret = vega20_upload_dpm_level(smu, false, FEATURE_DPM_DCEFCLK_MASK); + if (ret) + pr_err("Failed to upload boot level to lowest!\n"); + + break; + + case PP_PCIE: + if (soft_min_level >= NUM_LINK_LEVELS || + soft_max_level >= NUM_LINK_LEVELS) { + ret = -EINVAL; + break; + } + + ret = smu_send_smc_msg_with_param(smu, + SMU_MSG_SetMinLinkDpmByIndex, soft_min_level); + if (ret) + pr_err("Failed to set min link dpm level!\n"); + + break; + + default: + break; + } + + mutex_unlock(&(smu->mutex)); + return ret; +} + +static int vega20_get_clock_by_type_with_latency(struct smu_context *smu, + enum amd_pp_clock_type type, + struct pp_clock_levels_with_latency *clocks) +{ + int ret; + struct vega20_single_dpm_table *single_dpm_table; + struct smu_dpm_context *smu_dpm = &smu->smu_dpm; + struct vega20_dpm_table *dpm_table = NULL; + + dpm_table = smu_dpm->dpm_context; + + mutex_lock(&smu->mutex); + + switch (type) { + case amd_pp_sys_clock: + single_dpm_table = &(dpm_table->gfx_table); + ret = vega20_get_clk_table(smu, clocks, single_dpm_table); + break; + case amd_pp_mem_clock: + single_dpm_table = &(dpm_table->mem_table); + ret = vega20_get_clk_table(smu, clocks, single_dpm_table); + break; + case amd_pp_dcef_clock: + single_dpm_table = &(dpm_table->dcef_table); + ret = vega20_get_clk_table(smu, clocks, single_dpm_table); + break; + case amd_pp_soc_clock: + single_dpm_table = &(dpm_table->soc_table); + ret = vega20_get_clk_table(smu, clocks, single_dpm_table); + break; + default: + ret = -EINVAL; + } + + mutex_unlock(&smu->mutex); + return ret; +} + +static int vega20_overdrive_get_gfx_clk_base_voltage(struct smu_context *smu, + uint32_t *voltage, + uint32_t freq) +{ + int ret; + + ret = smu_send_smc_msg_with_param(smu, + SMU_MSG_GetAVFSVoltageByDpm, + ((AVFS_CURVE << 24) | (OD8_HOTCURVE_TEMPERATURE << 16) | freq)); + if (ret) { + pr_err("[GetBaseVoltage] failed to get GFXCLK AVFS voltage from SMU!"); + return ret; + } + + smu_read_smc_arg(smu, voltage); + *voltage = *voltage / VOLTAGE_SCALE; + + return 0; +} + +static int vega20_set_default_od8_setttings(struct smu_context *smu) +{ + struct smu_table_context *table_context = &smu->smu_table; + OverDriveTable_t *od_table = (OverDriveTable_t *)(table_context->overdrive_table); + struct vega20_od8_settings *od8_settings = NULL; + PPTable_t *smc_pptable = table_context->driver_pptable; + int i, ret; + + if (table_context->od8_settings) + return -EINVAL; + + table_context->od8_settings = kzalloc(sizeof(struct vega20_od8_settings), GFP_KERNEL); + + if (!table_context->od8_settings) + return -ENOMEM; + + memset(table_context->od8_settings, 0, sizeof(struct vega20_od8_settings)); + od8_settings = (struct vega20_od8_settings *)table_context->od8_settings; + + if (smu_feature_is_enabled(smu, FEATURE_DPM_SOCCLK_BIT)) { + if (table_context->od_feature_capabilities[ATOM_VEGA20_ODFEATURE_GFXCLK_LIMITS] && + table_context->od_settings_max[OD8_SETTING_GFXCLK_FMAX] > 0 && + table_context->od_settings_min[OD8_SETTING_GFXCLK_FMIN] > 0 && + (table_context->od_settings_max[OD8_SETTING_GFXCLK_FMAX] >= + table_context->od_settings_min[OD8_SETTING_GFXCLK_FMIN])) { + od8_settings->od8_settings_array[OD8_SETTING_GFXCLK_FMIN].feature_id = + OD8_GFXCLK_LIMITS; + od8_settings->od8_settings_array[OD8_SETTING_GFXCLK_FMAX].feature_id = + OD8_GFXCLK_LIMITS; + od8_settings->od8_settings_array[OD8_SETTING_GFXCLK_FMIN].default_value = + od_table->GfxclkFmin; + od8_settings->od8_settings_array[OD8_SETTING_GFXCLK_FMAX].default_value = + od_table->GfxclkFmax; + } + + if (table_context->od_feature_capabilities[ATOM_VEGA20_ODFEATURE_GFXCLK_CURVE] && + (table_context->od_settings_min[OD8_SETTING_GFXCLK_VOLTAGE1] >= + smc_pptable->MinVoltageGfx / VOLTAGE_SCALE) && + (table_context->od_settings_max[OD8_SETTING_GFXCLK_VOLTAGE3] <= + smc_pptable->MaxVoltageGfx / VOLTAGE_SCALE) && + (table_context->od_settings_min[OD8_SETTING_GFXCLK_VOLTAGE1] <= + table_context->od_settings_max[OD8_SETTING_GFXCLK_VOLTAGE3])) { + od8_settings->od8_settings_array[OD8_SETTING_GFXCLK_FREQ1].feature_id = + OD8_GFXCLK_CURVE; + od8_settings->od8_settings_array[OD8_SETTING_GFXCLK_VOLTAGE1].feature_id = + OD8_GFXCLK_CURVE; + od8_settings->od8_settings_array[OD8_SETTING_GFXCLK_FREQ2].feature_id = + OD8_GFXCLK_CURVE; + od8_settings->od8_settings_array[OD8_SETTING_GFXCLK_VOLTAGE2].feature_id = + OD8_GFXCLK_CURVE; + od8_settings->od8_settings_array[OD8_SETTING_GFXCLK_FREQ3].feature_id = + OD8_GFXCLK_CURVE; + od8_settings->od8_settings_array[OD8_SETTING_GFXCLK_VOLTAGE3].feature_id = + OD8_GFXCLK_CURVE; + + od_table->GfxclkFreq1 = od_table->GfxclkFmin; + od_table->GfxclkFreq2 = (od_table->GfxclkFmin + od_table->GfxclkFmax) / 2; + od_table->GfxclkFreq3 = od_table->GfxclkFmax; + od8_settings->od8_settings_array[OD8_SETTING_GFXCLK_FREQ1].default_value = + od_table->GfxclkFreq1; + od8_settings->od8_settings_array[OD8_SETTING_GFXCLK_FREQ2].default_value = + od_table->GfxclkFreq2; + od8_settings->od8_settings_array[OD8_SETTING_GFXCLK_FREQ3].default_value = + od_table->GfxclkFreq3; + + ret = vega20_overdrive_get_gfx_clk_base_voltage(smu, + &od8_settings->od8_settings_array[OD8_SETTING_GFXCLK_VOLTAGE1].default_value, + od_table->GfxclkFreq1); + if (ret) + od8_settings->od8_settings_array[OD8_SETTING_GFXCLK_VOLTAGE1].default_value = 0; + od_table->GfxclkVolt1 = + od8_settings->od8_settings_array[OD8_SETTING_GFXCLK_VOLTAGE1].default_value + * VOLTAGE_SCALE; + ret = vega20_overdrive_get_gfx_clk_base_voltage(smu, + &od8_settings->od8_settings_array[OD8_SETTING_GFXCLK_VOLTAGE2].default_value, + od_table->GfxclkFreq2); + if (ret) + od8_settings->od8_settings_array[OD8_SETTING_GFXCLK_VOLTAGE2].default_value = 0; + od_table->GfxclkVolt2 = + od8_settings->od8_settings_array[OD8_SETTING_GFXCLK_VOLTAGE2].default_value + * VOLTAGE_SCALE; + ret = vega20_overdrive_get_gfx_clk_base_voltage(smu, + &od8_settings->od8_settings_array[OD8_SETTING_GFXCLK_VOLTAGE3].default_value, + od_table->GfxclkFreq3); + if (ret) + od8_settings->od8_settings_array[OD8_SETTING_GFXCLK_VOLTAGE3].default_value = 0; + od_table->GfxclkVolt3 = + od8_settings->od8_settings_array[OD8_SETTING_GFXCLK_VOLTAGE3].default_value + * VOLTAGE_SCALE; + } + } + + if (smu_feature_is_enabled(smu, FEATURE_DPM_UCLK_BIT)) { + if (table_context->od_feature_capabilities[ATOM_VEGA20_ODFEATURE_UCLK_MAX] && + table_context->od_settings_min[OD8_SETTING_UCLK_FMAX] > 0 && + table_context->od_settings_max[OD8_SETTING_UCLK_FMAX] > 0 && + (table_context->od_settings_max[OD8_SETTING_UCLK_FMAX] >= + table_context->od_settings_min[OD8_SETTING_UCLK_FMAX])) { + od8_settings->od8_settings_array[OD8_SETTING_UCLK_FMAX].feature_id = + OD8_UCLK_MAX; + od8_settings->od8_settings_array[OD8_SETTING_UCLK_FMAX].default_value = + od_table->UclkFmax; + } + } + + if (table_context->od_feature_capabilities[ATOM_VEGA20_ODFEATURE_POWER_LIMIT] && + table_context->od_settings_min[OD8_SETTING_POWER_PERCENTAGE] > 0 && + table_context->od_settings_min[OD8_SETTING_POWER_PERCENTAGE] <= 100 && + table_context->od_settings_max[OD8_SETTING_POWER_PERCENTAGE] > 0 && + table_context->od_settings_max[OD8_SETTING_POWER_PERCENTAGE] <= 100) { + od8_settings->od8_settings_array[OD8_SETTING_POWER_PERCENTAGE].feature_id = + OD8_POWER_LIMIT; + od8_settings->od8_settings_array[OD8_SETTING_POWER_PERCENTAGE].default_value = + od_table->OverDrivePct; + } + + if (smu_feature_is_enabled(smu, FEATURE_FAN_CONTROL_BIT)) { + if (table_context->od_feature_capabilities[ATOM_VEGA20_ODFEATURE_FAN_ACOUSTIC_LIMIT] && + table_context->od_settings_min[OD8_SETTING_FAN_ACOUSTIC_LIMIT] > 0 && + table_context->od_settings_max[OD8_SETTING_FAN_ACOUSTIC_LIMIT] > 0 && + (table_context->od_settings_max[OD8_SETTING_FAN_ACOUSTIC_LIMIT] >= + table_context->od_settings_min[OD8_SETTING_FAN_ACOUSTIC_LIMIT])) { + od8_settings->od8_settings_array[OD8_SETTING_FAN_ACOUSTIC_LIMIT].feature_id = + OD8_ACOUSTIC_LIMIT_SCLK; + od8_settings->od8_settings_array[OD8_SETTING_FAN_ACOUSTIC_LIMIT].default_value = + od_table->FanMaximumRpm; + } + + if (table_context->od_feature_capabilities[ATOM_VEGA20_ODFEATURE_FAN_SPEED_MIN] && + table_context->od_settings_min[OD8_SETTING_FAN_MIN_SPEED] > 0 && + table_context->od_settings_max[OD8_SETTING_FAN_MIN_SPEED] > 0 && + (table_context->od_settings_max[OD8_SETTING_FAN_MIN_SPEED] >= + table_context->od_settings_min[OD8_SETTING_FAN_MIN_SPEED])) { + od8_settings->od8_settings_array[OD8_SETTING_FAN_MIN_SPEED].feature_id = + OD8_FAN_SPEED_MIN; + od8_settings->od8_settings_array[OD8_SETTING_FAN_MIN_SPEED].default_value = + od_table->FanMinimumPwm * smc_pptable->FanMaximumRpm / 100; + } + } + + if (smu_feature_is_enabled(smu, FEATURE_THERMAL_BIT)) { + if (table_context->od_feature_capabilities[ATOM_VEGA20_ODFEATURE_TEMPERATURE_FAN] && + table_context->od_settings_min[OD8_SETTING_FAN_TARGET_TEMP] > 0 && + table_context->od_settings_max[OD8_SETTING_FAN_TARGET_TEMP] > 0 && + (table_context->od_settings_max[OD8_SETTING_FAN_TARGET_TEMP] >= + table_context->od_settings_min[OD8_SETTING_FAN_TARGET_TEMP])) { + od8_settings->od8_settings_array[OD8_SETTING_FAN_TARGET_TEMP].feature_id = + OD8_TEMPERATURE_FAN; + od8_settings->od8_settings_array[OD8_SETTING_FAN_TARGET_TEMP].default_value = + od_table->FanTargetTemperature; + } + + if (table_context->od_feature_capabilities[ATOM_VEGA20_ODFEATURE_TEMPERATURE_SYSTEM] && + table_context->od_settings_min[OD8_SETTING_OPERATING_TEMP_MAX] > 0 && + table_context->od_settings_max[OD8_SETTING_OPERATING_TEMP_MAX] > 0 && + (table_context->od_settings_max[OD8_SETTING_OPERATING_TEMP_MAX] >= + table_context->od_settings_min[OD8_SETTING_OPERATING_TEMP_MAX])) { + od8_settings->od8_settings_array[OD8_SETTING_OPERATING_TEMP_MAX].feature_id = + OD8_TEMPERATURE_SYSTEM; + od8_settings->od8_settings_array[OD8_SETTING_OPERATING_TEMP_MAX].default_value = + od_table->MaxOpTemp; + } + } + + for (i = 0; i < OD8_SETTING_COUNT; i++) { + if (od8_settings->od8_settings_array[i].feature_id) { + od8_settings->od8_settings_array[i].min_value = + table_context->od_settings_min[i]; + od8_settings->od8_settings_array[i].max_value = + table_context->od_settings_max[i]; + od8_settings->od8_settings_array[i].current_value = + od8_settings->od8_settings_array[i].default_value; + } else { + od8_settings->od8_settings_array[i].min_value = 0; + od8_settings->od8_settings_array[i].max_value = 0; + od8_settings->od8_settings_array[i].current_value = 0; + } + } + + return 0; +} + +static int vega20_get_od_percentage(struct smu_context *smu, + enum pp_clock_type type) +{ + struct smu_dpm_context *smu_dpm = &smu->smu_dpm; + struct vega20_dpm_table *dpm_table = NULL; + struct vega20_dpm_table *golden_table = NULL; + struct vega20_single_dpm_table *single_dpm_table; + struct vega20_single_dpm_table *golden_dpm_table; + int value, golden_value; + + dpm_table = smu_dpm->dpm_context; + golden_table = smu_dpm->golden_dpm_context; + + switch (type) { + case OD_SCLK: + single_dpm_table = &(dpm_table->gfx_table); + golden_dpm_table = &(golden_table->gfx_table); + break; + case OD_MCLK: + single_dpm_table = &(dpm_table->mem_table); + golden_dpm_table = &(golden_table->mem_table); + break; + default: + return -EINVAL; + break; + } + + value = single_dpm_table->dpm_levels[single_dpm_table->count - 1].value; + golden_value = golden_dpm_table->dpm_levels[golden_dpm_table->count - 1].value; + + value -= golden_value; + value = DIV_ROUND_UP(value * 100, golden_value); + + return value; +} + +static int +vega20_get_profiling_clk_mask(struct smu_context *smu, + enum amd_dpm_forced_level level, + uint32_t *sclk_mask, + uint32_t *mclk_mask, + uint32_t *soc_mask) +{ + struct vega20_dpm_table *dpm_table = (struct vega20_dpm_table *)smu->smu_dpm.dpm_context; + struct vega20_single_dpm_table *gfx_dpm_table; + struct vega20_single_dpm_table *mem_dpm_table; + struct vega20_single_dpm_table *soc_dpm_table; + + if (!smu->smu_dpm.dpm_context) + return -EINVAL; + + gfx_dpm_table = &dpm_table->gfx_table; + mem_dpm_table = &dpm_table->mem_table; + soc_dpm_table = &dpm_table->soc_table; + + *sclk_mask = 0; + *mclk_mask = 0; + *soc_mask = 0; + + if (gfx_dpm_table->count > VEGA20_UMD_PSTATE_GFXCLK_LEVEL && + mem_dpm_table->count > VEGA20_UMD_PSTATE_MCLK_LEVEL && + soc_dpm_table->count > VEGA20_UMD_PSTATE_SOCCLK_LEVEL) { + *sclk_mask = VEGA20_UMD_PSTATE_GFXCLK_LEVEL; + *mclk_mask = VEGA20_UMD_PSTATE_MCLK_LEVEL; + *soc_mask = VEGA20_UMD_PSTATE_SOCCLK_LEVEL; + } + + if (level == AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK) { + *sclk_mask = 0; + } else if (level == AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK) { + *mclk_mask = 0; + } else if (level == AMD_DPM_FORCED_LEVEL_PROFILE_PEAK) { + *sclk_mask = gfx_dpm_table->count - 1; + *mclk_mask = mem_dpm_table->count - 1; + *soc_mask = soc_dpm_table->count - 1; + } + + return 0; +} + +static int +vega20_set_uclk_to_highest_dpm_level(struct smu_context *smu, + struct vega20_single_dpm_table *dpm_table) +{ + int ret = 0; + struct smu_dpm_context *smu_dpm_ctx = &(smu->smu_dpm); + if (!smu_dpm_ctx->dpm_context) + return -EINVAL; + + if (smu_feature_is_enabled(smu, FEATURE_DPM_UCLK_BIT)) { + if (dpm_table->count <= 0) { + pr_err("[%s] Dpm table has no entry!", __func__); + return -EINVAL; + } + + if (dpm_table->count > NUM_UCLK_DPM_LEVELS) { + pr_err("[%s] Dpm table has too many entries!", __func__); + return -EINVAL; + } + + dpm_table->dpm_state.hard_min_level = dpm_table->dpm_levels[dpm_table->count - 1].value; + ret = smu_send_smc_msg_with_param(smu, + SMU_MSG_SetHardMinByFreq, + (PPCLK_UCLK << 16) | dpm_table->dpm_state.hard_min_level); + if (ret) { + pr_err("[%s] Set hard min uclk failed!", __func__); + return ret; + } + } + + return ret; +} + +static int vega20_pre_display_config_changed(struct smu_context *smu) +{ + int ret = 0; + struct vega20_dpm_table *dpm_table = smu->smu_dpm.dpm_context; + + if (!smu->smu_dpm.dpm_context) + return -EINVAL; + + smu_send_smc_msg_with_param(smu, SMU_MSG_NumOfDisplays, 0); + ret = vega20_set_uclk_to_highest_dpm_level(smu, + &dpm_table->mem_table); + if (ret) + pr_err("Failed to set uclk to highest dpm level"); + return ret; +} + +static int vega20_display_config_changed(struct smu_context *smu) +{ + int ret = 0; + + if (!smu->funcs) + return -EINVAL; + + if (!smu->smu_dpm.dpm_context || + !smu->smu_table.tables || + !smu->smu_table.tables[TABLE_WATERMARKS].cpu_addr) + return -EINVAL; + + if ((smu->watermarks_bitmap & WATERMARKS_EXIST) && + !(smu->watermarks_bitmap & WATERMARKS_LOADED)) { + ret = smu->funcs->write_watermarks_table(smu); + if (ret) { + pr_err("Failed to update WMTABLE!"); + return ret; + } + smu->watermarks_bitmap |= WATERMARKS_LOADED; + } + + if ((smu->watermarks_bitmap & WATERMARKS_EXIST) && + smu_feature_is_supported(smu, FEATURE_DPM_DCEFCLK_BIT) && + smu_feature_is_supported(smu, FEATURE_DPM_SOCCLK_BIT)) { + smu_send_smc_msg_with_param(smu, + SMU_MSG_NumOfDisplays, + smu->display_config->num_display); + } + + return ret; +} + +static int vega20_apply_clocks_adjust_rules(struct smu_context *smu) +{ + struct smu_dpm_context *smu_dpm_ctx = &(smu->smu_dpm); + struct vega20_dpm_table *dpm_ctx = (struct vega20_dpm_table *)(smu_dpm_ctx->dpm_context); + struct vega20_single_dpm_table *dpm_table; + bool vblank_too_short = false; + bool disable_mclk_switching; + uint32_t i, latency; + + disable_mclk_switching = ((1 < smu->display_config->num_display) && + !smu->display_config->multi_monitor_in_sync) || vblank_too_short; + latency = smu->display_config->dce_tolerable_mclk_in_active_latency; + + /* gfxclk */ + dpm_table = &(dpm_ctx->gfx_table); + dpm_table->dpm_state.soft_min_level = dpm_table->dpm_levels[0].value; + dpm_table->dpm_state.soft_max_level = dpm_table->dpm_levels[dpm_table->count - 1].value; + dpm_table->dpm_state.hard_min_level = dpm_table->dpm_levels[0].value; + dpm_table->dpm_state.hard_max_level = dpm_table->dpm_levels[dpm_table->count - 1].value; + + if (VEGA20_UMD_PSTATE_GFXCLK_LEVEL < dpm_table->count) { + dpm_table->dpm_state.soft_min_level = dpm_table->dpm_levels[VEGA20_UMD_PSTATE_GFXCLK_LEVEL].value; + dpm_table->dpm_state.soft_max_level = dpm_table->dpm_levels[VEGA20_UMD_PSTATE_GFXCLK_LEVEL].value; + } + + if (smu_dpm_ctx->dpm_level == AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK) { + dpm_table->dpm_state.soft_min_level = dpm_table->dpm_levels[0].value; + dpm_table->dpm_state.soft_max_level = dpm_table->dpm_levels[0].value; + } + + if (smu_dpm_ctx->dpm_level == AMD_DPM_FORCED_LEVEL_PROFILE_PEAK) { + dpm_table->dpm_state.soft_min_level = dpm_table->dpm_levels[dpm_table->count - 1].value; + dpm_table->dpm_state.soft_max_level = dpm_table->dpm_levels[dpm_table->count - 1].value; + } + + /* memclk */ + dpm_table = &(dpm_ctx->mem_table); + dpm_table->dpm_state.soft_min_level = dpm_table->dpm_levels[0].value; + dpm_table->dpm_state.soft_max_level = dpm_table->dpm_levels[dpm_table->count - 1].value; + dpm_table->dpm_state.hard_min_level = dpm_table->dpm_levels[0].value; + dpm_table->dpm_state.hard_max_level = dpm_table->dpm_levels[dpm_table->count - 1].value; + + if (VEGA20_UMD_PSTATE_MCLK_LEVEL < dpm_table->count) { + dpm_table->dpm_state.soft_min_level = dpm_table->dpm_levels[VEGA20_UMD_PSTATE_MCLK_LEVEL].value; + dpm_table->dpm_state.soft_max_level = dpm_table->dpm_levels[VEGA20_UMD_PSTATE_MCLK_LEVEL].value; + } + + if (smu_dpm_ctx->dpm_level == AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK) { + dpm_table->dpm_state.soft_min_level = dpm_table->dpm_levels[0].value; + dpm_table->dpm_state.soft_max_level = dpm_table->dpm_levels[0].value; + } + + if (smu_dpm_ctx->dpm_level == AMD_DPM_FORCED_LEVEL_PROFILE_PEAK) { + dpm_table->dpm_state.soft_min_level = dpm_table->dpm_levels[dpm_table->count - 1].value; + dpm_table->dpm_state.soft_max_level = dpm_table->dpm_levels[dpm_table->count - 1].value; + } + + /* honour DAL's UCLK Hardmin */ + if (dpm_table->dpm_state.hard_min_level < (smu->display_config->min_mem_set_clock / 100)) + dpm_table->dpm_state.hard_min_level = smu->display_config->min_mem_set_clock / 100; + + /* Hardmin is dependent on displayconfig */ + if (disable_mclk_switching) { + dpm_table->dpm_state.hard_min_level = dpm_table->dpm_levels[dpm_table->count - 1].value; + for (i = 0; i < smu_dpm_ctx->mclk_latency_table->count - 1; i++) { + if (smu_dpm_ctx->mclk_latency_table->entries[i].latency <= latency) { + if (dpm_table->dpm_levels[i].value >= (smu->display_config->min_mem_set_clock / 100)) { + dpm_table->dpm_state.hard_min_level = dpm_table->dpm_levels[i].value; + break; + } + } + } + } + + if (smu->display_config->nb_pstate_switch_disable) + dpm_table->dpm_state.hard_min_level = dpm_table->dpm_levels[dpm_table->count - 1].value; + + /* vclk */ + dpm_table = &(dpm_ctx->vclk_table); + dpm_table->dpm_state.soft_min_level = dpm_table->dpm_levels[0].value; + dpm_table->dpm_state.soft_max_level = dpm_table->dpm_levels[dpm_table->count - 1].value; + dpm_table->dpm_state.hard_min_level = dpm_table->dpm_levels[0].value; + dpm_table->dpm_state.hard_max_level = dpm_table->dpm_levels[dpm_table->count - 1].value; + + if (VEGA20_UMD_PSTATE_UVDCLK_LEVEL < dpm_table->count) { + dpm_table->dpm_state.soft_min_level = dpm_table->dpm_levels[VEGA20_UMD_PSTATE_UVDCLK_LEVEL].value; + dpm_table->dpm_state.soft_max_level = dpm_table->dpm_levels[VEGA20_UMD_PSTATE_UVDCLK_LEVEL].value; + } + + if (smu_dpm_ctx->dpm_level == AMD_DPM_FORCED_LEVEL_PROFILE_PEAK) { + dpm_table->dpm_state.soft_min_level = dpm_table->dpm_levels[dpm_table->count - 1].value; + dpm_table->dpm_state.soft_max_level = dpm_table->dpm_levels[dpm_table->count - 1].value; + } + + /* dclk */ + dpm_table = &(dpm_ctx->dclk_table); + dpm_table->dpm_state.soft_min_level = dpm_table->dpm_levels[0].value; + dpm_table->dpm_state.soft_max_level = dpm_table->dpm_levels[dpm_table->count - 1].value; + dpm_table->dpm_state.hard_min_level = dpm_table->dpm_levels[0].value; + dpm_table->dpm_state.hard_max_level = dpm_table->dpm_levels[dpm_table->count - 1].value; + + if (VEGA20_UMD_PSTATE_UVDCLK_LEVEL < dpm_table->count) { + dpm_table->dpm_state.soft_min_level = dpm_table->dpm_levels[VEGA20_UMD_PSTATE_UVDCLK_LEVEL].value; + dpm_table->dpm_state.soft_max_level = dpm_table->dpm_levels[VEGA20_UMD_PSTATE_UVDCLK_LEVEL].value; + } + + if (smu_dpm_ctx->dpm_level == AMD_DPM_FORCED_LEVEL_PROFILE_PEAK) { + dpm_table->dpm_state.soft_min_level = dpm_table->dpm_levels[dpm_table->count - 1].value; + dpm_table->dpm_state.soft_max_level = dpm_table->dpm_levels[dpm_table->count - 1].value; + } + + /* socclk */ + dpm_table = &(dpm_ctx->soc_table); + dpm_table->dpm_state.soft_min_level = dpm_table->dpm_levels[0].value; + dpm_table->dpm_state.soft_max_level = dpm_table->dpm_levels[dpm_table->count - 1].value; + dpm_table->dpm_state.hard_min_level = dpm_table->dpm_levels[0].value; + dpm_table->dpm_state.hard_max_level = dpm_table->dpm_levels[dpm_table->count - 1].value; + + if (VEGA20_UMD_PSTATE_SOCCLK_LEVEL < dpm_table->count) { + dpm_table->dpm_state.soft_min_level = dpm_table->dpm_levels[VEGA20_UMD_PSTATE_SOCCLK_LEVEL].value; + dpm_table->dpm_state.soft_max_level = dpm_table->dpm_levels[VEGA20_UMD_PSTATE_SOCCLK_LEVEL].value; + } + + if (smu_dpm_ctx->dpm_level == AMD_DPM_FORCED_LEVEL_PROFILE_PEAK) { + dpm_table->dpm_state.soft_min_level = dpm_table->dpm_levels[dpm_table->count - 1].value; + dpm_table->dpm_state.soft_max_level = dpm_table->dpm_levels[dpm_table->count - 1].value; + } + + /* eclk */ + dpm_table = &(dpm_ctx->eclk_table); + dpm_table->dpm_state.soft_min_level = dpm_table->dpm_levels[0].value; + dpm_table->dpm_state.soft_max_level = dpm_table->dpm_levels[dpm_table->count - 1].value; + dpm_table->dpm_state.hard_min_level = dpm_table->dpm_levels[0].value; + dpm_table->dpm_state.hard_max_level = dpm_table->dpm_levels[dpm_table->count - 1].value; + + if (VEGA20_UMD_PSTATE_VCEMCLK_LEVEL < dpm_table->count) { + dpm_table->dpm_state.soft_min_level = dpm_table->dpm_levels[VEGA20_UMD_PSTATE_VCEMCLK_LEVEL].value; + dpm_table->dpm_state.soft_max_level = dpm_table->dpm_levels[VEGA20_UMD_PSTATE_VCEMCLK_LEVEL].value; + } + + if (smu_dpm_ctx->dpm_level == AMD_DPM_FORCED_LEVEL_PROFILE_PEAK) { + dpm_table->dpm_state.soft_min_level = dpm_table->dpm_levels[dpm_table->count - 1].value; + dpm_table->dpm_state.soft_max_level = dpm_table->dpm_levels[dpm_table->count - 1].value; + } + return 0; +} + +static int +vega20_notify_smc_dispaly_config(struct smu_context *smu) +{ + struct vega20_dpm_table *dpm_table = smu->smu_dpm.dpm_context; + struct vega20_single_dpm_table *memtable = &dpm_table->mem_table; + struct smu_clocks min_clocks = {0}; + struct pp_display_clock_request clock_req; + int ret = 0; + + min_clocks.dcef_clock = smu->display_config->min_dcef_set_clk; + min_clocks.dcef_clock_in_sr = smu->display_config->min_dcef_deep_sleep_set_clk; + min_clocks.memory_clock = smu->display_config->min_mem_set_clock; + + if (smu_feature_is_supported(smu, FEATURE_DPM_DCEFCLK_BIT)) { + clock_req.clock_type = amd_pp_dcef_clock; + clock_req.clock_freq_in_khz = min_clocks.dcef_clock * 10; + if (!smu->funcs->display_clock_voltage_request(smu, &clock_req)) { + if (smu_feature_is_supported(smu, FEATURE_DS_DCEFCLK_BIT)) { + ret = smu_send_smc_msg_with_param(smu, + SMU_MSG_SetMinDeepSleepDcefclk, + min_clocks.dcef_clock_in_sr/100); + if (ret) { + pr_err("Attempt to set divider for DCEFCLK Failed!"); + return ret; + } + } + } else { + pr_info("Attempt to set Hard Min for DCEFCLK Failed!"); + } + } + + if (smu_feature_is_enabled(smu, FEATURE_DPM_UCLK_BIT)) { + memtable->dpm_state.hard_min_level = min_clocks.memory_clock/100; + ret = smu_send_smc_msg_with_param(smu, + SMU_MSG_SetHardMinByFreq, + (PPCLK_UCLK << 16) | memtable->dpm_state.hard_min_level); + if (ret) { + pr_err("[%s] Set hard min uclk failed!", __func__); + return ret; + } + } + + return 0; +} + +static uint32_t vega20_find_lowest_dpm_level(struct vega20_single_dpm_table *table) +{ + uint32_t i; + + for (i = 0; i < table->count; i++) { + if (table->dpm_levels[i].enabled) + break; + } + if (i >= table->count) { + i = 0; + table->dpm_levels[i].enabled = true; + } + + return i; +} + +static uint32_t vega20_find_highest_dpm_level(struct vega20_single_dpm_table *table) +{ + int i = 0; + + if (!table) { + pr_err("[%s] DPM Table does not exist!", __func__); + return 0; + } + if (table->count <= 0) { + pr_err("[%s] DPM Table has no entry!", __func__); + return 0; + } + if (table->count > MAX_REGULAR_DPM_NUMBER) { + pr_err("[%s] DPM Table has too many entries!", __func__); + return MAX_REGULAR_DPM_NUMBER - 1; + } + + for (i = table->count - 1; i >= 0; i--) { + if (table->dpm_levels[i].enabled) + break; + } + if (i < 0) { + i = 0; + table->dpm_levels[i].enabled = true; + } + + return i; +} + +static int vega20_force_dpm_limit_value(struct smu_context *smu, bool highest) +{ + uint32_t soft_level; + int ret = 0; + struct vega20_dpm_table *dpm_table = + (struct vega20_dpm_table *)smu->smu_dpm.dpm_context; + + if (highest) + soft_level = vega20_find_highest_dpm_level(&(dpm_table->gfx_table)); + else + soft_level = vega20_find_lowest_dpm_level(&(dpm_table->gfx_table)); + + dpm_table->gfx_table.dpm_state.soft_min_level = + dpm_table->gfx_table.dpm_state.soft_max_level = + dpm_table->gfx_table.dpm_levels[soft_level].value; + + if (highest) + soft_level = vega20_find_highest_dpm_level(&(dpm_table->mem_table)); + else + soft_level = vega20_find_lowest_dpm_level(&(dpm_table->mem_table)); + + dpm_table->mem_table.dpm_state.soft_min_level = + dpm_table->mem_table.dpm_state.soft_max_level = + dpm_table->mem_table.dpm_levels[soft_level].value; + + if (highest) + soft_level = vega20_find_highest_dpm_level(&(dpm_table->soc_table)); + else + soft_level = vega20_find_lowest_dpm_level(&(dpm_table->soc_table)); + + dpm_table->soc_table.dpm_state.soft_min_level = + dpm_table->soc_table.dpm_state.soft_max_level = + dpm_table->soc_table.dpm_levels[soft_level].value; + + ret = vega20_upload_dpm_level(smu, false, 0xFFFFFFFF); + if (ret) { + pr_err("Failed to upload boot level to %s!\n", + highest ? "highest" : "lowest"); + return ret; + } + + ret = vega20_upload_dpm_level(smu, true, 0xFFFFFFFF); + if (ret) { + pr_err("Failed to upload dpm max level to %s!\n!", + highest ? "highest" : "lowest"); + return ret; + } + + return ret; +} + +static int vega20_unforce_dpm_levels(struct smu_context *smu) +{ + uint32_t soft_min_level, soft_max_level; + int ret = 0; + struct vega20_dpm_table *dpm_table = + (struct vega20_dpm_table *)smu->smu_dpm.dpm_context; + + soft_min_level = vega20_find_lowest_dpm_level(&(dpm_table->gfx_table)); + soft_max_level = vega20_find_highest_dpm_level(&(dpm_table->gfx_table)); + dpm_table->gfx_table.dpm_state.soft_min_level = + dpm_table->gfx_table.dpm_levels[soft_min_level].value; + dpm_table->gfx_table.dpm_state.soft_max_level = + dpm_table->gfx_table.dpm_levels[soft_max_level].value; + + soft_min_level = vega20_find_lowest_dpm_level(&(dpm_table->mem_table)); + soft_max_level = vega20_find_highest_dpm_level(&(dpm_table->mem_table)); + dpm_table->mem_table.dpm_state.soft_min_level = + dpm_table->gfx_table.dpm_levels[soft_min_level].value; + dpm_table->mem_table.dpm_state.soft_max_level = + dpm_table->gfx_table.dpm_levels[soft_max_level].value; + + soft_min_level = vega20_find_lowest_dpm_level(&(dpm_table->soc_table)); + soft_max_level = vega20_find_highest_dpm_level(&(dpm_table->soc_table)); + dpm_table->soc_table.dpm_state.soft_min_level = + dpm_table->soc_table.dpm_levels[soft_min_level].value; + dpm_table->soc_table.dpm_state.soft_max_level = + dpm_table->soc_table.dpm_levels[soft_max_level].value; + + ret = smu_upload_dpm_level(smu, false, 0xFFFFFFFF); + if (ret) { + pr_err("Failed to upload DPM Bootup Levels!"); + return ret; + } + + ret = smu_upload_dpm_level(smu, true, 0xFFFFFFFF); + if (ret) { + pr_err("Failed to upload DPM Max Levels!"); + return ret; + } + + return ret; +} + +static enum amd_dpm_forced_level vega20_get_performance_level(struct smu_context *smu) +{ + struct smu_dpm_context *smu_dpm_ctx = &(smu->smu_dpm); + if (!smu_dpm_ctx->dpm_context) + return -EINVAL; + + if (smu_dpm_ctx->dpm_level != smu_dpm_ctx->saved_dpm_level) { + mutex_lock(&(smu->mutex)); + smu_dpm_ctx->saved_dpm_level = smu_dpm_ctx->dpm_level; + mutex_unlock(&(smu->mutex)); + } + return smu_dpm_ctx->dpm_level; +} + +static int +vega20_force_performance_level(struct smu_context *smu, enum amd_dpm_forced_level level) +{ + int ret = 0; + int i; + struct smu_dpm_context *smu_dpm_ctx = &(smu->smu_dpm); + + if (!smu_dpm_ctx->dpm_context) + return -EINVAL; + + for (i = 0; i < smu->adev->num_ip_blocks; i++) { + if (smu->adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) + break; + } + + mutex_lock(&smu->mutex); + + smu->adev->ip_blocks[i].version->funcs->enable_umd_pstate(smu, &level); + ret = smu_handle_task(smu, level, + AMD_PP_TASK_READJUST_POWER_STATE); + + mutex_unlock(&smu->mutex); + + return ret; +} + +static int vega20_update_specified_od8_value(struct smu_context *smu, + uint32_t index, + uint32_t value) +{ + struct smu_table_context *table_context = &smu->smu_table; + OverDriveTable_t *od_table = + (OverDriveTable_t *)(table_context->overdrive_table); + struct vega20_od8_settings *od8_settings = + (struct vega20_od8_settings *)table_context->od8_settings; + + switch (index) { + case OD8_SETTING_GFXCLK_FMIN: + od_table->GfxclkFmin = (uint16_t)value; + break; + + case OD8_SETTING_GFXCLK_FMAX: + if (value < od8_settings->od8_settings_array[OD8_SETTING_GFXCLK_FMAX].min_value || + value > od8_settings->od8_settings_array[OD8_SETTING_GFXCLK_FMAX].max_value) + return -EINVAL; + od_table->GfxclkFmax = (uint16_t)value; + break; + + case OD8_SETTING_GFXCLK_FREQ1: + od_table->GfxclkFreq1 = (uint16_t)value; + break; + + case OD8_SETTING_GFXCLK_VOLTAGE1: + od_table->GfxclkVolt1 = (uint16_t)value; + break; + + case OD8_SETTING_GFXCLK_FREQ2: + od_table->GfxclkFreq2 = (uint16_t)value; + break; + + case OD8_SETTING_GFXCLK_VOLTAGE2: + od_table->GfxclkVolt2 = (uint16_t)value; + break; + + case OD8_SETTING_GFXCLK_FREQ3: + od_table->GfxclkFreq3 = (uint16_t)value; + break; + + case OD8_SETTING_GFXCLK_VOLTAGE3: + od_table->GfxclkVolt3 = (uint16_t)value; + break; + + case OD8_SETTING_UCLK_FMAX: + if (value < od8_settings->od8_settings_array[OD8_SETTING_UCLK_FMAX].min_value || + value > od8_settings->od8_settings_array[OD8_SETTING_UCLK_FMAX].max_value) + return -EINVAL; + od_table->UclkFmax = (uint16_t)value; + break; + + case OD8_SETTING_POWER_PERCENTAGE: + od_table->OverDrivePct = (int16_t)value; + break; + + case OD8_SETTING_FAN_ACOUSTIC_LIMIT: + od_table->FanMaximumRpm = (uint16_t)value; + break; + + case OD8_SETTING_FAN_MIN_SPEED: + od_table->FanMinimumPwm = (uint16_t)value; + break; + + case OD8_SETTING_FAN_TARGET_TEMP: + od_table->FanTargetTemperature = (uint16_t)value; + break; + + case OD8_SETTING_OPERATING_TEMP_MAX: + od_table->MaxOpTemp = (uint16_t)value; + break; + } + + return 0; +} + +static int vega20_set_od_percentage(struct smu_context *smu, + enum pp_clock_type type, + uint32_t value) +{ + struct smu_dpm_context *smu_dpm = &smu->smu_dpm; + struct vega20_dpm_table *dpm_table = NULL; + struct vega20_dpm_table *golden_table = NULL; + struct vega20_single_dpm_table *single_dpm_table; + struct vega20_single_dpm_table *golden_dpm_table; + uint32_t od_clk, index; + int ret = 0; + int feature_enabled; + PPCLK_e clk_id; + + mutex_lock(&(smu->mutex)); + + dpm_table = smu_dpm->dpm_context; + golden_table = smu_dpm->golden_dpm_context; + + switch (type) { + case OD_SCLK: + single_dpm_table = &(dpm_table->gfx_table); + golden_dpm_table = &(golden_table->gfx_table); + feature_enabled = smu_feature_is_enabled(smu, FEATURE_DPM_GFXCLK_BIT); + clk_id = PPCLK_GFXCLK; + index = OD8_SETTING_GFXCLK_FMAX; + break; + case OD_MCLK: + single_dpm_table = &(dpm_table->mem_table); + golden_dpm_table = &(golden_table->mem_table); + feature_enabled = smu_feature_is_enabled(smu, FEATURE_DPM_UCLK_BIT); + clk_id = PPCLK_UCLK; + index = OD8_SETTING_UCLK_FMAX; + break; + default: + ret = -EINVAL; + break; + } + + if (ret) + goto set_od_failed; + + od_clk = golden_dpm_table->dpm_levels[golden_dpm_table->count - 1].value * value; + od_clk /= 100; + od_clk += golden_dpm_table->dpm_levels[golden_dpm_table->count - 1].value; + + ret = smu_update_od8_settings(smu, index, od_clk); + if (ret) { + pr_err("[Setoverdrive] failed to set od clk!\n"); + goto set_od_failed; + } + + if (feature_enabled) { + ret = vega20_set_single_dpm_table(smu, single_dpm_table, + clk_id); + if (ret) { + pr_err("[Setoverdrive] failed to refresh dpm table!\n"); + goto set_od_failed; + } + } else { + single_dpm_table->count = 1; + single_dpm_table->dpm_levels[0].value = smu->smu_table.boot_values.gfxclk / 100; + } + + ret = smu_handle_task(smu, smu_dpm->dpm_level, + AMD_PP_TASK_READJUST_POWER_STATE); + +set_od_failed: + mutex_unlock(&(smu->mutex)); + + return ret; +} + +static int vega20_odn_edit_dpm_table(struct smu_context *smu, + enum PP_OD_DPM_TABLE_COMMAND type, + long *input, uint32_t size) +{ + struct smu_table_context *table_context = &smu->smu_table; + OverDriveTable_t *od_table = + (OverDriveTable_t *)(table_context->overdrive_table); + struct smu_dpm_context *smu_dpm = &smu->smu_dpm; + struct vega20_dpm_table *dpm_table = NULL; + struct vega20_single_dpm_table *single_dpm_table; + struct vega20_od8_settings *od8_settings = + (struct vega20_od8_settings *)table_context->od8_settings; + struct pp_clock_levels_with_latency clocks; + int32_t input_index, input_clk, input_vol, i; + int od8_id; + int ret = 0; + + dpm_table = smu_dpm->dpm_context; + + if (!input) { + pr_warn("NULL user input for clock and voltage\n"); + return -EINVAL; + } + + switch (type) { + case PP_OD_EDIT_SCLK_VDDC_TABLE: + if (!(od8_settings->od8_settings_array[OD8_SETTING_GFXCLK_FMIN].feature_id && + od8_settings->od8_settings_array[OD8_SETTING_GFXCLK_FMAX].feature_id)) { + pr_info("Sclk min/max frequency overdrive not supported\n"); + return -EOPNOTSUPP; + } + + for (i = 0; i < size; i += 2) { + if (i + 2 > size) { + pr_info("invalid number of input parameters %d\n", size); + return -EINVAL; + } + + input_index = input[i]; + input_clk = input[i + 1]; + + if (input_index != 0 && input_index != 1) { + pr_info("Invalid index %d\n", input_index); + pr_info("Support min/max sclk frequency settingonly which index by 0/1\n"); + return -EINVAL; + } + + if (input_clk < od8_settings->od8_settings_array[OD8_SETTING_GFXCLK_FMIN].min_value || + input_clk > od8_settings->od8_settings_array[OD8_SETTING_GFXCLK_FMAX].max_value) { + pr_info("clock freq %d is not within allowed range [%d - %d]\n", + input_clk, + od8_settings->od8_settings_array[OD8_SETTING_GFXCLK_FMIN].min_value, + od8_settings->od8_settings_array[OD8_SETTING_GFXCLK_FMAX].max_value); + return -EINVAL; + } + + if (input_index == 0 && od_table->GfxclkFmin != input_clk) { + od_table->GfxclkFmin = input_clk; + table_context->od_gfxclk_update = true; + } else if (input_index == 1 && od_table->GfxclkFmax != input_clk) { + od_table->GfxclkFmax = input_clk; + table_context->od_gfxclk_update = true; + } + } + + break; + + case PP_OD_EDIT_MCLK_VDDC_TABLE: + if (!od8_settings->od8_settings_array[OD8_SETTING_UCLK_FMAX].feature_id) { + pr_info("Mclk max frequency overdrive not supported\n"); + return -EOPNOTSUPP; + } + + single_dpm_table = &(dpm_table->mem_table); + ret = vega20_get_clk_table(smu, &clocks, single_dpm_table); + if (ret) { + pr_err("Attempt to get memory clk levels Failed!"); + return ret; + } + + for (i = 0; i < size; i += 2) { + if (i + 2 > size) { + pr_info("invalid number of input parameters %d\n", + size); + return -EINVAL; + } + + input_index = input[i]; + input_clk = input[i + 1]; + + if (input_index != 1) { + pr_info("Invalid index %d\n", input_index); + pr_info("Support max Mclk frequency setting only which index by 1\n"); + return -EINVAL; + } + + if (input_clk < clocks.data[0].clocks_in_khz / 1000 || + input_clk > od8_settings->od8_settings_array[OD8_SETTING_UCLK_FMAX].max_value) { + pr_info("clock freq %d is not within allowed range [%d - %d]\n", + input_clk, + clocks.data[0].clocks_in_khz / 1000, + od8_settings->od8_settings_array[OD8_SETTING_UCLK_FMAX].max_value); + return -EINVAL; + } + + if (input_index == 1 && od_table->UclkFmax != input_clk) { + table_context->od_gfxclk_update = true; + od_table->UclkFmax = input_clk; + } + } + + break; + + case PP_OD_EDIT_VDDC_CURVE: + if (!(od8_settings->od8_settings_array[OD8_SETTING_GFXCLK_FREQ1].feature_id && + od8_settings->od8_settings_array[OD8_SETTING_GFXCLK_FREQ2].feature_id && + od8_settings->od8_settings_array[OD8_SETTING_GFXCLK_FREQ3].feature_id && + od8_settings->od8_settings_array[OD8_SETTING_GFXCLK_VOLTAGE1].feature_id && + od8_settings->od8_settings_array[OD8_SETTING_GFXCLK_VOLTAGE2].feature_id && + od8_settings->od8_settings_array[OD8_SETTING_GFXCLK_VOLTAGE3].feature_id)) { + pr_info("Voltage curve calibrate not supported\n"); + return -EOPNOTSUPP; + } + + for (i = 0; i < size; i += 3) { + if (i + 3 > size) { + pr_info("invalid number of input parameters %d\n", + size); + return -EINVAL; + } + + input_index = input[i]; + input_clk = input[i + 1]; + input_vol = input[i + 2]; + + if (input_index > 2) { + pr_info("Setting for point %d is not supported\n", + input_index + 1); + pr_info("Three supported points index by 0, 1, 2\n"); + return -EINVAL; + } + + od8_id = OD8_SETTING_GFXCLK_FREQ1 + 2 * input_index; + if (input_clk < od8_settings->od8_settings_array[od8_id].min_value || + input_clk > od8_settings->od8_settings_array[od8_id].max_value) { + pr_info("clock freq %d is not within allowed range [%d - %d]\n", + input_clk, + od8_settings->od8_settings_array[od8_id].min_value, + od8_settings->od8_settings_array[od8_id].max_value); + return -EINVAL; + } + + od8_id = OD8_SETTING_GFXCLK_VOLTAGE1 + 2 * input_index; + if (input_vol < od8_settings->od8_settings_array[od8_id].min_value || + input_vol > od8_settings->od8_settings_array[od8_id].max_value) { + pr_info("clock voltage %d is not within allowed range [%d- %d]\n", + input_vol, + od8_settings->od8_settings_array[od8_id].min_value, + od8_settings->od8_settings_array[od8_id].max_value); + return -EINVAL; + } + + switch (input_index) { + case 0: + od_table->GfxclkFreq1 = input_clk; + od_table->GfxclkVolt1 = input_vol * VOLTAGE_SCALE; + break; + case 1: + od_table->GfxclkFreq2 = input_clk; + od_table->GfxclkVolt2 = input_vol * VOLTAGE_SCALE; + break; + case 2: + od_table->GfxclkFreq3 = input_clk; + od_table->GfxclkVolt3 = input_vol * VOLTAGE_SCALE; + break; + } + } + + break; + + case PP_OD_RESTORE_DEFAULT_TABLE: + ret = smu_update_table(smu, TABLE_OVERDRIVE, table_context->overdrive_table, false); + if (ret) { + pr_err("Failed to export over drive table!\n"); + return ret; + } + + break; + + case PP_OD_COMMIT_DPM_TABLE: + ret = smu_update_table(smu, TABLE_OVERDRIVE, table_context->overdrive_table, true); + if (ret) { + pr_err("Failed to import over drive table!\n"); + return ret; + } + + /* retrieve updated gfxclk table */ + if (table_context->od_gfxclk_update) { + table_context->od_gfxclk_update = false; + single_dpm_table = &(dpm_table->gfx_table); + + if (smu_feature_is_enabled(smu, FEATURE_DPM_GFXCLK_BIT)) { + ret = vega20_set_single_dpm_table(smu, single_dpm_table, + PPCLK_GFXCLK); + if (ret) { + pr_err("[Setoverdrive] failed to refresh dpm table!\n"); + return ret; + } + } else { + single_dpm_table->count = 1; + single_dpm_table->dpm_levels[0].value = smu->smu_table.boot_values.gfxclk / 100; + } + } + + break; + + default: + return -EINVAL; + } + + if (type == PP_OD_COMMIT_DPM_TABLE) { + mutex_lock(&(smu->mutex)); + ret = smu_handle_task(smu, smu_dpm->dpm_level, + AMD_PP_TASK_READJUST_POWER_STATE); + mutex_unlock(&(smu->mutex)); + } + + return ret; +} + +static const struct pptable_funcs vega20_ppt_funcs = { + .alloc_dpm_context = vega20_allocate_dpm_context, + .store_powerplay_table = vega20_store_powerplay_table, + .check_powerplay_table = vega20_check_powerplay_table, + .append_powerplay_table = vega20_append_powerplay_table, + .get_smu_msg_index = vega20_get_smu_msg_index, + .run_afll_btc = vega20_run_btc_afll, + .get_unallowed_feature_mask = vega20_get_unallowed_feature_mask, + .get_current_power_state = vega20_get_current_power_state, + .set_default_dpm_table = vega20_set_default_dpm_table, + .set_power_state = NULL, + .populate_umd_state_clk = vega20_populate_umd_state_clk, + .print_clk_levels = vega20_print_clk_levels, + .force_clk_levels = vega20_force_clk_levels, + .get_clock_by_type_with_latency = vega20_get_clock_by_type_with_latency, + .set_default_od8_settings = vega20_set_default_od8_setttings, + .get_od_percentage = vega20_get_od_percentage, + .get_performance_level = vega20_get_performance_level, + .force_performance_level = vega20_force_performance_level, + .update_specified_od8_value = vega20_update_specified_od8_value, + .set_od_percentage = vega20_set_od_percentage, + .od_edit_dpm_table = vega20_odn_edit_dpm_table, + .pre_display_config_changed = vega20_pre_display_config_changed, + .display_config_changed = vega20_display_config_changed, + .apply_clocks_adjust_rules = vega20_apply_clocks_adjust_rules, + .notify_smc_dispaly_config = vega20_notify_smc_dispaly_config, + .force_dpm_limit_value = vega20_force_dpm_limit_value, + .unforce_dpm_levels = vega20_unforce_dpm_levels, + .upload_dpm_level = vega20_upload_dpm_level, + .get_profiling_clk_mask = vega20_get_profiling_clk_mask, +}; + +void vega20_set_ppt_funcs(struct smu_context *smu) +{ + smu->ppt_funcs = &vega20_ppt_funcs; + smu->smc_if_version = SMU11_DRIVER_IF_VERSION; +} diff --git a/drivers/gpu/drm/amd/powerplay/vega20_ppt.h b/drivers/gpu/drm/amd/powerplay/vega20_ppt.h new file mode 100644 index 000000000000..5a0d2af63173 --- /dev/null +++ b/drivers/gpu/drm/amd/powerplay/vega20_ppt.h @@ -0,0 +1,129 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __VEGA20_PPT_H__ +#define __VEGA20_PPT_H__ + +#define VEGA20_UMD_PSTATE_GFXCLK_LEVEL 0x3 +#define VEGA20_UMD_PSTATE_SOCCLK_LEVEL 0x3 +#define VEGA20_UMD_PSTATE_MCLK_LEVEL 0x2 +#define VEGA20_UMD_PSTATE_UVDCLK_LEVEL 0x3 +#define VEGA20_UMD_PSTATE_VCEMCLK_LEVEL 0x3 + +#define MAX_REGULAR_DPM_NUMBER 16 +#define MAX_PCIE_CONF 2 + +#define VOLTAGE_SCALE 4 +#define AVFS_CURVE 0 +#define OD8_HOTCURVE_TEMPERATURE 85 + +struct vega20_dpm_level { + bool enabled; + uint32_t value; + uint32_t param1; +}; + +struct vega20_dpm_state { + uint32_t soft_min_level; + uint32_t soft_max_level; + uint32_t hard_min_level; + uint32_t hard_max_level; +}; + +struct vega20_single_dpm_table { + uint32_t count; + struct vega20_dpm_state dpm_state; + struct vega20_dpm_level dpm_levels[MAX_REGULAR_DPM_NUMBER]; +}; + +struct vega20_pcie_table { + uint16_t count; + uint8_t pcie_gen[MAX_PCIE_CONF]; + uint8_t pcie_lane[MAX_PCIE_CONF]; + uint32_t lclk[MAX_PCIE_CONF]; +}; + +struct vega20_dpm_table { + struct vega20_single_dpm_table soc_table; + struct vega20_single_dpm_table gfx_table; + struct vega20_single_dpm_table mem_table; + struct vega20_single_dpm_table eclk_table; + struct vega20_single_dpm_table vclk_table; + struct vega20_single_dpm_table dclk_table; + struct vega20_single_dpm_table dcef_table; + struct vega20_single_dpm_table pixel_table; + struct vega20_single_dpm_table display_table; + struct vega20_single_dpm_table phy_table; + struct vega20_single_dpm_table fclk_table; + struct vega20_pcie_table pcie_table; +}; + +enum OD8_FEATURE_ID +{ + OD8_GFXCLK_LIMITS = 1 << 0, + OD8_GFXCLK_CURVE = 1 << 1, + OD8_UCLK_MAX = 1 << 2, + OD8_POWER_LIMIT = 1 << 3, + OD8_ACOUSTIC_LIMIT_SCLK = 1 << 4, //FanMaximumRpm + OD8_FAN_SPEED_MIN = 1 << 5, //FanMinimumPwm + OD8_TEMPERATURE_FAN = 1 << 6, //FanTargetTemperature + OD8_TEMPERATURE_SYSTEM = 1 << 7, //MaxOpTemp + OD8_MEMORY_TIMING_TUNE = 1 << 8, + OD8_FAN_ZERO_RPM_CONTROL = 1 << 9 +}; + +enum OD8_SETTING_ID +{ + OD8_SETTING_GFXCLK_FMIN = 0, + OD8_SETTING_GFXCLK_FMAX, + OD8_SETTING_GFXCLK_FREQ1, + OD8_SETTING_GFXCLK_VOLTAGE1, + OD8_SETTING_GFXCLK_FREQ2, + OD8_SETTING_GFXCLK_VOLTAGE2, + OD8_SETTING_GFXCLK_FREQ3, + OD8_SETTING_GFXCLK_VOLTAGE3, + OD8_SETTING_UCLK_FMAX, + OD8_SETTING_POWER_PERCENTAGE, + OD8_SETTING_FAN_ACOUSTIC_LIMIT, + OD8_SETTING_FAN_MIN_SPEED, + OD8_SETTING_FAN_TARGET_TEMP, + OD8_SETTING_OPERATING_TEMP_MAX, + OD8_SETTING_AC_TIMING, + OD8_SETTING_FAN_ZERO_RPM_CONTROL, + OD8_SETTING_COUNT +}; + +struct vega20_od8_single_setting { + uint32_t feature_id; + int32_t min_value; + int32_t max_value; + int32_t current_value; + int32_t default_value; +}; + +struct vega20_od8_settings { + struct vega20_od8_single_setting od8_settings_array[OD8_SETTING_COUNT]; +}; + +extern void vega20_set_ppt_funcs(struct smu_context *smu); + +#endif |