summaryrefslogtreecommitdiff
path: root/drivers/gpu
diff options
context:
space:
mode:
authorTakashi Iwai <tiwai@suse.de>2022-08-24 08:03:46 +0200
committerTakashi Iwai <tiwai@suse.de>2022-08-24 08:04:56 +0200
commit384c687fb4ad0774aaae58ed7f43cc738aa57a73 (patch)
treeda35994e3876cdcb9f7d144c6e372b0a942db02d /drivers/gpu
parent18afcf90d8807fef66d1fd428eeb2b407df90fa8 (diff)
parent0db78532ff144a52757d636a26803099d78f431e (diff)
downloadlwn-384c687fb4ad0774aaae58ed7f43cc738aa57a73.tar.gz
lwn-384c687fb4ad0774aaae58ed7f43cc738aa57a73.zip
Merge branch 'topic/memalloc-cleanup' into for-next
ALSA: Drop hackish GFP giveaway for CONTINUOUS pages This is a series of cleanup patches for dropping the current hackish way of passing the GFP_* flags for CONTINOUS and VMALLOC memory allocations. There are only three users for this legacy feature, and all of them seem superfluous. And, if any driver requires the memory restriction in future, it can now pass the proper device pointer for specifying the DMA mask. Link: https://lore.kernel.org/r/20220823115740.14123-1-tiwai@suse.de Signed-off-by: Takashi Iwai <tiwai@suse.de>
Diffstat (limited to 'drivers/gpu')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/aldebaran.c45
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c21
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_job.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c34
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/athub_v3_0.c42
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v10_0.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v11_0.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v6_0.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v8_0.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c39
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c150
-rw-r--r--drivers/gpu/drm/amd/amdgpu/ih_v6_0.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c42
-rw-r--r--drivers/gpu/drm/amd/amdgpu/navi10_ih.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v12_0.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v13_0.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc21.c25
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vega10_ih.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vega20_ih.c7
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_chardev.c2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device.c7
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_events.c24
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_priv.h2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_svm.c17
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_svm.h1
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_topology.c11
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c15
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c4
-rw-r--r--drivers/gpu/drm/amd/display/dc/basics/conversion.c21
-rw-r--r--drivers/gpu/drm/amd/display/dc/basics/conversion.h3
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c4
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.h1
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c200
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_smu.h33
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc.c41
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_link.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_resource.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc.h6
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c15
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_link.h1
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c1
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c6
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c5
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c6
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.c8
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_stream_encoder.h3
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h1
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn314/Makefile25
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.c42
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.h4
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn314/dcn314_init.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c341
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.h3
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.h1
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.h1
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c40
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c5
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/Makefile4
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c1
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c1
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c9
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c376
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.h40
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c47
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c12
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c812
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h5
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c7
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h106
-rw-r--r--drivers/gpu/drm/amd/display/include/dal_asic_id.h6
-rw-r--r--drivers/gpu/drm/amd/display/include/logger_types.h4
-rw-r--r--drivers/gpu/drm/amd/display/modules/freesync/freesync.c15
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_4.h4
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h2
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c1
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c21
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c2
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c17
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_5_ppt.c3
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c14
-rw-r--r--drivers/gpu/drm/bridge/lvds-codec.c2
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_object.c16
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_object_types.h3
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_pages.c25
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt.c77
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt.h12
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_pm.h3
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_types.h18
-rw-r--r--drivers/gpu/drm/i915/gt/intel_migrate.c23
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ppgtt.c8
-rw-r--r--drivers/gpu/drm/i915/gt/intel_region_lmem.c4
-rw-r--r--drivers/gpu/drm/i915/i915_drv.h4
-rw-r--r--drivers/gpu/drm/i915/i915_vma.c33
-rw-r--r--drivers/gpu/drm/i915/i915_vma.h1
-rw-r--r--drivers/gpu/drm/i915/i915_vma_resource.c5
-rw-r--r--drivers/gpu/drm/i915/i915_vma_resource.h6
-rw-r--r--drivers/gpu/drm/imx/dcss/dcss-kms.c2
-rw-r--r--drivers/gpu/drm/meson/meson_drv.c5
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/device/base.c22
-rw-r--r--drivers/gpu/drm/sun4i/sun6i_mipi_dsi.c10
-rw-r--r--drivers/gpu/drm/ttm/ttm_bo.c2
129 files changed, 1974 insertions, 1249 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/aldebaran.c b/drivers/gpu/drm/amd/amdgpu/aldebaran.c
index c6cc493a5486..2b97b8a96fb4 100644
--- a/drivers/gpu/drm/amd/amdgpu/aldebaran.c
+++ b/drivers/gpu/drm/amd/amdgpu/aldebaran.c
@@ -148,30 +148,22 @@ aldebaran_mode2_perform_reset(struct amdgpu_reset_control *reset_ctl,
struct amdgpu_reset_context *reset_context)
{
struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle;
+ struct list_head *reset_device_list = reset_context->reset_device_list;
struct amdgpu_device *tmp_adev = NULL;
- struct list_head reset_device_list;
int r = 0;
dev_dbg(adev->dev, "aldebaran perform hw reset\n");
+
+ if (reset_device_list == NULL)
+ return -EINVAL;
+
if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 2) &&
reset_context->hive == NULL) {
/* Wrong context, return error */
return -EINVAL;
}
- INIT_LIST_HEAD(&reset_device_list);
- if (reset_context->hive) {
- list_for_each_entry (tmp_adev,
- &reset_context->hive->device_list,
- gmc.xgmi.head)
- list_add_tail(&tmp_adev->reset_list,
- &reset_device_list);
- } else {
- list_add_tail(&reset_context->reset_req_dev->reset_list,
- &reset_device_list);
- }
-
- list_for_each_entry (tmp_adev, &reset_device_list, reset_list) {
+ list_for_each_entry(tmp_adev, reset_device_list, reset_list) {
mutex_lock(&tmp_adev->reset_cntl->reset_lock);
tmp_adev->reset_cntl->active_reset = AMD_RESET_METHOD_MODE2;
}
@@ -179,7 +171,7 @@ aldebaran_mode2_perform_reset(struct amdgpu_reset_control *reset_ctl,
* Mode2 reset doesn't need any sync between nodes in XGMI hive, instead launch
* them together so that they can be completed asynchronously on multiple nodes
*/
- list_for_each_entry (tmp_adev, &reset_device_list, reset_list) {
+ list_for_each_entry(tmp_adev, reset_device_list, reset_list) {
/* For XGMI run all resets in parallel to speed up the process */
if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
if (!queue_work(system_unbound_wq,
@@ -197,7 +189,7 @@ aldebaran_mode2_perform_reset(struct amdgpu_reset_control *reset_ctl,
/* For XGMI wait for all resets to complete before proceed */
if (!r) {
- list_for_each_entry (tmp_adev, &reset_device_list, reset_list) {
+ list_for_each_entry(tmp_adev, reset_device_list, reset_list) {
if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
flush_work(&tmp_adev->reset_cntl->reset_work);
r = tmp_adev->asic_reset_res;
@@ -207,7 +199,7 @@ aldebaran_mode2_perform_reset(struct amdgpu_reset_control *reset_ctl,
}
}
- list_for_each_entry (tmp_adev, &reset_device_list, reset_list) {
+ list_for_each_entry(tmp_adev, reset_device_list, reset_list) {
mutex_unlock(&tmp_adev->reset_cntl->reset_lock);
tmp_adev->reset_cntl->active_reset = AMD_RESET_METHOD_NONE;
}
@@ -339,10 +331,13 @@ static int
aldebaran_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl,
struct amdgpu_reset_context *reset_context)
{
+ struct list_head *reset_device_list = reset_context->reset_device_list;
struct amdgpu_device *tmp_adev = NULL;
- struct list_head reset_device_list;
int r;
+ if (reset_device_list == NULL)
+ return -EINVAL;
+
if (reset_context->reset_req_dev->ip_versions[MP1_HWIP][0] ==
IP_VERSION(13, 0, 2) &&
reset_context->hive == NULL) {
@@ -350,19 +345,7 @@ aldebaran_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl,
return -EINVAL;
}
- INIT_LIST_HEAD(&reset_device_list);
- if (reset_context->hive) {
- list_for_each_entry (tmp_adev,
- &reset_context->hive->device_list,
- gmc.xgmi.head)
- list_add_tail(&tmp_adev->reset_list,
- &reset_device_list);
- } else {
- list_add_tail(&reset_context->reset_req_dev->reset_list,
- &reset_device_list);
- }
-
- list_for_each_entry (tmp_adev, &reset_device_list, reset_list) {
+ list_for_each_entry(tmp_adev, reset_device_list, reset_list) {
dev_info(tmp_adev->dev,
"GPU reset succeeded, trying to resume\n");
r = aldebaran_mode2_restore_ip(tmp_adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index e146810c700b..d597e2656c47 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -317,7 +317,7 @@ enum amdgpu_kiq_irq {
AMDGPU_CP_KIQ_IRQ_DRIVER0 = 0,
AMDGPU_CP_KIQ_IRQ_LAST
};
-
+#define SRIOV_USEC_TIMEOUT 1200000 /* wait 12 * 100ms for SRIOV */
#define MAX_KIQ_REG_WAIT 5000 /* in usecs, 5ms */
#define MAX_KIQ_REG_BAILOUT_INTERVAL 5 /* in msecs, 5ms */
#define MAX_KIQ_REG_TRY 1000
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 3c09dcc0986e..647220a8762d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -96,6 +96,7 @@ struct amdgpu_amdkfd_fence {
struct amdgpu_kfd_dev {
struct kfd_dev *dev;
uint64_t vram_used;
+ uint64_t vram_used_aligned;
bool init_complete;
struct work_struct reset_work;
};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index a699134a1e8c..cbd593f7d553 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -40,10 +40,10 @@
#define AMDGPU_USERPTR_RESTORE_DELAY_MS 1
/*
- * Align VRAM allocations to 2MB to avoid fragmentation caused by 4K allocations in the tail 2MB
+ * Align VRAM availability to 2MB to avoid fragmentation caused by 4K allocations in the tail 2MB
* BO chunk
*/
-#define VRAM_ALLOCATION_ALIGN (1 << 21)
+#define VRAM_AVAILABLITY_ALIGN (1 << 21)
/* Impose limit on how much memory KFD can use */
static struct {
@@ -149,7 +149,7 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
* to avoid fragmentation caused by 4K allocations in the tail
* 2M BO chunk.
*/
- vram_needed = ALIGN(size, VRAM_ALLOCATION_ALIGN);
+ vram_needed = size;
} else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
system_mem_needed = size;
} else if (!(alloc_flag &
@@ -182,8 +182,10 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
*/
WARN_ONCE(vram_needed && !adev,
"adev reference can't be null when vram is used");
- if (adev)
+ if (adev) {
adev->kfd.vram_used += vram_needed;
+ adev->kfd.vram_used_aligned += ALIGN(vram_needed, VRAM_AVAILABLITY_ALIGN);
+ }
kfd_mem_limit.system_mem_used += system_mem_needed;
kfd_mem_limit.ttm_mem_used += ttm_mem_needed;
@@ -203,8 +205,10 @@ void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev,
} else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
WARN_ONCE(!adev,
"adev reference can't be null when alloc mem flags vram is set");
- if (adev)
- adev->kfd.vram_used -= ALIGN(size, VRAM_ALLOCATION_ALIGN);
+ if (adev) {
+ adev->kfd.vram_used -= size;
+ adev->kfd.vram_used_aligned -= ALIGN(size, VRAM_AVAILABLITY_ALIGN);
+ }
} else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
kfd_mem_limit.system_mem_used -= size;
} else if (!(alloc_flag &
@@ -1608,15 +1612,14 @@ size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev)
uint64_t reserved_for_pt =
ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size);
size_t available;
-
spin_lock(&kfd_mem_limit.mem_limit_lock);
available = adev->gmc.real_vram_size
- - adev->kfd.vram_used
+ - adev->kfd.vram_used_aligned
- atomic64_read(&adev->vram_pin_size)
- reserved_for_pt;
spin_unlock(&kfd_mem_limit.mem_limit_lock);
- return ALIGN_DOWN(available, VRAM_ALLOCATION_ALIGN);
+ return ALIGN_DOWN(available, VRAM_AVAILABLITY_ALIGN);
}
int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
index fd8f3731758e..b81b77a9efa6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
@@ -314,7 +314,7 @@ amdgpu_atomfirmware_get_vram_info(struct amdgpu_device *adev,
mem_channel_number = vram_info->v30.channel_num;
mem_channel_width = vram_info->v30.channel_width;
if (vram_width)
- *vram_width = mem_channel_number * mem_channel_width;
+ *vram_width = mem_channel_number * (1 << mem_channel_width);
break;
default:
return -EINVAL;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index d8f1335bc68f..b7bae833c804 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -837,16 +837,12 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
continue;
r = amdgpu_vm_bo_update(adev, bo_va, false);
- if (r) {
- mutex_unlock(&p->bo_list->bo_list_mutex);
+ if (r)
return r;
- }
r = amdgpu_sync_fence(&p->job->sync, bo_va->last_pt_update);
- if (r) {
- mutex_unlock(&p->bo_list->bo_list_mutex);
+ if (r)
return r;
- }
}
r = amdgpu_vm_handle_moved(adev, vm);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index e2eec985adb3..cb00c7d6f50b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -1705,7 +1705,7 @@ static ssize_t amdgpu_reset_dump_register_list_write(struct file *f,
{
struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private;
char reg_offset[11];
- uint32_t *new, *tmp = NULL;
+ uint32_t *new = NULL, *tmp = NULL;
int ret, i = 0, len = 0;
do {
@@ -1747,7 +1747,8 @@ static ssize_t amdgpu_reset_dump_register_list_write(struct file *f,
ret = size;
error_free:
- kfree(tmp);
+ if (tmp != new)
+ kfree(tmp);
kfree(new);
return ret;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index c4a6fe3070b6..e8a0b19b7398 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4742,6 +4742,8 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle,
tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
reset_list);
amdgpu_reset_reg_dumps(tmp_adev);
+
+ reset_context->reset_device_list = device_list_handle;
r = amdgpu_reset_perform_reset(tmp_adev, reset_context);
/* If reset handler not implemented, continue; otherwise return */
if (r == -ENOSYS)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 5071b96be982..b1099ee79c50 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -272,10 +272,6 @@ void amdgpu_job_stop_all_jobs_on_sched(struct drm_gpu_scheduler *sched)
/* Signal all jobs not yet scheduled */
for (i = DRM_SCHED_PRIORITY_COUNT - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) {
struct drm_sched_rq *rq = &sched->sched_rq[i];
-
- if (!rq)
- continue;
-
spin_lock(&rq->lock);
list_for_each_entry(s_entity, &rq->entities, list) {
while ((s_job = to_drm_sched_job(spsc_queue_pop(&s_entity->job_queue)))) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
index 9e55a5d7a825..ffda1560c648 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
@@ -37,6 +37,7 @@ struct amdgpu_reset_context {
struct amdgpu_device *reset_req_dev;
struct amdgpu_job *job;
struct amdgpu_hive_info *hive;
+ struct list_head *reset_device_list;
unsigned long flags;
};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 3b4c19412625..134575a3893c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -637,6 +637,8 @@ struct amdgpu_ttm_tt {
#endif
};
+#define ttm_to_amdgpu_ttm_tt(ptr) container_of(ptr, struct amdgpu_ttm_tt, ttm)
+
#ifdef CONFIG_DRM_AMDGPU_USERPTR
/*
* amdgpu_ttm_tt_get_user_pages - get device accessible pages that back user
@@ -648,7 +650,7 @@ struct amdgpu_ttm_tt {
int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages)
{
struct ttm_tt *ttm = bo->tbo.ttm;
- struct amdgpu_ttm_tt *gtt = (void *)ttm;
+ struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
unsigned long start = gtt->userptr;
struct vm_area_struct *vma;
struct mm_struct *mm;
@@ -702,7 +704,7 @@ out_unlock:
*/
bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm)
{
- struct amdgpu_ttm_tt *gtt = (void *)ttm;
+ struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
bool r = false;
if (!gtt || !gtt->userptr)
@@ -751,7 +753,7 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_device *bdev,
struct ttm_tt *ttm)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
- struct amdgpu_ttm_tt *gtt = (void *)ttm;
+ struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
enum dma_data_direction direction = write ?
DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
@@ -788,7 +790,7 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_device *bdev,
struct ttm_tt *ttm)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
- struct amdgpu_ttm_tt *gtt = (void *)ttm;
+ struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
enum dma_data_direction direction = write ?
DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
@@ -822,7 +824,7 @@ static void amdgpu_ttm_gart_bind(struct amdgpu_device *adev,
{
struct amdgpu_bo *abo = ttm_to_amdgpu_bo(tbo);
struct ttm_tt *ttm = tbo->ttm;
- struct amdgpu_ttm_tt *gtt = (void *)ttm;
+ struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
if (amdgpu_bo_encrypted(abo))
flags |= AMDGPU_PTE_TMZ;
@@ -860,7 +862,7 @@ static int amdgpu_ttm_backend_bind(struct ttm_device *bdev,
struct ttm_resource *bo_mem)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
- struct amdgpu_ttm_tt *gtt = (void*)ttm;
+ struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
uint64_t flags;
int r;
@@ -927,7 +929,7 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
struct ttm_operation_ctx ctx = { false, false };
- struct amdgpu_ttm_tt *gtt = (void *)bo->ttm;
+ struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(bo->ttm);
struct ttm_placement placement;
struct ttm_place placements;
struct ttm_resource *tmp;
@@ -998,7 +1000,7 @@ static void amdgpu_ttm_backend_unbind(struct ttm_device *bdev,
struct ttm_tt *ttm)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
- struct amdgpu_ttm_tt *gtt = (void *)ttm;
+ struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
/* if the pages have userptr pinning then clear that first */
if (gtt->userptr) {
@@ -1025,7 +1027,7 @@ static void amdgpu_ttm_backend_unbind(struct ttm_device *bdev,
static void amdgpu_ttm_backend_destroy(struct ttm_device *bdev,
struct ttm_tt *ttm)
{
- struct amdgpu_ttm_tt *gtt = (void *)ttm;
+ struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
if (gtt->usertask)
put_task_struct(gtt->usertask);
@@ -1079,7 +1081,7 @@ static int amdgpu_ttm_tt_populate(struct ttm_device *bdev,
struct ttm_operation_ctx *ctx)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
- struct amdgpu_ttm_tt *gtt = (void *)ttm;
+ struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
pgoff_t i;
int ret;
@@ -1113,7 +1115,7 @@ static int amdgpu_ttm_tt_populate(struct ttm_device *bdev,
static void amdgpu_ttm_tt_unpopulate(struct ttm_device *bdev,
struct ttm_tt *ttm)
{
- struct amdgpu_ttm_tt *gtt = (void *)ttm;
+ struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
struct amdgpu_device *adev;
pgoff_t i;
@@ -1182,7 +1184,7 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_buffer_object *bo,
/* Set TTM_TT_FLAG_EXTERNAL before populate but after create. */
bo->ttm->page_flags |= TTM_TT_FLAG_EXTERNAL;
- gtt = (void *)bo->ttm;
+ gtt = ttm_to_amdgpu_ttm_tt(bo->ttm);
gtt->userptr = addr;
gtt->userflags = flags;
@@ -1199,7 +1201,7 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_buffer_object *bo,
*/
struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm)
{
- struct amdgpu_ttm_tt *gtt = (void *)ttm;
+ struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
if (gtt == NULL)
return NULL;
@@ -1218,7 +1220,7 @@ struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm)
bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
unsigned long end, unsigned long *userptr)
{
- struct amdgpu_ttm_tt *gtt = (void *)ttm;
+ struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
unsigned long size;
if (gtt == NULL || !gtt->userptr)
@@ -1241,7 +1243,7 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
*/
bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm)
{
- struct amdgpu_ttm_tt *gtt = (void *)ttm;
+ struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
if (gtt == NULL || !gtt->userptr)
return false;
@@ -1254,7 +1256,7 @@ bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm)
*/
bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm)
{
- struct amdgpu_ttm_tt *gtt = (void *)ttm;
+ struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
if (gtt == NULL)
return false;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c
index 108e8e8a1a36..576849e95296 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c
@@ -496,8 +496,7 @@ static int amdgpu_vkms_sw_init(void *handle)
adev_to_drm(adev)->mode_config.max_height = YRES_MAX;
adev_to_drm(adev)->mode_config.preferred_depth = 24;
- /* disable prefer shadow for now due to hibernation issues */
- adev_to_drm(adev)->mode_config.prefer_shadow = 0;
+ adev_to_drm(adev)->mode_config.prefer_shadow = 1;
adev_to_drm(adev)->mode_config.fb_base = adev->gmc.aper_base;
diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v3_0.c b/drivers/gpu/drm/amd/amdgpu/athub_v3_0.c
index 33a8a7365aef..f0e235f98afb 100644
--- a/drivers/gpu/drm/amd/amdgpu/athub_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/athub_v3_0.c
@@ -28,13 +28,44 @@
#include "navi10_enum.h"
#include "soc15_common.h"
+#define regATHUB_MISC_CNTL_V3_0_1 0x00d7
+#define regATHUB_MISC_CNTL_V3_0_1_BASE_IDX 0
+
+
+static uint32_t athub_v3_0_get_cg_cntl(struct amdgpu_device *adev)
+{
+ uint32_t data;
+
+ switch (adev->ip_versions[ATHUB_HWIP][0]) {
+ case IP_VERSION(3, 0, 1):
+ data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL_V3_0_1);
+ break;
+ default:
+ data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL);
+ break;
+ }
+ return data;
+}
+
+static void athub_v3_0_set_cg_cntl(struct amdgpu_device *adev, uint32_t data)
+{
+ switch (adev->ip_versions[ATHUB_HWIP][0]) {
+ case IP_VERSION(3, 0, 1):
+ WREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL_V3_0_1, data);
+ break;
+ default:
+ WREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL, data);
+ break;
+ }
+}
+
static void
athub_v3_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
bool enable)
{
uint32_t def, data;
- def = data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL);
+ def = data = athub_v3_0_get_cg_cntl(adev);
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_ATHUB_MGCG))
data |= ATHUB_MISC_CNTL__CG_ENABLE_MASK;
@@ -42,7 +73,7 @@ athub_v3_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
data &= ~ATHUB_MISC_CNTL__CG_ENABLE_MASK;
if (def != data)
- WREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL, data);
+ athub_v3_0_set_cg_cntl(adev, data);
}
static void
@@ -51,7 +82,7 @@ athub_v3_0_update_medium_grain_light_sleep(struct amdgpu_device *adev,
{
uint32_t def, data;
- def = data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL);
+ def = data = athub_v3_0_get_cg_cntl(adev);
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_ATHUB_LS))
data |= ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK;
@@ -59,7 +90,7 @@ athub_v3_0_update_medium_grain_light_sleep(struct amdgpu_device *adev,
data &= ~ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK;
if (def != data)
- WREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL, data);
+ athub_v3_0_set_cg_cntl(adev, data);
}
int athub_v3_0_set_clockgating(struct amdgpu_device *adev,
@@ -70,6 +101,7 @@ int athub_v3_0_set_clockgating(struct amdgpu_device *adev,
switch (adev->ip_versions[ATHUB_HWIP][0]) {
case IP_VERSION(3, 0, 0):
+ case IP_VERSION(3, 0, 1):
case IP_VERSION(3, 0, 2):
athub_v3_0_update_medium_grain_clock_gating(adev,
state == AMD_CG_STATE_GATE);
@@ -88,7 +120,7 @@ void athub_v3_0_get_clockgating(struct amdgpu_device *adev, u64 *flags)
int data;
/* AMD_CG_SUPPORT_ATHUB_MGCG */
- data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL);
+ data = athub_v3_0_get_cg_cntl(adev);
if (data & ATHUB_MISC_CNTL__CG_ENABLE_MASK)
*flags |= AMD_CG_SUPPORT_ATHUB_MGCG;
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
index 9c964cd3b5d4..288fce7dc0ed 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
@@ -2796,8 +2796,7 @@ static int dce_v10_0_sw_init(void *handle)
adev_to_drm(adev)->mode_config.max_height = 16384;
adev_to_drm(adev)->mode_config.preferred_depth = 24;
- /* disable prefer shadow for now due to hibernation issues */
- adev_to_drm(adev)->mode_config.prefer_shadow = 0;
+ adev_to_drm(adev)->mode_config.prefer_shadow = 1;
adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true;
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
index e0ad9f27dc3f..cbe5250b31cb 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
@@ -2914,8 +2914,7 @@ static int dce_v11_0_sw_init(void *handle)
adev_to_drm(adev)->mode_config.max_height = 16384;
adev_to_drm(adev)->mode_config.preferred_depth = 24;
- /* disable prefer shadow for now due to hibernation issues */
- adev_to_drm(adev)->mode_config.prefer_shadow = 0;
+ adev_to_drm(adev)->mode_config.prefer_shadow = 1;
adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true;
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
index 77f5e998a120..b1c44fab074f 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
@@ -2673,8 +2673,7 @@ static int dce_v6_0_sw_init(void *handle)
adev_to_drm(adev)->mode_config.max_width = 16384;
adev_to_drm(adev)->mode_config.max_height = 16384;
adev_to_drm(adev)->mode_config.preferred_depth = 24;
- /* disable prefer shadow for now due to hibernation issues */
- adev_to_drm(adev)->mode_config.prefer_shadow = 0;
+ adev_to_drm(adev)->mode_config.prefer_shadow = 1;
adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true;
adev_to_drm(adev)->mode_config.fb_base = adev->gmc.aper_base;
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
index 802e5c753271..a22b45c92792 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
@@ -2693,8 +2693,11 @@ static int dce_v8_0_sw_init(void *handle)
adev_to_drm(adev)->mode_config.max_height = 16384;
adev_to_drm(adev)->mode_config.preferred_depth = 24;
- /* disable prefer shadow for now due to hibernation issues */
- adev_to_drm(adev)->mode_config.prefer_shadow = 0;
+ if (adev->asic_type == CHIP_HAWAII)
+ /* disable prefer shadow for now due to hibernation issues */
+ adev_to_drm(adev)->mode_config.prefer_shadow = 0;
+ else
+ adev_to_drm(adev)->mode_config.prefer_shadow = 1;
adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index fafbad3cf08d..a2a4dc1844c0 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -4846,7 +4846,7 @@ static int gfx_v10_0_sw_init(void *handle)
case IP_VERSION(10, 3, 3):
case IP_VERSION(10, 3, 7):
adev->gfx.me.num_me = 1;
- adev->gfx.me.num_pipe_per_me = 2;
+ adev->gfx.me.num_pipe_per_me = 1;
adev->gfx.me.num_queue_per_pipe = 1;
adev->gfx.mec.num_mec = 2;
adev->gfx.mec.num_pipe_per_mec = 4;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index 6fd71cb10e54..158d87e6805d 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -53,6 +53,7 @@
#define GFX11_MEC_HPD_SIZE 2048
#define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
+#define RLC_PG_DELAY_3_DEFAULT_GC_11_0_1 0x1388
#define regCGTT_WD_CLK_CTRL 0x5086
#define regCGTT_WD_CLK_CTRL_BASE_IDX 1
@@ -5279,6 +5280,38 @@ static const struct amdgpu_rlc_funcs gfx_v11_0_rlc_funcs = {
.update_spm_vmid = gfx_v11_0_update_spm_vmid,
};
+static void gfx_v11_cntl_power_gating(struct amdgpu_device *adev, bool enable)
+{
+ u32 data = RREG32_SOC15(GC, 0, regRLC_PG_CNTL);
+
+ if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG))
+ data |= RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
+ else
+ data &= ~RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
+
+ WREG32_SOC15(GC, 0, regRLC_PG_CNTL, data);
+
+ // Program RLC_PG_DELAY3 for CGPG hysteresis
+ if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) {
+ switch (adev->ip_versions[GC_HWIP][0]) {
+ case IP_VERSION(11, 0, 1):
+ WREG32_SOC15(GC, 0, regRLC_PG_DELAY_3, RLC_PG_DELAY_3_DEFAULT_GC_11_0_1);
+ break;
+ default:
+ break;
+ }
+ }
+}
+
+static void gfx_v11_cntl_pg(struct amdgpu_device *adev, bool enable)
+{
+ amdgpu_gfx_rlc_enter_safe_mode(adev);
+
+ gfx_v11_cntl_power_gating(adev, enable);
+
+ amdgpu_gfx_rlc_exit_safe_mode(adev);
+}
+
static int gfx_v11_0_set_powergating_state(void *handle,
enum amd_powergating_state state)
{
@@ -5293,6 +5326,11 @@ static int gfx_v11_0_set_powergating_state(void *handle,
case IP_VERSION(11, 0, 2):
amdgpu_gfx_off_ctrl(adev, enable);
break;
+ case IP_VERSION(11, 0, 1):
+ gfx_v11_cntl_pg(adev, enable);
+ /* TODO: Enable this when GFXOFF is ready */
+ // amdgpu_gfx_off_ctrl(adev, enable);
+ break;
default:
break;
}
@@ -5310,6 +5348,7 @@ static int gfx_v11_0_set_clockgating_state(void *handle,
switch (adev->ip_versions[GC_HWIP][0]) {
case IP_VERSION(11, 0, 0):
+ case IP_VERSION(11, 0, 1):
case IP_VERSION(11, 0, 2):
gfx_v11_0_update_gfx_clock_gating(adev,
state == AMD_CG_STATE_GATE);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index 9ae8cdaa033e..f513e2c2e964 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -419,6 +419,7 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
uint32_t seq;
uint16_t queried_pasid;
bool ret;
+ u32 usec_timeout = amdgpu_sriov_vf(adev) ? SRIOV_USEC_TIMEOUT : adev->usec_timeout;
struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
struct amdgpu_kiq *kiq = &adev->gfx.kiq;
@@ -437,7 +438,7 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
amdgpu_ring_commit(ring);
spin_unlock(&adev->gfx.kiq.ring_lock);
- r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
+ r = amdgpu_fence_wait_polling(ring, seq, usec_timeout);
if (r < 1) {
dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r);
return -ETIME;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 22761a3bb818..4603653916f5 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -896,6 +896,7 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
uint32_t seq;
uint16_t queried_pasid;
bool ret;
+ u32 usec_timeout = amdgpu_sriov_vf(adev) ? SRIOV_USEC_TIMEOUT : adev->usec_timeout;
struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
struct amdgpu_kiq *kiq = &adev->gfx.kiq;
@@ -935,7 +936,7 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
amdgpu_ring_commit(ring);
spin_unlock(&adev->gfx.kiq.ring_lock);
- r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
+ r = amdgpu_fence_wait_polling(ring, seq, usec_timeout);
if (r < 1) {
dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r);
up_read(&adev->reset_domain->sem);
@@ -1624,12 +1625,15 @@ static int gmc_v9_0_sw_init(void *handle)
amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 47);
else
amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
+ if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
+ adev->gmc.translate_further = adev->vm_manager.num_level > 1;
break;
case IP_VERSION(9, 4, 1):
adev->num_vmhubs = 3;
/* Keep the vm size same with Vega20 */
amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
+ adev->gmc.translate_further = adev->vm_manager.num_level > 1;
break;
default:
break;
diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c b/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c
index 39a696cd45b5..29c3484ae1f1 100644
--- a/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c
@@ -40,6 +40,156 @@ static void hdp_v5_2_flush_hdp(struct amdgpu_device *adev,
0);
}
+static void hdp_v5_2_update_mem_power_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t hdp_clk_cntl;
+ uint32_t hdp_mem_pwr_cntl;
+
+ if (!(adev->cg_flags & (AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_HDP_DS |
+ AMD_CG_SUPPORT_HDP_SD)))
+ return;
+
+ hdp_clk_cntl = RREG32_SOC15(HDP, 0, regHDP_CLK_CNTL);
+ hdp_mem_pwr_cntl = RREG32_SOC15(HDP, 0, regHDP_MEM_POWER_CTRL);
+
+ /* Before doing clock/power mode switch, forced on MEM clock */
+ hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL,
+ ATOMIC_MEM_CLK_SOFT_OVERRIDE, 1);
+ hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL,
+ RC_MEM_CLK_SOFT_OVERRIDE, 1);
+ WREG32_SOC15(HDP, 0, regHDP_CLK_CNTL, hdp_clk_cntl);
+
+ /* disable clock and power gating before any changing */
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_CTRL_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_LS_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_DS_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_SD_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_CTRL_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_LS_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_DS_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_SD_EN, 0);
+ WREG32_SOC15(HDP, 0, regHDP_MEM_POWER_CTRL, hdp_mem_pwr_cntl);
+
+ /* Already disabled above. The actions below are for "enabled" only */
+ if (enable) {
+ /* only one clock gating mode (LS/DS/SD) can be enabled */
+ if (adev->cg_flags & AMD_CG_SUPPORT_HDP_SD) {
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_SD_EN, 1);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_SD_EN, 1);
+ } else if (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS) {
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_LS_EN, 1);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_LS_EN, 1);
+ } else if (adev->cg_flags & AMD_CG_SUPPORT_HDP_DS) {
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_DS_EN, 1);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_DS_EN, 1);
+ }
+
+ /* confirmed that ATOMIC/RC_MEM_POWER_CTRL_EN have to be set for SRAM LS/DS/SD */
+ if (adev->cg_flags & (AMD_CG_SUPPORT_HDP_LS | AMD_CG_SUPPORT_HDP_DS |
+ AMD_CG_SUPPORT_HDP_SD)) {
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_CTRL_EN, 1);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_CTRL_EN, 1);
+ WREG32_SOC15(HDP, 0, regHDP_MEM_POWER_CTRL, hdp_mem_pwr_cntl);
+ }
+ }
+
+ /* disable MEM clock override after clock/power mode changing */
+ hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL,
+ ATOMIC_MEM_CLK_SOFT_OVERRIDE, 0);
+ hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL,
+ RC_MEM_CLK_SOFT_OVERRIDE, 0);
+ WREG32_SOC15(HDP, 0, regHDP_CLK_CNTL, hdp_clk_cntl);
+}
+
+static void hdp_v5_2_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t hdp_clk_cntl;
+
+ if (!(adev->cg_flags & AMD_CG_SUPPORT_HDP_MGCG))
+ return;
+
+ hdp_clk_cntl = RREG32_SOC15(HDP, 0, regHDP_CLK_CNTL);
+
+ if (enable) {
+ hdp_clk_cntl &=
+ ~(uint32_t)
+ (HDP_CLK_CNTL__ATOMIC_MEM_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__RC_MEM_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__DBUS_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__DYN_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__XDP_REG_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__HDP_REG_CLK_SOFT_OVERRIDE_MASK);
+ } else {
+ hdp_clk_cntl |= HDP_CLK_CNTL__ATOMIC_MEM_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__RC_MEM_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__DBUS_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__DYN_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__XDP_REG_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__HDP_REG_CLK_SOFT_OVERRIDE_MASK;
+ }
+
+ WREG32_SOC15(HDP, 0, regHDP_CLK_CNTL, hdp_clk_cntl);
+}
+
+static void hdp_v5_2_get_clockgating_state(struct amdgpu_device *adev,
+ u64 *flags)
+{
+ uint32_t tmp;
+
+ /* AMD_CG_SUPPORT_HDP_MGCG */
+ tmp = RREG32_SOC15(HDP, 0, regHDP_CLK_CNTL);
+ if (!(tmp & (HDP_CLK_CNTL__ATOMIC_MEM_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__RC_MEM_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__DBUS_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__DYN_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__XDP_REG_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__HDP_REG_CLK_SOFT_OVERRIDE_MASK)))
+ *flags |= AMD_CG_SUPPORT_HDP_MGCG;
+
+ /* AMD_CG_SUPPORT_HDP_LS/DS/SD */
+ tmp = RREG32_SOC15(HDP, 0, regHDP_MEM_POWER_CTRL);
+ if (tmp & HDP_MEM_POWER_CTRL__ATOMIC_MEM_POWER_LS_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_HDP_LS;
+ else if (tmp & HDP_MEM_POWER_CTRL__ATOMIC_MEM_POWER_DS_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_HDP_DS;
+ else if (tmp & HDP_MEM_POWER_CTRL__ATOMIC_MEM_POWER_SD_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_HDP_SD;
+}
+
+static void hdp_v5_2_update_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ hdp_v5_2_update_mem_power_gating(adev, enable);
+ hdp_v5_2_update_medium_grain_clock_gating(adev, enable);
+}
+
const struct amdgpu_hdp_funcs hdp_v5_2_funcs = {
.flush_hdp = hdp_v5_2_flush_hdp,
+ .update_clock_gating = hdp_v5_2_update_clock_gating,
+ .get_clock_gating_state = hdp_v5_2_get_clockgating_state,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c
index 92dc60a9d209..085e613f3646 100644
--- a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c
@@ -727,6 +727,7 @@ static const struct amd_ip_funcs ih_v6_0_ip_funcs = {
static const struct amdgpu_ih_funcs ih_v6_0_funcs = {
.get_wptr = ih_v6_0_get_wptr,
.decode_iv = amdgpu_ih_decode_iv_helper,
+ .decode_iv_ts = amdgpu_ih_decode_iv_ts_helper,
.set_rptr = ih_v6_0_set_rptr
};
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c
index cac72ced94c8..e8058edc1d10 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c
@@ -518,18 +518,41 @@ static u64 mmhub_v3_0_1_get_mc_fb_offset(struct amdgpu_device *adev)
static void mmhub_v3_0_1_update_medium_grain_clock_gating(struct amdgpu_device *adev,
bool enable)
{
- //TODO
+ uint32_t def, data;
+
+ def = data = RREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG);
+
+ if (enable)
+ data |= MM_ATC_L2_MISC_CG__ENABLE_MASK;
+ else
+ data &= ~MM_ATC_L2_MISC_CG__ENABLE_MASK;
+
+ if (def != data)
+ WREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG, data);
}
static void mmhub_v3_0_1_update_medium_grain_light_sleep(struct amdgpu_device *adev,
bool enable)
{
- //TODO
+ uint32_t def, data;
+
+ def = data = RREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG);
+
+ if (enable)
+ data |= MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK;
+ else
+ data &= ~MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK;
+
+ if (def != data)
+ WREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG, data);
}
static int mmhub_v3_0_1_set_clockgating(struct amdgpu_device *adev,
enum amd_clockgating_state state)
{
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
mmhub_v3_0_1_update_medium_grain_clock_gating(adev,
state == AMD_CG_STATE_GATE);
mmhub_v3_0_1_update_medium_grain_light_sleep(adev,
@@ -539,7 +562,20 @@ static int mmhub_v3_0_1_set_clockgating(struct amdgpu_device *adev,
static void mmhub_v3_0_1_get_clockgating(struct amdgpu_device *adev, u64 *flags)
{
- //TODO
+ int data;
+
+ if (amdgpu_sriov_vf(adev))
+ *flags = 0;
+
+ data = RREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG);
+
+ /* AMD_CG_SUPPORT_MC_MGCG */
+ if (data & MM_ATC_L2_MISC_CG__ENABLE_MASK)
+ *flags |= AMD_CG_SUPPORT_MC_MGCG;
+
+ /* AMD_CG_SUPPORT_MC_LS */
+ if (data & MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK)
+ *flags |= AMD_CG_SUPPORT_MC_LS;
}
const struct amdgpu_mmhub_funcs mmhub_v3_0_1_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
index 4b5396d3e60f..eec13cb5bf75 100644
--- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
@@ -409,9 +409,11 @@ static u32 navi10_ih_get_wptr(struct amdgpu_device *adev,
u32 wptr, tmp;
struct amdgpu_ih_regs *ih_regs;
- if (ih == &adev->irq.ih) {
+ if (ih == &adev->irq.ih || ih == &adev->irq.ih_soft) {
/* Only ring0 supports writeback. On other rings fall back
* to register-based code with overflow checking below.
+ * ih_soft ring doesn't have any backing hardware registers,
+ * update wptr and return.
*/
wptr = le32_to_cpu(*ih->wptr_cpu);
@@ -483,6 +485,9 @@ static void navi10_ih_set_rptr(struct amdgpu_device *adev,
{
struct amdgpu_ih_regs *ih_regs;
+ if (ih == &adev->irq.ih_soft)
+ return;
+
if (ih->use_doorbell) {
/* XXX check if swapping is necessary on BE */
*ih->rptr_cpu = ih->rptr;
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c
index a2588200ea58..0b2ac418e4ac 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c
@@ -101,6 +101,16 @@ static int psp_v12_0_init_microcode(struct psp_context *psp)
adev->psp.dtm_context.context.bin_desc.start_addr =
(uint8_t *)adev->psp.hdcp_context.context.bin_desc.start_addr +
le32_to_cpu(ta_hdr->dtm.offset_bytes);
+
+ if (adev->apu_flags & AMD_APU_IS_RENOIR) {
+ adev->psp.securedisplay_context.context.bin_desc.fw_version =
+ le32_to_cpu(ta_hdr->securedisplay.fw_version);
+ adev->psp.securedisplay_context.context.bin_desc.size_bytes =
+ le32_to_cpu(ta_hdr->securedisplay.size_bytes);
+ adev->psp.securedisplay_context.context.bin_desc.start_addr =
+ (uint8_t *)adev->psp.hdcp_context.context.bin_desc.start_addr +
+ le32_to_cpu(ta_hdr->securedisplay.offset_bytes);
+ }
}
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
index 726a5bba40b2..a75a286e1ecf 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
@@ -20,7 +20,6 @@
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
-#include <linux/dev_printk.h>
#include <drm/drm_drv.h>
#include <linux/vmalloc.h>
#include "amdgpu.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c
index 52816de5e17b..1ff7fc7bb340 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc21.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc21.c
@@ -546,8 +546,10 @@ static int soc21_common_early_init(void *handle)
case IP_VERSION(11, 0, 0):
adev->cg_flags = AMD_CG_SUPPORT_GFX_CGCG |
AMD_CG_SUPPORT_GFX_CGLS |
+#if 0
AMD_CG_SUPPORT_GFX_3D_CGCG |
AMD_CG_SUPPORT_GFX_3D_CGLS |
+#endif
AMD_CG_SUPPORT_GFX_MGCG |
AMD_CG_SUPPORT_REPEATER_FGCG |
AMD_CG_SUPPORT_GFX_FGCG |
@@ -575,7 +577,9 @@ static int soc21_common_early_init(void *handle)
AMD_CG_SUPPORT_VCN_MGCG |
AMD_CG_SUPPORT_JPEG_MGCG |
AMD_CG_SUPPORT_ATHUB_MGCG |
- AMD_CG_SUPPORT_ATHUB_LS;
+ AMD_CG_SUPPORT_ATHUB_LS |
+ AMD_CG_SUPPORT_IH_CG |
+ AMD_CG_SUPPORT_HDP_SD;
adev->pg_flags =
AMD_PG_SUPPORT_VCN |
AMD_PG_SUPPORT_VCN_DPG |
@@ -586,9 +590,23 @@ static int soc21_common_early_init(void *handle)
break;
case IP_VERSION(11, 0, 1):
adev->cg_flags =
+ AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_FGCG |
+ AMD_CG_SUPPORT_REPEATER_FGCG |
+ AMD_CG_SUPPORT_GFX_PERF_CLK |
+ AMD_CG_SUPPORT_MC_MGCG |
+ AMD_CG_SUPPORT_MC_LS |
+ AMD_CG_SUPPORT_HDP_MGCG |
+ AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_ATHUB_MGCG |
+ AMD_CG_SUPPORT_ATHUB_LS |
+ AMD_CG_SUPPORT_IH_CG |
AMD_CG_SUPPORT_VCN_MGCG |
AMD_CG_SUPPORT_JPEG_MGCG;
adev->pg_flags =
+ AMD_PG_SUPPORT_GFX_PG |
AMD_PG_SUPPORT_JPEG;
adev->external_rev_id = adev->rev_id + 0x1;
break;
@@ -683,6 +701,7 @@ static int soc21_common_set_clockgating_state(void *handle,
switch (adev->ip_versions[NBIO_HWIP][0]) {
case IP_VERSION(4, 3, 0):
+ case IP_VERSION(4, 3, 1):
adev->nbio.funcs->update_medium_grain_clock_gating(adev,
state == AMD_CG_STATE_GATE);
adev->nbio.funcs->update_medium_grain_light_sleep(adev,
@@ -690,6 +709,10 @@ static int soc21_common_set_clockgating_state(void *handle,
adev->hdp.funcs->update_clock_gating(adev,
state == AMD_CG_STATE_GATE);
break;
+ case IP_VERSION(7, 7, 0):
+ adev->hdp.funcs->update_clock_gating(adev,
+ state == AMD_CG_STATE_GATE);
+ break;
default:
break;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
index ca14c3ef742e..fb2d74f30448 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
@@ -1115,7 +1115,7 @@ static int vcn_v4_0_start(struct amdgpu_device *adev)
*
* Stop VCN block with dpg mode
*/
-static int vcn_v4_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx)
+static void vcn_v4_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx)
{
uint32_t tmp;
@@ -1133,7 +1133,6 @@ static int vcn_v4_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx)
/* disable dynamic power gating mode */
WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, regUVD_POWER_STATUS), 0,
~UVD_POWER_STATUS__UVD_PG_MODE_MASK);
- return 0;
}
/**
@@ -1154,7 +1153,7 @@ static int vcn_v4_0_stop(struct amdgpu_device *adev)
fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF;
if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
- r = vcn_v4_0_stop_dpg_mode(adev, i);
+ vcn_v4_0_stop_dpg_mode(adev, i);
continue;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
index cdd599a08125..03b7066471f9 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
@@ -334,9 +334,11 @@ static u32 vega10_ih_get_wptr(struct amdgpu_device *adev,
u32 wptr, tmp;
struct amdgpu_ih_regs *ih_regs;
- if (ih == &adev->irq.ih) {
+ if (ih == &adev->irq.ih || ih == &adev->irq.ih_soft) {
/* Only ring0 supports writeback. On other rings fall back
* to register-based code with overflow checking below.
+ * ih_soft ring doesn't have any backing hardware registers,
+ * update wptr and return.
*/
wptr = le32_to_cpu(*ih->wptr_cpu);
@@ -409,6 +411,9 @@ static void vega10_ih_set_rptr(struct amdgpu_device *adev,
{
struct amdgpu_ih_regs *ih_regs;
+ if (ih == &adev->irq.ih_soft)
+ return;
+
if (ih->use_doorbell) {
/* XXX check if swapping is necessary on BE */
*ih->rptr_cpu = ih->rptr;
diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
index 3b4eb8285943..2022ffbb8dba 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
@@ -385,9 +385,11 @@ static u32 vega20_ih_get_wptr(struct amdgpu_device *adev,
u32 wptr, tmp;
struct amdgpu_ih_regs *ih_regs;
- if (ih == &adev->irq.ih) {
+ if (ih == &adev->irq.ih || ih == &adev->irq.ih_soft) {
/* Only ring0 supports writeback. On other rings fall back
* to register-based code with overflow checking below.
+ * ih_soft ring doesn't have any backing hardware registers,
+ * update wptr and return.
*/
wptr = le32_to_cpu(*ih->wptr_cpu);
@@ -461,6 +463,9 @@ static void vega20_ih_set_rptr(struct amdgpu_device *adev,
{
struct amdgpu_ih_regs *ih_regs;
+ if (ih == &adev->irq.ih_soft)
+ return;
+
if (ih->use_doorbell) {
/* XXX check if swapping is necessary on BE */
*ih->rptr_cpu = ih->rptr;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 2b3d8bc8f0aa..dc774ddf3445 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -874,7 +874,7 @@ static int kfd_ioctl_wait_events(struct file *filp, struct kfd_process *p,
err = kfd_wait_on_events(p, args->num_events,
(void __user *)args->events_ptr,
(args->wait_for_all != 0),
- args->timeout, &args->wait_result);
+ &args->timeout, &args->wait_result);
return err;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index f5853835f03a..357298e69495 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -102,13 +102,18 @@ static void kfd_device_info_set_sdma_info(struct kfd_dev *kfd)
switch (sdma_version) {
case IP_VERSION(6, 0, 0):
- case IP_VERSION(6, 0, 1):
case IP_VERSION(6, 0, 2):
/* Reserve 1 for paging and 1 for gfx */
kfd->device_info.num_reserved_sdma_queues_per_engine = 2;
/* BIT(0)=engine-0 queue-0; BIT(1)=engine-1 queue-0; BIT(2)=engine-0 queue-1; ... */
kfd->device_info.reserved_sdma_queues_bitmap = 0xFULL;
break;
+ case IP_VERSION(6, 0, 1):
+ /* Reserve 1 for paging and 1 for gfx */
+ kfd->device_info.num_reserved_sdma_queues_per_engine = 2;
+ /* BIT(0)=engine-0 queue-0; BIT(1)=engine-0 queue-1; ... */
+ kfd->device_info.reserved_sdma_queues_bitmap = 0x3ULL;
+ break;
default:
break;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
index 3942a56c28bb..83e3ce9f6049 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
@@ -894,7 +894,8 @@ static long user_timeout_to_jiffies(uint32_t user_timeout_ms)
return msecs_to_jiffies(user_timeout_ms) + 1;
}
-static void free_waiters(uint32_t num_events, struct kfd_event_waiter *waiters)
+static void free_waiters(uint32_t num_events, struct kfd_event_waiter *waiters,
+ bool undo_auto_reset)
{
uint32_t i;
@@ -903,6 +904,9 @@ static void free_waiters(uint32_t num_events, struct kfd_event_waiter *waiters)
spin_lock(&waiters[i].event->lock);
remove_wait_queue(&waiters[i].event->wq,
&waiters[i].wait);
+ if (undo_auto_reset && waiters[i].activated &&
+ waiters[i].event && waiters[i].event->auto_reset)
+ set_event(waiters[i].event);
spin_unlock(&waiters[i].event->lock);
}
@@ -911,7 +915,7 @@ static void free_waiters(uint32_t num_events, struct kfd_event_waiter *waiters)
int kfd_wait_on_events(struct kfd_process *p,
uint32_t num_events, void __user *data,
- bool all, uint32_t user_timeout_ms,
+ bool all, uint32_t *user_timeout_ms,
uint32_t *wait_result)
{
struct kfd_event_data __user *events =
@@ -920,7 +924,7 @@ int kfd_wait_on_events(struct kfd_process *p,
int ret = 0;
struct kfd_event_waiter *event_waiters = NULL;
- long timeout = user_timeout_to_jiffies(user_timeout_ms);
+ long timeout = user_timeout_to_jiffies(*user_timeout_ms);
event_waiters = alloc_event_waiters(num_events);
if (!event_waiters) {
@@ -970,15 +974,11 @@ int kfd_wait_on_events(struct kfd_process *p,
}
if (signal_pending(current)) {
- /*
- * This is wrong when a nonzero, non-infinite timeout
- * is specified. We need to use
- * ERESTARTSYS_RESTARTBLOCK, but struct restart_block
- * contains a union with data for each user and it's
- * in generic kernel code that I don't want to
- * touch yet.
- */
ret = -ERESTARTSYS;
+ if (*user_timeout_ms != KFD_EVENT_TIMEOUT_IMMEDIATE &&
+ *user_timeout_ms != KFD_EVENT_TIMEOUT_INFINITE)
+ *user_timeout_ms = jiffies_to_msecs(
+ max(0l, timeout-1));
break;
}
@@ -1019,7 +1019,7 @@ int kfd_wait_on_events(struct kfd_process *p,
event_waiters, events);
out_unlock:
- free_waiters(num_events, event_waiters);
+ free_waiters(num_events, event_waiters, ret == -ERESTARTSYS);
mutex_unlock(&p->event_mutex);
out:
if (ret)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index d03a3b9c9c5d..bf610e3b683b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -1317,7 +1317,7 @@ void kfd_event_free_process(struct kfd_process *p);
int kfd_event_mmap(struct kfd_process *process, struct vm_area_struct *vma);
int kfd_wait_on_events(struct kfd_process *p,
uint32_t num_events, void __user *data,
- bool all, uint32_t user_timeout_ms,
+ bool all, uint32_t *user_timeout_ms,
uint32_t *wait_result);
void kfd_signal_event_interrupt(u32 pasid, uint32_t partial_id,
uint32_t valid_id_bits);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index a67ba8879a56..11074cc8c333 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -541,7 +541,6 @@ svm_range_vram_node_new(struct amdgpu_device *adev, struct svm_range *prange,
kfree(svm_bo);
return -ESRCH;
}
- svm_bo->svms = prange->svms;
svm_bo->eviction_fence =
amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1),
mm,
@@ -3273,7 +3272,6 @@ int svm_range_schedule_evict_svm_bo(struct amdgpu_amdkfd_fence *fence)
static void svm_range_evict_svm_bo_worker(struct work_struct *work)
{
struct svm_range_bo *svm_bo;
- struct kfd_process *p;
struct mm_struct *mm;
int r = 0;
@@ -3281,13 +3279,12 @@ static void svm_range_evict_svm_bo_worker(struct work_struct *work)
if (!svm_bo_ref_unless_zero(svm_bo))
return; /* svm_bo was freed while eviction was pending */
- /* svm_range_bo_release destroys this worker thread. So during
- * the lifetime of this thread, kfd_process and mm will be valid.
- */
- p = container_of(svm_bo->svms, struct kfd_process, svms);
- mm = p->mm;
- if (!mm)
+ if (mmget_not_zero(svm_bo->eviction_fence->mm)) {
+ mm = svm_bo->eviction_fence->mm;
+ } else {
+ svm_range_bo_unref(svm_bo);
return;
+ }
mmap_read_lock(mm);
spin_lock(&svm_bo->list_lock);
@@ -3305,8 +3302,7 @@ static void svm_range_evict_svm_bo_worker(struct work_struct *work)
mutex_lock(&prange->migrate_mutex);
do {
- r = svm_migrate_vram_to_ram(prange,
- svm_bo->eviction_fence->mm,
+ r = svm_migrate_vram_to_ram(prange, mm,
KFD_MIGRATE_TRIGGER_TTM_EVICTION);
} while (!r && prange->actual_loc && --retries);
@@ -3324,6 +3320,7 @@ static void svm_range_evict_svm_bo_worker(struct work_struct *work)
}
spin_unlock(&svm_bo->list_lock);
mmap_read_unlock(mm);
+ mmput(mm);
dma_fence_signal(&svm_bo->eviction_fence->base);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
index 9156b041ef17..cfac13ad06ef 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
@@ -46,7 +46,6 @@ struct svm_range_bo {
spinlock_t list_lock;
struct amdgpu_amdkfd_fence *eviction_fence;
struct work_struct eviction_work;
- struct svm_range_list *svms;
uint32_t evicting;
struct work_struct release_work;
};
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index 25990bec600d..3f0a4a415907 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -1392,8 +1392,8 @@ static int kfd_build_p2p_node_entry(struct kfd_topology_device *dev,
static int kfd_create_indirect_link_prop(struct kfd_topology_device *kdev, int gpu_node)
{
+ struct kfd_iolink_properties *gpu_link, *tmp_link, *cpu_link;
struct kfd_iolink_properties *props = NULL, *props2 = NULL;
- struct kfd_iolink_properties *gpu_link, *cpu_link;
struct kfd_topology_device *cpu_dev;
int ret = 0;
int i, num_cpu;
@@ -1416,16 +1416,19 @@ static int kfd_create_indirect_link_prop(struct kfd_topology_device *kdev, int g
continue;
/* find CPU <--> CPU links */
+ cpu_link = NULL;
cpu_dev = kfd_topology_device_by_proximity_domain(i);
if (cpu_dev) {
- list_for_each_entry(cpu_link,
+ list_for_each_entry(tmp_link,
&cpu_dev->io_link_props, list) {
- if (cpu_link->node_to == gpu_link->node_to)
+ if (tmp_link->node_to == gpu_link->node_to) {
+ cpu_link = tmp_link;
break;
+ }
}
}
- if (cpu_link->node_to != gpu_link->node_to)
+ if (!cpu_link)
return -ENOMEM;
/* CPU <--> CPU <--> GPU, GPU node*/
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 8660d93cc405..5140d9c2bf3b 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -3825,8 +3825,11 @@ static int amdgpu_dm_mode_config_init(struct amdgpu_device *adev)
adev_to_drm(adev)->mode_config.max_height = 16384;
adev_to_drm(adev)->mode_config.preferred_depth = 24;
- /* disable prefer shadow for now due to hibernation issues */
- adev_to_drm(adev)->mode_config.prefer_shadow = 0;
+ if (adev->asic_type == CHIP_HAWAII)
+ /* disable prefer shadow for now due to hibernation issues */
+ adev_to_drm(adev)->mode_config.prefer_shadow = 0;
+ else
+ adev_to_drm(adev)->mode_config.prefer_shadow = 1;
/* indicates support for immediate flip */
adev_to_drm(adev)->mode_config.async_page_flip = true;
@@ -4135,6 +4138,7 @@ static void register_backlight_device(struct amdgpu_display_manager *dm,
}
}
+static void amdgpu_set_panel_orientation(struct drm_connector *connector);
/*
* In this architecture, the association
@@ -4326,6 +4330,7 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
adev_to_drm(adev)->vblank_disable_immediate = false;
}
}
+ amdgpu_set_panel_orientation(&aconnector->base);
}
/* Software is initialized. Now we can register interrupt handlers. */
@@ -6684,6 +6689,10 @@ static void amdgpu_set_panel_orientation(struct drm_connector *connector)
connector->connector_type != DRM_MODE_CONNECTOR_LVDS)
return;
+ mutex_lock(&connector->dev->mode_config.mutex);
+ amdgpu_dm_connector_get_modes(connector);
+ mutex_unlock(&connector->dev->mode_config.mutex);
+
encoder = amdgpu_dm_connector_to_encoder(connector);
if (!encoder)
return;
@@ -6728,8 +6737,6 @@ static void amdgpu_dm_connector_ddc_get_modes(struct drm_connector *connector,
* restored here.
*/
amdgpu_dm_update_freesync_caps(connector, edid);
-
- amdgpu_set_panel_orientation(connector);
} else {
amdgpu_dm_connector->num_modes = 0;
}
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
index b841b8b0a9d8..fca7cf9dbaee 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
@@ -660,7 +660,7 @@ static int get_plane_modifiers(struct amdgpu_device *adev, unsigned int plane_ty
add_gfx10_1_modifiers(adev, mods, &size, &capacity);
break;
case AMDGPU_FAMILY_GC_11_0_0:
- case AMDGPU_FAMILY_GC_11_0_2:
+ case AMDGPU_FAMILY_GC_11_0_1:
add_gfx11_modifiers(adev, mods, &size, &capacity);
break;
}
@@ -1412,7 +1412,7 @@ static bool dm_plane_format_mod_supported(struct drm_plane *plane,
}
break;
case AMDGPU_FAMILY_GC_11_0_0:
- case AMDGPU_FAMILY_GC_11_0_2:
+ case AMDGPU_FAMILY_GC_11_0_1:
switch (AMD_FMT_MOD_GET(TILE, modifier)) {
case AMD_FMT_MOD_TILE_GFX11_256K_R_X:
case AMD_FMT_MOD_TILE_GFX9_64K_R_X:
diff --git a/drivers/gpu/drm/amd/display/dc/basics/conversion.c b/drivers/gpu/drm/amd/display/dc/basics/conversion.c
index 6767fab55c26..352e9afb85c6 100644
--- a/drivers/gpu/drm/amd/display/dc/basics/conversion.c
+++ b/drivers/gpu/drm/amd/display/dc/basics/conversion.c
@@ -100,3 +100,24 @@ void convert_float_matrix(
matrix[i] = (uint16_t)reg_value;
}
}
+
+static uint32_t find_gcd(uint32_t a, uint32_t b)
+{
+ uint32_t remainder = 0;
+ while (b != 0) {
+ remainder = a % b;
+ a = b;
+ b = remainder;
+ }
+ return a;
+}
+
+void reduce_fraction(uint32_t num, uint32_t den,
+ uint32_t *out_num, uint32_t *out_den)
+{
+ uint32_t gcd = 0;
+
+ gcd = find_gcd(num, den);
+ *out_num = num / gcd;
+ *out_den = den / gcd;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/basics/conversion.h b/drivers/gpu/drm/amd/display/dc/basics/conversion.h
index ade785c4fdc7..81da4e6f7a1a 100644
--- a/drivers/gpu/drm/amd/display/dc/basics/conversion.h
+++ b/drivers/gpu/drm/amd/display/dc/basics/conversion.h
@@ -38,6 +38,9 @@ void convert_float_matrix(
struct fixed31_32 *flt,
uint32_t buffer_size);
+void reduce_fraction(uint32_t num, uint32_t den,
+ uint32_t *out_num, uint32_t *out_den);
+
static inline unsigned int log_2(unsigned int num)
{
return ilog2(num);
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c
index 4c76091fd1f2..f276abb63bcd 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c
@@ -337,7 +337,7 @@ struct clk_mgr *dc_clk_mgr_create(struct dc_context *ctx, struct pp_smu_funcs *p
break;
}
- case AMDGPU_FAMILY_GC_11_0_2: {
+ case AMDGPU_FAMILY_GC_11_0_1: {
struct clk_mgr_dcn314 *clk_mgr = kzalloc(sizeof(*clk_mgr), GFP_KERNEL);
if (clk_mgr == NULL) {
@@ -397,7 +397,7 @@ void dc_destroy_clk_mgr(struct clk_mgr *clk_mgr_base)
dcn32_clk_mgr_destroy(clk_mgr);
break;
- case AMDGPU_FAMILY_GC_11_0_2:
+ case AMDGPU_FAMILY_GC_11_0_1:
dcn314_clk_mgr_destroy(clk_mgr);
break;
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c
index 0202dc682682..ca6dfd2d7561 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c
@@ -24,10 +24,9 @@
*/
#include "dccg.h"
-#include "clk_mgr_internal.h"
+#include "rn_clk_mgr.h"
#include "dcn20/dcn20_clk_mgr.h"
-#include "rn_clk_mgr.h"
#include "dml/dcn20/dcn20_fpu.h"
#include "dce100/dce_clk_mgr.h"
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.h
index 2e088c5171b2..f1319957e400 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.h
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.h
@@ -28,6 +28,7 @@
#include "clk_mgr.h"
#include "dm_pp_smu.h"
+#include "clk_mgr_internal.h"
extern struct wm_table ddr4_wm_table_gs;
extern struct wm_table lpddr4_wm_table_gs;
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c
index ee99974b3b62..beb025cd3dc2 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c
@@ -307,16 +307,6 @@ static void dcn314_enable_pme_wa(struct clk_mgr *clk_mgr_base)
dcn314_smu_enable_pme_wa(clk_mgr);
}
-void dcn314_init_clocks(struct clk_mgr *clk_mgr)
-{
- memset(&(clk_mgr->clks), 0, sizeof(struct dc_clocks));
- // Assumption is that boot state always supports pstate
- clk_mgr->clks.p_state_change_support = true;
- clk_mgr->clks.prev_p_state_change_support = true;
- clk_mgr->clks.pwr_state = DCN_PWR_STATE_UNKNOWN;
- clk_mgr->clks.zstate_support = DCN_ZSTATE_SUPPORT_UNKNOWN;
-}
-
bool dcn314_are_clock_states_equal(struct dc_clocks *a,
struct dc_clocks *b)
{
@@ -425,7 +415,7 @@ static struct wm_table lpddr5_wm_table = {
}
};
-static DpmClocks_t dummy_clocks;
+static DpmClocks314_t dummy_clocks;
static struct dcn314_watermarks dummy_wms = { 0 };
@@ -510,7 +500,7 @@ static void dcn314_notify_wm_ranges(struct clk_mgr *clk_mgr_base)
static void dcn314_get_dpm_table_from_smu(struct clk_mgr_internal *clk_mgr,
struct dcn314_smu_dpm_clks *smu_dpm_clks)
{
- DpmClocks_t *table = smu_dpm_clks->dpm_clks;
+ DpmClocks314_t *table = smu_dpm_clks->dpm_clks;
if (!clk_mgr->smu_ver)
return;
@@ -527,6 +517,26 @@ static void dcn314_get_dpm_table_from_smu(struct clk_mgr_internal *clk_mgr,
dcn314_smu_transfer_dpm_table_smu_2_dram(clk_mgr);
}
+static inline bool is_valid_clock_value(uint32_t clock_value)
+{
+ return clock_value > 1 && clock_value < 100000;
+}
+
+static unsigned int convert_wck_ratio(uint8_t wck_ratio)
+{
+ switch (wck_ratio) {
+ case WCK_RATIO_1_2:
+ return 2;
+
+ case WCK_RATIO_1_4:
+ return 4;
+
+ default:
+ break;
+ }
+ return 1;
+}
+
static uint32_t find_max_clk_value(const uint32_t clocks[], uint32_t num_clocks)
{
uint32_t max = 0;
@@ -540,89 +550,127 @@ static uint32_t find_max_clk_value(const uint32_t clocks[], uint32_t num_clocks)
return max;
}
-static unsigned int find_clk_for_voltage(
- const DpmClocks_t *clock_table,
- const uint32_t clocks[],
- unsigned int voltage)
-{
- int i;
- int max_voltage = 0;
- int clock = 0;
-
- for (i = 0; i < NUM_SOC_VOLTAGE_LEVELS; i++) {
- if (clock_table->SocVoltage[i] == voltage) {
- return clocks[i];
- } else if (clock_table->SocVoltage[i] >= max_voltage &&
- clock_table->SocVoltage[i] < voltage) {
- max_voltage = clock_table->SocVoltage[i];
- clock = clocks[i];
- }
- }
-
- ASSERT(clock);
- return clock;
-}
-
static void dcn314_clk_mgr_helper_populate_bw_params(struct clk_mgr_internal *clk_mgr,
struct integrated_info *bios_info,
- const DpmClocks_t *clock_table)
+ const DpmClocks314_t *clock_table)
{
- int i, j;
struct clk_bw_params *bw_params = clk_mgr->base.bw_params;
- uint32_t max_dispclk = 0, max_dppclk = 0;
-
- j = -1;
-
- ASSERT(NUM_DF_PSTATE_LEVELS <= MAX_NUM_DPM_LVL);
-
- /* Find lowest DPM, FCLK is filled in reverse order*/
+ struct clk_limit_table_entry def_max = bw_params->clk_table.entries[bw_params->clk_table.num_entries - 1];
+ uint32_t max_pstate = 0, max_fclk = 0, min_pstate = 0, max_dispclk = 0, max_dppclk = 0;
+ int i;
- for (i = NUM_DF_PSTATE_LEVELS - 1; i >= 0; i--) {
- if (clock_table->DfPstateTable[i].FClk != 0) {
- j = i;
- break;
+ /* Find highest valid fclk pstate */
+ for (i = 0; i < clock_table->NumDfPstatesEnabled; i++) {
+ if (is_valid_clock_value(clock_table->DfPstateTable[i].FClk) &&
+ clock_table->DfPstateTable[i].FClk > max_fclk) {
+ max_fclk = clock_table->DfPstateTable[i].FClk;
+ max_pstate = i;
}
}
- if (j == -1) {
- /* clock table is all 0s, just use our own hardcode */
- ASSERT(0);
- return;
- }
-
- bw_params->clk_table.num_entries = j + 1;
+ /* We expect the table to contain at least one valid fclk entry. */
+ ASSERT(is_valid_clock_value(max_fclk));
- /* dispclk and dppclk can be max at any voltage, same number of levels for both */
+ /* Dispclk and dppclk can be max at any voltage, same number of levels for both */
if (clock_table->NumDispClkLevelsEnabled <= NUM_DISPCLK_DPM_LEVELS &&
clock_table->NumDispClkLevelsEnabled <= NUM_DPPCLK_DPM_LEVELS) {
max_dispclk = find_max_clk_value(clock_table->DispClocks, clock_table->NumDispClkLevelsEnabled);
max_dppclk = find_max_clk_value(clock_table->DppClocks, clock_table->NumDispClkLevelsEnabled);
} else {
+ /* Invalid number of entries in the table from PMFW. */
ASSERT(0);
}
- for (i = 0; i < bw_params->clk_table.num_entries; i++, j--) {
- bw_params->clk_table.entries[i].fclk_mhz = clock_table->DfPstateTable[j].FClk;
- bw_params->clk_table.entries[i].memclk_mhz = clock_table->DfPstateTable[j].MemClk;
- bw_params->clk_table.entries[i].voltage = clock_table->DfPstateTable[j].Voltage;
- switch (clock_table->DfPstateTable[j].WckRatio) {
- case WCK_RATIO_1_2:
- bw_params->clk_table.entries[i].wck_ratio = 2;
- break;
- case WCK_RATIO_1_4:
- bw_params->clk_table.entries[i].wck_ratio = 4;
- break;
- default:
- bw_params->clk_table.entries[i].wck_ratio = 1;
+ /* Base the clock table on dcfclk, need at least one entry regardless of pmfw table */
+ for (i = 0; i < clock_table->NumDcfClkLevelsEnabled; i++) {
+ uint32_t min_fclk = clock_table->DfPstateTable[0].FClk;
+ int j;
+
+ for (j = 1; j < clock_table->NumDfPstatesEnabled; j++) {
+ if (is_valid_clock_value(clock_table->DfPstateTable[j].FClk) &&
+ clock_table->DfPstateTable[j].FClk < min_fclk &&
+ clock_table->DfPstateTable[j].Voltage <= clock_table->SocVoltage[i]) {
+ min_fclk = clock_table->DfPstateTable[j].FClk;
+ min_pstate = j;
+ }
}
- bw_params->clk_table.entries[i].dcfclk_mhz = find_clk_for_voltage(clock_table, clock_table->DcfClocks, clock_table->DfPstateTable[j].Voltage);
- bw_params->clk_table.entries[i].socclk_mhz = find_clk_for_voltage(clock_table, clock_table->SocClocks, clock_table->DfPstateTable[j].Voltage);
+
+ /* First search defaults for the clocks we don't read using closest lower or equal default dcfclk */
+ for (j = bw_params->clk_table.num_entries - 1; j > 0; j--)
+ if (bw_params->clk_table.entries[j].dcfclk_mhz <= clock_table->DcfClocks[i])
+ break;
+
+ bw_params->clk_table.entries[i].phyclk_mhz = bw_params->clk_table.entries[j].phyclk_mhz;
+ bw_params->clk_table.entries[i].phyclk_d18_mhz = bw_params->clk_table.entries[j].phyclk_d18_mhz;
+ bw_params->clk_table.entries[i].dtbclk_mhz = bw_params->clk_table.entries[j].dtbclk_mhz;
+
+ /* Now update clocks we do read */
+ bw_params->clk_table.entries[i].fclk_mhz = min_fclk;
+ bw_params->clk_table.entries[i].memclk_mhz = clock_table->DfPstateTable[min_pstate].MemClk;
+ bw_params->clk_table.entries[i].voltage = clock_table->DfPstateTable[min_pstate].Voltage;
+ bw_params->clk_table.entries[i].dcfclk_mhz = clock_table->DcfClocks[i];
+ bw_params->clk_table.entries[i].socclk_mhz = clock_table->SocClocks[i];
bw_params->clk_table.entries[i].dispclk_mhz = max_dispclk;
bw_params->clk_table.entries[i].dppclk_mhz = max_dppclk;
+ bw_params->clk_table.entries[i].wck_ratio = convert_wck_ratio(
+ clock_table->DfPstateTable[min_pstate].WckRatio);
+ };
+
+ /* Make sure to include at least one entry at highest pstate */
+ if (max_pstate != min_pstate || i == 0) {
+ if (i > MAX_NUM_DPM_LVL - 1)
+ i = MAX_NUM_DPM_LVL - 1;
+
+ bw_params->clk_table.entries[i].fclk_mhz = max_fclk;
+ bw_params->clk_table.entries[i].memclk_mhz = clock_table->DfPstateTable[max_pstate].MemClk;
+ bw_params->clk_table.entries[i].voltage = clock_table->DfPstateTable[max_pstate].Voltage;
+ bw_params->clk_table.entries[i].dcfclk_mhz = find_max_clk_value(clock_table->DcfClocks, NUM_DCFCLK_DPM_LEVELS);
+ bw_params->clk_table.entries[i].socclk_mhz = find_max_clk_value(clock_table->SocClocks, NUM_SOCCLK_DPM_LEVELS);
+ bw_params->clk_table.entries[i].dispclk_mhz = max_dispclk;
+ bw_params->clk_table.entries[i].dppclk_mhz = max_dppclk;
+ bw_params->clk_table.entries[i].wck_ratio = convert_wck_ratio(
+ clock_table->DfPstateTable[max_pstate].WckRatio);
+ i++;
}
+ bw_params->clk_table.num_entries = i--;
+
+ /* Make sure all highest clocks are included*/
+ bw_params->clk_table.entries[i].socclk_mhz = find_max_clk_value(clock_table->SocClocks, NUM_SOCCLK_DPM_LEVELS);
+ bw_params->clk_table.entries[i].dispclk_mhz = find_max_clk_value(clock_table->DispClocks, NUM_DISPCLK_DPM_LEVELS);
+ bw_params->clk_table.entries[i].dppclk_mhz = find_max_clk_value(clock_table->DppClocks, NUM_DPPCLK_DPM_LEVELS);
+ ASSERT(clock_table->DcfClocks[i] == find_max_clk_value(clock_table->DcfClocks, NUM_DCFCLK_DPM_LEVELS));
+ bw_params->clk_table.entries[i].phyclk_mhz = def_max.phyclk_mhz;
+ bw_params->clk_table.entries[i].phyclk_d18_mhz = def_max.phyclk_d18_mhz;
+ bw_params->clk_table.entries[i].dtbclk_mhz = def_max.dtbclk_mhz;
+ /*
+ * Set any 0 clocks to max default setting. Not an issue for
+ * power since we aren't doing switching in such case anyway
+ */
+ for (i = 0; i < bw_params->clk_table.num_entries; i++) {
+ if (!bw_params->clk_table.entries[i].fclk_mhz) {
+ bw_params->clk_table.entries[i].fclk_mhz = def_max.fclk_mhz;
+ bw_params->clk_table.entries[i].memclk_mhz = def_max.memclk_mhz;
+ bw_params->clk_table.entries[i].voltage = def_max.voltage;
+ }
+ if (!bw_params->clk_table.entries[i].dcfclk_mhz)
+ bw_params->clk_table.entries[i].dcfclk_mhz = def_max.dcfclk_mhz;
+ if (!bw_params->clk_table.entries[i].socclk_mhz)
+ bw_params->clk_table.entries[i].socclk_mhz = def_max.socclk_mhz;
+ if (!bw_params->clk_table.entries[i].dispclk_mhz)
+ bw_params->clk_table.entries[i].dispclk_mhz = def_max.dispclk_mhz;
+ if (!bw_params->clk_table.entries[i].dppclk_mhz)
+ bw_params->clk_table.entries[i].dppclk_mhz = def_max.dppclk_mhz;
+ if (!bw_params->clk_table.entries[i].phyclk_mhz)
+ bw_params->clk_table.entries[i].phyclk_mhz = def_max.phyclk_mhz;
+ if (!bw_params->clk_table.entries[i].phyclk_d18_mhz)
+ bw_params->clk_table.entries[i].phyclk_d18_mhz = def_max.phyclk_d18_mhz;
+ if (!bw_params->clk_table.entries[i].dtbclk_mhz)
+ bw_params->clk_table.entries[i].dtbclk_mhz = def_max.dtbclk_mhz;
+ }
+ ASSERT(bw_params->clk_table.entries[i-1].dcfclk_mhz);
bw_params->vram_type = bios_info->memory_type;
- bw_params->num_channels = bios_info->ma_channel_number;
+ bw_params->num_channels = bios_info->ma_channel_number ? bios_info->ma_channel_number : 4;
for (i = 0; i < WM_SET_COUNT; i++) {
bw_params->wm_table.entries[i].wm_inst = i;
@@ -641,7 +689,7 @@ static struct clk_mgr_funcs dcn314_funcs = {
.get_dp_ref_clk_frequency = dce12_get_dp_ref_freq_khz,
.get_dtb_ref_clk_frequency = dcn31_get_dtb_ref_freq_khz,
.update_clocks = dcn314_update_clocks,
- .init_clocks = dcn314_init_clocks,
+ .init_clocks = dcn31_init_clocks,
.enable_pme_wa = dcn314_enable_pme_wa,
.are_clock_states_equal = dcn314_are_clock_states_equal,
.notify_wm_ranges = dcn314_notify_wm_ranges
@@ -681,10 +729,10 @@ void dcn314_clk_mgr_construct(
}
ASSERT(clk_mgr->smu_wm_set.wm_set);
- smu_dpm_clks.dpm_clks = (DpmClocks_t *)dm_helpers_allocate_gpu_mem(
+ smu_dpm_clks.dpm_clks = (DpmClocks314_t *)dm_helpers_allocate_gpu_mem(
clk_mgr->base.base.ctx,
DC_MEM_ALLOC_TYPE_FRAME_BUFFER,
- sizeof(DpmClocks_t),
+ sizeof(DpmClocks314_t),
&smu_dpm_clks.mc_address.quad_part);
if (smu_dpm_clks.dpm_clks == NULL) {
@@ -729,7 +777,7 @@ void dcn314_clk_mgr_construct(
if (clk_mgr->base.base.ctx->dc->debug.pstate_enabled) {
dcn314_get_dpm_table_from_smu(&clk_mgr->base, &smu_dpm_clks);
- if (ctx->dc_bios && ctx->dc_bios->integrated_info) {
+ if (ctx->dc_bios && ctx->dc_bios->integrated_info && ctx->dc->config.use_default_clock_table == false) {
dcn314_clk_mgr_helper_populate_bw_params(
&clk_mgr->base,
ctx->dc_bios->integrated_info,
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.h
index c695a4498c50..171f84340eb2 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.h
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.h
@@ -42,7 +42,7 @@ struct clk_mgr_dcn314 {
bool dcn314_are_clock_states_equal(struct dc_clocks *a,
struct dc_clocks *b);
-void dcn314_init_clocks(struct clk_mgr *clk_mgr);
+
void dcn314_update_clocks(struct clk_mgr *clk_mgr_base,
struct dc_state *context,
bool safe_to_lower);
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_smu.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_smu.h
index a7958dc96581..047d19ea919c 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_smu.h
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_smu.h
@@ -36,6 +36,37 @@ typedef enum {
WCK_RATIO_MAX
} WCK_RATIO_e;
+typedef struct {
+ uint32_t FClk;
+ uint32_t MemClk;
+ uint32_t Voltage;
+ uint8_t WckRatio;
+ uint8_t Spare[3];
+} DfPstateTable314_t;
+
+//Freq in MHz
+//Voltage in milli volts with 2 fractional bits
+typedef struct {
+ uint32_t DcfClocks[NUM_DCFCLK_DPM_LEVELS];
+ uint32_t DispClocks[NUM_DISPCLK_DPM_LEVELS];
+ uint32_t DppClocks[NUM_DPPCLK_DPM_LEVELS];
+ uint32_t SocClocks[NUM_SOCCLK_DPM_LEVELS];
+ uint32_t VClocks[NUM_VCN_DPM_LEVELS];
+ uint32_t DClocks[NUM_VCN_DPM_LEVELS];
+ uint32_t SocVoltage[NUM_SOC_VOLTAGE_LEVELS];
+ DfPstateTable314_t DfPstateTable[NUM_DF_PSTATE_LEVELS];
+
+ uint8_t NumDcfClkLevelsEnabled;
+ uint8_t NumDispClkLevelsEnabled; //Applies to both Dispclk and Dppclk
+ uint8_t NumSocClkLevelsEnabled;
+ uint8_t VcnClkLevelsEnabled; //Applies to both Vclk and Dclk
+ uint8_t NumDfPstatesEnabled;
+ uint8_t spare[3];
+
+ uint32_t MinGfxClk;
+ uint32_t MaxGfxClk;
+} DpmClocks314_t;
+
struct dcn314_watermarks {
// Watermarks
WatermarkRowGeneric_t WatermarkRow[WM_COUNT][NUM_WM_RANGES];
@@ -43,7 +74,7 @@ struct dcn314_watermarks {
};
struct dcn314_smu_dpm_clks {
- DpmClocks_t *dpm_clks;
+ DpmClocks314_t *dpm_clks;
union large_integer mc_address;
};
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c
index e42f44fc1c08..aeecca68dea7 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -1074,8 +1074,15 @@ static void disable_dangling_plane(struct dc *dc, struct dc_state *context)
struct dc_stream_state *old_stream =
dc->current_state->res_ctx.pipe_ctx[i].stream;
bool should_disable = true;
- bool pipe_split_change =
- context->res_ctx.pipe_ctx[i].top_pipe != dc->current_state->res_ctx.pipe_ctx[i].top_pipe;
+ bool pipe_split_change = false;
+
+ if ((context->res_ctx.pipe_ctx[i].top_pipe) &&
+ (dc->current_state->res_ctx.pipe_ctx[i].top_pipe))
+ pipe_split_change = context->res_ctx.pipe_ctx[i].top_pipe->pipe_idx !=
+ dc->current_state->res_ctx.pipe_ctx[i].top_pipe->pipe_idx;
+ else
+ pipe_split_change = context->res_ctx.pipe_ctx[i].top_pipe !=
+ dc->current_state->res_ctx.pipe_ctx[i].top_pipe;
for (j = 0; j < context->stream_count; j++) {
if (old_stream == context->streams[j]) {
@@ -3229,7 +3236,7 @@ static void commit_planes_for_stream(struct dc *dc,
odm_pipe->ttu_regs.min_ttu_vblank = MAX_TTU;
}
- if ((update_type != UPDATE_TYPE_FAST) && stream->update_flags.bits.dsc_changed) {
+ if ((update_type != UPDATE_TYPE_FAST) && stream->update_flags.bits.dsc_changed)
if (top_pipe_to_program &&
top_pipe_to_program->stream_res.tg->funcs->lock_doublebuffer_enable) {
if (should_use_dmub_lock(stream->link)) {
@@ -3247,7 +3254,6 @@ static void commit_planes_for_stream(struct dc *dc,
top_pipe_to_program->stream_res.tg->funcs->lock_doublebuffer_enable(
top_pipe_to_program->stream_res.tg);
}
- }
if (should_lock_all_pipes && dc->hwss.interdependent_update_lock) {
if (dc->hwss.subvp_pipe_control_lock)
@@ -3466,7 +3472,7 @@ static void commit_planes_for_stream(struct dc *dc,
dc->hwss.pipe_control_lock(dc, top_pipe_to_program, false);
}
- if ((update_type != UPDATE_TYPE_FAST) && stream->update_flags.bits.dsc_changed) {
+ if ((update_type != UPDATE_TYPE_FAST) && stream->update_flags.bits.dsc_changed)
if (top_pipe_to_program->stream_res.tg->funcs->lock_doublebuffer_enable) {
top_pipe_to_program->stream_res.tg->funcs->wait_for_state(
top_pipe_to_program->stream_res.tg,
@@ -3493,21 +3499,19 @@ static void commit_planes_for_stream(struct dc *dc,
top_pipe_to_program->stream_res.tg->funcs->lock_doublebuffer_disable(
top_pipe_to_program->stream_res.tg);
}
- }
- if (update_type != UPDATE_TYPE_FAST) {
+ if (update_type != UPDATE_TYPE_FAST)
dc->hwss.post_unlock_program_front_end(dc, context);
- /* Since phantom pipe programming is moved to post_unlock_program_front_end,
- * move the SubVP lock to after the phantom pipes have been setup
- */
- if (should_lock_all_pipes && dc->hwss.interdependent_update_lock) {
- if (dc->hwss.subvp_pipe_control_lock)
- dc->hwss.subvp_pipe_control_lock(dc, context, false, should_lock_all_pipes, NULL, subvp_prev_use);
- } else {
- if (dc->hwss.subvp_pipe_control_lock)
- dc->hwss.subvp_pipe_control_lock(dc, context, false, should_lock_all_pipes, top_pipe_to_program, subvp_prev_use);
- }
+ /* Since phantom pipe programming is moved to post_unlock_program_front_end,
+ * move the SubVP lock to after the phantom pipes have been setup
+ */
+ if (should_lock_all_pipes && dc->hwss.interdependent_update_lock) {
+ if (dc->hwss.subvp_pipe_control_lock)
+ dc->hwss.subvp_pipe_control_lock(dc, context, false, should_lock_all_pipes, NULL, subvp_prev_use);
+ } else {
+ if (dc->hwss.subvp_pipe_control_lock)
+ dc->hwss.subvp_pipe_control_lock(dc, context, false, should_lock_all_pipes, top_pipe_to_program, subvp_prev_use);
}
// Fire manual trigger only when bottom plane is flipped
@@ -4292,7 +4296,7 @@ bool dc_is_dmub_outbox_supported(struct dc *dc)
!dc->debug.dpia_debug.bits.disable_dpia)
return true;
- if (dc->ctx->asic_id.chip_family == AMDGPU_FAMILY_GC_11_0_2 &&
+ if (dc->ctx->asic_id.chip_family == AMDGPU_FAMILY_GC_11_0_1 &&
!dc->debug.dpia_debug.bits.disable_dpia)
return true;
@@ -4340,6 +4344,7 @@ void dc_enable_dmub_outbox(struct dc *dc)
struct dc_context *dc_ctx = dc->ctx;
dmub_enable_outbox_notification(dc_ctx->dmub_srv);
+ DC_LOG_DC("%s: dmub outbox notifications enabled\n", __func__);
}
/**
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
index 9e51338441d0..66d2ae7aacf5 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
@@ -3372,7 +3372,7 @@ bool dc_link_setup_psr(struct dc_link *link,
switch(link->ctx->asic_id.chip_family) {
case FAMILY_YELLOW_CARP:
case AMDGPU_FAMILY_GC_10_3_6:
- case AMDGPU_FAMILY_GC_11_0_2:
+ case AMDGPU_FAMILY_GC_11_0_1:
if(!dc->debug.disable_z10)
psr_context->psr_level.bits.SKIP_CRTC_DISABLE = false;
break;
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
index ffc0f1c0ea93..7dbab15bfa68 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
@@ -169,7 +169,7 @@ enum dce_version resource_parse_asic_id(struct hw_asic_id asic_id)
if (ASICREV_IS_GC_11_0_2(asic_id.hw_internal_rev))
dc_version = DCN_VERSION_3_21;
break;
- case AMDGPU_FAMILY_GC_11_0_2:
+ case AMDGPU_FAMILY_GC_11_0_1:
dc_version = DCN_VERSION_3_14;
break;
default:
diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h
index 8e1e40083ec8..5908b60db313 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -47,7 +47,7 @@ struct aux_payload;
struct set_config_cmd_payload;
struct dmub_notification;
-#define DC_VER "3.2.196"
+#define DC_VER "3.2.198"
#define MAX_SURFACES 3
#define MAX_PLANES 6
@@ -213,6 +213,7 @@ struct dc_caps {
uint32_t cache_num_ways;
uint16_t subvp_fw_processing_delay_us;
uint16_t subvp_prefetch_end_to_mall_start_us;
+ uint8_t subvp_swath_height_margin_lines; // subvp start line must be aligned to 2 x swath height
uint16_t subvp_pstate_allow_width_us;
uint16_t subvp_vertical_int_margin_us;
bool seamless_odm;
@@ -352,6 +353,7 @@ struct dc_config {
bool use_pipe_ctx_sync_logic;
bool ignore_dpref_ss;
bool enable_mipi_converter_optimization;
+ bool use_default_clock_table;
};
enum visual_confirm {
@@ -609,6 +611,7 @@ struct dc_bounding_box_overrides {
int percent_of_ideal_drambw;
int dram_clock_change_latency_ns;
int dummy_clock_change_latency_ns;
+ int fclk_clock_change_latency_ns;
/* This forces a hard min on the DCFCLK we use
* for DML. Unlike the debug option for forcing
* DCFCLK, this override affects watermark calculations
@@ -751,6 +754,7 @@ struct dc_debug_options {
uint32_t mst_start_top_delay;
uint8_t psr_power_use_phy_fsm;
enum dml_hostvm_override_opts dml_hostvm_override;
+ bool dml_disallow_alternate_prefetch_modes;
bool use_legacy_soc_bb_mechanism;
bool exit_idle_opt_for_cursor_updates;
bool enable_single_display_2to1_odm_policy;
diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
index 2d61c2a91cee..09b304507bad 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
+++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
@@ -29,6 +29,7 @@
#include "dm_helpers.h"
#include "dc_hw_types.h"
#include "core_types.h"
+#include "../basics/conversion.h"
#define CTX dc_dmub_srv->ctx
#define DC_LOGGER CTX->logger
@@ -275,8 +276,7 @@ void dc_dmub_srv_set_drr_manual_trigger_cmd(struct dc *dc, uint32_t tg_inst)
union dmub_rb_cmd cmd = { 0 };
cmd.drr_update.header.type = DMUB_CMD__FW_ASSISTED_MCLK_SWITCH;
- // TODO: Uncomment once FW headers are promoted
- //cmd.drr_update.header.sub_type = DMUB_CMD__FAMS_SET_MANUAL_TRIGGER;
+ cmd.drr_update.header.sub_type = DMUB_CMD__FAMS_SET_MANUAL_TRIGGER;
cmd.drr_update.dmub_optc_state_req.tg_inst = tg_inst;
cmd.drr_update.header.payload_bytes = sizeof(cmd.drr_update) - sizeof(cmd.drr_update.header);
@@ -601,6 +601,7 @@ static void populate_subvp_cmd_pipe_info(struct dc *dc,
&cmd->fw_assisted_mclk_switch_v2.config_data.pipe_data[cmd_pipe_index];
struct dc_crtc_timing *main_timing = &subvp_pipe->stream->timing;
struct dc_crtc_timing *phantom_timing = &subvp_pipe->stream->mall_stream_config.paired_stream->timing;
+ uint32_t out_num, out_den;
pipe_data->mode = SUBVP;
pipe_data->pipe_config.subvp_data.pix_clk_100hz = subvp_pipe->stream->timing.pix_clk_100hz;
@@ -612,6 +613,16 @@ static void populate_subvp_cmd_pipe_info(struct dc *dc,
main_timing->v_total - main_timing->v_front_porch - main_timing->v_addressable;
pipe_data->pipe_config.subvp_data.mall_region_lines = phantom_timing->v_addressable;
pipe_data->pipe_config.subvp_data.main_pipe_index = subvp_pipe->pipe_idx;
+ pipe_data->pipe_config.subvp_data.is_drr = subvp_pipe->stream->ignore_msa_timing_param;
+
+ /* Calculate the scaling factor from the src and dst height.
+ * e.g. If 3840x2160 being downscaled to 1920x1080, the scaling factor is 1/2.
+ * Reduce the fraction 1080/2160 = 1/2 for the "scaling factor"
+ */
+ reduce_fraction(subvp_pipe->stream->src.height, subvp_pipe->stream->dst.height, &out_num, &out_den);
+ // TODO: Uncomment below lines once DMCUB include headers are promoted
+ //pipe_data->pipe_config.subvp_data.scale_factor_numerator = out_num;
+ //pipe_data->pipe_config.subvp_data.scale_factor_denominator = out_den;
// Prefetch lines is equal to VACTIVE + BP + VSYNC
pipe_data->pipe_config.subvp_data.prefetch_lines =
diff --git a/drivers/gpu/drm/amd/display/dc/dc_link.h b/drivers/gpu/drm/amd/display/dc/dc_link.h
index a0af0f6afeef..9544abf75e84 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_link.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_link.h
@@ -344,6 +344,7 @@ enum dc_detect_reason {
DETECT_REASON_HPDRX,
DETECT_REASON_FALLBACK,
DETECT_REASON_RETRAIN,
+ DETECT_REASON_TDR,
};
bool dc_link_detect(struct dc_link *dc_link, enum dc_detect_reason reason);
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c
index 213de8cabfad..165392380842 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c
@@ -543,9 +543,11 @@ static void dce112_get_pix_clk_dividers_helper (
switch (pix_clk_params->color_depth) {
case COLOR_DEPTH_101010:
actual_pixel_clock_100hz = (actual_pixel_clock_100hz * 5) >> 2;
+ actual_pixel_clock_100hz -= actual_pixel_clock_100hz % 10;
break;
case COLOR_DEPTH_121212:
actual_pixel_clock_100hz = (actual_pixel_clock_100hz * 6) >> 2;
+ actual_pixel_clock_100hz -= actual_pixel_clock_100hz % 10;
break;
case COLOR_DEPTH_161616:
actual_pixel_clock_100hz = actual_pixel_clock_100hz * 2;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c
index d4a6504dfe00..db7ca4b0cdb9 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c
@@ -361,8 +361,6 @@ void dpp1_cnv_setup (
select = INPUT_CSC_SELECT_ICSC;
break;
case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616:
- pixel_format = 22;
- break;
case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616:
pixel_format = 26; /* ARGB16161616_UNORM */
break;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c
index b54c12400323..564e061ccb58 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c
@@ -278,9 +278,6 @@ void hubp1_program_pixel_format(
SURFACE_PIXEL_FORMAT, 10);
break;
case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616:
- REG_UPDATE(DCSURF_SURFACE_CONFIG,
- SURFACE_PIXEL_FORMAT, 22);
- break;
case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616: /*we use crossbar already*/
REG_UPDATE(DCSURF_SURFACE_CONFIG,
SURFACE_PIXEL_FORMAT, 26); /* ARGB16161616_UNORM */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
index bed783747f16..5b5d952b2b8c 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
@@ -110,6 +110,7 @@ void dcn10_lock_all_pipes(struct dc *dc,
*/
if (pipe_ctx->top_pipe ||
!pipe_ctx->stream ||
+ !pipe_ctx->plane_state ||
!tg->funcs->is_tg_enabled(tg))
continue;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c
index 769974375b4b..8e9384094f6d 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c
@@ -131,6 +131,12 @@ struct mpcc *mpc1_get_mpcc_for_dpp(struct mpc_tree *tree, int dpp_id)
while (tmp_mpcc != NULL) {
if (tmp_mpcc->dpp_id == dpp_id)
return tmp_mpcc;
+
+ /* avoid circular linked list */
+ ASSERT(tmp_mpcc != tmp_mpcc->mpcc_bot);
+ if (tmp_mpcc == tmp_mpcc->mpcc_bot)
+ break;
+
tmp_mpcc = tmp_mpcc->mpcc_bot;
}
return NULL;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c
index e1a9a45b03b6..3fc300cd1ce9 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c
@@ -465,6 +465,11 @@ void optc1_enable_optc_clock(struct timing_generator *optc, bool enable)
OTG_CLOCK_ON, 1,
1, 1000);
} else {
+
+ //last chance to clear underflow, otherwise, it will always there due to clock is off.
+ if (optc->funcs->is_optc_underflow_occurred(optc) == true)
+ optc->funcs->clear_optc_underflow(optc);
+
REG_UPDATE_2(OTG_CLOCK_CONTROL,
OTG_CLOCK_GATE_DIS, 0,
OTG_CLOCK_EN, 0);
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.c
index ea1f14af0db7..eaa7032f0f1a 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.c
@@ -166,8 +166,6 @@ static void dpp2_cnv_setup (
select = DCN2_ICSC_SELECT_ICSC_A;
break;
case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616:
- pixel_format = 22;
- break;
case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616:
pixel_format = 26; /* ARGB16161616_UNORM */
break;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c
index 936af65381ef..9570c2118ccc 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c
@@ -463,9 +463,6 @@ void hubp2_program_pixel_format(
SURFACE_PIXEL_FORMAT, 10);
break;
case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616:
- REG_UPDATE(DCSURF_SURFACE_CONFIG,
- SURFACE_PIXEL_FORMAT, 22);
- break;
case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616: /*we use crossbar already*/
REG_UPDATE(DCSURF_SURFACE_CONFIG,
SURFACE_PIXEL_FORMAT, 26); /* ARGB16161616_UNORM */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c
index 3d307dd58e9a..116f67a0b989 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c
@@ -531,6 +531,12 @@ static struct mpcc *mpc2_get_mpcc_for_dpp(struct mpc_tree *tree, int dpp_id)
while (tmp_mpcc != NULL) {
if (tmp_mpcc->dpp_id == 0xf || tmp_mpcc->dpp_id == dpp_id)
return tmp_mpcc;
+
+ /* avoid circular linked list */
+ ASSERT(tmp_mpcc != tmp_mpcc->mpcc_bot);
+ if (tmp_mpcc == tmp_mpcc->mpcc_bot)
+ break;
+
tmp_mpcc = tmp_mpcc->mpcc_bot;
}
return NULL;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.c
index c5e200d09038..5752271f22df 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.c
@@ -67,9 +67,15 @@ static uint32_t convert_and_clamp(
void dcn21_dchvm_init(struct hubbub *hubbub)
{
struct dcn20_hubbub *hubbub1 = TO_DCN20_HUBBUB(hubbub);
- uint32_t riommu_active;
+ uint32_t riommu_active, prefetch_done;
int i;
+ REG_GET(DCHVM_RIOMMU_STAT0, HOSTVM_PREFETCH_DONE, &prefetch_done);
+
+ if (prefetch_done) {
+ hubbub->riommu_active = true;
+ return;
+ }
//Init DCHVM block
REG_UPDATE(DCHVM_CTRL0, HOSTVM_INIT_REQ, 1);
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c
index 77b00f86c216..4a668d6563df 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c
@@ -244,8 +244,6 @@ void dpp3_cnv_setup (
select = INPUT_CSC_SELECT_ICSC;
break;
case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616:
- pixel_format = 22;
- break;
case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616:
pixel_format = 26; /* ARGB16161616_UNORM */
break;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c
index 6a4dcafb9bba..dc3e8df706b3 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c
@@ -86,7 +86,7 @@ bool hubp3_program_surface_flip_and_addr(
VMID, address->vmid);
if (address->type == PLN_ADDR_TYPE_GRPH_STEREO) {
- REG_UPDATE(DCSURF_FLIP_CONTROL, SURFACE_FLIP_MODE_FOR_STEREOSYNC, 0x1);
+ REG_UPDATE(DCSURF_FLIP_CONTROL, SURFACE_FLIP_MODE_FOR_STEREOSYNC, 0);
REG_UPDATE(DCSURF_FLIP_CONTROL, SURFACE_FLIP_IN_STEREOSYNC, 0x1);
} else {
diff --git a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c
index 0a67f8a5656d..d97076648acb 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c
@@ -372,7 +372,7 @@ static struct stream_encoder *dcn303_stream_encoder_create(enum engine_id eng_id
int afmt_inst;
/* Mapping of VPG, AFMT, DME register blocks to DIO block instance */
- if (eng_id <= ENGINE_ID_DIGE) {
+ if (eng_id <= ENGINE_ID_DIGB) {
vpg_inst = eng_id;
afmt_inst = eng_id;
} else
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_stream_encoder.h b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_stream_encoder.h
index 7c77c71591a0..82c3b3ac1f0d 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_stream_encoder.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_stream_encoder.h
@@ -162,7 +162,8 @@
SE_SF(DP_SYM32_ENC0_DP_SYM32_ENC_SDP_AUDIO_CONTROL0, AIP_ENABLE, mask_sh),\
SE_SF(DP_SYM32_ENC0_DP_SYM32_ENC_SDP_AUDIO_CONTROL0, ACM_ENABLE, mask_sh),\
SE_SF(DP_SYM32_ENC0_DP_SYM32_ENC_VID_CRC_CONTROL, CRC_ENABLE, mask_sh),\
- SE_SF(DP_SYM32_ENC0_DP_SYM32_ENC_VID_CRC_CONTROL, CRC_CONT_MODE_ENABLE, mask_sh)
+ SE_SF(DP_SYM32_ENC0_DP_SYM32_ENC_VID_CRC_CONTROL, CRC_CONT_MODE_ENABLE, mask_sh),\
+ SE_SF(DP_SYM32_ENC0_DP_SYM32_ENC_HBLANK_CONTROL, HBLANK_MINIMUM_SYMBOL_WIDTH, mask_sh)
#define DCN3_1_HPO_DP_STREAM_ENC_REG_FIELD_LIST(type) \
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c
index 468a893ff785..aedff18aff56 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c
@@ -2153,7 +2153,7 @@ static bool dcn31_resource_construct(
pool->base.usb4_dpia_count = 4;
}
- if (dc->ctx->asic_id.chip_family == AMDGPU_FAMILY_GC_11_0_2)
+ if (dc->ctx->asic_id.chip_family == AMDGPU_FAMILY_GC_11_0_1)
pool->base.usb4_dpia_count = 4;
/* Audio, Stream Encoders including HPO and virtual, MPC 3D LUTs */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h
index 41f8ec99da6b..901436591ed4 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h
@@ -32,7 +32,6 @@
container_of(pool, struct dcn31_resource_pool, base)
extern struct _vcs_dpi_ip_params_st dcn3_1_ip;
-extern struct _vcs_dpi_soc_bounding_box_st dcn3_1_soc;
struct dcn31_resource_pool {
struct resource_pool base;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/Makefile b/drivers/gpu/drm/amd/display/dc/dcn314/Makefile
index e3b5a95e03b1..702c28c2560e 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn314/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn314/Makefile
@@ -13,31 +13,6 @@
DCN314 = dcn314_resource.o dcn314_hwseq.o dcn314_init.o \
dcn314_dio_stream_encoder.o dcn314_dccg.o dcn314_optc.o
-ifdef CONFIG_X86
-CFLAGS_$(AMDDALPATH)/dc/dcn314/dcn314_resource.o := -mhard-float -msse
-endif
-
-ifdef CONFIG_PPC64
-CFLAGS_$(AMDDALPATH)/dc/dcn314/dcn314_resource.o := -mhard-float -maltivec
-endif
-
-ifdef CONFIG_CC_IS_GCC
-ifeq ($(call cc-ifversion, -lt, 0701, y), y)
-IS_OLD_GCC = 1
-endif
-endif
-
-ifdef CONFIG_X86
-ifdef IS_OLD_GCC
-# Stack alignment mismatch, proceed with caution.
-# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3
-# (8B stack alignment).
-CFLAGS_$(AMDDALPATH)/dc/dcn314/dcn314_resource.o += -mpreferred-stack-boundary=4
-else
-CFLAGS_$(AMDDALPATH)/dc/dcn314/dcn314_resource.o += -msse2
-endif
-endif
-
AMD_DAL_DCN314 = $(addprefix $(AMDDALPATH)/dc/dcn314/,$(DCN314))
AMD_DISPLAY_FILES += $(AMD_DAL_DCN314)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.c
index 755c715ad8dc..39931d48f385 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.c
@@ -343,7 +343,10 @@ unsigned int dcn314_calculate_dccg_k1_k2_values(struct pipe_ctx *pipe_ctx, unsig
{
struct dc_stream_state *stream = pipe_ctx->stream;
unsigned int odm_combine_factor = 0;
+ struct dc *dc = pipe_ctx->stream->ctx->dc;
+ bool two_pix_per_container = false;
+ two_pix_per_container = optc2_is_two_pixels_per_containter(&stream->timing);
odm_combine_factor = get_odm_config(pipe_ctx, NULL);
if (is_dp_128b_132b_signal(pipe_ctx)) {
@@ -355,16 +358,13 @@ unsigned int dcn314_calculate_dccg_k1_k2_values(struct pipe_ctx *pipe_ctx, unsig
else
*k2_div = PIXEL_RATE_DIV_BY_4;
} else if (dc_is_dp_signal(pipe_ctx->stream->signal)) {
- if (stream->timing.pixel_encoding == PIXEL_ENCODING_YCBCR420) {
+ if (two_pix_per_container) {
*k1_div = PIXEL_RATE_DIV_BY_1;
*k2_div = PIXEL_RATE_DIV_BY_2;
- } else if (stream->timing.pixel_encoding == PIXEL_ENCODING_YCBCR422) {
- *k1_div = PIXEL_RATE_DIV_BY_2;
- *k2_div = PIXEL_RATE_DIV_BY_2;
} else {
- if (odm_combine_factor == 1)
- *k2_div = PIXEL_RATE_DIV_BY_4;
- else if (odm_combine_factor == 2)
+ *k1_div = PIXEL_RATE_DIV_BY_1;
+ *k2_div = PIXEL_RATE_DIV_BY_4;
+ if ((odm_combine_factor == 2) || dc->debug.enable_dp_dig_pixel_rate_div_policy)
*k2_div = PIXEL_RATE_DIV_BY_2;
}
}
@@ -374,3 +374,31 @@ unsigned int dcn314_calculate_dccg_k1_k2_values(struct pipe_ctx *pipe_ctx, unsig
return odm_combine_factor;
}
+
+void dcn314_set_pixels_per_cycle(struct pipe_ctx *pipe_ctx)
+{
+ uint32_t pix_per_cycle = 1;
+ uint32_t odm_combine_factor = 1;
+
+ if (!pipe_ctx || !pipe_ctx->stream || !pipe_ctx->stream_res.stream_enc)
+ return;
+
+ odm_combine_factor = get_odm_config(pipe_ctx, NULL);
+ if (optc2_is_two_pixels_per_containter(&pipe_ctx->stream->timing) || odm_combine_factor > 1
+ || dcn314_is_dp_dig_pixel_rate_div_policy(pipe_ctx))
+ pix_per_cycle = 2;
+
+ if (pipe_ctx->stream_res.stream_enc->funcs->set_input_mode)
+ pipe_ctx->stream_res.stream_enc->funcs->set_input_mode(pipe_ctx->stream_res.stream_enc,
+ pix_per_cycle);
+}
+
+bool dcn314_is_dp_dig_pixel_rate_div_policy(struct pipe_ctx *pipe_ctx)
+{
+ struct dc *dc = pipe_ctx->stream->ctx->dc;
+
+ if (dc_is_dp_signal(pipe_ctx->stream->signal) && !is_dp_128b_132b_signal(pipe_ctx) &&
+ dc->debug.enable_dp_dig_pixel_rate_div_policy)
+ return true;
+ return false;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.h b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.h
index be0f5e4d48e1..d014580592ac 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.h
@@ -39,4 +39,8 @@ void dcn314_enable_power_gating_plane(struct dce_hwseq *hws, bool enable);
unsigned int dcn314_calculate_dccg_k1_k2_values(struct pipe_ctx *pipe_ctx, unsigned int *k1_div, unsigned int *k2_div);
+void dcn314_set_pixels_per_cycle(struct pipe_ctx *pipe_ctx);
+
+bool dcn314_is_dp_dig_pixel_rate_div_policy(struct pipe_ctx *pipe_ctx);
+
#endif /* __DC_HWSS_DCN314_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_init.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_init.c
index b9debeb081fd..fcf67eb3478f 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_init.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_init.c
@@ -145,6 +145,8 @@ static const struct hwseq_private_funcs dcn314_private_funcs = {
.set_shaper_3dlut = dcn20_set_shaper_3dlut,
.setup_hpo_hw_control = dcn31_setup_hpo_hw_control,
.calculate_dccg_k1_k2_values = dcn314_calculate_dccg_k1_k2_values,
+ .set_pixels_per_cycle = dcn314_set_pixels_per_cycle,
+ .is_dp_dig_pixel_rate_div_policy = dcn314_is_dp_dig_pixel_rate_div_policy,
};
void dcn314_hw_sequencer_construct(struct dc *dc)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c
index 63861cdfb09f..85f32206a766 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c
@@ -70,6 +70,7 @@
#include "dce110/dce110_resource.h"
#include "dml/display_mode_vba.h"
#include "dml/dcn31/dcn31_fpu.h"
+#include "dml/dcn314/dcn314_fpu.h"
#include "dcn314/dcn314_dccg.h"
#include "dcn10/dcn10_resource.h"
#include "dcn31/dcn31_panel_cntl.h"
@@ -132,155 +133,6 @@ static const struct IP_BASE DCN_BASE = { { { { 0x00000012, 0x000000C0, 0x000034C
#define DC_LOGGER_INIT(logger)
-#define DCN3_14_DEFAULT_DET_SIZE 384
-#define DCN3_14_MAX_DET_SIZE 384
-#define DCN3_14_MIN_COMPBUF_SIZE_KB 128
-#define DCN3_14_CRB_SEGMENT_SIZE_KB 64
-struct _vcs_dpi_ip_params_st dcn3_14_ip = {
- .VBlankNomDefaultUS = 668,
- .gpuvm_enable = 1,
- .gpuvm_max_page_table_levels = 1,
- .hostvm_enable = 1,
- .hostvm_max_page_table_levels = 2,
- .rob_buffer_size_kbytes = 64,
- .det_buffer_size_kbytes = DCN3_14_DEFAULT_DET_SIZE,
- .config_return_buffer_size_in_kbytes = 1792,
- .compressed_buffer_segment_size_in_kbytes = 64,
- .meta_fifo_size_in_kentries = 32,
- .zero_size_buffer_entries = 512,
- .compbuf_reserved_space_64b = 256,
- .compbuf_reserved_space_zs = 64,
- .dpp_output_buffer_pixels = 2560,
- .opp_output_buffer_lines = 1,
- .pixel_chunk_size_kbytes = 8,
- .meta_chunk_size_kbytes = 2,
- .min_meta_chunk_size_bytes = 256,
- .writeback_chunk_size_kbytes = 8,
- .ptoi_supported = false,
- .num_dsc = 4,
- .maximum_dsc_bits_per_component = 10,
- .dsc422_native_support = false,
- .is_line_buffer_bpp_fixed = true,
- .line_buffer_fixed_bpp = 48,
- .line_buffer_size_bits = 789504,
- .max_line_buffer_lines = 12,
- .writeback_interface_buffer_size_kbytes = 90,
- .max_num_dpp = 4,
- .max_num_otg = 4,
- .max_num_hdmi_frl_outputs = 1,
- .max_num_wb = 1,
- .max_dchub_pscl_bw_pix_per_clk = 4,
- .max_pscl_lb_bw_pix_per_clk = 2,
- .max_lb_vscl_bw_pix_per_clk = 4,
- .max_vscl_hscl_bw_pix_per_clk = 4,
- .max_hscl_ratio = 6,
- .max_vscl_ratio = 6,
- .max_hscl_taps = 8,
- .max_vscl_taps = 8,
- .dpte_buffer_size_in_pte_reqs_luma = 64,
- .dpte_buffer_size_in_pte_reqs_chroma = 34,
- .dispclk_ramp_margin_percent = 1,
- .max_inter_dcn_tile_repeaters = 8,
- .cursor_buffer_size = 16,
- .cursor_chunk_size = 2,
- .writeback_line_buffer_buffer_size = 0,
- .writeback_min_hscl_ratio = 1,
- .writeback_min_vscl_ratio = 1,
- .writeback_max_hscl_ratio = 1,
- .writeback_max_vscl_ratio = 1,
- .writeback_max_hscl_taps = 1,
- .writeback_max_vscl_taps = 1,
- .dppclk_delay_subtotal = 46,
- .dppclk_delay_scl = 50,
- .dppclk_delay_scl_lb_only = 16,
- .dppclk_delay_cnvc_formatter = 27,
- .dppclk_delay_cnvc_cursor = 6,
- .dispclk_delay_subtotal = 119,
- .dynamic_metadata_vm_enabled = false,
- .odm_combine_4to1_supported = false,
- .dcc_supported = true,
-};
-
-struct _vcs_dpi_soc_bounding_box_st dcn3_14_soc = {
- /*TODO: correct dispclk/dppclk voltage level determination*/
- .clock_limits = {
- {
- .state = 0,
- .dispclk_mhz = 1200.0,
- .dppclk_mhz = 1200.0,
- .phyclk_mhz = 600.0,
- .phyclk_d18_mhz = 667.0,
- .dscclk_mhz = 186.0,
- .dtbclk_mhz = 625.0,
- },
- {
- .state = 1,
- .dispclk_mhz = 1200.0,
- .dppclk_mhz = 1200.0,
- .phyclk_mhz = 810.0,
- .phyclk_d18_mhz = 667.0,
- .dscclk_mhz = 209.0,
- .dtbclk_mhz = 625.0,
- },
- {
- .state = 2,
- .dispclk_mhz = 1200.0,
- .dppclk_mhz = 1200.0,
- .phyclk_mhz = 810.0,
- .phyclk_d18_mhz = 667.0,
- .dscclk_mhz = 209.0,
- .dtbclk_mhz = 625.0,
- },
- {
- .state = 3,
- .dispclk_mhz = 1200.0,
- .dppclk_mhz = 1200.0,
- .phyclk_mhz = 810.0,
- .phyclk_d18_mhz = 667.0,
- .dscclk_mhz = 371.0,
- .dtbclk_mhz = 625.0,
- },
- {
- .state = 4,
- .dispclk_mhz = 1200.0,
- .dppclk_mhz = 1200.0,
- .phyclk_mhz = 810.0,
- .phyclk_d18_mhz = 667.0,
- .dscclk_mhz = 417.0,
- .dtbclk_mhz = 625.0,
- },
- },
- .num_states = 5,
- .sr_exit_time_us = 9.0,
- .sr_enter_plus_exit_time_us = 11.0,
- .sr_exit_z8_time_us = 442.0,
- .sr_enter_plus_exit_z8_time_us = 560.0,
- .writeback_latency_us = 12.0,
- .dram_channel_width_bytes = 4,
- .round_trip_ping_latency_dcfclk_cycles = 106,
- .urgent_latency_pixel_data_only_us = 4.0,
- .urgent_latency_pixel_mixed_with_vm_data_us = 4.0,
- .urgent_latency_vm_data_only_us = 4.0,
- .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096,
- .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096,
- .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096,
- .pct_ideal_sdp_bw_after_urgent = 80.0,
- .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 65.0,
- .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0,
- .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0,
- .max_avg_sdp_bw_use_normal_percent = 60.0,
- .max_avg_dram_bw_use_normal_percent = 60.0,
- .fabric_datapath_to_dcn_data_return_bytes = 32,
- .return_bus_width_bytes = 64,
- .downspread_percent = 0.38,
- .dcn_downspread_percent = 0.5,
- .gpuvm_min_page_size_bytes = 4096,
- .hostvm_min_page_size_bytes = 4096,
- .do_urgent_latency_adjustment = false,
- .urgent_latency_adjustment_fabric_clock_component_us = 0,
- .urgent_latency_adjustment_fabric_clock_reference_mhz = 0,
-};
-
enum dcn31_clk_src_array_id {
DCN31_CLK_SRC_PLL0,
DCN31_CLK_SRC_PLL1,
@@ -1402,7 +1254,7 @@ static struct stream_encoder *dcn314_stream_encoder_create(
int afmt_inst;
/* Mapping of VPG, AFMT, DME register blocks to DIO block instance */
- if (eng_id <= ENGINE_ID_DIGF) {
+ if (eng_id < ENGINE_ID_DIGF) {
vpg_inst = eng_id;
afmt_inst = eng_id;
} else
@@ -1447,7 +1299,8 @@ static struct hpo_dp_stream_encoder *dcn31_hpo_dp_stream_encoder_create(
* VPG[8] -> HPO_DP[2]
* VPG[9] -> HPO_DP[3]
*/
- vpg_inst = hpo_dp_inst + 6;
+ //Uses offset index 5-8, but actually maps to vpg_inst 6-9
+ vpg_inst = hpo_dp_inst + 5;
/* Mapping of APG register blocks to HPO DP block instance:
* APG[0] -> HPO_DP[0]
@@ -1793,109 +1646,16 @@ static struct clock_source *dcn31_clock_source_create(
return NULL;
}
-static bool is_dual_plane(enum surface_pixel_format format)
-{
- return format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN || format == SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA;
-}
-
static int dcn314_populate_dml_pipes_from_context(
struct dc *dc, struct dc_state *context,
display_e2e_pipe_params_st *pipes,
bool fast_validate)
{
- int i, pipe_cnt;
- struct resource_context *res_ctx = &context->res_ctx;
- struct pipe_ctx *pipe;
- bool upscaled = false;
-
- dcn20_populate_dml_pipes_from_context(dc, context, pipes, fast_validate);
-
- for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) {
- struct dc_crtc_timing *timing;
-
- if (!res_ctx->pipe_ctx[i].stream)
- continue;
- pipe = &res_ctx->pipe_ctx[i];
- timing = &pipe->stream->timing;
-
- if (dc_extended_blank_supported(dc) && pipe->stream->adjust.v_total_max == pipe->stream->adjust.v_total_min
- && pipe->stream->adjust.v_total_min > timing->v_total)
- pipes[pipe_cnt].pipe.dest.vtotal = pipe->stream->adjust.v_total_min;
-
- if (pipe->plane_state &&
- (pipe->plane_state->src_rect.height < pipe->plane_state->dst_rect.height ||
- pipe->plane_state->src_rect.width < pipe->plane_state->dst_rect.width))
- upscaled = true;
-
- /*
- * Immediate flip can be set dynamically after enabling the plane.
- * We need to require support for immediate flip or underflow can be
- * intermittently experienced depending on peak b/w requirements.
- */
- pipes[pipe_cnt].pipe.src.immediate_flip = true;
-
- pipes[pipe_cnt].pipe.src.unbounded_req_mode = false;
- pipes[pipe_cnt].pipe.src.hostvm = dc->res_pool->hubbub->riommu_active;
- pipes[pipe_cnt].pipe.src.gpuvm = true;
- pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
- pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
- pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch;
- pipes[pipe_cnt].pipe.src.dcc_rate = 3;
- pipes[pipe_cnt].dout.dsc_input_bpc = 0;
-
- if (pipes[pipe_cnt].dout.dsc_enable) {
- switch (timing->display_color_depth) {
- case COLOR_DEPTH_888:
- pipes[pipe_cnt].dout.dsc_input_bpc = 8;
- break;
- case COLOR_DEPTH_101010:
- pipes[pipe_cnt].dout.dsc_input_bpc = 10;
- break;
- case COLOR_DEPTH_121212:
- pipes[pipe_cnt].dout.dsc_input_bpc = 12;
- break;
- default:
- ASSERT(0);
- break;
- }
- }
-
- pipe_cnt++;
- }
- context->bw_ctx.dml.ip.det_buffer_size_kbytes = DCN3_14_DEFAULT_DET_SIZE;
-
- dc->config.enable_4to1MPC = false;
- if (pipe_cnt == 1 && pipe->plane_state && !dc->debug.disable_z9_mpc) {
- if (is_dual_plane(pipe->plane_state->format)
- && pipe->plane_state->src_rect.width <= 1920 && pipe->plane_state->src_rect.height <= 1080) {
- dc->config.enable_4to1MPC = true;
- } else if (!is_dual_plane(pipe->plane_state->format) && pipe->plane_state->src_rect.width <= 5120) {
- /* Limit to 5k max to avoid forced pipe split when there is not enough detile for swath */
- context->bw_ctx.dml.ip.det_buffer_size_kbytes = 192;
- pipes[0].pipe.src.unbounded_req_mode = true;
- }
- } else if (context->stream_count >= dc->debug.crb_alloc_policy_min_disp_count
- && dc->debug.crb_alloc_policy > DET_SIZE_DEFAULT) {
- context->bw_ctx.dml.ip.det_buffer_size_kbytes = dc->debug.crb_alloc_policy * 64;
- } else if (context->stream_count >= 3 && upscaled) {
- context->bw_ctx.dml.ip.det_buffer_size_kbytes = 192;
- }
-
- for (i = 0; i < dc->res_pool->pipe_count; i++) {
- struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
-
- if (!pipe->stream)
- continue;
+ int pipe_cnt;
- if (pipe->stream->signal == SIGNAL_TYPE_EDP && dc->debug.seamless_boot_odm_combine &&
- pipe->stream->apply_seamless_boot_optimization) {
-
- if (pipe->stream->apply_boot_odm_mode == dm_odm_combine_policy_2to1) {
- context->bw_ctx.dml.vba.ODMCombinePolicy = dm_odm_combine_policy_2to1;
- break;
- }
- }
- }
+ DC_FP_START();
+ pipe_cnt = dcn314_populate_dml_pipes_from_context_fpu(dc, context, pipes, fast_validate);
+ DC_FP_END();
return pipe_cnt;
}
@@ -1906,88 +1666,9 @@ static struct dc_cap_funcs cap_funcs = {
static void dcn314_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params)
{
- struct clk_limit_table *clk_table = &bw_params->clk_table;
- struct _vcs_dpi_voltage_scaling_st *clock_tmp = dcn3_14_soc._clock_tmp;
- unsigned int i, closest_clk_lvl;
- int max_dispclk_mhz = 0, max_dppclk_mhz = 0;
- int j;
-
- // Default clock levels are used for diags, which may lead to overclocking.
- if (!IS_DIAG_DC(dc->ctx->dce_environment)) {
-
- dcn3_14_ip.max_num_otg = dc->res_pool->res_cap->num_timing_generator;
- dcn3_14_ip.max_num_dpp = dc->res_pool->pipe_count;
-
- if (bw_params->num_channels > 0)
- dcn3_14_soc.num_chans = bw_params->num_channels;
-
- ASSERT(dcn3_14_soc.num_chans);
- ASSERT(clk_table->num_entries);
-
- /* Prepass to find max clocks independent of voltage level. */
- for (i = 0; i < clk_table->num_entries; ++i) {
- if (clk_table->entries[i].dispclk_mhz > max_dispclk_mhz)
- max_dispclk_mhz = clk_table->entries[i].dispclk_mhz;
- if (clk_table->entries[i].dppclk_mhz > max_dppclk_mhz)
- max_dppclk_mhz = clk_table->entries[i].dppclk_mhz;
- }
-
- for (i = 0; i < clk_table->num_entries; i++) {
- /* loop backwards*/
- for (closest_clk_lvl = 0, j = dcn3_14_soc.num_states - 1; j >= 0; j--) {
- if ((unsigned int) dcn3_14_soc.clock_limits[j].dcfclk_mhz <= clk_table->entries[i].dcfclk_mhz) {
- closest_clk_lvl = j;
- break;
- }
- }
- if (clk_table->num_entries == 1) {
- /*smu gives one DPM level, let's take the highest one*/
- closest_clk_lvl = dcn3_14_soc.num_states - 1;
- }
-
- clock_tmp[i].state = i;
-
- /* Clocks dependent on voltage level. */
- clock_tmp[i].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz;
- if (clk_table->num_entries == 1 &&
- clock_tmp[i].dcfclk_mhz < dcn3_14_soc.clock_limits[closest_clk_lvl].dcfclk_mhz) {
- /*SMU fix not released yet*/
- clock_tmp[i].dcfclk_mhz = dcn3_14_soc.clock_limits[closest_clk_lvl].dcfclk_mhz;
- }
- clock_tmp[i].fabricclk_mhz = clk_table->entries[i].fclk_mhz;
- clock_tmp[i].socclk_mhz = clk_table->entries[i].socclk_mhz;
-
- if (clk_table->entries[i].memclk_mhz && clk_table->entries[i].wck_ratio)
- clock_tmp[i].dram_speed_mts = clk_table->entries[i].memclk_mhz * 2 * clk_table->entries[i].wck_ratio;
-
- /* Clocks independent of voltage level. */
- clock_tmp[i].dispclk_mhz = max_dispclk_mhz ? max_dispclk_mhz :
- dcn3_14_soc.clock_limits[closest_clk_lvl].dispclk_mhz;
-
- clock_tmp[i].dppclk_mhz = max_dppclk_mhz ? max_dppclk_mhz :
- dcn3_14_soc.clock_limits[closest_clk_lvl].dppclk_mhz;
-
- clock_tmp[i].dram_bw_per_chan_gbps = dcn3_14_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps;
- clock_tmp[i].dscclk_mhz = dcn3_14_soc.clock_limits[closest_clk_lvl].dscclk_mhz;
- clock_tmp[i].dtbclk_mhz = dcn3_14_soc.clock_limits[closest_clk_lvl].dtbclk_mhz;
- clock_tmp[i].phyclk_d18_mhz = dcn3_14_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz;
- clock_tmp[i].phyclk_mhz = dcn3_14_soc.clock_limits[closest_clk_lvl].phyclk_mhz;
- }
- for (i = 0; i < clk_table->num_entries; i++)
- dcn3_14_soc.clock_limits[i] = clock_tmp[i];
- if (clk_table->num_entries)
- dcn3_14_soc.num_states = clk_table->num_entries;
- }
-
- if (max_dispclk_mhz) {
- dcn3_14_soc.dispclk_dppclk_vco_speed_mhz = max_dispclk_mhz * 2;
- dc->dml.soc.dispclk_dppclk_vco_speed_mhz = max_dispclk_mhz * 2;
- }
-
- if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment))
- dml_init_instance(&dc->dml, &dcn3_14_soc, &dcn3_14_ip, DML_PROJECT_DCN31);
- else
- dml_init_instance(&dc->dml, &dcn3_14_soc, &dcn3_14_ip, DML_PROJECT_DCN31_FPGA);
+ DC_FP_START();
+ dcn314_update_bw_bounding_box_fpu(dc, bw_params);
+ DC_FP_END();
}
static struct resource_funcs dcn314_res_pool_funcs = {
diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.h b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.h
index c41108847ce0..0dd3153aa5c1 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.h
@@ -29,6 +29,9 @@
#include "core_types.h"
+extern struct _vcs_dpi_ip_params_st dcn3_14_ip;
+extern struct _vcs_dpi_soc_bounding_box_st dcn3_14_soc;
+
#define TO_DCN314_RES_POOL(pool)\
container_of(pool, struct dcn314_resource_pool, base)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.h b/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.h
index 39929fa67a51..22849eaa6f24 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.h
@@ -32,7 +32,6 @@
container_of(pool, struct dcn315_resource_pool, base)
extern struct _vcs_dpi_ip_params_st dcn3_15_ip;
-extern struct _vcs_dpi_ip_params_st dcn3_15_soc;
struct dcn315_resource_pool {
struct resource_pool base;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.h b/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.h
index 0dc5a6c13ae7..aba6d634131b 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.h
@@ -32,7 +32,6 @@
container_of(pool, struct dcn316_resource_pool, base)
extern struct _vcs_dpi_ip_params_st dcn3_16_ip;
-extern struct _vcs_dpi_ip_params_st dcn3_16_soc;
struct dcn316_resource_pool {
struct resource_pool base;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c
index d38341f68b17..ebd3945c71f1 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c
@@ -250,6 +250,7 @@ static uint32_t dcn32_calculate_cab_allocation(struct dc *dc, struct dc_state *c
uint32_t total_lines = 0;
uint32_t lines_per_way = 0;
uint32_t num_ways = 0;
+ uint32_t prev_addr_low = 0;
for (i = 0; i < ctx->stream_count; i++) {
stream = ctx->streams[i];
@@ -267,10 +268,20 @@ static uint32_t dcn32_calculate_cab_allocation(struct dc *dc, struct dc_state *c
plane = ctx->stream_status[i].plane_states[j];
// Calculate total surface size
- surface_size = plane->plane_size.surface_pitch *
+ if (prev_addr_low != plane->address.grph.addr.u.low_part) {
+ /* if plane address are different from prev FB, then userspace allocated separate FBs*/
+ surface_size += plane->plane_size.surface_pitch *
plane->plane_size.surface_size.height *
(plane->format >= SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616 ? 8 : 4);
+ prev_addr_low = plane->address.grph.addr.u.low_part;
+ } else {
+ /* We have the same fb for all the planes.
+ * Xorg always creates one giant fb that holds all surfaces,
+ * so allocating it once is sufficient.
+ * */
+ continue;
+ }
// Convert surface size + starting address to number of cache lines required
// (alignment accounted for)
cache_lines_used += dcn32_cache_lines_for_surface(dc, surface_size,
@@ -320,7 +331,10 @@ static uint32_t dcn32_calculate_cab_allocation(struct dc *dc, struct dc_state *c
bool dcn32_apply_idle_power_optimizations(struct dc *dc, bool enable)
{
union dmub_rb_cmd cmd;
- uint8_t ways;
+ uint8_t ways, i;
+ int j;
+ bool stereo_in_use = false;
+ struct dc_plane_state *plane = NULL;
if (!dc->ctx->dmub_srv)
return false;
@@ -349,7 +363,23 @@ bool dcn32_apply_idle_power_optimizations(struct dc *dc, bool enable)
* and configure HUBP's to fetch from MALL
*/
ways = dcn32_calculate_cab_allocation(dc, dc->current_state);
- if (ways <= dc->caps.cache_num_ways) {
+
+ /* MALL not supported with Stereo3D. If any plane is using stereo,
+ * don't try to enter MALL.
+ */
+ for (i = 0; i < dc->current_state->stream_count; i++) {
+ for (j = 0; j < dc->current_state->stream_status[i].plane_count; j++) {
+ plane = dc->current_state->stream_status[i].plane_states[j];
+
+ if (plane->address.type == PLN_ADDR_TYPE_GRPH_STEREO) {
+ stereo_in_use = true;
+ break;
+ }
+ }
+ if (stereo_in_use)
+ break;
+ }
+ if (ways <= dc->caps.cache_num_ways && !stereo_in_use) {
memset(&cmd, 0, sizeof(cmd));
cmd.cab.header.type = DMUB_CMD__CAB_FOR_SS;
cmd.cab.header.sub_type = DMUB_CMD__CAB_DCN_SS_FIT_IN_CAB;
@@ -683,9 +713,11 @@ void dcn32_update_mall_sel(struct dc *dc, struct dc_state *context)
if (pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) {
hubp->funcs->hubp_update_mall_sel(hubp, 1, false);
} else {
+ // MALL not supported with Stereo3D
hubp->funcs->hubp_update_mall_sel(hubp,
num_ways <= dc->caps.cache_num_ways &&
- pipe->stream->link->psr_settings.psr_version == DC_PSR_VERSION_UNSUPPORTED ? 2 : 0,
+ pipe->stream->link->psr_settings.psr_version == DC_PSR_VERSION_UNSUPPORTED &&
+ pipe->plane_state->address.type != PLN_ADDR_TYPE_GRPH_STEREO ? 2 : 0,
cache_cursor);
}
}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c
index eff1f4e17689..1fad7b48bd5b 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c
@@ -281,7 +281,7 @@ static struct timing_generator_funcs dcn32_tg_funcs = {
.lock_doublebuffer_enable = optc3_lock_doublebuffer_enable,
.lock_doublebuffer_disable = optc3_lock_doublebuffer_disable,
.enable_optc_clock = optc1_enable_optc_clock,
- .set_drr = optc31_set_drr, // TODO: Update to optc32_set_drr once FW headers are promoted
+ .set_drr = optc32_set_drr,
.get_last_used_drr_vtotal = optc2_get_last_used_drr_vtotal,
.set_vtotal_min_max = optc3_set_vtotal_min_max,
.set_static_screen_control = optc1_set_static_screen_control,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
index 9a26d24b579f..8b887b552f2c 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
@@ -867,7 +867,7 @@ static const struct dc_debug_options debug_defaults_drv = {
}
},
.use_max_lb = true,
- .force_disable_subvp = true,
+ .force_disable_subvp = false,
.exit_idle_opt_for_cursor_updates = true,
.enable_single_display_2to1_odm_policy = true,
.enable_dp_dig_pixel_rate_div_policy = 1,
@@ -2051,6 +2051,7 @@ static bool dcn32_resource_construct(
dc->caps.max_cab_allocation_bytes = 67108864; // 64MB = 1024 * 1024 * 64
dc->caps.subvp_fw_processing_delay_us = 15;
dc->caps.subvp_prefetch_end_to_mall_start_us = 15;
+ dc->caps.subvp_swath_height_margin_lines = 16;
dc->caps.subvp_pstate_allow_width_us = 20;
dc->caps.subvp_vertical_int_margin_us = 30;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c
index b3f8503cea9c..955f52e6064d 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c
@@ -63,7 +63,7 @@ uint32_t dcn32_helper_calculate_num_ways_for_subvp(struct dc *dc, struct dc_stat
if (pipe->stream && pipe->plane_state && !pipe->top_pipe &&
pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) {
bytes_per_pixel = pipe->plane_state->format >= SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616 ? 8 : 4;
- mall_region_pixels = pipe->stream->timing.h_addressable * pipe->stream->timing.v_addressable;
+ mall_region_pixels = pipe->plane_state->plane_size.surface_pitch * pipe->stream->timing.v_addressable;
// For bytes required in MALL, calculate based on number of MBlks required
num_mblks = (mall_region_pixels * bytes_per_pixel +
diff --git a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c
index 8157e40d2c7e..c8b7d6ff38f4 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c
@@ -868,7 +868,7 @@ static const struct dc_debug_options debug_defaults_drv = {
}
},
.use_max_lb = true,
- .force_disable_subvp = true,
+ .force_disable_subvp = false,
.exit_idle_opt_for_cursor_updates = true,
.enable_single_display_2to1_odm_policy = true,
.enable_dp_dig_pixel_rate_div_policy = 1,
@@ -1662,8 +1662,9 @@ static bool dcn321_resource_construct(
dc->caps.max_cab_allocation_bytes = 33554432; // 32MB = 1024 * 1024 * 32
dc->caps.subvp_fw_processing_delay_us = 15;
dc->caps.subvp_prefetch_end_to_mall_start_us = 15;
+ dc->caps.subvp_swath_height_margin_lines = 16;
dc->caps.subvp_pstate_allow_width_us = 20;
-
+ dc->caps.subvp_vertical_int_margin_us = 30;
dc->caps.max_slave_planes = 1;
dc->caps.max_slave_yuv_planes = 1;
dc->caps.max_slave_rgb_planes = 1;
diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile
index 359f6e9a1da0..86a3b5bfd699 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile
@@ -61,7 +61,6 @@ CFLAGS_$(AMDDALPATH)/dc/dml/display_mode_vba.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn10/dcn10_fpu.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/dcn20_fpu.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20.o := $(dml_ccflags)
-CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_rq_dlg_calc_20.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20v2.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_rq_dlg_calc_20v2.o := $(dml_ccflags)
@@ -71,6 +70,7 @@ CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/display_mode_vba_30.o := $(dml_ccflags) $(fram
CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/display_rq_dlg_calc_30.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn31/display_mode_vba_31.o := $(dml_ccflags) $(frame_warn_flag)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn31/display_rq_dlg_calc_31.o := $(dml_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml/dcn314/dcn314_fpu.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/dcn30_fpu.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn32/dcn32_fpu.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn32/display_mode_vba_32.o := $(dml_ccflags) $(frame_warn_flag)
@@ -82,7 +82,6 @@ CFLAGS_$(AMDDALPATH)/dc/dml/dcn301/dcn301_fpu.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn302/dcn302_fpu.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn303/dcn303_fpu.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dsc/rc_calc_fpu.o := $(dml_ccflags)
-CFLAGS_$(AMDDALPATH)/dc/dml/display_mode_lib.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/calcs/dcn_calcs.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/calcs/dcn_calc_auto.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/calcs/dcn_calc_math.o := $(dml_ccflags) -Wno-tautological-compare
@@ -131,6 +130,7 @@ DML += dcn321/dcn321_fpu.o
DML += dcn301/dcn301_fpu.o
DML += dcn302/dcn302_fpu.o
DML += dcn303/dcn303_fpu.o
+DML += dcn314/dcn314_fpu.o
DML += dsc/rc_calc_fpu.o
DML += calcs/dcn_calcs.o calcs/dcn_calc_math.o calcs/dcn_calc_auto.o
endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c
index ca44df4fca74..d34e0f1314d9 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c
@@ -30,6 +30,7 @@
#include "dchubbub.h"
#include "dcn20/dcn20_resource.h"
#include "dcn21/dcn21_resource.h"
+#include "clk_mgr/dcn21/rn_clk_mgr.h"
#include "dcn20_fpu.h"
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c
index 7ef66e511ec8..d211cf6d234c 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c
@@ -26,6 +26,7 @@
#include "clk_mgr.h"
#include "dcn20/dcn20_resource.h"
#include "dcn301/dcn301_resource.h"
+#include "clk_mgr/dcn301/vg_clk_mgr.h"
#include "dml/dcn20/dcn20_fpu.h"
#include "dcn301_fpu.h"
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c
index e36cfa5985ea..149a1b17cdf3 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c
@@ -25,6 +25,9 @@
#include "resource.h"
#include "clk_mgr.h"
+#include "dcn31/dcn31_resource.h"
+#include "dcn315/dcn315_resource.h"
+#include "dcn316/dcn316_resource.h"
#include "dml/dcn20/dcn20_fpu.h"
#include "dcn31_fpu.h"
@@ -114,7 +117,7 @@ struct _vcs_dpi_ip_params_st dcn3_1_ip = {
.dcc_supported = true,
};
-struct _vcs_dpi_soc_bounding_box_st dcn3_1_soc = {
+static struct _vcs_dpi_soc_bounding_box_st dcn3_1_soc = {
/*TODO: correct dispclk/dppclk voltage level determination*/
.clock_limits = {
{
@@ -259,7 +262,7 @@ struct _vcs_dpi_ip_params_st dcn3_15_ip = {
.dcc_supported = true,
};
-struct _vcs_dpi_soc_bounding_box_st dcn3_15_soc = {
+static struct _vcs_dpi_soc_bounding_box_st dcn3_15_soc = {
.sr_exit_time_us = 9.0,
.sr_enter_plus_exit_time_us = 11.0,
.sr_exit_z8_time_us = 50.0,
@@ -355,7 +358,7 @@ struct _vcs_dpi_ip_params_st dcn3_16_ip = {
.dcc_supported = true,
};
-struct _vcs_dpi_soc_bounding_box_st dcn3_16_soc = {
+static struct _vcs_dpi_soc_bounding_box_st dcn3_16_soc = {
/*TODO: correct dispclk/dppclk voltage level determination*/
.clock_limits = {
{
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
index 3fab19134480..d63b4209b14c 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
@@ -26,7 +26,7 @@
#include "dc.h"
#include "dc_link.h"
#include "../display_mode_lib.h"
-#include "dml/dcn30/display_mode_vba_30.h"
+#include "../dcn30/display_mode_vba_30.h"
#include "display_mode_vba_31.h"
#include "../dml_inline_defs.h"
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c
index 66b82e4f05c6..35d10b4d018b 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c
@@ -27,7 +27,7 @@
#include "../display_mode_vba.h"
#include "../dml_inline_defs.h"
#include "display_rq_dlg_calc_31.h"
-#include "dml/dcn30/display_mode_vba_30.h"
+#include "../dcn30/display_mode_vba_30.h"
static bool is_dual_plane(enum source_format_class source_format)
{
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c
new file mode 100644
index 000000000000..34a5d0f87b5f
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c
@@ -0,0 +1,376 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "clk_mgr.h"
+#include "resource.h"
+#include "dcn31/dcn31_hubbub.h"
+#include "dcn314_fpu.h"
+#include "dml/dcn20/dcn20_fpu.h"
+#include "dml/display_mode_vba.h"
+
+struct _vcs_dpi_ip_params_st dcn3_14_ip = {
+ .VBlankNomDefaultUS = 668,
+ .gpuvm_enable = 1,
+ .gpuvm_max_page_table_levels = 1,
+ .hostvm_enable = 1,
+ .hostvm_max_page_table_levels = 2,
+ .rob_buffer_size_kbytes = 64,
+ .det_buffer_size_kbytes = DCN3_14_DEFAULT_DET_SIZE,
+ .config_return_buffer_size_in_kbytes = 1792,
+ .compressed_buffer_segment_size_in_kbytes = 64,
+ .meta_fifo_size_in_kentries = 32,
+ .zero_size_buffer_entries = 512,
+ .compbuf_reserved_space_64b = 256,
+ .compbuf_reserved_space_zs = 64,
+ .dpp_output_buffer_pixels = 2560,
+ .opp_output_buffer_lines = 1,
+ .pixel_chunk_size_kbytes = 8,
+ .meta_chunk_size_kbytes = 2,
+ .min_meta_chunk_size_bytes = 256,
+ .writeback_chunk_size_kbytes = 8,
+ .ptoi_supported = false,
+ .num_dsc = 4,
+ .maximum_dsc_bits_per_component = 10,
+ .dsc422_native_support = false,
+ .is_line_buffer_bpp_fixed = true,
+ .line_buffer_fixed_bpp = 48,
+ .line_buffer_size_bits = 789504,
+ .max_line_buffer_lines = 12,
+ .writeback_interface_buffer_size_kbytes = 90,
+ .max_num_dpp = 4,
+ .max_num_otg = 4,
+ .max_num_hdmi_frl_outputs = 1,
+ .max_num_wb = 1,
+ .max_dchub_pscl_bw_pix_per_clk = 4,
+ .max_pscl_lb_bw_pix_per_clk = 2,
+ .max_lb_vscl_bw_pix_per_clk = 4,
+ .max_vscl_hscl_bw_pix_per_clk = 4,
+ .max_hscl_ratio = 6,
+ .max_vscl_ratio = 6,
+ .max_hscl_taps = 8,
+ .max_vscl_taps = 8,
+ .dpte_buffer_size_in_pte_reqs_luma = 64,
+ .dpte_buffer_size_in_pte_reqs_chroma = 34,
+ .dispclk_ramp_margin_percent = 1,
+ .max_inter_dcn_tile_repeaters = 8,
+ .cursor_buffer_size = 16,
+ .cursor_chunk_size = 2,
+ .writeback_line_buffer_buffer_size = 0,
+ .writeback_min_hscl_ratio = 1,
+ .writeback_min_vscl_ratio = 1,
+ .writeback_max_hscl_ratio = 1,
+ .writeback_max_vscl_ratio = 1,
+ .writeback_max_hscl_taps = 1,
+ .writeback_max_vscl_taps = 1,
+ .dppclk_delay_subtotal = 46,
+ .dppclk_delay_scl = 50,
+ .dppclk_delay_scl_lb_only = 16,
+ .dppclk_delay_cnvc_formatter = 27,
+ .dppclk_delay_cnvc_cursor = 6,
+ .dispclk_delay_subtotal = 119,
+ .dynamic_metadata_vm_enabled = false,
+ .odm_combine_4to1_supported = false,
+ .dcc_supported = true,
+};
+
+struct _vcs_dpi_soc_bounding_box_st dcn3_14_soc = {
+ /*TODO: correct dispclk/dppclk voltage level determination*/
+ .clock_limits = {
+ {
+ .state = 0,
+ .dispclk_mhz = 1200.0,
+ .dppclk_mhz = 1200.0,
+ .phyclk_mhz = 600.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 186.0,
+ .dtbclk_mhz = 600.0,
+ },
+ {
+ .state = 1,
+ .dispclk_mhz = 1200.0,
+ .dppclk_mhz = 1200.0,
+ .phyclk_mhz = 810.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 209.0,
+ .dtbclk_mhz = 600.0,
+ },
+ {
+ .state = 2,
+ .dispclk_mhz = 1200.0,
+ .dppclk_mhz = 1200.0,
+ .phyclk_mhz = 810.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 209.0,
+ .dtbclk_mhz = 600.0,
+ },
+ {
+ .state = 3,
+ .dispclk_mhz = 1200.0,
+ .dppclk_mhz = 1200.0,
+ .phyclk_mhz = 810.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 371.0,
+ .dtbclk_mhz = 600.0,
+ },
+ {
+ .state = 4,
+ .dispclk_mhz = 1200.0,
+ .dppclk_mhz = 1200.0,
+ .phyclk_mhz = 810.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 417.0,
+ .dtbclk_mhz = 600.0,
+ },
+ },
+ .num_states = 5,
+ .sr_exit_time_us = 9.0,
+ .sr_enter_plus_exit_time_us = 11.0,
+ .sr_exit_z8_time_us = 442.0,
+ .sr_enter_plus_exit_z8_time_us = 560.0,
+ .writeback_latency_us = 12.0,
+ .dram_channel_width_bytes = 4,
+ .round_trip_ping_latency_dcfclk_cycles = 106,
+ .urgent_latency_pixel_data_only_us = 4.0,
+ .urgent_latency_pixel_mixed_with_vm_data_us = 4.0,
+ .urgent_latency_vm_data_only_us = 4.0,
+ .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096,
+ .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096,
+ .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096,
+ .pct_ideal_sdp_bw_after_urgent = 80.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 65.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0,
+ .max_avg_sdp_bw_use_normal_percent = 60.0,
+ .max_avg_dram_bw_use_normal_percent = 60.0,
+ .fabric_datapath_to_dcn_data_return_bytes = 32,
+ .return_bus_width_bytes = 64,
+ .downspread_percent = 0.38,
+ .dcn_downspread_percent = 0.5,
+ .gpuvm_min_page_size_bytes = 4096,
+ .hostvm_min_page_size_bytes = 4096,
+ .do_urgent_latency_adjustment = false,
+ .urgent_latency_adjustment_fabric_clock_component_us = 0,
+ .urgent_latency_adjustment_fabric_clock_reference_mhz = 0,
+};
+
+
+void dcn314_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_params)
+{
+ struct clk_limit_table *clk_table = &bw_params->clk_table;
+ struct _vcs_dpi_voltage_scaling_st *clock_limits =
+ dcn3_14_soc.clock_limits;
+ unsigned int i, closest_clk_lvl;
+ int max_dispclk_mhz = 0, max_dppclk_mhz = 0;
+ int j;
+
+ dc_assert_fp_enabled();
+
+ // Default clock levels are used for diags, which may lead to overclocking.
+ if (!IS_DIAG_DC(dc->ctx->dce_environment) && dc->config.use_default_clock_table == false) {
+
+ dcn3_14_ip.max_num_otg = dc->res_pool->res_cap->num_timing_generator;
+ dcn3_14_ip.max_num_dpp = dc->res_pool->pipe_count;
+
+ if (bw_params->num_channels > 0)
+ dcn3_14_soc.num_chans = bw_params->num_channels;
+
+ ASSERT(dcn3_14_soc.num_chans);
+ ASSERT(clk_table->num_entries);
+
+ /* Prepass to find max clocks independent of voltage level. */
+ for (i = 0; i < clk_table->num_entries; ++i) {
+ if (clk_table->entries[i].dispclk_mhz > max_dispclk_mhz)
+ max_dispclk_mhz = clk_table->entries[i].dispclk_mhz;
+ if (clk_table->entries[i].dppclk_mhz > max_dppclk_mhz)
+ max_dppclk_mhz = clk_table->entries[i].dppclk_mhz;
+ }
+
+ for (i = 0; i < clk_table->num_entries; i++) {
+ /* loop backwards*/
+ for (closest_clk_lvl = 0, j = dcn3_14_soc.num_states - 1; j >= 0; j--) {
+ if ((unsigned int) dcn3_14_soc.clock_limits[j].dcfclk_mhz <= clk_table->entries[i].dcfclk_mhz) {
+ closest_clk_lvl = j;
+ break;
+ }
+ }
+ if (clk_table->num_entries == 1) {
+ /*smu gives one DPM level, let's take the highest one*/
+ closest_clk_lvl = dcn3_14_soc.num_states - 1;
+ }
+
+ clock_limits[i].state = i;
+
+ /* Clocks dependent on voltage level. */
+ clock_limits[i].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz;
+ if (clk_table->num_entries == 1 &&
+ clock_limits[i].dcfclk_mhz < dcn3_14_soc.clock_limits[closest_clk_lvl].dcfclk_mhz) {
+ /*SMU fix not released yet*/
+ clock_limits[i].dcfclk_mhz = dcn3_14_soc.clock_limits[closest_clk_lvl].dcfclk_mhz;
+ }
+ clock_limits[i].fabricclk_mhz = clk_table->entries[i].fclk_mhz;
+ clock_limits[i].socclk_mhz = clk_table->entries[i].socclk_mhz;
+
+ if (clk_table->entries[i].memclk_mhz && clk_table->entries[i].wck_ratio)
+ clock_limits[i].dram_speed_mts = clk_table->entries[i].memclk_mhz * 2 * clk_table->entries[i].wck_ratio;
+
+ /* Clocks independent of voltage level. */
+ clock_limits[i].dispclk_mhz = max_dispclk_mhz ? max_dispclk_mhz :
+ dcn3_14_soc.clock_limits[closest_clk_lvl].dispclk_mhz;
+
+ clock_limits[i].dppclk_mhz = max_dppclk_mhz ? max_dppclk_mhz :
+ dcn3_14_soc.clock_limits[closest_clk_lvl].dppclk_mhz;
+
+ clock_limits[i].dram_bw_per_chan_gbps = dcn3_14_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps;
+ clock_limits[i].dscclk_mhz = dcn3_14_soc.clock_limits[closest_clk_lvl].dscclk_mhz;
+ clock_limits[i].dtbclk_mhz = dcn3_14_soc.clock_limits[closest_clk_lvl].dtbclk_mhz;
+ clock_limits[i].phyclk_d18_mhz = dcn3_14_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz;
+ clock_limits[i].phyclk_mhz = dcn3_14_soc.clock_limits[closest_clk_lvl].phyclk_mhz;
+ }
+ for (i = 0; i < clk_table->num_entries; i++)
+ dcn3_14_soc.clock_limits[i] = clock_limits[i];
+ if (clk_table->num_entries) {
+ dcn3_14_soc.num_states = clk_table->num_entries;
+ }
+ }
+
+ if (max_dispclk_mhz) {
+ dcn3_14_soc.dispclk_dppclk_vco_speed_mhz = max_dispclk_mhz * 2;
+ dc->dml.soc.dispclk_dppclk_vco_speed_mhz = max_dispclk_mhz * 2;
+ }
+
+ if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment))
+ dml_init_instance(&dc->dml, &dcn3_14_soc, &dcn3_14_ip, DML_PROJECT_DCN31);
+ else
+ dml_init_instance(&dc->dml, &dcn3_14_soc, &dcn3_14_ip, DML_PROJECT_DCN31_FPGA);
+}
+
+static bool is_dual_plane(enum surface_pixel_format format)
+{
+ return format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN || format == SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA;
+}
+
+int dcn314_populate_dml_pipes_from_context_fpu(struct dc *dc, struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ bool fast_validate)
+{
+ int i, pipe_cnt;
+ struct resource_context *res_ctx = &context->res_ctx;
+ struct pipe_ctx *pipe;
+ bool upscaled = false;
+
+ dc_assert_fp_enabled();
+
+ dcn20_populate_dml_pipes_from_context(dc, context, pipes, fast_validate);
+
+ for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) {
+ struct dc_crtc_timing *timing;
+
+ if (!res_ctx->pipe_ctx[i].stream)
+ continue;
+ pipe = &res_ctx->pipe_ctx[i];
+ timing = &pipe->stream->timing;
+
+ if (dc_extended_blank_supported(dc) && pipe->stream->adjust.v_total_max == pipe->stream->adjust.v_total_min
+ && pipe->stream->adjust.v_total_min > timing->v_total)
+ pipes[pipe_cnt].pipe.dest.vtotal = pipe->stream->adjust.v_total_min;
+
+ if (pipe->plane_state &&
+ (pipe->plane_state->src_rect.height < pipe->plane_state->dst_rect.height ||
+ pipe->plane_state->src_rect.width < pipe->plane_state->dst_rect.width))
+ upscaled = true;
+
+ /*
+ * Immediate flip can be set dynamically after enabling the plane.
+ * We need to require support for immediate flip or underflow can be
+ * intermittently experienced depending on peak b/w requirements.
+ */
+ pipes[pipe_cnt].pipe.src.immediate_flip = true;
+
+ pipes[pipe_cnt].pipe.src.unbounded_req_mode = false;
+ pipes[pipe_cnt].pipe.src.hostvm = dc->res_pool->hubbub->riommu_active;
+ pipes[pipe_cnt].pipe.src.gpuvm = true;
+ pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
+ pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
+ pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch;
+ pipes[pipe_cnt].pipe.src.dcc_rate = 3;
+ pipes[pipe_cnt].dout.dsc_input_bpc = 0;
+
+ if (pipes[pipe_cnt].dout.dsc_enable) {
+ switch (timing->display_color_depth) {
+ case COLOR_DEPTH_888:
+ pipes[pipe_cnt].dout.dsc_input_bpc = 8;
+ break;
+ case COLOR_DEPTH_101010:
+ pipes[pipe_cnt].dout.dsc_input_bpc = 10;
+ break;
+ case COLOR_DEPTH_121212:
+ pipes[pipe_cnt].dout.dsc_input_bpc = 12;
+ break;
+ default:
+ ASSERT(0);
+ break;
+ }
+ }
+
+ pipe_cnt++;
+ }
+ context->bw_ctx.dml.ip.det_buffer_size_kbytes = DCN3_14_DEFAULT_DET_SIZE;
+
+ dc->config.enable_4to1MPC = false;
+ if (pipe_cnt == 1 && pipe->plane_state && !dc->debug.disable_z9_mpc) {
+ if (is_dual_plane(pipe->plane_state->format)
+ && pipe->plane_state->src_rect.width <= 1920 && pipe->plane_state->src_rect.height <= 1080) {
+ dc->config.enable_4to1MPC = true;
+ } else if (!is_dual_plane(pipe->plane_state->format) && pipe->plane_state->src_rect.width <= 5120) {
+ /* Limit to 5k max to avoid forced pipe split when there is not enough detile for swath */
+ context->bw_ctx.dml.ip.det_buffer_size_kbytes = 192;
+ pipes[0].pipe.src.unbounded_req_mode = true;
+ }
+ } else if (context->stream_count >= dc->debug.crb_alloc_policy_min_disp_count
+ && dc->debug.crb_alloc_policy > DET_SIZE_DEFAULT) {
+ context->bw_ctx.dml.ip.det_buffer_size_kbytes = dc->debug.crb_alloc_policy * 64;
+ } else if (context->stream_count >= 3 && upscaled) {
+ context->bw_ctx.dml.ip.det_buffer_size_kbytes = 192;
+ }
+
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+
+ if (!pipe->stream)
+ continue;
+
+ if (pipe->stream->signal == SIGNAL_TYPE_EDP && dc->debug.seamless_boot_odm_combine &&
+ pipe->stream->apply_seamless_boot_optimization) {
+
+ if (pipe->stream->apply_boot_odm_mode == dm_odm_combine_policy_2to1) {
+ context->bw_ctx.dml.vba.ODMCombinePolicy = dm_odm_combine_policy_2to1;
+ break;
+ }
+ }
+ }
+
+ return pipe_cnt;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.h
new file mode 100644
index 000000000000..d32c5bb99f4c
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DCN314_FPU_H__
+#define __DCN314_FPU_H__
+
+#define DCN3_14_DEFAULT_DET_SIZE 384
+#define DCN3_14_MAX_DET_SIZE 384
+#define DCN3_14_MIN_COMPBUF_SIZE_KB 128
+#define DCN3_14_CRB_SEGMENT_SIZE_KB 64
+
+void dcn314_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_params);
+int dcn314_populate_dml_pipes_from_context_fpu(struct dc *dc, struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ bool fast_validate);
+
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
index 66453546e24f..8118cfc5b405 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
@@ -473,8 +473,11 @@ void dcn32_set_phantom_stream_timing(struct dc *dc,
// DML calculation for MALL region doesn't take into account FW delay
// and required pstate allow width for multi-display cases
+ /* Add 16 lines margin to the MALL REGION because SUB_VP_START_LINE must be aligned
+ * to 2 swaths (i.e. 16 lines)
+ */
phantom_vactive = get_subviewport_lines_needed_in_mall(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx) +
- pstate_width_fw_delay_lines;
+ pstate_width_fw_delay_lines + dc->caps.subvp_swath_height_margin_lines;
// For backporch of phantom pipe, use vstartup of the main pipe
phantom_bp = get_vstartup(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx);
@@ -490,6 +493,7 @@ void dcn32_set_phantom_stream_timing(struct dc *dc,
phantom_stream->timing.v_front_porch +
phantom_stream->timing.v_sync_width +
phantom_bp;
+ phantom_stream->timing.flags.DSC = 0; // Don't need DSC for phantom timing
}
/**
@@ -983,9 +987,15 @@ static void dcn32_full_validate_bw_helper(struct dc *dc,
* DML favors voltage over p-state, but we're more interested in
* supporting p-state over voltage. We can't support p-state in
* prefetch mode > 0 so try capping the prefetch mode to start.
+ * Override present for testing.
*/
- context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final =
+ if (dc->debug.dml_disallow_alternate_prefetch_modes)
+ context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final =
dm_prefetch_support_uclk_fclk_and_stutter;
+ else
+ context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final =
+ dm_prefetch_support_uclk_fclk_and_stutter_if_possible;
+
*vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, *pipe_cnt);
/* This may adjust vlevel and maxMpcComb */
if (*vlevel < context->bw_ctx.dml.soc.num_states)
@@ -1014,7 +1024,9 @@ static void dcn32_full_validate_bw_helper(struct dc *dc,
* will not allow for switch in VBLANK. The DRR display must have it's VBLANK stretched
* enough to support MCLK switching.
*/
- if (*vlevel == context->bw_ctx.dml.soc.num_states) {
+ if (*vlevel == context->bw_ctx.dml.soc.num_states &&
+ context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final ==
+ dm_prefetch_support_uclk_fclk_and_stutter) {
context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final =
dm_prefetch_support_stutter;
/* There are params (such as FabricClock) that need to be recalculated
@@ -1344,7 +1356,8 @@ bool dcn32_internal_validate_bw(struct dc *dc,
int split[MAX_PIPES] = { 0 };
bool merge[MAX_PIPES] = { false };
bool newly_split[MAX_PIPES] = { false };
- int pipe_cnt, i, pipe_idx, vlevel;
+ int pipe_cnt, i, pipe_idx;
+ int vlevel = context->bw_ctx.dml.soc.num_states;
struct vba_vars_st *vba = &context->bw_ctx.dml.vba;
dc_assert_fp_enabled();
@@ -1373,17 +1386,22 @@ bool dcn32_internal_validate_bw(struct dc *dc,
DC_FP_END();
}
- if (fast_validate || vlevel == context->bw_ctx.dml.soc.num_states ||
- vba->DRAMClockChangeSupport[vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported) {
+ if (fast_validate ||
+ (dc->debug.dml_disallow_alternate_prefetch_modes &&
+ (vlevel == context->bw_ctx.dml.soc.num_states ||
+ vba->DRAMClockChangeSupport[vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported))) {
/*
- * If mode is unsupported or there's still no p-state support then
- * fall back to favoring voltage.
+ * If dml_disallow_alternate_prefetch_modes is false, then we have already
+ * tried alternate prefetch modes during full validation.
+ *
+ * If mode is unsupported or there is no p-state support, then
+ * fall back to favouring voltage.
*
- * If Prefetch mode 0 failed for this config, or passed with Max UCLK, try if
- * supported with Prefetch mode 1 (dm_prefetch_support_fclk_and_stutter == 2)
+ * If Prefetch mode 0 failed for this config, or passed with Max UCLK, then try
+ * to support with Prefetch mode 1 (dm_prefetch_support_fclk_and_stutter == 2)
*/
context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final =
- dm_prefetch_support_fclk_and_stutter;
+ dm_prefetch_support_fclk_and_stutter;
vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, pipe_cnt);
@@ -2098,6 +2116,13 @@ void dcn32_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_pa
dc->bb_overrides.dram_clock_change_latency_ns / 1000.0;
}
+ if ((int)(dcn3_2_soc.fclk_change_latency_us * 1000)
+ != dc->bb_overrides.fclk_clock_change_latency_ns
+ && dc->bb_overrides.fclk_clock_change_latency_ns) {
+ dcn3_2_soc.fclk_change_latency_us =
+ dc->bb_overrides.fclk_clock_change_latency_ns / 1000;
+ }
+
if ((int)(dcn3_2_soc.dummy_pstate_latency_us * 1000)
!= dc->bb_overrides.dummy_clock_change_latency_ns
&& dc->bb_overrides.dummy_clock_change_latency_ns) {
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
index 890612db08dc..cb2025771646 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
@@ -221,7 +221,6 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman
// VBA_DELTA
// Calculate DET size, swath height
dml32_CalculateSwathAndDETConfiguration(
- &v->dummy_vars.dml32_CalculateSwathAndDETConfiguration,
mode_lib->vba.DETSizeOverride,
mode_lib->vba.UsesMALLForPStateChange,
mode_lib->vba.ConfigReturnBufferSizeInKByte,
@@ -461,7 +460,6 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman
{
dml32_CalculateVMRowAndSwath(
- &v->dummy_vars.dml32_CalculateVMRowAndSwath,
mode_lib->vba.NumberOfActiveSurfaces,
v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters,
v->SurfaceSizeInMALL,
@@ -757,9 +755,7 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman
v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.BytePerPixelY = v->BytePerPixelY[k];
v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.BytePerPixelC = v->BytePerPixelC[k];
v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.ProgressiveToInterlaceUnitInOPP = mode_lib->vba.ProgressiveToInterlaceUnitInOPP;
- v->ErrorResult[k] = dml32_CalculatePrefetchSchedule(
- &v->dummy_vars.dml32_CalculatePrefetchSchedule,
- v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.HostVMInefficiencyFactor,
+ v->ErrorResult[k] = dml32_CalculatePrefetchSchedule(v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.HostVMInefficiencyFactor,
&v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe, v->DSCDelay[k],
mode_lib->vba.DPPCLKDelaySubtotal + mode_lib->vba.DPPCLKDelayCNVCFormater,
mode_lib->vba.DPPCLKDelaySCL,
@@ -1167,7 +1163,6 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman
v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.SMNLatency = mode_lib->vba.SMNLatency;
dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
- &v->dummy_vars.dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport,
mode_lib->vba.USRRetrainingRequiredFinal,
mode_lib->vba.UsesMALLForPStateChange,
mode_lib->vba.PrefetchModePerState[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb],
@@ -1952,7 +1947,6 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
}
dml32_CalculateSwathAndDETConfiguration(
- &v->dummy_vars.dml32_CalculateSwathAndDETConfiguration,
mode_lib->vba.DETSizeOverride,
mode_lib->vba.UsesMALLForPStateChange,
mode_lib->vba.ConfigReturnBufferSizeInKByte,
@@ -2549,7 +2543,6 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
}
dml32_CalculateSwathAndDETConfiguration(
- &v->dummy_vars.dml32_CalculateSwathAndDETConfiguration,
mode_lib->vba.DETSizeOverride,
mode_lib->vba.UsesMALLForPStateChange,
mode_lib->vba.ConfigReturnBufferSizeInKByte,
@@ -2749,7 +2742,6 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
{
dml32_CalculateVMRowAndSwath(
- &v->dummy_vars.dml32_CalculateVMRowAndSwath,
mode_lib->vba.NumberOfActiveSurfaces,
v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters,
mode_lib->vba.SurfaceSizeInMALL,
@@ -3266,7 +3258,6 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
mode_lib->vba.NoTimeForPrefetch[i][j][k] =
dml32_CalculatePrefetchSchedule(
- &v->dummy_vars.dml32_CalculatePrefetchSchedule,
v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.HostVMInefficiencyFactor,
&v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe,
mode_lib->vba.DSCDelayPerState[i][k],
@@ -3566,7 +3557,6 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
{
dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
- &v->dummy_vars.dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport,
mode_lib->vba.USRRetrainingRequiredFinal,
mode_lib->vba.UsesMALLForPStateChange,
mode_lib->vba.PrefetchModePerState[i][j],
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c
index 07f8f3b8626b..05fc14a47fba 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c
@@ -391,7 +391,6 @@ void dml32_CalculateBytePerPixelAndBlockSizes(
} // CalculateBytePerPixelAndBlockSizes
void dml32_CalculateSwathAndDETConfiguration(
- struct dml32_CalculateSwathAndDETConfiguration *st_vars,
unsigned int DETSizeOverride[],
enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
unsigned int ConfigReturnBufferSizeInKByte,
@@ -456,10 +455,18 @@ void dml32_CalculateSwathAndDETConfiguration(
bool ViewportSizeSupportPerSurface[],
bool *ViewportSizeSupport)
{
+ unsigned int MaximumSwathHeightY[DC__NUM_DPP__MAX];
+ unsigned int MaximumSwathHeightC[DC__NUM_DPP__MAX];
+ unsigned int RoundedUpMaxSwathSizeBytesY[DC__NUM_DPP__MAX];
+ unsigned int RoundedUpMaxSwathSizeBytesC[DC__NUM_DPP__MAX];
+ unsigned int RoundedUpSwathSizeBytesY;
+ unsigned int RoundedUpSwathSizeBytesC;
+ double SwathWidthdoubleDPP[DC__NUM_DPP__MAX];
+ double SwathWidthdoubleDPPChroma[DC__NUM_DPP__MAX];
unsigned int k;
-
- st_vars->TotalActiveDPP = 0;
- st_vars->NoChromaSurfaces = true;
+ unsigned int TotalActiveDPP = 0;
+ bool NoChromaSurfaces = true;
+ unsigned int DETBufferSizeInKByteForSwathCalculation;
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP);
@@ -494,43 +501,43 @@ void dml32_CalculateSwathAndDETConfiguration(
DPPPerSurface,
/* Output */
- st_vars->SwathWidthdoubleDPP,
- st_vars->SwathWidthdoubleDPPChroma,
+ SwathWidthdoubleDPP,
+ SwathWidthdoubleDPPChroma,
SwathWidth,
SwathWidthChroma,
- st_vars->MaximumSwathHeightY,
- st_vars->MaximumSwathHeightC,
+ MaximumSwathHeightY,
+ MaximumSwathHeightC,
swath_width_luma_ub,
swath_width_chroma_ub);
for (k = 0; k < NumberOfActiveSurfaces; ++k) {
- st_vars->RoundedUpMaxSwathSizeBytesY[k] = swath_width_luma_ub[k] * BytePerPixDETY[k] * st_vars->MaximumSwathHeightY[k];
- st_vars->RoundedUpMaxSwathSizeBytesC[k] = swath_width_chroma_ub[k] * BytePerPixDETC[k] * st_vars->MaximumSwathHeightC[k];
+ RoundedUpMaxSwathSizeBytesY[k] = swath_width_luma_ub[k] * BytePerPixDETY[k] * MaximumSwathHeightY[k];
+ RoundedUpMaxSwathSizeBytesC[k] = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MaximumSwathHeightC[k];
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: k=%0d DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]);
dml_print("DML::%s: k=%0d swath_width_luma_ub = %d\n", __func__, k, swath_width_luma_ub[k]);
dml_print("DML::%s: k=%0d BytePerPixDETY = %f\n", __func__, k, BytePerPixDETY[k]);
- dml_print("DML::%s: k=%0d MaximumSwathHeightY = %d\n", __func__, k, st_vars->MaximumSwathHeightY[k]);
+ dml_print("DML::%s: k=%0d MaximumSwathHeightY = %d\n", __func__, k, MaximumSwathHeightY[k]);
dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, k,
- st_vars->RoundedUpMaxSwathSizeBytesY[k]);
+ RoundedUpMaxSwathSizeBytesY[k]);
dml_print("DML::%s: k=%0d swath_width_chroma_ub = %d\n", __func__, k, swath_width_chroma_ub[k]);
dml_print("DML::%s: k=%0d BytePerPixDETC = %f\n", __func__, k, BytePerPixDETC[k]);
- dml_print("DML::%s: k=%0d MaximumSwathHeightC = %d\n", __func__, k, st_vars->MaximumSwathHeightC[k]);
+ dml_print("DML::%s: k=%0d MaximumSwathHeightC = %d\n", __func__, k, MaximumSwathHeightC[k]);
dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, k,
- st_vars->RoundedUpMaxSwathSizeBytesC[k]);
+ RoundedUpMaxSwathSizeBytesC[k]);
#endif
if (SourcePixelFormat[k] == dm_420_10) {
- st_vars->RoundedUpMaxSwathSizeBytesY[k] = dml_ceil((unsigned int) st_vars->RoundedUpMaxSwathSizeBytesY[k], 256);
- st_vars->RoundedUpMaxSwathSizeBytesC[k] = dml_ceil((unsigned int) st_vars->RoundedUpMaxSwathSizeBytesC[k], 256);
+ RoundedUpMaxSwathSizeBytesY[k] = dml_ceil((unsigned int) RoundedUpMaxSwathSizeBytesY[k], 256);
+ RoundedUpMaxSwathSizeBytesC[k] = dml_ceil((unsigned int) RoundedUpMaxSwathSizeBytesC[k], 256);
}
}
for (k = 0; k < NumberOfActiveSurfaces; ++k) {
- st_vars->TotalActiveDPP = st_vars->TotalActiveDPP + (ForceSingleDPP ? 1 : DPPPerSurface[k]);
+ TotalActiveDPP = TotalActiveDPP + (ForceSingleDPP ? 1 : DPPPerSurface[k]);
if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 ||
SourcePixelFormat[k] == dm_420_12 || SourcePixelFormat[k] == dm_rgbe_alpha) {
- st_vars->NoChromaSurfaces = false;
+ NoChromaSurfaces = false;
}
}
@@ -540,10 +547,10 @@ void dml32_CalculateSwathAndDETConfiguration(
// if unbounded req is enabled, program reserved space such that the ROB will not hold more than 8 swaths worth of data
// - assume worst-case compression rate of 4. [ROB size - 8 * swath_size / max_compression ratio]
// - assume for "narrow" vp case in which the ROB can fit 8 swaths, the DET should be big enough to do full size req
- *CompBufReservedSpaceNeedAdjustment = ((int) ROBSizeKBytes - (int) *CompBufReservedSpaceKBytes) > (int) (st_vars->RoundedUpMaxSwathSizeBytesY[0]/512);
+ *CompBufReservedSpaceNeedAdjustment = ((int) ROBSizeKBytes - (int) *CompBufReservedSpaceKBytes) > (int) (RoundedUpMaxSwathSizeBytesY[0]/512);
if (*CompBufReservedSpaceNeedAdjustment == 1) {
- *CompBufReservedSpaceKBytes = ROBSizeKBytes - st_vars->RoundedUpMaxSwathSizeBytesY[0]/512;
+ *CompBufReservedSpaceKBytes = ROBSizeKBytes - RoundedUpMaxSwathSizeBytesY[0]/512;
}
#ifdef __DML_VBA_DEBUG__
@@ -551,7 +558,7 @@ void dml32_CalculateSwathAndDETConfiguration(
dml_print("DML::%s: CompBufReservedSpaceNeedAdjustment = %d\n", __func__, *CompBufReservedSpaceNeedAdjustment);
#endif
- *UnboundedRequestEnabled = dml32_UnboundedRequest(UseUnboundedRequestingFinal, st_vars->TotalActiveDPP, st_vars->NoChromaSurfaces, Output[0], SurfaceTiling[0], *CompBufReservedSpaceNeedAdjustment, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment);
+ *UnboundedRequestEnabled = dml32_UnboundedRequest(UseUnboundedRequestingFinal, TotalActiveDPP, NoChromaSurfaces, Output[0], SurfaceTiling[0], *CompBufReservedSpaceNeedAdjustment, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment);
dml32_CalculateDETBufferSize(DETSizeOverride,
UseMALLForPStateChange,
@@ -566,8 +573,8 @@ void dml32_CalculateSwathAndDETConfiguration(
SourcePixelFormat,
ReadBandwidthLuma,
ReadBandwidthChroma,
- st_vars->RoundedUpMaxSwathSizeBytesY,
- st_vars->RoundedUpMaxSwathSizeBytesC,
+ RoundedUpMaxSwathSizeBytesY,
+ RoundedUpMaxSwathSizeBytesC,
DPPPerSurface,
/* Output */
@@ -575,7 +582,7 @@ void dml32_CalculateSwathAndDETConfiguration(
CompressedBufferSizeInkByte);
#ifdef __DML_VBA_DEBUG__
- dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, st_vars->TotalActiveDPP);
+ dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, TotalActiveDPP);
dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte);
dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte);
dml_print("DML::%s: UseUnboundedRequestingFinal = %d\n", __func__, UseUnboundedRequestingFinal);
@@ -586,42 +593,42 @@ void dml32_CalculateSwathAndDETConfiguration(
*ViewportSizeSupport = true;
for (k = 0; k < NumberOfActiveSurfaces; ++k) {
- st_vars->DETBufferSizeInKByteForSwathCalculation = (UseMALLForPStateChange[k] ==
+ DETBufferSizeInKByteForSwathCalculation = (UseMALLForPStateChange[k] ==
dm_use_mall_pstate_change_phantom_pipe ? 1024 : DETBufferSizeInKByte[k]);
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: k=%0d DETBufferSizeInKByteForSwathCalculation = %d\n", __func__, k,
- st_vars->DETBufferSizeInKByteForSwathCalculation);
+ DETBufferSizeInKByteForSwathCalculation);
#endif
- if (st_vars->RoundedUpMaxSwathSizeBytesY[k] + st_vars->RoundedUpMaxSwathSizeBytesC[k] <=
- st_vars->DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
- SwathHeightY[k] = st_vars->MaximumSwathHeightY[k];
- SwathHeightC[k] = st_vars->MaximumSwathHeightC[k];
- st_vars->RoundedUpSwathSizeBytesY = st_vars->RoundedUpMaxSwathSizeBytesY[k];
- st_vars->RoundedUpSwathSizeBytesC = st_vars->RoundedUpMaxSwathSizeBytesC[k];
- } else if (st_vars->RoundedUpMaxSwathSizeBytesY[k] >= 1.5 * st_vars->RoundedUpMaxSwathSizeBytesC[k] &&
- st_vars->RoundedUpMaxSwathSizeBytesY[k] / 2 + st_vars->RoundedUpMaxSwathSizeBytesC[k] <=
- st_vars->DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
- SwathHeightY[k] = st_vars->MaximumSwathHeightY[k] / 2;
- SwathHeightC[k] = st_vars->MaximumSwathHeightC[k];
- st_vars->RoundedUpSwathSizeBytesY = st_vars->RoundedUpMaxSwathSizeBytesY[k] / 2;
- st_vars->RoundedUpSwathSizeBytesC = st_vars->RoundedUpMaxSwathSizeBytesC[k];
- } else if (st_vars->RoundedUpMaxSwathSizeBytesY[k] < 1.5 * st_vars->RoundedUpMaxSwathSizeBytesC[k] &&
- st_vars->RoundedUpMaxSwathSizeBytesY[k] + st_vars->RoundedUpMaxSwathSizeBytesC[k] / 2 <=
- st_vars->DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
- SwathHeightY[k] = st_vars->MaximumSwathHeightY[k];
- SwathHeightC[k] = st_vars->MaximumSwathHeightC[k] / 2;
- st_vars->RoundedUpSwathSizeBytesY = st_vars->RoundedUpMaxSwathSizeBytesY[k];
- st_vars->RoundedUpSwathSizeBytesC = st_vars->RoundedUpMaxSwathSizeBytesC[k] / 2;
+ if (RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] <=
+ DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
+ SwathHeightY[k] = MaximumSwathHeightY[k];
+ SwathHeightC[k] = MaximumSwathHeightC[k];
+ RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k];
+ RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k];
+ } else if (RoundedUpMaxSwathSizeBytesY[k] >= 1.5 * RoundedUpMaxSwathSizeBytesC[k] &&
+ RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] <=
+ DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
+ SwathHeightY[k] = MaximumSwathHeightY[k] / 2;
+ SwathHeightC[k] = MaximumSwathHeightC[k];
+ RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k] / 2;
+ RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k];
+ } else if (RoundedUpMaxSwathSizeBytesY[k] < 1.5 * RoundedUpMaxSwathSizeBytesC[k] &&
+ RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] / 2 <=
+ DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
+ SwathHeightY[k] = MaximumSwathHeightY[k];
+ SwathHeightC[k] = MaximumSwathHeightC[k] / 2;
+ RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k];
+ RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k] / 2;
} else {
- SwathHeightY[k] = st_vars->MaximumSwathHeightY[k] / 2;
- SwathHeightC[k] = st_vars->MaximumSwathHeightC[k] / 2;
- st_vars->RoundedUpSwathSizeBytesY = st_vars->RoundedUpMaxSwathSizeBytesY[k] / 2;
- st_vars->RoundedUpSwathSizeBytesC = st_vars->RoundedUpMaxSwathSizeBytesC[k] / 2;
+ SwathHeightY[k] = MaximumSwathHeightY[k] / 2;
+ SwathHeightC[k] = MaximumSwathHeightC[k] / 2;
+ RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k] / 2;
+ RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k] / 2;
}
- if ((st_vars->RoundedUpMaxSwathSizeBytesY[k] / 2 + st_vars->RoundedUpMaxSwathSizeBytesC[k] / 2 >
- st_vars->DETBufferSizeInKByteForSwathCalculation * 1024 / 2)
+ if ((RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] / 2 >
+ DETBufferSizeInKByteForSwathCalculation * 1024 / 2)
|| SwathWidth[k] > MaximumSwathWidthLuma[k] || (SwathHeightC[k] > 0 &&
SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) {
*ViewportSizeSupport = false;
@@ -636,7 +643,7 @@ void dml32_CalculateSwathAndDETConfiguration(
#endif
DETBufferSizeY[k] = DETBufferSizeInKByte[k] * 1024;
DETBufferSizeC[k] = 0;
- } else if (st_vars->RoundedUpSwathSizeBytesY <= 1.5 * st_vars->RoundedUpSwathSizeBytesC) {
+ } else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) {
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: k=%0d Half DET for plane0, half for plane1\n", __func__, k);
#endif
@@ -654,11 +661,11 @@ void dml32_CalculateSwathAndDETConfiguration(
dml_print("DML::%s: k=%0d SwathHeightY = %d\n", __func__, k, SwathHeightY[k]);
dml_print("DML::%s: k=%0d SwathHeightC = %d\n", __func__, k, SwathHeightC[k]);
dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__,
- k, st_vars->RoundedUpMaxSwathSizeBytesY[k]);
+ k, RoundedUpMaxSwathSizeBytesY[k]);
dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__,
- k, st_vars->RoundedUpMaxSwathSizeBytesC[k]);
- dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesY = %d\n", __func__, k, st_vars->RoundedUpSwathSizeBytesY);
- dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesC = %d\n", __func__, k, st_vars->RoundedUpSwathSizeBytesC);
+ k, RoundedUpMaxSwathSizeBytesC[k]);
+ dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesY = %d\n", __func__, k, RoundedUpSwathSizeBytesY);
+ dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesC = %d\n", __func__, k, RoundedUpSwathSizeBytesC);
dml_print("DML::%s: k=%0d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]);
dml_print("DML::%s: k=%0d DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]);
dml_print("DML::%s: k=%0d DETBufferSizeC = %d\n", __func__, k, DETBufferSizeC[k]);
@@ -1867,7 +1874,6 @@ void dml32_CalculateSurfaceSizeInMall(
} // CalculateSurfaceSizeInMall
void dml32_CalculateVMRowAndSwath(
- struct dml32_CalculateVMRowAndSwath *st_vars,
unsigned int NumberOfActiveSurfaces,
DmlPipe myPipe[],
unsigned int SurfaceSizeInMALL[],
@@ -1933,6 +1939,21 @@ void dml32_CalculateVMRowAndSwath(
unsigned int BIGK_FRAGMENT_SIZE[])
{
unsigned int k;
+ unsigned int PTEBufferSizeInRequestsForLuma[DC__NUM_DPP__MAX];
+ unsigned int PTEBufferSizeInRequestsForChroma[DC__NUM_DPP__MAX];
+ unsigned int PDEAndMetaPTEBytesFrameY;
+ unsigned int PDEAndMetaPTEBytesFrameC;
+ unsigned int MetaRowByteY[DC__NUM_DPP__MAX];
+ unsigned int MetaRowByteC[DC__NUM_DPP__MAX];
+ unsigned int PixelPTEBytesPerRowY[DC__NUM_DPP__MAX];
+ unsigned int PixelPTEBytesPerRowC[DC__NUM_DPP__MAX];
+ unsigned int PixelPTEBytesPerRowY_one_row_per_frame[DC__NUM_DPP__MAX];
+ unsigned int PixelPTEBytesPerRowC_one_row_per_frame[DC__NUM_DPP__MAX];
+ unsigned int dpte_row_width_luma_ub_one_row_per_frame[DC__NUM_DPP__MAX];
+ unsigned int dpte_row_height_luma_one_row_per_frame[DC__NUM_DPP__MAX];
+ unsigned int dpte_row_width_chroma_ub_one_row_per_frame[DC__NUM_DPP__MAX];
+ unsigned int dpte_row_height_chroma_one_row_per_frame[DC__NUM_DPP__MAX];
+ bool one_row_per_frame_fits_in_buffer[DC__NUM_DPP__MAX];
for (k = 0; k < NumberOfActiveSurfaces; ++k) {
if (HostVMEnable == true) {
@@ -1954,15 +1975,15 @@ void dml32_CalculateVMRowAndSwath(
myPipe[k].SourcePixelFormat == dm_rgbe_alpha) {
if ((myPipe[k].SourcePixelFormat == dm_420_10 || myPipe[k].SourcePixelFormat == dm_420_12) &&
!IsVertical(myPipe[k].SourceRotation)) {
- st_vars->PTEBufferSizeInRequestsForLuma[k] =
+ PTEBufferSizeInRequestsForLuma[k] =
(PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma) / 2;
- st_vars->PTEBufferSizeInRequestsForChroma[k] = st_vars->PTEBufferSizeInRequestsForLuma[k];
+ PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsForLuma[k];
} else {
- st_vars->PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma;
- st_vars->PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsChroma;
+ PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma;
+ PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsChroma;
}
- st_vars->PDEAndMetaPTEBytesFrameC = dml32_CalculateVMAndRowBytes(
+ PDEAndMetaPTEBytesFrameC = dml32_CalculateVMAndRowBytes(
myPipe[k].ViewportStationary,
myPipe[k].DCCEnable,
myPipe[k].DPPPerSurface,
@@ -1982,21 +2003,21 @@ void dml32_CalculateVMRowAndSwath(
GPUVMMaxPageTableLevels,
GPUVMMinPageSizeKBytes[k],
HostVMMinPageSize,
- st_vars->PTEBufferSizeInRequestsForChroma[k],
+ PTEBufferSizeInRequestsForChroma[k],
myPipe[k].PitchC,
myPipe[k].DCCMetaPitchC,
myPipe[k].BlockWidthC,
myPipe[k].BlockHeightC,
/* Output */
- &st_vars->MetaRowByteC[k],
- &st_vars->PixelPTEBytesPerRowC[k],
+ &MetaRowByteC[k],
+ &PixelPTEBytesPerRowC[k],
&dpte_row_width_chroma_ub[k],
&dpte_row_height_chroma[k],
&dpte_row_height_linear_chroma[k],
- &st_vars->PixelPTEBytesPerRowC_one_row_per_frame[k],
- &st_vars->dpte_row_width_chroma_ub_one_row_per_frame[k],
- &st_vars->dpte_row_height_chroma_one_row_per_frame[k],
+ &PixelPTEBytesPerRowC_one_row_per_frame[k],
+ &dpte_row_width_chroma_ub_one_row_per_frame[k],
+ &dpte_row_height_chroma_one_row_per_frame[k],
&meta_req_width_chroma[k],
&meta_req_height_chroma[k],
&meta_row_width_chroma[k],
@@ -2024,19 +2045,19 @@ void dml32_CalculateVMRowAndSwath(
&VInitPreFillC[k],
&MaxNumSwathC[k]);
} else {
- st_vars->PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma;
- st_vars->PTEBufferSizeInRequestsForChroma[k] = 0;
- st_vars->PixelPTEBytesPerRowC[k] = 0;
- st_vars->PDEAndMetaPTEBytesFrameC = 0;
- st_vars->MetaRowByteC[k] = 0;
+ PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma;
+ PTEBufferSizeInRequestsForChroma[k] = 0;
+ PixelPTEBytesPerRowC[k] = 0;
+ PDEAndMetaPTEBytesFrameC = 0;
+ MetaRowByteC[k] = 0;
MaxNumSwathC[k] = 0;
PrefetchSourceLinesC[k] = 0;
- st_vars->dpte_row_height_chroma_one_row_per_frame[k] = 0;
- st_vars->dpte_row_width_chroma_ub_one_row_per_frame[k] = 0;
- st_vars->PixelPTEBytesPerRowC_one_row_per_frame[k] = 0;
+ dpte_row_height_chroma_one_row_per_frame[k] = 0;
+ dpte_row_width_chroma_ub_one_row_per_frame[k] = 0;
+ PixelPTEBytesPerRowC_one_row_per_frame[k] = 0;
}
- st_vars->PDEAndMetaPTEBytesFrameY = dml32_CalculateVMAndRowBytes(
+ PDEAndMetaPTEBytesFrameY = dml32_CalculateVMAndRowBytes(
myPipe[k].ViewportStationary,
myPipe[k].DCCEnable,
myPipe[k].DPPPerSurface,
@@ -2056,21 +2077,21 @@ void dml32_CalculateVMRowAndSwath(
GPUVMMaxPageTableLevels,
GPUVMMinPageSizeKBytes[k],
HostVMMinPageSize,
- st_vars->PTEBufferSizeInRequestsForLuma[k],
+ PTEBufferSizeInRequestsForLuma[k],
myPipe[k].PitchY,
myPipe[k].DCCMetaPitchY,
myPipe[k].BlockWidthY,
myPipe[k].BlockHeightY,
/* Output */
- &st_vars->MetaRowByteY[k],
- &st_vars->PixelPTEBytesPerRowY[k],
+ &MetaRowByteY[k],
+ &PixelPTEBytesPerRowY[k],
&dpte_row_width_luma_ub[k],
&dpte_row_height_luma[k],
&dpte_row_height_linear_luma[k],
- &st_vars->PixelPTEBytesPerRowY_one_row_per_frame[k],
- &st_vars->dpte_row_width_luma_ub_one_row_per_frame[k],
- &st_vars->dpte_row_height_luma_one_row_per_frame[k],
+ &PixelPTEBytesPerRowY_one_row_per_frame[k],
+ &dpte_row_width_luma_ub_one_row_per_frame[k],
+ &dpte_row_height_luma_one_row_per_frame[k],
&meta_req_width[k],
&meta_req_height[k],
&meta_row_width[k],
@@ -2098,19 +2119,19 @@ void dml32_CalculateVMRowAndSwath(
&VInitPreFillY[k],
&MaxNumSwathY[k]);
- PDEAndMetaPTEBytesFrame[k] = st_vars->PDEAndMetaPTEBytesFrameY + st_vars->PDEAndMetaPTEBytesFrameC;
- MetaRowByte[k] = st_vars->MetaRowByteY[k] + st_vars->MetaRowByteC[k];
+ PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY + PDEAndMetaPTEBytesFrameC;
+ MetaRowByte[k] = MetaRowByteY[k] + MetaRowByteC[k];
- if (st_vars->PixelPTEBytesPerRowY[k] <= 64 * st_vars->PTEBufferSizeInRequestsForLuma[k] &&
- st_vars->PixelPTEBytesPerRowC[k] <= 64 * st_vars->PTEBufferSizeInRequestsForChroma[k]) {
+ if (PixelPTEBytesPerRowY[k] <= 64 * PTEBufferSizeInRequestsForLuma[k] &&
+ PixelPTEBytesPerRowC[k] <= 64 * PTEBufferSizeInRequestsForChroma[k]) {
PTEBufferSizeNotExceeded[k] = true;
} else {
PTEBufferSizeNotExceeded[k] = false;
}
- st_vars->one_row_per_frame_fits_in_buffer[k] = (st_vars->PixelPTEBytesPerRowY_one_row_per_frame[k] <= 64 * 2 *
- st_vars->PTEBufferSizeInRequestsForLuma[k] &&
- st_vars->PixelPTEBytesPerRowC_one_row_per_frame[k] <= 64 * 2 * st_vars->PTEBufferSizeInRequestsForChroma[k]);
+ one_row_per_frame_fits_in_buffer[k] = (PixelPTEBytesPerRowY_one_row_per_frame[k] <= 64 * 2 *
+ PTEBufferSizeInRequestsForLuma[k] &&
+ PixelPTEBytesPerRowC_one_row_per_frame[k] <= 64 * 2 * PTEBufferSizeInRequestsForChroma[k]);
}
dml32_CalculateMALLUseForStaticScreen(
@@ -2118,7 +2139,7 @@ void dml32_CalculateVMRowAndSwath(
MALLAllocatedForDCN,
UseMALLForStaticScreen, // mode
SurfaceSizeInMALL,
- st_vars->one_row_per_frame_fits_in_buffer,
+ one_row_per_frame_fits_in_buffer,
/* Output */
UsesMALLForStaticScreen); // boolen
@@ -2144,13 +2165,13 @@ void dml32_CalculateVMRowAndSwath(
!(UseMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame);
if (use_one_row_for_frame[k]) {
- dpte_row_height_luma[k] = st_vars->dpte_row_height_luma_one_row_per_frame[k];
- dpte_row_width_luma_ub[k] = st_vars->dpte_row_width_luma_ub_one_row_per_frame[k];
- st_vars->PixelPTEBytesPerRowY[k] = st_vars->PixelPTEBytesPerRowY_one_row_per_frame[k];
- dpte_row_height_chroma[k] = st_vars->dpte_row_height_chroma_one_row_per_frame[k];
- dpte_row_width_chroma_ub[k] = st_vars->dpte_row_width_chroma_ub_one_row_per_frame[k];
- st_vars->PixelPTEBytesPerRowC[k] = st_vars->PixelPTEBytesPerRowC_one_row_per_frame[k];
- PTEBufferSizeNotExceeded[k] = st_vars->one_row_per_frame_fits_in_buffer[k];
+ dpte_row_height_luma[k] = dpte_row_height_luma_one_row_per_frame[k];
+ dpte_row_width_luma_ub[k] = dpte_row_width_luma_ub_one_row_per_frame[k];
+ PixelPTEBytesPerRowY[k] = PixelPTEBytesPerRowY_one_row_per_frame[k];
+ dpte_row_height_chroma[k] = dpte_row_height_chroma_one_row_per_frame[k];
+ dpte_row_width_chroma_ub[k] = dpte_row_width_chroma_ub_one_row_per_frame[k];
+ PixelPTEBytesPerRowC[k] = PixelPTEBytesPerRowC_one_row_per_frame[k];
+ PTEBufferSizeNotExceeded[k] = one_row_per_frame_fits_in_buffer[k];
}
if (MetaRowByte[k] <= DCCMetaBufferSizeBytes)
@@ -2158,7 +2179,7 @@ void dml32_CalculateVMRowAndSwath(
else
DCCMetaBufferSizeNotExceeded[k] = false;
- PixelPTEBytesPerRow[k] = st_vars->PixelPTEBytesPerRowY[k] + st_vars->PixelPTEBytesPerRowC[k];
+ PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY[k] + PixelPTEBytesPerRowC[k];
if (use_one_row_for_frame[k])
PixelPTEBytesPerRow[k] = PixelPTEBytesPerRow[k] / 2;
@@ -2169,11 +2190,11 @@ void dml32_CalculateVMRowAndSwath(
myPipe[k].VRatioChroma,
myPipe[k].DCCEnable,
myPipe[k].HTotal / myPipe[k].PixelClock,
- st_vars->MetaRowByteY[k], st_vars->MetaRowByteC[k],
+ MetaRowByteY[k], MetaRowByteC[k],
meta_row_height[k],
meta_row_height_chroma[k],
- st_vars->PixelPTEBytesPerRowY[k],
- st_vars->PixelPTEBytesPerRowC[k],
+ PixelPTEBytesPerRowY[k],
+ PixelPTEBytesPerRowC[k],
dpte_row_height_luma[k],
dpte_row_height_chroma[k],
@@ -2189,12 +2210,12 @@ void dml32_CalculateVMRowAndSwath(
dml_print("DML::%s: k=%d, dpte_row_height_luma = %d\n", __func__, k, dpte_row_height_luma[k]);
dml_print("DML::%s: k=%d, dpte_row_width_luma_ub = %d\n",
__func__, k, dpte_row_width_luma_ub[k]);
- dml_print("DML::%s: k=%d, PixelPTEBytesPerRowY = %d\n", __func__, k, st_vars->PixelPTEBytesPerRowY[k]);
+ dml_print("DML::%s: k=%d, PixelPTEBytesPerRowY = %d\n", __func__, k, PixelPTEBytesPerRowY[k]);
dml_print("DML::%s: k=%d, dpte_row_height_chroma = %d\n",
__func__, k, dpte_row_height_chroma[k]);
dml_print("DML::%s: k=%d, dpte_row_width_chroma_ub = %d\n",
__func__, k, dpte_row_width_chroma_ub[k]);
- dml_print("DML::%s: k=%d, PixelPTEBytesPerRowC = %d\n", __func__, k, st_vars->PixelPTEBytesPerRowC[k]);
+ dml_print("DML::%s: k=%d, PixelPTEBytesPerRowC = %d\n", __func__, k, PixelPTEBytesPerRowC[k]);
dml_print("DML::%s: k=%d, PixelPTEBytesPerRow = %d\n", __func__, k, PixelPTEBytesPerRow[k]);
dml_print("DML::%s: k=%d, PTEBufferSizeNotExceeded = %d\n",
__func__, k, PTEBufferSizeNotExceeded[k]);
@@ -3342,7 +3363,6 @@ double dml32_CalculateExtraLatency(
} // CalculateExtraLatency
bool dml32_CalculatePrefetchSchedule(
- struct dml32_CalculatePrefetchSchedule *st_vars,
double HostVMInefficiencyFactor,
DmlPipe *myPipe,
unsigned int DSCDelay,
@@ -3406,18 +3426,45 @@ bool dml32_CalculatePrefetchSchedule(
double *VReadyOffsetPix)
{
bool MyError = false;
-
- st_vars->TimeForFetchingMetaPTE = 0;
- st_vars->TimeForFetchingRowInVBlank = 0;
- st_vars->LinesToRequestPrefetchPixelData = 0;
- st_vars->max_vratio_pre = __DML_MAX_VRATIO_PRE__;
- st_vars->Tsw_est1 = 0;
- st_vars->Tsw_est3 = 0;
+ unsigned int DPPCycles, DISPCLKCycles;
+ double DSTTotalPixelsAfterScaler;
+ double LineTime;
+ double dst_y_prefetch_equ;
+ double prefetch_bw_oto;
+ double Tvm_oto;
+ double Tr0_oto;
+ double Tvm_oto_lines;
+ double Tr0_oto_lines;
+ double dst_y_prefetch_oto;
+ double TimeForFetchingMetaPTE = 0;
+ double TimeForFetchingRowInVBlank = 0;
+ double LinesToRequestPrefetchPixelData = 0;
+ unsigned int HostVMDynamicLevelsTrips;
+ double trip_to_mem;
+ double Tvm_trips;
+ double Tr0_trips;
+ double Tvm_trips_rounded;
+ double Tr0_trips_rounded;
+ double Lsw_oto;
+ double Tpre_rounded;
+ double prefetch_bw_equ;
+ double Tvm_equ;
+ double Tr0_equ;
+ double Tdmbf;
+ double Tdmec;
+ double Tdmsks;
+ double prefetch_sw_bytes;
+ double bytes_pp;
+ double dep_bytes;
+ unsigned int max_vratio_pre = __DML_MAX_VRATIO_PRE__;
+ double min_Lsw;
+ double Tsw_est1 = 0;
+ double Tsw_est3 = 0;
if (GPUVMEnable == true && HostVMEnable == true)
- st_vars->HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
+ HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
else
- st_vars->HostVMDynamicLevelsTrips = 0;
+ HostVMDynamicLevelsTrips = 0;
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
dml_print("DML::%s: GPUVMPageTableLevels = %d\n", __func__, GPUVMPageTableLevels);
@@ -3440,19 +3487,19 @@ bool dml32_CalculatePrefetchSchedule(
TSetup,
/* output */
- &st_vars->Tdmbf,
- &st_vars->Tdmec,
- &st_vars->Tdmsks,
+ &Tdmbf,
+ &Tdmec,
+ &Tdmsks,
VUpdateOffsetPix,
VUpdateWidthPix,
VReadyOffsetPix);
- st_vars->LineTime = myPipe->HTotal / myPipe->PixelClock;
- st_vars->trip_to_mem = UrgentLatency;
- st_vars->Tvm_trips = UrgentExtraLatency + st_vars->trip_to_mem * (GPUVMPageTableLevels * (st_vars->HostVMDynamicLevelsTrips + 1) - 1);
+ LineTime = myPipe->HTotal / myPipe->PixelClock;
+ trip_to_mem = UrgentLatency;
+ Tvm_trips = UrgentExtraLatency + trip_to_mem * (GPUVMPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1);
if (DynamicMetadataVMEnabled == true)
- *Tdmdl = TWait + st_vars->Tvm_trips + st_vars->trip_to_mem;
+ *Tdmdl = TWait + Tvm_trips + trip_to_mem;
else
*Tdmdl = TWait + UrgentExtraLatency;
@@ -3462,15 +3509,15 @@ bool dml32_CalculatePrefetchSchedule(
#endif
if (DynamicMetadataEnable == true) {
- if (VStartup * st_vars->LineTime < *TSetup + *Tdmdl + st_vars->Tdmbf + st_vars->Tdmec + st_vars->Tdmsks) {
+ if (VStartup * LineTime < *TSetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) {
*NotEnoughTimeForDynamicMetadata = true;
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__);
dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n",
- __func__, st_vars->Tdmbf);
- dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, st_vars->Tdmec);
+ __func__, Tdmbf);
+ dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec);
dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n",
- __func__, st_vars->Tdmsks);
+ __func__, Tdmsks);
dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n",
__func__, *Tdmdl);
#endif
@@ -3482,21 +3529,21 @@ bool dml32_CalculatePrefetchSchedule(
}
*Tdmdl_vm = (DynamicMetadataEnable == true && DynamicMetadataVMEnabled == true &&
- GPUVMEnable == true ? TWait + st_vars->Tvm_trips : 0);
+ GPUVMEnable == true ? TWait + Tvm_trips : 0);
if (myPipe->ScalerEnabled)
- st_vars->DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCL;
+ DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCL;
else
- st_vars->DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCLLBOnly;
+ DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCLLBOnly;
- st_vars->DPPCycles = st_vars->DPPCycles + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor;
+ DPPCycles = DPPCycles + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor;
- st_vars->DISPCLKCycles = DISPCLKDelaySubtotal;
+ DISPCLKCycles = DISPCLKDelaySubtotal;
if (myPipe->Dppclk == 0.0 || myPipe->Dispclk == 0.0)
return true;
- *DSTXAfterScaler = st_vars->DPPCycles * myPipe->PixelClock / myPipe->Dppclk + st_vars->DISPCLKCycles *
+ *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->Dppclk + DISPCLKCycles *
myPipe->PixelClock / myPipe->Dispclk + DSCDelay;
*DSTXAfterScaler = *DSTXAfterScaler + (myPipe->ODMMode != dm_odm_combine_mode_disabled ? 18 : 0)
@@ -3506,10 +3553,10 @@ bool dml32_CalculatePrefetchSchedule(
+ ((myPipe->ODMMode == dm_odm_mode_mso_1to4) ? myPipe->HActive * 3 / 4 : 0);
#ifdef __DML_VBA_DEBUG__
- dml_print("DML::%s: DPPCycles: %d\n", __func__, st_vars->DPPCycles);
+ dml_print("DML::%s: DPPCycles: %d\n", __func__, DPPCycles);
dml_print("DML::%s: PixelClock: %f\n", __func__, myPipe->PixelClock);
dml_print("DML::%s: Dppclk: %f\n", __func__, myPipe->Dppclk);
- dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, st_vars->DISPCLKCycles);
+ dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, DISPCLKCycles);
dml_print("DML::%s: DISPCLK: %f\n", __func__, myPipe->Dispclk);
dml_print("DML::%s: DSCDelay: %d\n", __func__, DSCDelay);
dml_print("DML::%s: ODMMode: %d\n", __func__, myPipe->ODMMode);
@@ -3522,9 +3569,9 @@ bool dml32_CalculatePrefetchSchedule(
else
*DSTYAfterScaler = 0;
- st_vars->DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler;
- *DSTYAfterScaler = dml_floor(st_vars->DSTTotalPixelsAfterScaler / myPipe->HTotal, 1);
- *DSTXAfterScaler = st_vars->DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal));
+ DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler;
+ *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1);
+ *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal));
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: DSTXAfterScaler: %d (final)\n", __func__, *DSTXAfterScaler);
dml_print("DML::%s: DSTYAfterScaler: %d (final)\n", __func__, *DSTYAfterScaler);
@@ -3532,132 +3579,132 @@ bool dml32_CalculatePrefetchSchedule(
MyError = false;
- st_vars->Tr0_trips = st_vars->trip_to_mem * (st_vars->HostVMDynamicLevelsTrips + 1);
+ Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1);
if (GPUVMEnable == true) {
- st_vars->Tvm_trips_rounded = dml_ceil(4.0 * st_vars->Tvm_trips / st_vars->LineTime, 1.0) / 4.0 * st_vars->LineTime;
- st_vars->Tr0_trips_rounded = dml_ceil(4.0 * st_vars->Tr0_trips / st_vars->LineTime, 1.0) / 4.0 * st_vars->LineTime;
+ Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1.0) / 4.0 * LineTime;
+ Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1.0) / 4.0 * LineTime;
if (GPUVMPageTableLevels >= 3) {
- *Tno_bw = UrgentExtraLatency + st_vars->trip_to_mem *
- (double) ((GPUVMPageTableLevels - 2) * (st_vars->HostVMDynamicLevelsTrips + 1) - 1);
+ *Tno_bw = UrgentExtraLatency + trip_to_mem *
+ (double) ((GPUVMPageTableLevels - 2) * (HostVMDynamicLevelsTrips + 1) - 1);
} else if (GPUVMPageTableLevels == 1 && myPipe->DCCEnable != true) {
- st_vars->Tr0_trips_rounded = dml_ceil(4.0 * UrgentExtraLatency / st_vars->LineTime, 1.0) /
- 4.0 * st_vars->LineTime; // VBA_ERROR
+ Tr0_trips_rounded = dml_ceil(4.0 * UrgentExtraLatency / LineTime, 1.0) /
+ 4.0 * LineTime; // VBA_ERROR
*Tno_bw = UrgentExtraLatency;
} else {
*Tno_bw = 0;
}
} else if (myPipe->DCCEnable == true) {
- st_vars->Tvm_trips_rounded = st_vars->LineTime / 4.0;
- st_vars->Tr0_trips_rounded = dml_ceil(4.0 * st_vars->Tr0_trips / st_vars->LineTime, 1.0) / 4.0 * st_vars->LineTime;
+ Tvm_trips_rounded = LineTime / 4.0;
+ Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1.0) / 4.0 * LineTime;
*Tno_bw = 0;
} else {
- st_vars->Tvm_trips_rounded = st_vars->LineTime / 4.0;
- st_vars->Tr0_trips_rounded = st_vars->LineTime / 2.0;
+ Tvm_trips_rounded = LineTime / 4.0;
+ Tr0_trips_rounded = LineTime / 2.0;
*Tno_bw = 0;
}
- st_vars->Tvm_trips_rounded = dml_max(st_vars->Tvm_trips_rounded, st_vars->LineTime / 4.0);
- st_vars->Tr0_trips_rounded = dml_max(st_vars->Tr0_trips_rounded, st_vars->LineTime / 4.0);
+ Tvm_trips_rounded = dml_max(Tvm_trips_rounded, LineTime / 4.0);
+ Tr0_trips_rounded = dml_max(Tr0_trips_rounded, LineTime / 4.0);
if (myPipe->SourcePixelFormat == dm_420_8 || myPipe->SourcePixelFormat == dm_420_10
|| myPipe->SourcePixelFormat == dm_420_12) {
- st_vars->bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4;
+ bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4;
} else {
- st_vars->bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC;
+ bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC;
}
- st_vars->prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY
+ prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY
+ PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC;
- st_vars->prefetch_bw_oto = dml_max(st_vars->bytes_pp * myPipe->PixelClock / myPipe->DPPPerSurface,
- st_vars->prefetch_sw_bytes / (dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * st_vars->LineTime));
+ prefetch_bw_oto = dml_max(bytes_pp * myPipe->PixelClock / myPipe->DPPPerSurface,
+ prefetch_sw_bytes / (dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime));
- st_vars->min_Lsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / st_vars->max_vratio_pre;
- st_vars->min_Lsw = dml_max(st_vars->min_Lsw, 1.0);
- st_vars->Lsw_oto = dml_ceil(4.0 * dml_max(st_vars->prefetch_sw_bytes / st_vars->prefetch_bw_oto / st_vars->LineTime, st_vars->min_Lsw), 1.0) / 4.0;
+ min_Lsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / max_vratio_pre;
+ min_Lsw = dml_max(min_Lsw, 1.0);
+ Lsw_oto = dml_ceil(4.0 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / LineTime, min_Lsw), 1.0) / 4.0;
if (GPUVMEnable == true) {
- st_vars->Tvm_oto = dml_max3(
- st_vars->Tvm_trips,
- *Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / st_vars->prefetch_bw_oto,
- st_vars->LineTime / 4.0);
+ Tvm_oto = dml_max3(
+ Tvm_trips,
+ *Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto,
+ LineTime / 4.0);
} else
- st_vars->Tvm_oto = st_vars->LineTime / 4.0;
+ Tvm_oto = LineTime / 4.0;
if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
- st_vars->Tr0_oto = dml_max4(
- st_vars->Tr0_trips,
- (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / st_vars->prefetch_bw_oto,
- (st_vars->LineTime - st_vars->Tvm_oto)/2.0,
- st_vars->LineTime / 4.0);
+ Tr0_oto = dml_max4(
+ Tr0_trips,
+ (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto,
+ (LineTime - Tvm_oto)/2.0,
+ LineTime / 4.0);
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: Tr0_oto max0 = %f\n", __func__,
- (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / st_vars->prefetch_bw_oto);
- dml_print("DML::%s: Tr0_oto max1 = %f\n", __func__, st_vars->Tr0_trips);
- dml_print("DML::%s: Tr0_oto max2 = %f\n", __func__, st_vars->LineTime - st_vars->Tvm_oto);
- dml_print("DML::%s: Tr0_oto max3 = %f\n", __func__, st_vars->LineTime / 4);
+ (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto);
+ dml_print("DML::%s: Tr0_oto max1 = %f\n", __func__, Tr0_trips);
+ dml_print("DML::%s: Tr0_oto max2 = %f\n", __func__, LineTime - Tvm_oto);
+ dml_print("DML::%s: Tr0_oto max3 = %f\n", __func__, LineTime / 4);
#endif
} else
- st_vars->Tr0_oto = (st_vars->LineTime - st_vars->Tvm_oto) / 2.0;
+ Tr0_oto = (LineTime - Tvm_oto) / 2.0;
- st_vars->Tvm_oto_lines = dml_ceil(4.0 * st_vars->Tvm_oto / st_vars->LineTime, 1) / 4.0;
- st_vars->Tr0_oto_lines = dml_ceil(4.0 * st_vars->Tr0_oto / st_vars->LineTime, 1) / 4.0;
- st_vars->dst_y_prefetch_oto = st_vars->Tvm_oto_lines + 2 * st_vars->Tr0_oto_lines + st_vars->Lsw_oto;
+ Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0;
+ Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0;
+ dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto;
- st_vars->dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / st_vars->LineTime -
+ dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime -
(*DSTYAfterScaler + (double) *DSTXAfterScaler / (double) myPipe->HTotal);
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: HTotal = %d\n", __func__, myPipe->HTotal);
- dml_print("DML::%s: min_Lsw = %f\n", __func__, st_vars->min_Lsw);
+ dml_print("DML::%s: min_Lsw = %f\n", __func__, min_Lsw);
dml_print("DML::%s: *Tno_bw = %f\n", __func__, *Tno_bw);
dml_print("DML::%s: UrgentExtraLatency = %f\n", __func__, UrgentExtraLatency);
- dml_print("DML::%s: trip_to_mem = %f\n", __func__, st_vars->trip_to_mem);
+ dml_print("DML::%s: trip_to_mem = %f\n", __func__, trip_to_mem);
dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY);
dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
dml_print("DML::%s: BytePerPixelC = %d\n", __func__, myPipe->BytePerPixelC);
dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC);
dml_print("DML::%s: swath_width_chroma_ub = %d\n", __func__, swath_width_chroma_ub);
- dml_print("DML::%s: prefetch_sw_bytes = %f\n", __func__, st_vars->prefetch_sw_bytes);
- dml_print("DML::%s: bytes_pp = %f\n", __func__, st_vars->bytes_pp);
+ dml_print("DML::%s: prefetch_sw_bytes = %f\n", __func__, prefetch_sw_bytes);
+ dml_print("DML::%s: bytes_pp = %f\n", __func__, bytes_pp);
dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
- dml_print("DML::%s: Tvm_trips = %f\n", __func__, st_vars->Tvm_trips);
- dml_print("DML::%s: Tr0_trips = %f\n", __func__, st_vars->Tr0_trips);
- dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, st_vars->prefetch_bw_oto);
- dml_print("DML::%s: Tr0_oto = %f\n", __func__, st_vars->Tr0_oto);
- dml_print("DML::%s: Tvm_oto = %f\n", __func__, st_vars->Tvm_oto);
- dml_print("DML::%s: Tvm_oto_lines = %f\n", __func__, st_vars->Tvm_oto_lines);
- dml_print("DML::%s: Tr0_oto_lines = %f\n", __func__, st_vars->Tr0_oto_lines);
- dml_print("DML::%s: Lsw_oto = %f\n", __func__, st_vars->Lsw_oto);
- dml_print("DML::%s: dst_y_prefetch_oto = %f\n", __func__, st_vars->dst_y_prefetch_oto);
- dml_print("DML::%s: dst_y_prefetch_equ = %f\n", __func__, st_vars->dst_y_prefetch_equ);
+ dml_print("DML::%s: Tvm_trips = %f\n", __func__, Tvm_trips);
+ dml_print("DML::%s: Tr0_trips = %f\n", __func__, Tr0_trips);
+ dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, prefetch_bw_oto);
+ dml_print("DML::%s: Tr0_oto = %f\n", __func__, Tr0_oto);
+ dml_print("DML::%s: Tvm_oto = %f\n", __func__, Tvm_oto);
+ dml_print("DML::%s: Tvm_oto_lines = %f\n", __func__, Tvm_oto_lines);
+ dml_print("DML::%s: Tr0_oto_lines = %f\n", __func__, Tr0_oto_lines);
+ dml_print("DML::%s: Lsw_oto = %f\n", __func__, Lsw_oto);
+ dml_print("DML::%s: dst_y_prefetch_oto = %f\n", __func__, dst_y_prefetch_oto);
+ dml_print("DML::%s: dst_y_prefetch_equ = %f\n", __func__, dst_y_prefetch_equ);
#endif
- st_vars->dst_y_prefetch_equ = dml_floor(4.0 * (st_vars->dst_y_prefetch_equ + 0.125), 1) / 4.0;
- st_vars->Tpre_rounded = st_vars->dst_y_prefetch_equ * st_vars->LineTime;
+ dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0;
+ Tpre_rounded = dst_y_prefetch_equ * LineTime;
#ifdef __DML_VBA_DEBUG__
- dml_print("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, st_vars->dst_y_prefetch_equ);
- dml_print("DML::%s: LineTime: %f\n", __func__, st_vars->LineTime);
+ dml_print("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, dst_y_prefetch_equ);
+ dml_print("DML::%s: LineTime: %f\n", __func__, LineTime);
dml_print("DML::%s: VStartup: %d\n", __func__, VStartup);
dml_print("DML::%s: Tvstartup: %fus - time between vstartup and first pixel of active\n",
- __func__, VStartup * st_vars->LineTime);
+ __func__, VStartup * LineTime);
dml_print("DML::%s: TSetup: %fus - time from vstartup to vready\n", __func__, *TSetup);
dml_print("DML::%s: TCalc: %fus - time for calculations in dchub starting at vready\n", __func__, TCalc);
- dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, st_vars->Tdmbf);
- dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, st_vars->Tdmec);
+ dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, Tdmbf);
+ dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec);
dml_print("DML::%s: Tdmdl_vm: %fus - time for vm stages of dmd\n", __func__, *Tdmdl_vm);
dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", __func__, *Tdmdl);
dml_print("DML::%s: DSTYAfterScaler: %d lines - number of lines of pipeline and buffer delay after scaler\n",
__func__, *DSTYAfterScaler);
#endif
- st_vars->dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor,
+ dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor,
MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor);
- if (st_vars->prefetch_sw_bytes < st_vars->dep_bytes)
- st_vars->prefetch_sw_bytes = 2 * st_vars->dep_bytes;
+ if (prefetch_sw_bytes < dep_bytes)
+ prefetch_sw_bytes = 2 * dep_bytes;
*PrefetchBandwidth = 0;
*DestinationLinesToRequestVMInVBlank = 0;
@@ -3665,61 +3712,61 @@ bool dml32_CalculatePrefetchSchedule(
*VRatioPrefetchY = 0;
*VRatioPrefetchC = 0;
*RequiredPrefetchPixDataBWLuma = 0;
- if (st_vars->dst_y_prefetch_equ > 1) {
+ if (dst_y_prefetch_equ > 1) {
double PrefetchBandwidth1;
double PrefetchBandwidth2;
double PrefetchBandwidth3;
double PrefetchBandwidth4;
- if (st_vars->Tpre_rounded - *Tno_bw > 0) {
+ if (Tpre_rounded - *Tno_bw > 0) {
PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte
+ 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
- + st_vars->prefetch_sw_bytes) / (st_vars->Tpre_rounded - *Tno_bw);
- st_vars->Tsw_est1 = st_vars->prefetch_sw_bytes / PrefetchBandwidth1;
+ + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw);
+ Tsw_est1 = prefetch_sw_bytes / PrefetchBandwidth1;
} else
PrefetchBandwidth1 = 0;
- if (VStartup == MaxVStartup && (st_vars->Tsw_est1 / st_vars->LineTime < st_vars->min_Lsw)
- && st_vars->Tpre_rounded - st_vars->min_Lsw * st_vars->LineTime - 0.75 * st_vars->LineTime - *Tno_bw > 0) {
+ if (VStartup == MaxVStartup && (Tsw_est1 / LineTime < min_Lsw)
+ && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw > 0) {
PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte
+ 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
- / (st_vars->Tpre_rounded - st_vars->min_Lsw * st_vars->LineTime - 0.75 * st_vars->LineTime - *Tno_bw);
+ / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw);
}
- if (st_vars->Tpre_rounded - *Tno_bw - 2 * st_vars->Tr0_trips_rounded > 0)
- PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + st_vars->prefetch_sw_bytes) /
- (st_vars->Tpre_rounded - *Tno_bw - 2 * st_vars->Tr0_trips_rounded);
+ if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0)
+ PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + prefetch_sw_bytes) /
+ (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded);
else
PrefetchBandwidth2 = 0;
- if (st_vars->Tpre_rounded - st_vars->Tvm_trips_rounded > 0) {
+ if (Tpre_rounded - Tvm_trips_rounded > 0) {
PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
- + st_vars->prefetch_sw_bytes) / (st_vars->Tpre_rounded - st_vars->Tvm_trips_rounded);
- st_vars->Tsw_est3 = st_vars->prefetch_sw_bytes / PrefetchBandwidth3;
+ + prefetch_sw_bytes) / (Tpre_rounded - Tvm_trips_rounded);
+ Tsw_est3 = prefetch_sw_bytes / PrefetchBandwidth3;
} else
PrefetchBandwidth3 = 0;
if (VStartup == MaxVStartup &&
- (st_vars->Tsw_est3 / st_vars->LineTime < st_vars->min_Lsw) && st_vars->Tpre_rounded - st_vars->min_Lsw * st_vars->LineTime - 0.75 *
- st_vars->LineTime - st_vars->Tvm_trips_rounded > 0) {
+ (Tsw_est3 / LineTime < min_Lsw) && Tpre_rounded - min_Lsw * LineTime - 0.75 *
+ LineTime - Tvm_trips_rounded > 0) {
PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
- / (st_vars->Tpre_rounded - st_vars->min_Lsw * st_vars->LineTime - 0.75 * st_vars->LineTime - st_vars->Tvm_trips_rounded);
+ / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded);
}
- if (st_vars->Tpre_rounded - st_vars->Tvm_trips_rounded - 2 * st_vars->Tr0_trips_rounded > 0) {
- PrefetchBandwidth4 = st_vars->prefetch_sw_bytes /
- (st_vars->Tpre_rounded - st_vars->Tvm_trips_rounded - 2 * st_vars->Tr0_trips_rounded);
+ if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0) {
+ PrefetchBandwidth4 = prefetch_sw_bytes /
+ (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded);
} else {
PrefetchBandwidth4 = 0;
}
#ifdef __DML_VBA_DEBUG__
- dml_print("DML::%s: Tpre_rounded: %f\n", __func__, st_vars->Tpre_rounded);
+ dml_print("DML::%s: Tpre_rounded: %f\n", __func__, Tpre_rounded);
dml_print("DML::%s: Tno_bw: %f\n", __func__, *Tno_bw);
- dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, st_vars->Tvm_trips_rounded);
- dml_print("DML::%s: Tsw_est1: %f\n", __func__, st_vars->Tsw_est1);
- dml_print("DML::%s: Tsw_est3: %f\n", __func__, st_vars->Tsw_est3);
+ dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, Tvm_trips_rounded);
+ dml_print("DML::%s: Tsw_est1: %f\n", __func__, Tsw_est1);
+ dml_print("DML::%s: Tsw_est3: %f\n", __func__, Tsw_est3);
dml_print("DML::%s: PrefetchBandwidth1: %f\n", __func__, PrefetchBandwidth1);
dml_print("DML::%s: PrefetchBandwidth2: %f\n", __func__, PrefetchBandwidth2);
dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, PrefetchBandwidth3);
@@ -3732,9 +3779,9 @@ bool dml32_CalculatePrefetchSchedule(
if (PrefetchBandwidth1 > 0) {
if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1
- >= st_vars->Tvm_trips_rounded
+ >= Tvm_trips_rounded
&& (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor)
- / PrefetchBandwidth1 >= st_vars->Tr0_trips_rounded) {
+ / PrefetchBandwidth1 >= Tr0_trips_rounded) {
Case1OK = true;
} else {
Case1OK = false;
@@ -3745,9 +3792,9 @@ bool dml32_CalculatePrefetchSchedule(
if (PrefetchBandwidth2 > 0) {
if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2
- >= st_vars->Tvm_trips_rounded
+ >= Tvm_trips_rounded
&& (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor)
- / PrefetchBandwidth2 < st_vars->Tr0_trips_rounded) {
+ / PrefetchBandwidth2 < Tr0_trips_rounded) {
Case2OK = true;
} else {
Case2OK = false;
@@ -3758,9 +3805,9 @@ bool dml32_CalculatePrefetchSchedule(
if (PrefetchBandwidth3 > 0) {
if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 <
- st_vars->Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow *
+ Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow *
HostVMInefficiencyFactor) / PrefetchBandwidth3 >=
- st_vars->Tr0_trips_rounded) {
+ Tr0_trips_rounded) {
Case3OK = true;
} else {
Case3OK = false;
@@ -3770,80 +3817,80 @@ bool dml32_CalculatePrefetchSchedule(
}
if (Case1OK)
- st_vars->prefetch_bw_equ = PrefetchBandwidth1;
+ prefetch_bw_equ = PrefetchBandwidth1;
else if (Case2OK)
- st_vars->prefetch_bw_equ = PrefetchBandwidth2;
+ prefetch_bw_equ = PrefetchBandwidth2;
else if (Case3OK)
- st_vars->prefetch_bw_equ = PrefetchBandwidth3;
+ prefetch_bw_equ = PrefetchBandwidth3;
else
- st_vars->prefetch_bw_equ = PrefetchBandwidth4;
+ prefetch_bw_equ = PrefetchBandwidth4;
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: Case1OK: %d\n", __func__, Case1OK);
dml_print("DML::%s: Case2OK: %d\n", __func__, Case2OK);
dml_print("DML::%s: Case3OK: %d\n", __func__, Case3OK);
- dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, st_vars->prefetch_bw_equ);
+ dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, prefetch_bw_equ);
#endif
- if (st_vars->prefetch_bw_equ > 0) {
+ if (prefetch_bw_equ > 0) {
if (GPUVMEnable == true) {
- st_vars->Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame *
- HostVMInefficiencyFactor / st_vars->prefetch_bw_equ,
- st_vars->Tvm_trips, st_vars->LineTime / 4);
+ Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame *
+ HostVMInefficiencyFactor / prefetch_bw_equ,
+ Tvm_trips, LineTime / 4);
} else {
- st_vars->Tvm_equ = st_vars->LineTime / 4;
+ Tvm_equ = LineTime / 4;
}
if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
- st_vars->Tr0_equ = dml_max4((MetaRowByte + PixelPTEBytesPerRow *
- HostVMInefficiencyFactor) / st_vars->prefetch_bw_equ, st_vars->Tr0_trips,
- (st_vars->LineTime - st_vars->Tvm_equ) / 2, st_vars->LineTime / 4);
+ Tr0_equ = dml_max4((MetaRowByte + PixelPTEBytesPerRow *
+ HostVMInefficiencyFactor) / prefetch_bw_equ, Tr0_trips,
+ (LineTime - Tvm_equ) / 2, LineTime / 4);
} else {
- st_vars->Tr0_equ = (st_vars->LineTime - st_vars->Tvm_equ) / 2;
+ Tr0_equ = (LineTime - Tvm_equ) / 2;
}
} else {
- st_vars->Tvm_equ = 0;
- st_vars->Tr0_equ = 0;
+ Tvm_equ = 0;
+ Tr0_equ = 0;
#ifdef __DML_VBA_DEBUG__
dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__);
#endif
}
}
- if (st_vars->dst_y_prefetch_oto < st_vars->dst_y_prefetch_equ) {
- *DestinationLinesForPrefetch = st_vars->dst_y_prefetch_oto;
- st_vars->TimeForFetchingMetaPTE = st_vars->Tvm_oto;
- st_vars->TimeForFetchingRowInVBlank = st_vars->Tr0_oto;
- *PrefetchBandwidth = st_vars->prefetch_bw_oto;
+ if (dst_y_prefetch_oto < dst_y_prefetch_equ) {
+ *DestinationLinesForPrefetch = dst_y_prefetch_oto;
+ TimeForFetchingMetaPTE = Tvm_oto;
+ TimeForFetchingRowInVBlank = Tr0_oto;
+ *PrefetchBandwidth = prefetch_bw_oto;
} else {
- *DestinationLinesForPrefetch = st_vars->dst_y_prefetch_equ;
- st_vars->TimeForFetchingMetaPTE = st_vars->Tvm_equ;
- st_vars->TimeForFetchingRowInVBlank = st_vars->Tr0_equ;
- *PrefetchBandwidth = st_vars->prefetch_bw_equ;
+ *DestinationLinesForPrefetch = dst_y_prefetch_equ;
+ TimeForFetchingMetaPTE = Tvm_equ;
+ TimeForFetchingRowInVBlank = Tr0_equ;
+ *PrefetchBandwidth = prefetch_bw_equ;
}
- *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * st_vars->TimeForFetchingMetaPTE / st_vars->LineTime, 1.0) / 4.0;
+ *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0;
*DestinationLinesToRequestRowInVBlank =
- dml_ceil(4.0 * st_vars->TimeForFetchingRowInVBlank / st_vars->LineTime, 1.0) / 4.0;
+ dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0;
- st_vars->LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch -
+ LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch -
*DestinationLinesToRequestVMInVBlank - 2 * *DestinationLinesToRequestRowInVBlank;
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *DestinationLinesForPrefetch);
dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n",
__func__, *DestinationLinesToRequestVMInVBlank);
- dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, st_vars->TimeForFetchingRowInVBlank);
- dml_print("DML::%s: LineTime = %f\n", __func__, st_vars->LineTime);
+ dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, TimeForFetchingRowInVBlank);
+ dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n",
__func__, *DestinationLinesToRequestRowInVBlank);
dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
- dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, st_vars->LinesToRequestPrefetchPixelData);
+ dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, LinesToRequestPrefetchPixelData);
#endif
- if (st_vars->LinesToRequestPrefetchPixelData >= 1 && st_vars->prefetch_bw_equ > 0) {
- *VRatioPrefetchY = (double) PrefetchSourceLinesY / st_vars->LinesToRequestPrefetchPixelData;
+ if (LinesToRequestPrefetchPixelData >= 1 && prefetch_bw_equ > 0) {
+ *VRatioPrefetchY = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData;
*VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
@@ -3851,12 +3898,12 @@ bool dml32_CalculatePrefetchSchedule(
dml_print("DML::%s: VInitPreFillY = %d\n", __func__, VInitPreFillY);
#endif
if ((SwathHeightY > 4) && (VInitPreFillY > 3)) {
- if (st_vars->LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) {
+ if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) {
*VRatioPrefetchY =
dml_max((double) PrefetchSourceLinesY /
- st_vars->LinesToRequestPrefetchPixelData,
+ LinesToRequestPrefetchPixelData,
(double) MaxNumSwathY * SwathHeightY /
- (st_vars->LinesToRequestPrefetchPixelData -
+ (LinesToRequestPrefetchPixelData -
(VInitPreFillY - 3.0) / 2.0));
*VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
} else {
@@ -3870,7 +3917,7 @@ bool dml32_CalculatePrefetchSchedule(
#endif
}
- *VRatioPrefetchC = (double) PrefetchSourceLinesC / st_vars->LinesToRequestPrefetchPixelData;
+ *VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData;
*VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
#ifdef __DML_VBA_DEBUG__
@@ -3879,11 +3926,11 @@ bool dml32_CalculatePrefetchSchedule(
dml_print("DML::%s: VInitPreFillC = %d\n", __func__, VInitPreFillC);
#endif
if ((SwathHeightC > 4)) {
- if (st_vars->LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) {
+ if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) {
*VRatioPrefetchC =
dml_max(*VRatioPrefetchC,
(double) MaxNumSwathC * SwathHeightC /
- (st_vars->LinesToRequestPrefetchPixelData -
+ (LinesToRequestPrefetchPixelData -
(VInitPreFillC - 3.0) / 2.0));
*VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
} else {
@@ -3898,25 +3945,25 @@ bool dml32_CalculatePrefetchSchedule(
}
*RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY
- / st_vars->LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub
- / st_vars->LineTime;
+ / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub
+ / LineTime;
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY);
dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
- dml_print("DML::%s: LineTime = %f\n", __func__, st_vars->LineTime);
+ dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n",
__func__, *RequiredPrefetchPixDataBWLuma);
#endif
*RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC /
- st_vars->LinesToRequestPrefetchPixelData
+ LinesToRequestPrefetchPixelData
* myPipe->BytePerPixelC
- * swath_width_chroma_ub / st_vars->LineTime;
+ * swath_width_chroma_ub / LineTime;
} else {
MyError = true;
#ifdef __DML_VBA_DEBUG__
dml_print("DML:%s: MyErr set. LinesToRequestPrefetchPixelData: %f, should be > 0\n",
- __func__, st_vars->LinesToRequestPrefetchPixelData);
+ __func__, LinesToRequestPrefetchPixelData);
#endif
*VRatioPrefetchY = 0;
*VRatioPrefetchC = 0;
@@ -3925,15 +3972,15 @@ bool dml32_CalculatePrefetchSchedule(
}
#ifdef __DML_VBA_DEBUG__
dml_print("DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n",
- (double)st_vars->LinesToRequestPrefetchPixelData * st_vars->LineTime +
- 2.0*st_vars->TimeForFetchingRowInVBlank + st_vars->TimeForFetchingMetaPTE);
- dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", st_vars->TimeForFetchingMetaPTE);
+ (double)LinesToRequestPrefetchPixelData * LineTime +
+ 2.0*TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE);
+ dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE);
dml_print("DML: To: %fus - time for propagation from scaler to optc\n",
- (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * st_vars->LineTime);
+ (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime);
dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n");
- dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", VStartup * st_vars->LineTime -
- st_vars->TimeForFetchingMetaPTE - 2*st_vars->TimeForFetchingRowInVBlank - (*DSTYAfterScaler +
- ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * st_vars->LineTime - TWait - TCalc - *TSetup);
+ dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", VStartup * LineTime -
+ TimeForFetchingMetaPTE - 2*TimeForFetchingRowInVBlank - (*DSTYAfterScaler +
+ ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - *TSetup);
dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n",
PixelPTEBytesPerRow);
#endif
@@ -3941,7 +3988,7 @@ bool dml32_CalculatePrefetchSchedule(
MyError = true;
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: MyErr set, dst_y_prefetch_equ = %f (should be > 1)\n",
- __func__, st_vars->dst_y_prefetch_equ);
+ __func__, dst_y_prefetch_equ);
#endif
}
@@ -3957,10 +4004,10 @@ bool dml32_CalculatePrefetchSchedule(
dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n",
__func__, *DestinationLinesToRequestVMInVBlank);
- dml_print("DML::%s: LineTime = %f\n", __func__, st_vars->LineTime);
+ dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
#endif
prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor /
- (*DestinationLinesToRequestVMInVBlank * st_vars->LineTime);
+ (*DestinationLinesToRequestVMInVBlank * LineTime);
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw);
#endif
@@ -3977,7 +4024,7 @@ bool dml32_CalculatePrefetchSchedule(
prefetch_row_bw = 0;
} else if (*DestinationLinesToRequestRowInVBlank > 0) {
prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) /
- (*DestinationLinesToRequestRowInVBlank * st_vars->LineTime);
+ (*DestinationLinesToRequestRowInVBlank * LineTime);
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
@@ -4000,12 +4047,12 @@ bool dml32_CalculatePrefetchSchedule(
if (MyError) {
*PrefetchBandwidth = 0;
- st_vars->TimeForFetchingMetaPTE = 0;
- st_vars->TimeForFetchingRowInVBlank = 0;
+ TimeForFetchingMetaPTE = 0;
+ TimeForFetchingRowInVBlank = 0;
*DestinationLinesToRequestVMInVBlank = 0;
*DestinationLinesToRequestRowInVBlank = 0;
*DestinationLinesForPrefetch = 0;
- st_vars->LinesToRequestPrefetchPixelData = 0;
+ LinesToRequestPrefetchPixelData = 0;
*VRatioPrefetchY = 0;
*VRatioPrefetchC = 0;
*RequiredPrefetchPixDataBWLuma = 0;
@@ -4159,7 +4206,6 @@ void dml32_CalculateFlipSchedule(
} // CalculateFlipSchedule
void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
- struct dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport *st_vars,
bool USRRetrainingRequiredFinal,
enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
unsigned int PrefetchMode,
@@ -4221,15 +4267,37 @@ void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
double ActiveDRAMClockChangeLatencyMargin[])
{
unsigned int i, j, k;
-
- st_vars->SurfaceWithMinActiveFCLKChangeMargin = 0;
- st_vars->DRAMClockChangeSupportNumber = 0;
- st_vars->DRAMClockChangeMethod = 0;
- st_vars->FoundFirstSurfaceWithMinActiveFCLKChangeMargin = false;
- st_vars->MinActiveFCLKChangeMargin = 0.;
- st_vars->SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = 0.;
- st_vars->TotalPixelBW = 0.0;
- st_vars->TotalActiveWriteback = 0;
+ unsigned int SurfaceWithMinActiveFCLKChangeMargin = 0;
+ unsigned int DRAMClockChangeSupportNumber = 0;
+ unsigned int LastSurfaceWithoutMargin;
+ unsigned int DRAMClockChangeMethod = 0;
+ bool FoundFirstSurfaceWithMinActiveFCLKChangeMargin = false;
+ double MinActiveFCLKChangeMargin = 0.;
+ double SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = 0.;
+ double ActiveClockChangeLatencyHidingY;
+ double ActiveClockChangeLatencyHidingC;
+ double ActiveClockChangeLatencyHiding;
+ double EffectiveDETBufferSizeY;
+ double ActiveFCLKChangeLatencyMargin[DC__NUM_DPP__MAX];
+ double USRRetrainingLatencyMargin[DC__NUM_DPP__MAX];
+ double TotalPixelBW = 0.0;
+ bool SynchronizedSurfaces[DC__NUM_DPP__MAX][DC__NUM_DPP__MAX];
+ double EffectiveLBLatencyHidingY;
+ double EffectiveLBLatencyHidingC;
+ double LinesInDETY[DC__NUM_DPP__MAX];
+ double LinesInDETC[DC__NUM_DPP__MAX];
+ unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX];
+ unsigned int LinesInDETCRoundedDownToSwath[DC__NUM_DPP__MAX];
+ double FullDETBufferingTimeY;
+ double FullDETBufferingTimeC;
+ double WritebackDRAMClockChangeLatencyMargin;
+ double WritebackFCLKChangeLatencyMargin;
+ double WritebackLatencyHiding;
+ bool SameTimingForFCLKChange;
+
+ unsigned int TotalActiveWriteback = 0;
+ unsigned int LBLatencyHidingSourceLinesY[DC__NUM_DPP__MAX];
+ unsigned int LBLatencyHidingSourceLinesC[DC__NUM_DPP__MAX];
Watermark->UrgentWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency;
Watermark->USRRetrainingWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency
@@ -4261,13 +4329,13 @@ void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
#endif
- st_vars->TotalActiveWriteback = 0;
+ TotalActiveWriteback = 0;
for (k = 0; k < NumberOfActiveSurfaces; ++k) {
if (WritebackEnable[k] == true)
- st_vars->TotalActiveWriteback = st_vars->TotalActiveWriteback + 1;
+ TotalActiveWriteback = TotalActiveWriteback + 1;
}
- if (st_vars->TotalActiveWriteback <= 1) {
+ if (TotalActiveWriteback <= 1) {
Watermark->WritebackUrgentWatermark = mmSOCParameters.WritebackLatency;
} else {
Watermark->WritebackUrgentWatermark = mmSOCParameters.WritebackLatency
@@ -4277,7 +4345,7 @@ void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
Watermark->WritebackUrgentWatermark = Watermark->WritebackUrgentWatermark
+ mmSOCParameters.USRRetrainingLatency;
- if (st_vars->TotalActiveWriteback <= 1) {
+ if (TotalActiveWriteback <= 1) {
Watermark->WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency
+ mmSOCParameters.WritebackLatency;
Watermark->WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency
@@ -4307,14 +4375,14 @@ void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
#endif
for (k = 0; k < NumberOfActiveSurfaces; ++k) {
- st_vars->TotalPixelBW = st_vars->TotalPixelBW + DPPPerSurface[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k] +
+ TotalPixelBW = TotalPixelBW + DPPPerSurface[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k] +
SwathWidthC[k] * BytePerPixelDETC[k] * VRatioChroma[k]) / (HTotal[k] / PixelClock[k]);
}
for (k = 0; k < NumberOfActiveSurfaces; ++k) {
- st_vars->LBLatencyHidingSourceLinesY[k] = dml_min((double) MaxLineBufferLines, dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(HRatio[k], 1.0)), 1)) - (VTaps[k] - 1);
- st_vars->LBLatencyHidingSourceLinesC[k] = dml_min((double) MaxLineBufferLines, dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(HRatioChroma[k], 1.0)), 1)) - (VTapsChroma[k] - 1);
+ LBLatencyHidingSourceLinesY[k] = dml_min((double) MaxLineBufferLines, dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(HRatio[k], 1.0)), 1)) - (VTaps[k] - 1);
+ LBLatencyHidingSourceLinesC[k] = dml_min((double) MaxLineBufferLines, dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(HRatioChroma[k], 1.0)), 1)) - (VTapsChroma[k] - 1);
#ifdef __DML_VBA_DEBUG__
@@ -4325,72 +4393,72 @@ void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
dml_print("DML::%s: k=%d, VTaps = %d\n", __func__, k, VTaps[k]);
#endif
- st_vars->EffectiveLBLatencyHidingY = st_vars->LBLatencyHidingSourceLinesY[k] / VRatio[k] * (HTotal[k] / PixelClock[k]);
- st_vars->EffectiveLBLatencyHidingC = st_vars->LBLatencyHidingSourceLinesC[k] / VRatioChroma[k] * (HTotal[k] / PixelClock[k]);
- st_vars->EffectiveDETBufferSizeY = DETBufferSizeY[k];
+ EffectiveLBLatencyHidingY = LBLatencyHidingSourceLinesY[k] / VRatio[k] * (HTotal[k] / PixelClock[k]);
+ EffectiveLBLatencyHidingC = LBLatencyHidingSourceLinesC[k] / VRatioChroma[k] * (HTotal[k] / PixelClock[k]);
+ EffectiveDETBufferSizeY = DETBufferSizeY[k];
if (UnboundedRequestEnabled) {
- st_vars->EffectiveDETBufferSizeY = st_vars->EffectiveDETBufferSizeY
+ EffectiveDETBufferSizeY = EffectiveDETBufferSizeY
+ CompressedBufferSizeInkByte * 1024
* (SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k])
- / (HTotal[k] / PixelClock[k]) / st_vars->TotalPixelBW;
+ / (HTotal[k] / PixelClock[k]) / TotalPixelBW;
}
- st_vars->LinesInDETY[k] = (double) st_vars->EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k];
- st_vars->LinesInDETYRoundedDownToSwath[k] = dml_floor(st_vars->LinesInDETY[k], SwathHeightY[k]);
- st_vars->FullDETBufferingTimeY = st_vars->LinesInDETYRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k]) / VRatio[k];
+ LinesInDETY[k] = (double) EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k];
+ LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]);
+ FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k]) / VRatio[k];
- st_vars->ActiveClockChangeLatencyHidingY = st_vars->EffectiveLBLatencyHidingY + st_vars->FullDETBufferingTimeY
+ ActiveClockChangeLatencyHidingY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY
- (DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) * HTotal[k] / PixelClock[k];
if (NumberOfActiveSurfaces > 1) {
- st_vars->ActiveClockChangeLatencyHidingY = st_vars->ActiveClockChangeLatencyHidingY
+ ActiveClockChangeLatencyHidingY = ActiveClockChangeLatencyHidingY
- (1 - 1 / NumberOfActiveSurfaces) * SwathHeightY[k] * HTotal[k]
/ PixelClock[k] / VRatio[k];
}
if (BytePerPixelDETC[k] > 0) {
- st_vars->LinesInDETC[k] = DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k];
- st_vars->LinesInDETCRoundedDownToSwath[k] = dml_floor(st_vars->LinesInDETC[k], SwathHeightC[k]);
- st_vars->FullDETBufferingTimeC = st_vars->LinesInDETCRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k])
+ LinesInDETC[k] = DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k];
+ LinesInDETCRoundedDownToSwath[k] = dml_floor(LinesInDETC[k], SwathHeightC[k]);
+ FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k])
/ VRatioChroma[k];
- st_vars->ActiveClockChangeLatencyHidingC = st_vars->EffectiveLBLatencyHidingC + st_vars->FullDETBufferingTimeC
+ ActiveClockChangeLatencyHidingC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC
- (DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) * HTotal[k]
/ PixelClock[k];
if (NumberOfActiveSurfaces > 1) {
- st_vars->ActiveClockChangeLatencyHidingC = st_vars->ActiveClockChangeLatencyHidingC
+ ActiveClockChangeLatencyHidingC = ActiveClockChangeLatencyHidingC
- (1 - 1 / NumberOfActiveSurfaces) * SwathHeightC[k] * HTotal[k]
/ PixelClock[k] / VRatioChroma[k];
}
- st_vars->ActiveClockChangeLatencyHiding = dml_min(st_vars->ActiveClockChangeLatencyHidingY,
- st_vars->ActiveClockChangeLatencyHidingC);
+ ActiveClockChangeLatencyHiding = dml_min(ActiveClockChangeLatencyHidingY,
+ ActiveClockChangeLatencyHidingC);
} else {
- st_vars->ActiveClockChangeLatencyHiding = st_vars->ActiveClockChangeLatencyHidingY;
+ ActiveClockChangeLatencyHiding = ActiveClockChangeLatencyHidingY;
}
- ActiveDRAMClockChangeLatencyMargin[k] = st_vars->ActiveClockChangeLatencyHiding - Watermark->UrgentWatermark
+ ActiveDRAMClockChangeLatencyMargin[k] = ActiveClockChangeLatencyHiding - Watermark->UrgentWatermark
- Watermark->DRAMClockChangeWatermark;
- st_vars->ActiveFCLKChangeLatencyMargin[k] = st_vars->ActiveClockChangeLatencyHiding - Watermark->UrgentWatermark
+ ActiveFCLKChangeLatencyMargin[k] = ActiveClockChangeLatencyHiding - Watermark->UrgentWatermark
- Watermark->FCLKChangeWatermark;
- st_vars->USRRetrainingLatencyMargin[k] = st_vars->ActiveClockChangeLatencyHiding - Watermark->USRRetrainingWatermark;
+ USRRetrainingLatencyMargin[k] = ActiveClockChangeLatencyHiding - Watermark->USRRetrainingWatermark;
if (WritebackEnable[k]) {
- st_vars->WritebackLatencyHiding = WritebackInterfaceBufferSize * 1024
+ WritebackLatencyHiding = WritebackInterfaceBufferSize * 1024
/ (WritebackDestinationWidth[k] * WritebackDestinationHeight[k]
/ (WritebackSourceHeight[k] * HTotal[k] / PixelClock[k]) * 4);
if (WritebackPixelFormat[k] == dm_444_64)
- st_vars->WritebackLatencyHiding = st_vars->WritebackLatencyHiding / 2;
+ WritebackLatencyHiding = WritebackLatencyHiding / 2;
- st_vars->WritebackDRAMClockChangeLatencyMargin = st_vars->WritebackLatencyHiding
+ WritebackDRAMClockChangeLatencyMargin = WritebackLatencyHiding
- Watermark->WritebackDRAMClockChangeWatermark;
- st_vars->WritebackFCLKChangeLatencyMargin = st_vars->WritebackLatencyHiding
+ WritebackFCLKChangeLatencyMargin = WritebackLatencyHiding
- Watermark->WritebackFCLKChangeWatermark;
ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMargin[k],
- st_vars->WritebackFCLKChangeLatencyMargin);
- st_vars->ActiveFCLKChangeLatencyMargin[k] = dml_min(st_vars->ActiveFCLKChangeLatencyMargin[k],
- st_vars->WritebackDRAMClockChangeLatencyMargin);
+ WritebackFCLKChangeLatencyMargin);
+ ActiveFCLKChangeLatencyMargin[k] = dml_min(ActiveFCLKChangeLatencyMargin[k],
+ WritebackDRAMClockChangeLatencyMargin);
}
MaxActiveDRAMClockChangeLatencySupported[k] =
(UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ?
@@ -4409,41 +4477,41 @@ void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
HTotal[i] == HTotal[j] && VTotal[i] == VTotal[j] &&
VActive[i] == VActive[j]) || (SynchronizeDRRDisplaysForUCLKPStateChangeFinal &&
(DRRDisplay[i] || DRRDisplay[j]))) {
- st_vars->SynchronizedSurfaces[i][j] = true;
+ SynchronizedSurfaces[i][j] = true;
} else {
- st_vars->SynchronizedSurfaces[i][j] = false;
+ SynchronizedSurfaces[i][j] = false;
}
}
}
for (k = 0; k < NumberOfActiveSurfaces; ++k) {
if ((UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
- (!st_vars->FoundFirstSurfaceWithMinActiveFCLKChangeMargin ||
- st_vars->ActiveFCLKChangeLatencyMargin[k] < st_vars->MinActiveFCLKChangeMargin)) {
- st_vars->FoundFirstSurfaceWithMinActiveFCLKChangeMargin = true;
- st_vars->MinActiveFCLKChangeMargin = st_vars->ActiveFCLKChangeLatencyMargin[k];
- st_vars->SurfaceWithMinActiveFCLKChangeMargin = k;
+ (!FoundFirstSurfaceWithMinActiveFCLKChangeMargin ||
+ ActiveFCLKChangeLatencyMargin[k] < MinActiveFCLKChangeMargin)) {
+ FoundFirstSurfaceWithMinActiveFCLKChangeMargin = true;
+ MinActiveFCLKChangeMargin = ActiveFCLKChangeLatencyMargin[k];
+ SurfaceWithMinActiveFCLKChangeMargin = k;
}
}
- *MinActiveFCLKChangeLatencySupported = st_vars->MinActiveFCLKChangeMargin + mmSOCParameters.FCLKChangeLatency;
+ *MinActiveFCLKChangeLatencySupported = MinActiveFCLKChangeMargin + mmSOCParameters.FCLKChangeLatency;
- st_vars->SameTimingForFCLKChange = true;
+ SameTimingForFCLKChange = true;
for (k = 0; k < NumberOfActiveSurfaces; ++k) {
- if (!st_vars->SynchronizedSurfaces[k][st_vars->SurfaceWithMinActiveFCLKChangeMargin]) {
+ if (!SynchronizedSurfaces[k][SurfaceWithMinActiveFCLKChangeMargin]) {
if ((UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
- (st_vars->SameTimingForFCLKChange ||
- st_vars->ActiveFCLKChangeLatencyMargin[k] <
- st_vars->SecondMinActiveFCLKChangeMarginOneDisplayInVBLank)) {
- st_vars->SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = st_vars->ActiveFCLKChangeLatencyMargin[k];
+ (SameTimingForFCLKChange ||
+ ActiveFCLKChangeLatencyMargin[k] <
+ SecondMinActiveFCLKChangeMarginOneDisplayInVBLank)) {
+ SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = ActiveFCLKChangeLatencyMargin[k];
}
- st_vars->SameTimingForFCLKChange = false;
+ SameTimingForFCLKChange = false;
}
}
- if (st_vars->MinActiveFCLKChangeMargin > 0) {
+ if (MinActiveFCLKChangeMargin > 0) {
*FCLKChangeSupport = dm_fclock_change_vactive;
- } else if ((st_vars->SameTimingForFCLKChange || st_vars->SecondMinActiveFCLKChangeMarginOneDisplayInVBLank > 0) &&
+ } else if ((SameTimingForFCLKChange || SecondMinActiveFCLKChangeMarginOneDisplayInVBLank > 0) &&
(PrefetchMode <= 1)) {
*FCLKChangeSupport = dm_fclock_change_vblank;
} else {
@@ -4453,7 +4521,7 @@ void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
*USRRetrainingSupport = true;
for (k = 0; k < NumberOfActiveSurfaces; ++k) {
if ((UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
- (st_vars->USRRetrainingLatencyMargin[k] < 0)) {
+ (USRRetrainingLatencyMargin[k] < 0)) {
*USRRetrainingSupport = false;
}
}
@@ -4464,42 +4532,42 @@ void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe &&
ActiveDRAMClockChangeLatencyMargin[k] < 0) {
if (PrefetchMode > 0) {
- st_vars->DRAMClockChangeSupportNumber = 2;
- } else if (st_vars->DRAMClockChangeSupportNumber == 0) {
- st_vars->DRAMClockChangeSupportNumber = 1;
- st_vars->LastSurfaceWithoutMargin = k;
- } else if (st_vars->DRAMClockChangeSupportNumber == 1 &&
- !st_vars->SynchronizedSurfaces[st_vars->LastSurfaceWithoutMargin][k]) {
- st_vars->DRAMClockChangeSupportNumber = 2;
+ DRAMClockChangeSupportNumber = 2;
+ } else if (DRAMClockChangeSupportNumber == 0) {
+ DRAMClockChangeSupportNumber = 1;
+ LastSurfaceWithoutMargin = k;
+ } else if (DRAMClockChangeSupportNumber == 1 &&
+ !SynchronizedSurfaces[LastSurfaceWithoutMargin][k]) {
+ DRAMClockChangeSupportNumber = 2;
}
}
}
for (k = 0; k < NumberOfActiveSurfaces; ++k) {
if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame)
- st_vars->DRAMClockChangeMethod = 1;
+ DRAMClockChangeMethod = 1;
else if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport)
- st_vars->DRAMClockChangeMethod = 2;
+ DRAMClockChangeMethod = 2;
}
- if (st_vars->DRAMClockChangeMethod == 0) {
- if (st_vars->DRAMClockChangeSupportNumber == 0)
+ if (DRAMClockChangeMethod == 0) {
+ if (DRAMClockChangeSupportNumber == 0)
*DRAMClockChangeSupport = dm_dram_clock_change_vactive;
- else if (st_vars->DRAMClockChangeSupportNumber == 1)
+ else if (DRAMClockChangeSupportNumber == 1)
*DRAMClockChangeSupport = dm_dram_clock_change_vblank;
else
*DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
- } else if (st_vars->DRAMClockChangeMethod == 1) {
- if (st_vars->DRAMClockChangeSupportNumber == 0)
+ } else if (DRAMClockChangeMethod == 1) {
+ if (DRAMClockChangeSupportNumber == 0)
*DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_full_frame;
- else if (st_vars->DRAMClockChangeSupportNumber == 1)
+ else if (DRAMClockChangeSupportNumber == 1)
*DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_full_frame;
else
*DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
} else {
- if (st_vars->DRAMClockChangeSupportNumber == 0)
+ if (DRAMClockChangeSupportNumber == 0)
*DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_sub_vp;
- else if (st_vars->DRAMClockChangeSupportNumber == 1)
+ else if (DRAMClockChangeSupportNumber == 1)
*DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_sub_vp;
else
*DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
@@ -4513,7 +4581,7 @@ void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
dst_y_pstate = dml_ceil((mmSOCParameters.DRAMClockChangeLatency + mmSOCParameters.UrgentLatency) / (HTotal[k] / PixelClock[k]), 1);
src_y_pstate_l = dml_ceil(dst_y_pstate * VRatio[k], SwathHeightY[k]);
- src_y_ahead_l = dml_floor(DETBufferSizeY[k] / BytePerPixelDETY[k] / SwathWidthY[k], SwathHeightY[k]) + st_vars->LBLatencyHidingSourceLinesY[k];
+ src_y_ahead_l = dml_floor(DETBufferSizeY[k] / BytePerPixelDETY[k] / SwathWidthY[k], SwathHeightY[k]) + LBLatencyHidingSourceLinesY[k];
sub_vp_lines_l = src_y_pstate_l + src_y_ahead_l + meta_row_height[k];
#ifdef __DML_VBA_DEBUG__
@@ -4521,7 +4589,7 @@ dml_print("DML::%s: k=%d, DETBufferSizeY = %d\n", __func__, k, DET
dml_print("DML::%s: k=%d, BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]);
dml_print("DML::%s: k=%d, SwathWidthY = %d\n", __func__, k, SwathWidthY[k]);
dml_print("DML::%s: k=%d, SwathHeightY = %d\n", __func__, k, SwathHeightY[k]);
-dml_print("DML::%s: k=%d, LBLatencyHidingSourceLinesY = %d\n", __func__, k, st_vars->LBLatencyHidingSourceLinesY[k]);
+dml_print("DML::%s: k=%d, LBLatencyHidingSourceLinesY = %d\n", __func__, k, LBLatencyHidingSourceLinesY[k]);
dml_print("DML::%s: k=%d, dst_y_pstate = %d\n", __func__, k, dst_y_pstate);
dml_print("DML::%s: k=%d, src_y_pstate_l = %d\n", __func__, k, src_y_pstate_l);
dml_print("DML::%s: k=%d, src_y_ahead_l = %d\n", __func__, k, src_y_ahead_l);
@@ -4532,7 +4600,7 @@ dml_print("DML::%s: k=%d, sub_vp_lines_l = %d\n", __func__, k, sub_vp_lines_l
if (BytePerPixelDETC[k] > 0) {
src_y_pstate_c = dml_ceil(dst_y_pstate * VRatioChroma[k], SwathHeightC[k]);
- src_y_ahead_c = dml_floor(DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k], SwathHeightC[k]) + st_vars->LBLatencyHidingSourceLinesC[k];
+ src_y_ahead_c = dml_floor(DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k], SwathHeightC[k]) + LBLatencyHidingSourceLinesC[k];
sub_vp_lines_c = src_y_pstate_c + src_y_ahead_c + meta_row_height_chroma[k];
SubViewportLinesNeededInMALL[k] = dml_max(sub_vp_lines_l, sub_vp_lines_c);
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h
index 37a314ce284b..d293856ba906 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h
@@ -30,7 +30,6 @@
#include "os_types.h"
#include "../dc_features.h"
#include "../display_mode_structs.h"
-#include "dml/display_mode_vba.h"
unsigned int dml32_dscceComputeDelay(
unsigned int bpc,
@@ -82,7 +81,6 @@ void dml32_CalculateSinglePipeDPPCLKAndSCLThroughput(
double *DPPCLKUsingSingleDPP);
void dml32_CalculateSwathAndDETConfiguration(
- struct dml32_CalculateSwathAndDETConfiguration *st_vars,
unsigned int DETSizeOverride[],
enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
unsigned int ConfigReturnBufferSizeInKByte,
@@ -362,7 +360,6 @@ void dml32_CalculateSurfaceSizeInMall(
bool *ExceededMALLSize);
void dml32_CalculateVMRowAndSwath(
- struct dml32_CalculateVMRowAndSwath *st_vars,
unsigned int NumberOfActiveSurfaces,
DmlPipe myPipe[],
unsigned int SurfaceSizeInMALL[],
@@ -715,7 +712,6 @@ double dml32_CalculateExtraLatency(
unsigned int HostVMMaxNonCachedPageTableLevels);
bool dml32_CalculatePrefetchSchedule(
- struct dml32_CalculatePrefetchSchedule *st_vars,
double HostVMInefficiencyFactor,
DmlPipe *myPipe,
unsigned int DSCDelay,
@@ -811,7 +807,6 @@ void dml32_CalculateFlipSchedule(
bool *ImmediateFlipSupportedForPipe);
void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
- struct dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport *st_vars,
bool USRRetrainingRequiredFinal,
enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
unsigned int PrefetchMode,
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c
index 84b4b00f29cb..c87091683b5d 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c
@@ -498,6 +498,13 @@ void dcn321_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_p
dc->bb_overrides.dram_clock_change_latency_ns / 1000.0;
}
+ if ((int)(dcn3_21_soc.fclk_change_latency_us * 1000)
+ != dc->bb_overrides.fclk_clock_change_latency_ns
+ && dc->bb_overrides.fclk_clock_change_latency_ns) {
+ dcn3_21_soc.fclk_change_latency_us =
+ dc->bb_overrides.fclk_clock_change_latency_ns / 1000;
+ }
+
if ((int)(dcn3_21_soc.dummy_pstate_latency_us * 1000)
!= dc->bb_overrides.dummy_clock_change_latency_ns
&& dc->bb_overrides.dummy_clock_change_latency_ns) {
diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h
index 8460aefe7b6d..492aec634b68 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h
@@ -182,108 +182,6 @@ void Calculate256BBlockSizes(
unsigned int *BlockWidth256BytesY,
unsigned int *BlockWidth256BytesC);
-struct dml32_CalculateSwathAndDETConfiguration {
- unsigned int MaximumSwathHeightY[DC__NUM_DPP__MAX];
- unsigned int MaximumSwathHeightC[DC__NUM_DPP__MAX];
- unsigned int RoundedUpMaxSwathSizeBytesY[DC__NUM_DPP__MAX];
- unsigned int RoundedUpMaxSwathSizeBytesC[DC__NUM_DPP__MAX];
- unsigned int RoundedUpSwathSizeBytesY;
- unsigned int RoundedUpSwathSizeBytesC;
- double SwathWidthdoubleDPP[DC__NUM_DPP__MAX];
- double SwathWidthdoubleDPPChroma[DC__NUM_DPP__MAX];
- unsigned int TotalActiveDPP;
- bool NoChromaSurfaces;
- unsigned int DETBufferSizeInKByteForSwathCalculation;
-};
-
-struct dml32_CalculateVMRowAndSwath {
- unsigned int PTEBufferSizeInRequestsForLuma[DC__NUM_DPP__MAX];
- unsigned int PTEBufferSizeInRequestsForChroma[DC__NUM_DPP__MAX];
- unsigned int PDEAndMetaPTEBytesFrameY;
- unsigned int PDEAndMetaPTEBytesFrameC;
- unsigned int MetaRowByteY[DC__NUM_DPP__MAX];
- unsigned int MetaRowByteC[DC__NUM_DPP__MAX];
- unsigned int PixelPTEBytesPerRowY[DC__NUM_DPP__MAX];
- unsigned int PixelPTEBytesPerRowC[DC__NUM_DPP__MAX];
- unsigned int PixelPTEBytesPerRowY_one_row_per_frame[DC__NUM_DPP__MAX];
- unsigned int PixelPTEBytesPerRowC_one_row_per_frame[DC__NUM_DPP__MAX];
- unsigned int dpte_row_width_luma_ub_one_row_per_frame[DC__NUM_DPP__MAX];
- unsigned int dpte_row_height_luma_one_row_per_frame[DC__NUM_DPP__MAX];
- unsigned int dpte_row_width_chroma_ub_one_row_per_frame[DC__NUM_DPP__MAX];
- unsigned int dpte_row_height_chroma_one_row_per_frame[DC__NUM_DPP__MAX];
- bool one_row_per_frame_fits_in_buffer[DC__NUM_DPP__MAX];
-};
-
-struct dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport {
- unsigned int SurfaceWithMinActiveFCLKChangeMargin;
- unsigned int DRAMClockChangeSupportNumber;
- unsigned int LastSurfaceWithoutMargin;
- unsigned int DRAMClockChangeMethod;
- bool FoundFirstSurfaceWithMinActiveFCLKChangeMargin;
- double MinActiveFCLKChangeMargin;
- double SecondMinActiveFCLKChangeMarginOneDisplayInVBLank;
- double ActiveClockChangeLatencyHidingY;
- double ActiveClockChangeLatencyHidingC;
- double ActiveClockChangeLatencyHiding;
- double EffectiveDETBufferSizeY;
- double ActiveFCLKChangeLatencyMargin[DC__NUM_DPP__MAX];
- double USRRetrainingLatencyMargin[DC__NUM_DPP__MAX];
- double TotalPixelBW;
- bool SynchronizedSurfaces[DC__NUM_DPP__MAX][DC__NUM_DPP__MAX];
- double EffectiveLBLatencyHidingY;
- double EffectiveLBLatencyHidingC;
- double LinesInDETY[DC__NUM_DPP__MAX];
- double LinesInDETC[DC__NUM_DPP__MAX];
- unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX];
- unsigned int LinesInDETCRoundedDownToSwath[DC__NUM_DPP__MAX];
- double FullDETBufferingTimeY;
- double FullDETBufferingTimeC;
- double WritebackDRAMClockChangeLatencyMargin;
- double WritebackFCLKChangeLatencyMargin;
- double WritebackLatencyHiding;
- bool SameTimingForFCLKChange;
- unsigned int TotalActiveWriteback;
- unsigned int LBLatencyHidingSourceLinesY[DC__NUM_DPP__MAX];
- unsigned int LBLatencyHidingSourceLinesC[DC__NUM_DPP__MAX];
-};
-
-struct dml32_CalculatePrefetchSchedule {
- unsigned int DPPCycles, DISPCLKCycles;
- double DSTTotalPixelsAfterScaler;
- double LineTime;
- double dst_y_prefetch_equ;
- double prefetch_bw_oto;
- double Tvm_oto;
- double Tr0_oto;
- double Tvm_oto_lines;
- double Tr0_oto_lines;
- double dst_y_prefetch_oto;
- double TimeForFetchingMetaPTE;
- double TimeForFetchingRowInVBlank;
- double LinesToRequestPrefetchPixelData;
- unsigned int HostVMDynamicLevelsTrips;
- double trip_to_mem;
- double Tvm_trips;
- double Tr0_trips;
- double Tvm_trips_rounded;
- double Tr0_trips_rounded;
- double Lsw_oto;
- double Tpre_rounded;
- double prefetch_bw_equ;
- double Tvm_equ;
- double Tr0_equ;
- double Tdmbf;
- double Tdmec;
- double Tdmsks;
- double prefetch_sw_bytes;
- double bytes_pp;
- double dep_bytes;
- unsigned int max_vratio_pre;
- double min_Lsw;
- double Tsw_est1;
- double Tsw_est3;
-};
-
struct DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation {
unsigned int dummy_integer_array[2][DC__NUM_DPP__MAX];
double dummy_single_array[2][DC__NUM_DPP__MAX];
@@ -355,10 +253,6 @@ struct dummy_vars {
struct DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation
DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation;
struct dml32_ModeSupportAndSystemConfigurationFull dml32_ModeSupportAndSystemConfigurationFull;
- struct dml32_CalculateSwathAndDETConfiguration dml32_CalculateSwathAndDETConfiguration;
- struct dml32_CalculateVMRowAndSwath dml32_CalculateVMRowAndSwath;
- struct dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport;
- struct dml32_CalculatePrefetchSchedule dml32_CalculatePrefetchSchedule;
};
struct vba_vars_st {
diff --git a/drivers/gpu/drm/amd/display/include/dal_asic_id.h b/drivers/gpu/drm/amd/display/include/dal_asic_id.h
index ab06c7fc7452..9f3558c0ef11 100644
--- a/drivers/gpu/drm/amd/display/include/dal_asic_id.h
+++ b/drivers/gpu/drm/amd/display/include/dal_asic_id.h
@@ -244,13 +244,15 @@ enum {
#define ASICREV_IS_GC_10_3_7(eChipRev) ((eChipRev >= GC_10_3_7_A0) && (eChipRev < GC_10_3_7_UNKNOWN))
#define AMDGPU_FAMILY_GC_11_0_0 145
-#define AMDGPU_FAMILY_GC_11_0_2 148
+#define AMDGPU_FAMILY_GC_11_0_1 148
#define GC_11_0_0_A0 0x1
#define GC_11_0_2_A0 0x10
+#define GC_11_0_3_A0 0x20
#define GC_11_UNKNOWN 0xFF
#define ASICREV_IS_GC_11_0_0(eChipRev) (eChipRev < GC_11_0_2_A0)
-#define ASICREV_IS_GC_11_0_2(eChipRev) (eChipRev >= GC_11_0_2_A0 && eChipRev < GC_11_UNKNOWN)
+#define ASICREV_IS_GC_11_0_2(eChipRev) (eChipRev >= GC_11_0_2_A0 && eChipRev < GC_11_0_3_A0)
+#define ASICREV_IS_GC_11_0_3(eChipRev) (eChipRev >= GC_11_0_3_A0 && eChipRev < GC_11_UNKNOWN)
/*
* ASIC chip ID
diff --git a/drivers/gpu/drm/amd/display/include/logger_types.h b/drivers/gpu/drm/amd/display/include/logger_types.h
index f093b49c5e6e..3bf08a60c45c 100644
--- a/drivers/gpu/drm/amd/display/include/logger_types.h
+++ b/drivers/gpu/drm/amd/display/include/logger_types.h
@@ -119,13 +119,15 @@ enum dc_log_type {
LOG_HDMI_RETIMER_REDRIVER,
LOG_DSC,
LOG_SMU_MSG,
+ LOG_DC2RESERVED4,
+ LOG_DC2RESERVED5,
LOG_DWB,
LOG_GAMMA_DEBUG,
LOG_MAX_HW_POINTS,
LOG_ALL_TF_CHANNELS,
LOG_SAMPLE_1DLUT,
LOG_DP2,
- LOG_SECTION_TOTAL_COUNT
+ LOG_DC2RESERVED12,
};
#define DC_MIN_LOG_MASK ((1 << LOG_ERROR) | \
diff --git a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c
index da09ba7589f7..0f39ab9dc5b4 100644
--- a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c
+++ b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c
@@ -613,10 +613,6 @@ static void build_vrr_infopacket_data_v1(const struct mod_vrr_params *vrr,
* Note: We should never go above the field rate of the mode timing set.
*/
infopacket->sb[8] = (unsigned char)((vrr->max_refresh_in_uhz + 500000) / 1000000);
-
- /* FreeSync HDR */
- infopacket->sb[9] = 0;
- infopacket->sb[10] = 0;
}
static void build_vrr_infopacket_data_v3(const struct mod_vrr_params *vrr,
@@ -684,10 +680,6 @@ static void build_vrr_infopacket_data_v3(const struct mod_vrr_params *vrr,
/* PB16 : Reserved bits 7:1, FixedRate bit 0 */
infopacket->sb[16] = (vrr->state == VRR_STATE_ACTIVE_FIXED) ? 1 : 0;
-
- //FreeSync HDR
- infopacket->sb[9] = 0;
- infopacket->sb[10] = 0;
}
static void build_vrr_infopacket_fs2_data(enum color_transfer_func app_tf,
@@ -772,8 +764,7 @@ static void build_vrr_infopacket_header_v2(enum signal_type signal,
/* HB2 = [Bits 7:5 = 0] [Bits 4:0 = Length = 0x09] */
infopacket->hb2 = 0x09;
- *payload_size = 0x0A;
-
+ *payload_size = 0x09;
} else if (dc_is_dp_signal(signal)) {
/* HEADER */
@@ -822,9 +813,9 @@ static void build_vrr_infopacket_header_v3(enum signal_type signal,
infopacket->hb1 = version;
/* HB2 = [Bits 7:5 = 0] [Bits 4:0 = Length] */
- *payload_size = 0x10;
- infopacket->hb2 = *payload_size - 1; //-1 for checksum
+ infopacket->hb2 = 0x10;
+ *payload_size = 0x10;
} else if (dc_is_dp_signal(signal)) {
/* HEADER */
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_4.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_4.h
index 76f695a1d065..ae2d337158f3 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_4.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_4.h
@@ -27,7 +27,7 @@
// *** IMPORTANT ***
// SMU TEAM: Always increment the interface version if
// any structure is changed in this file
-#define PMFW_DRIVER_IF_VERSION 4
+#define PMFW_DRIVER_IF_VERSION 5
typedef struct {
int32_t value;
@@ -197,6 +197,8 @@ typedef struct {
uint16_t SkinTemp;
uint16_t DeviceState;
+ uint16_t CurTemp; //[centi-Celsius]
+ uint16_t spare2;
} SmuMetrics_t;
typedef struct {
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
index c02e5e576728..6fe2fe92ebd7 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
@@ -28,7 +28,7 @@
#define SMU13_DRIVER_IF_VERSION_INV 0xFFFFFFFF
#define SMU13_DRIVER_IF_VERSION_YELLOW_CARP 0x04
#define SMU13_DRIVER_IF_VERSION_ALDE 0x08
-#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_4 0x04
+#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_4 0x05
#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_5 0x04
#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_0 0x2C
#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_7 0x2C
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
index fa520d79ef67..6db67f082d91 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
@@ -4283,6 +4283,7 @@ static const struct pptable_funcs sienna_cichlid_ppt_funcs = {
.dump_pptable = sienna_cichlid_dump_pptable,
.init_microcode = smu_v11_0_init_microcode,
.load_microcode = smu_v11_0_load_microcode,
+ .fini_microcode = smu_v11_0_fini_microcode,
.init_smc_tables = sienna_cichlid_init_smc_tables,
.fini_smc_tables = smu_v11_0_fini_smc_tables,
.init_power = smu_v11_0_init_power,
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
index e8fe84f806d1..18ee3b5e64c5 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
@@ -212,6 +212,9 @@ int smu_v13_0_init_pptable_microcode(struct smu_context *smu)
if (!adev->scpm_enabled)
return 0;
+ if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 7))
+ return 0;
+
/* override pptable_id from driver parameter */
if (amdgpu_smu_pptable_id >= 0) {
pptable_id = amdgpu_smu_pptable_id;
@@ -219,16 +222,10 @@ int smu_v13_0_init_pptable_microcode(struct smu_context *smu)
} else {
pptable_id = smu->smu_table.boot_values.pp_table_id;
- if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 7) &&
- pptable_id == 3667)
- pptable_id = 36671;
-
- if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 7) &&
- pptable_id == 3688)
- pptable_id = 36881;
/*
* Temporary solution for SMU V13.0.0 with SCPM enabled:
* - use 36831 signed pptable when pp_table_id is 3683
+ * - use 37151 signed pptable when pp_table_id is 3715
* - use 36641 signed pptable when pp_table_id is 3664 or 0
* TODO: drop these when the pptable carried in vbios is ready.
*/
@@ -241,6 +238,9 @@ int smu_v13_0_init_pptable_microcode(struct smu_context *smu)
case 3683:
pptable_id = 36831;
break;
+ case 3715:
+ pptable_id = 37151;
+ break;
default:
dev_err(adev->dev, "Unsupported pptable id %d\n", pptable_id);
return -EINVAL;
@@ -478,7 +478,7 @@ int smu_v13_0_setup_pptable(struct smu_context *smu)
/*
* Temporary solution for SMU V13.0.0 with SCPM disabled:
- * - use 3664 or 3683 on request
+ * - use 3664, 3683 or 3715 on request
* - use 3664 when pptable_id is 0
* TODO: drop these when the pptable carried in vbios is ready.
*/
@@ -489,6 +489,7 @@ int smu_v13_0_setup_pptable(struct smu_context *smu)
break;
case 3664:
case 3683:
+ case 3715:
break;
default:
dev_err(adev->dev, "Unsupported pptable id %d\n", pptable_id);
@@ -2344,8 +2345,8 @@ int smu_v13_0_set_gfx_power_up_by_imu(struct smu_context *smu)
index = smu_cmn_to_asic_specific_index(smu, CMN2ASIC_MAPPING_MSG,
SMU_MSG_EnableGfxImu);
-
- return smu_cmn_send_msg_without_waiting(smu, index, 0);
+ /* Param 1 to tell PMFW to enable GFXOFF feature */
+ return smu_cmn_send_msg_without_waiting(smu, index, 1);
}
int smu_v13_0_od_edit_dpm_table(struct smu_context *smu,
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
index 1bbeceeb9e3c..df4a47acd724 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
@@ -1792,7 +1792,9 @@ static const struct pptable_funcs smu_v13_0_0_ppt_funcs = {
.dump_pptable = smu_v13_0_0_dump_pptable,
.init_microcode = smu_v13_0_init_microcode,
.load_microcode = smu_v13_0_load_microcode,
+ .fini_microcode = smu_v13_0_fini_microcode,
.init_smc_tables = smu_v13_0_0_init_smc_tables,
+ .fini_smc_tables = smu_v13_0_fini_smc_tables,
.init_power = smu_v13_0_init_power,
.fini_power = smu_v13_0_fini_power,
.check_fw_status = smu_v13_0_check_fw_status,
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c
index 82d3718d8324..97e1d55dcaad 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c
@@ -71,7 +71,6 @@ static struct cmn2asic_msg_mapping smu_v13_0_4_message_map[SMU_MSG_MAX_COUNT] =
MSG_MAP(TestMessage, PPSMC_MSG_TestMessage, 1),
MSG_MAP(GetSmuVersion, PPSMC_MSG_GetPmfwVersion, 1),
MSG_MAP(GetDriverIfVersion, PPSMC_MSG_GetDriverIfVersion, 1),
- MSG_MAP(EnableGfxOff, PPSMC_MSG_EnableGfxOff, 1),
MSG_MAP(AllowGfxOff, PPSMC_MSG_AllowGfxOff, 1),
MSG_MAP(DisallowGfxOff, PPSMC_MSG_DisallowGfxOff, 1),
MSG_MAP(PowerDownVcn, PPSMC_MSG_PowerDownVcn, 1),
@@ -199,6 +198,9 @@ static int smu_v13_0_4_fini_smc_tables(struct smu_context *smu)
kfree(smu_table->watermarks_table);
smu_table->watermarks_table = NULL;
+ kfree(smu_table->gpu_metrics_table);
+ smu_table->gpu_metrics_table = NULL;
+
return 0;
}
@@ -226,18 +228,6 @@ static int smu_v13_0_4_system_features_control(struct smu_context *smu, bool en)
return ret;
}
-static int smu_v13_0_4_post_smu_init(struct smu_context *smu)
-{
- struct amdgpu_device *adev = smu->adev;
- int ret = 0;
-
- /* allow message will be sent after enable message */
- ret = smu_cmn_send_smc_msg(smu, SMU_MSG_EnableGfxOff, NULL);
- if (ret)
- dev_err(adev->dev, "Failed to Enable GfxOff!\n");
- return ret;
-}
-
static ssize_t smu_v13_0_4_get_gpu_metrics(struct smu_context *smu,
void **table)
{
@@ -1026,7 +1016,6 @@ static const struct pptable_funcs smu_v13_0_4_ppt_funcs = {
.get_pp_feature_mask = smu_cmn_get_pp_feature_mask,
.set_driver_table_location = smu_v13_0_set_driver_table_location,
.gfx_off_control = smu_v13_0_gfx_off_control,
- .post_init = smu_v13_0_4_post_smu_init,
.mode2_reset = smu_v13_0_4_mode2_reset,
.get_dpm_ultimate_freq = smu_v13_0_4_get_dpm_ultimate_freq,
.od_edit_dpm_table = smu_v13_0_od_edit_dpm_table,
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_5_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_5_ppt.c
index 47360ef5c175..66445964efbd 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_5_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_5_ppt.c
@@ -176,6 +176,9 @@ static int smu_v13_0_5_fini_smc_tables(struct smu_context *smu)
kfree(smu_table->watermarks_table);
smu_table->watermarks_table = NULL;
+ kfree(smu_table->gpu_metrics_table);
+ smu_table->gpu_metrics_table = NULL;
+
return 0;
}
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
index 9dd56e73218b..1016d1c216d8 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
@@ -1567,6 +1567,16 @@ static int smu_v13_0_7_set_mp1_state(struct smu_context *smu,
return ret;
}
+static bool smu_v13_0_7_is_mode1_reset_supported(struct smu_context *smu)
+{
+ struct amdgpu_device *adev = smu->adev;
+
+ /* SRIOV does not support SMU mode1 reset */
+ if (amdgpu_sriov_vf(adev))
+ return false;
+
+ return true;
+}
static const struct pptable_funcs smu_v13_0_7_ppt_funcs = {
.get_allowed_feature_mask = smu_v13_0_7_get_allowed_feature_mask,
.set_default_dpm_table = smu_v13_0_7_set_default_dpm_table,
@@ -1574,7 +1584,9 @@ static const struct pptable_funcs smu_v13_0_7_ppt_funcs = {
.dump_pptable = smu_v13_0_7_dump_pptable,
.init_microcode = smu_v13_0_init_microcode,
.load_microcode = smu_v13_0_load_microcode,
+ .fini_microcode = smu_v13_0_fini_microcode,
.init_smc_tables = smu_v13_0_7_init_smc_tables,
+ .fini_smc_tables = smu_v13_0_fini_smc_tables,
.init_power = smu_v13_0_init_power,
.fini_power = smu_v13_0_fini_power,
.check_fw_status = smu_v13_0_7_check_fw_status,
@@ -1624,6 +1636,8 @@ static const struct pptable_funcs smu_v13_0_7_ppt_funcs = {
.baco_set_state = smu_v13_0_baco_set_state,
.baco_enter = smu_v13_0_baco_enter,
.baco_exit = smu_v13_0_baco_exit,
+ .mode1_reset_is_support = smu_v13_0_7_is_mode1_reset_supported,
+ .mode1_reset = smu_v13_0_mode1_reset,
.set_mp1_state = smu_v13_0_7_set_mp1_state,
};
diff --git a/drivers/gpu/drm/bridge/lvds-codec.c b/drivers/gpu/drm/bridge/lvds-codec.c
index 702ea803a743..39e7004de720 100644
--- a/drivers/gpu/drm/bridge/lvds-codec.c
+++ b/drivers/gpu/drm/bridge/lvds-codec.c
@@ -180,7 +180,7 @@ static int lvds_codec_probe(struct platform_device *pdev)
of_node_put(bus_node);
if (ret == -ENODEV) {
dev_warn(dev, "missing 'data-mapping' DT property\n");
- } else if (ret) {
+ } else if (ret < 0) {
dev_err(dev, "invalid 'data-mapping' DT property\n");
return ret;
} else {
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index ccec4055fde3..389e9f157ca5 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -268,7 +268,7 @@ static void __i915_gem_object_free_mmaps(struct drm_i915_gem_object *obj)
*/
void __i915_gem_object_pages_fini(struct drm_i915_gem_object *obj)
{
- assert_object_held(obj);
+ assert_object_held_shared(obj);
if (!list_empty(&obj->vma.list)) {
struct i915_vma *vma;
@@ -331,15 +331,7 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915,
continue;
}
- if (!i915_gem_object_trylock(obj, NULL)) {
- /* busy, toss it back to the pile */
- if (llist_add(&obj->freed, &i915->mm.free_list))
- queue_delayed_work(i915->wq, &i915->mm.free_work, msecs_to_jiffies(10));
- continue;
- }
-
__i915_gem_object_pages_fini(obj);
- i915_gem_object_unlock(obj);
__i915_gem_free_object(obj);
/* But keep the pointer alive for RCU-protected lookups */
@@ -359,7 +351,7 @@ void i915_gem_flush_free_objects(struct drm_i915_private *i915)
static void __i915_gem_free_work(struct work_struct *work)
{
struct drm_i915_private *i915 =
- container_of(work, struct drm_i915_private, mm.free_work.work);
+ container_of(work, struct drm_i915_private, mm.free_work);
i915_gem_flush_free_objects(i915);
}
@@ -391,7 +383,7 @@ static void i915_gem_free_object(struct drm_gem_object *gem_obj)
*/
if (llist_add(&obj->freed, &i915->mm.free_list))
- queue_delayed_work(i915->wq, &i915->mm.free_work, 0);
+ queue_work(i915->wq, &i915->mm.free_work);
}
void __i915_gem_object_flush_frontbuffer(struct drm_i915_gem_object *obj,
@@ -745,7 +737,7 @@ bool i915_gem_object_needs_ccs_pages(struct drm_i915_gem_object *obj)
void i915_gem_init__objects(struct drm_i915_private *i915)
{
- INIT_DELAYED_WORK(&i915->mm.free_work, __i915_gem_free_work);
+ INIT_WORK(&i915->mm.free_work, __i915_gem_free_work);
}
void i915_objects_module_exit(void)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index 5cf36a130061..9f6b14ec189a 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -335,7 +335,6 @@ struct drm_i915_gem_object {
#define I915_BO_READONLY BIT(7)
#define I915_TILING_QUIRK_BIT 8 /* unknown swizzling; do not release! */
#define I915_BO_PROTECTED BIT(9)
-#define I915_BO_WAS_BOUND_BIT 10
/**
* @mem_flags - Mutable placement-related flags
*
@@ -616,6 +615,8 @@ struct drm_i915_gem_object {
* pages were last acquired.
*/
bool dirty:1;
+
+ u32 tlb;
} mm;
struct {
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
index 97c820eee115..8357dbdcab5c 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
@@ -6,14 +6,15 @@
#include <drm/drm_cache.h>
+#include "gt/intel_gt.h"
+#include "gt/intel_gt_pm.h"
+
#include "i915_drv.h"
#include "i915_gem_object.h"
#include "i915_scatterlist.h"
#include "i915_gem_lmem.h"
#include "i915_gem_mman.h"
-#include "gt/intel_gt.h"
-
void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
struct sg_table *pages,
unsigned int sg_page_sizes)
@@ -190,6 +191,18 @@ static void unmap_object(struct drm_i915_gem_object *obj, void *ptr)
vunmap(ptr);
}
+static void flush_tlb_invalidate(struct drm_i915_gem_object *obj)
+{
+ struct drm_i915_private *i915 = to_i915(obj->base.dev);
+ struct intel_gt *gt = to_gt(i915);
+
+ if (!obj->mm.tlb)
+ return;
+
+ intel_gt_invalidate_tlb(gt, obj->mm.tlb);
+ obj->mm.tlb = 0;
+}
+
struct sg_table *
__i915_gem_object_unset_pages(struct drm_i915_gem_object *obj)
{
@@ -215,13 +228,7 @@ __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj)
__i915_gem_object_reset_page_iter(obj);
obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0;
- if (test_and_clear_bit(I915_BO_WAS_BOUND_BIT, &obj->flags)) {
- struct drm_i915_private *i915 = to_i915(obj->base.dev);
- intel_wakeref_t wakeref;
-
- with_intel_runtime_pm_if_active(&i915->runtime_pm, wakeref)
- intel_gt_invalidate_tlbs(to_gt(i915));
- }
+ flush_tlb_invalidate(obj);
return pages;
}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index 68c2b0d8f187..f435e06125aa 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -11,7 +11,9 @@
#include "pxp/intel_pxp.h"
#include "i915_drv.h"
+#include "i915_perf_oa_regs.h"
#include "intel_context.h"
+#include "intel_engine_pm.h"
#include "intel_engine_regs.h"
#include "intel_ggtt_gmch.h"
#include "intel_gt.h"
@@ -36,8 +38,6 @@ static void __intel_gt_init_early(struct intel_gt *gt)
{
spin_lock_init(&gt->irq_lock);
- mutex_init(&gt->tlb_invalidate_lock);
-
INIT_LIST_HEAD(&gt->closed_vma);
spin_lock_init(&gt->closed_lock);
@@ -48,6 +48,8 @@ static void __intel_gt_init_early(struct intel_gt *gt)
intel_gt_init_reset(gt);
intel_gt_init_requests(gt);
intel_gt_init_timelines(gt);
+ mutex_init(&gt->tlb.invalidate_lock);
+ seqcount_mutex_init(&gt->tlb.seqno, &gt->tlb.invalidate_lock);
intel_gt_pm_init_early(gt);
intel_uc_init_early(&gt->uc);
@@ -768,6 +770,7 @@ void intel_gt_driver_late_release_all(struct drm_i915_private *i915)
intel_gt_fini_requests(gt);
intel_gt_fini_reset(gt);
intel_gt_fini_timelines(gt);
+ mutex_destroy(&gt->tlb.invalidate_lock);
intel_engines_free(gt);
}
}
@@ -906,7 +909,7 @@ get_reg_and_bit(const struct intel_engine_cs *engine, const bool gen8,
return rb;
}
-void intel_gt_invalidate_tlbs(struct intel_gt *gt)
+static void mmio_invalidate_full(struct intel_gt *gt)
{
static const i915_reg_t gen8_regs[] = {
[RENDER_CLASS] = GEN8_RTCR,
@@ -924,13 +927,11 @@ void intel_gt_invalidate_tlbs(struct intel_gt *gt)
struct drm_i915_private *i915 = gt->i915;
struct intel_uncore *uncore = gt->uncore;
struct intel_engine_cs *engine;
+ intel_engine_mask_t awake, tmp;
enum intel_engine_id id;
const i915_reg_t *regs;
unsigned int num = 0;
- if (I915_SELFTEST_ONLY(gt->awake == -ENODEV))
- return;
-
if (GRAPHICS_VER(i915) == 12) {
regs = gen12_regs;
num = ARRAY_SIZE(gen12_regs);
@@ -945,28 +946,41 @@ void intel_gt_invalidate_tlbs(struct intel_gt *gt)
"Platform does not implement TLB invalidation!"))
return;
- GEM_TRACE("\n");
-
- assert_rpm_wakelock_held(&i915->runtime_pm);
-
- mutex_lock(&gt->tlb_invalidate_lock);
intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
spin_lock_irq(&uncore->lock); /* serialise invalidate with GT reset */
+ awake = 0;
for_each_engine(engine, gt, id) {
struct reg_and_bit rb;
+ if (!intel_engine_pm_is_awake(engine))
+ continue;
+
rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
if (!i915_mmio_reg_offset(rb.reg))
continue;
intel_uncore_write_fw(uncore, rb.reg, rb.bit);
+ awake |= engine->mask;
}
+ GT_TRACE(gt, "invalidated engines %08x\n", awake);
+
+ /* Wa_2207587034:tgl,dg1,rkl,adl-s,adl-p */
+ if (awake &&
+ (IS_TIGERLAKE(i915) ||
+ IS_DG1(i915) ||
+ IS_ROCKETLAKE(i915) ||
+ IS_ALDERLAKE_S(i915) ||
+ IS_ALDERLAKE_P(i915)))
+ intel_uncore_write_fw(uncore, GEN12_OA_TLB_INV_CR, 1);
+
spin_unlock_irq(&uncore->lock);
- for_each_engine(engine, gt, id) {
+ for_each_engine_masked(engine, gt, awake, tmp) {
+ struct reg_and_bit rb;
+
/*
* HW architecture suggest typical invalidation time at 40us,
* with pessimistic cases up to 100us and a recommendation to
@@ -974,12 +988,8 @@ void intel_gt_invalidate_tlbs(struct intel_gt *gt)
*/
const unsigned int timeout_us = 100;
const unsigned int timeout_ms = 4;
- struct reg_and_bit rb;
rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
- if (!i915_mmio_reg_offset(rb.reg))
- continue;
-
if (__intel_wait_for_register_fw(uncore,
rb.reg, rb.bit, 0,
timeout_us, timeout_ms,
@@ -996,5 +1006,38 @@ void intel_gt_invalidate_tlbs(struct intel_gt *gt)
* transitions.
*/
intel_uncore_forcewake_put_delayed(uncore, FORCEWAKE_ALL);
- mutex_unlock(&gt->tlb_invalidate_lock);
+}
+
+static bool tlb_seqno_passed(const struct intel_gt *gt, u32 seqno)
+{
+ u32 cur = intel_gt_tlb_seqno(gt);
+
+ /* Only skip if a *full* TLB invalidate barrier has passed */
+ return (s32)(cur - ALIGN(seqno, 2)) > 0;
+}
+
+void intel_gt_invalidate_tlb(struct intel_gt *gt, u32 seqno)
+{
+ intel_wakeref_t wakeref;
+
+ if (I915_SELFTEST_ONLY(gt->awake == -ENODEV))
+ return;
+
+ if (intel_gt_is_wedged(gt))
+ return;
+
+ if (tlb_seqno_passed(gt, seqno))
+ return;
+
+ with_intel_gt_pm_if_awake(gt, wakeref) {
+ mutex_lock(&gt->tlb.invalidate_lock);
+ if (tlb_seqno_passed(gt, seqno))
+ goto unlock;
+
+ mmio_invalidate_full(gt);
+
+ write_seqcount_invalidate(&gt->tlb.seqno);
+unlock:
+ mutex_unlock(&gt->tlb.invalidate_lock);
+ }
}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h
index 82d6f248d876..40b06adf509a 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt.h
@@ -101,6 +101,16 @@ void intel_gt_info_print(const struct intel_gt_info *info,
void intel_gt_watchdog_work(struct work_struct *work);
-void intel_gt_invalidate_tlbs(struct intel_gt *gt);
+static inline u32 intel_gt_tlb_seqno(const struct intel_gt *gt)
+{
+ return seqprop_sequence(&gt->tlb.seqno);
+}
+
+static inline u32 intel_gt_next_invalidate_tlb_full(const struct intel_gt *gt)
+{
+ return intel_gt_tlb_seqno(gt) | 1;
+}
+
+void intel_gt_invalidate_tlb(struct intel_gt *gt, u32 seqno);
#endif /* __INTEL_GT_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.h b/drivers/gpu/drm/i915/gt/intel_gt_pm.h
index bc898df7a48c..a334787a4939 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.h
@@ -55,6 +55,9 @@ static inline void intel_gt_pm_might_put(struct intel_gt *gt)
for (tmp = 1, intel_gt_pm_get(gt); tmp; \
intel_gt_pm_put(gt), tmp = 0)
+#define with_intel_gt_pm_if_awake(gt, wf) \
+ for (wf = intel_gt_pm_get_if_awake(gt); wf; intel_gt_pm_put_async(gt), wf = 0)
+
static inline int intel_gt_pm_wait_for_idle(struct intel_gt *gt)
{
return intel_wakeref_wait_for_idle(&gt->wakeref);
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
index df708802889d..3804a583382b 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
@@ -11,6 +11,7 @@
#include <linux/llist.h>
#include <linux/mutex.h>
#include <linux/notifier.h>
+#include <linux/seqlock.h>
#include <linux/spinlock.h>
#include <linux/types.h>
#include <linux/workqueue.h>
@@ -83,7 +84,22 @@ struct intel_gt {
struct intel_uc uc;
struct intel_gsc gsc;
- struct mutex tlb_invalidate_lock;
+ struct {
+ /* Serialize global tlb invalidations */
+ struct mutex invalidate_lock;
+
+ /*
+ * Batch TLB invalidations
+ *
+ * After unbinding the PTE, we need to ensure the TLB
+ * are invalidated prior to releasing the physical pages.
+ * But we only need one such invalidation for all unbinds,
+ * so we track how many TLB invalidations have been
+ * performed since unbind the PTE and only emit an extra
+ * invalidate if no full barrier has been passed.
+ */
+ seqcount_mutex_t seqno;
+ } tlb;
struct i915_wa_list wa_list;
diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c b/drivers/gpu/drm/i915/gt/intel_migrate.c
index 2c35324b5f68..2b10b96b17b5 100644
--- a/drivers/gpu/drm/i915/gt/intel_migrate.c
+++ b/drivers/gpu/drm/i915/gt/intel_migrate.c
@@ -708,7 +708,7 @@ intel_context_migrate_copy(struct intel_context *ce,
u8 src_access, dst_access;
struct i915_request *rq;
int src_sz, dst_sz;
- bool ccs_is_src;
+ bool ccs_is_src, overwrite_ccs;
int err;
GEM_BUG_ON(ce->vm != ce->engine->gt->migrate.context->vm);
@@ -749,6 +749,8 @@ intel_context_migrate_copy(struct intel_context *ce,
get_ccs_sg_sgt(&it_ccs, bytes_to_cpy);
}
+ overwrite_ccs = HAS_FLAT_CCS(i915) && !ccs_bytes_to_cpy && dst_is_lmem;
+
src_offset = 0;
dst_offset = CHUNK_SZ;
if (HAS_64K_PAGES(ce->engine->i915)) {
@@ -852,6 +854,25 @@ intel_context_migrate_copy(struct intel_context *ce,
if (err)
goto out_rq;
ccs_bytes_to_cpy -= ccs_sz;
+ } else if (overwrite_ccs) {
+ err = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
+ if (err)
+ goto out_rq;
+
+ /*
+ * While we can't always restore/manage the CCS state,
+ * we still need to ensure we don't leak the CCS state
+ * from the previous user, so make sure we overwrite it
+ * with something.
+ */
+ err = emit_copy_ccs(rq, dst_offset, INDIRECT_ACCESS,
+ dst_offset, DIRECT_ACCESS, len);
+ if (err)
+ goto out_rq;
+
+ err = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
+ if (err)
+ goto out_rq;
}
/* Arbitration is re-enabled between requests. */
diff --git a/drivers/gpu/drm/i915/gt/intel_ppgtt.c b/drivers/gpu/drm/i915/gt/intel_ppgtt.c
index d8b94d638559..6ee8d1127016 100644
--- a/drivers/gpu/drm/i915/gt/intel_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ppgtt.c
@@ -206,8 +206,12 @@ void ppgtt_bind_vma(struct i915_address_space *vm,
void ppgtt_unbind_vma(struct i915_address_space *vm,
struct i915_vma_resource *vma_res)
{
- if (vma_res->allocated)
- vm->clear_range(vm, vma_res->start, vma_res->vma_size);
+ if (!vma_res->allocated)
+ return;
+
+ vm->clear_range(vm, vma_res->start, vma_res->vma_size);
+ if (vma_res->tlb)
+ vma_invalidate_tlb(vm, vma_res->tlb);
}
static unsigned long pd_count(u64 size, int shift)
diff --git a/drivers/gpu/drm/i915/gt/intel_region_lmem.c b/drivers/gpu/drm/i915/gt/intel_region_lmem.c
index 6e90032e12e9..aa6aed837194 100644
--- a/drivers/gpu/drm/i915/gt/intel_region_lmem.c
+++ b/drivers/gpu/drm/i915/gt/intel_region_lmem.c
@@ -15,6 +15,7 @@
#include "gt/intel_gt_mcr.h"
#include "gt/intel_gt_regs.h"
+#ifdef CONFIG_64BIT
static void _release_bars(struct pci_dev *pdev)
{
int resno;
@@ -111,6 +112,9 @@ static void i915_resize_lmem_bar(struct drm_i915_private *i915, resource_size_t
pci_assign_unassigned_bus_resources(pdev->bus);
pci_write_config_dword(pdev, PCI_COMMAND, pci_cmd);
}
+#else
+static void i915_resize_lmem_bar(struct drm_i915_private *i915, resource_size_t lmem_size) {}
+#endif
static int
region_lmem_release(struct intel_memory_region *mem)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index d25647be25d1..086bbe8945d6 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -247,7 +247,7 @@ struct i915_gem_mm {
* List of objects which are pending destruction.
*/
struct llist_head free_list;
- struct delayed_work free_work;
+ struct work_struct free_work;
/**
* Count of objects pending destructions. Used to skip needlessly
* waiting on an RCU barrier if no objects are waiting to be freed.
@@ -1378,7 +1378,7 @@ static inline void i915_gem_drain_freed_objects(struct drm_i915_private *i915)
* armed the work again.
*/
while (atomic_read(&i915->mm.free_count)) {
- flush_delayed_work(&i915->mm.free_work);
+ flush_work(&i915->mm.free_work);
flush_delayed_work(&i915->bdev.wq);
rcu_barrier();
}
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index ef3b04c7e153..260371716490 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -538,8 +538,6 @@ int i915_vma_bind(struct i915_vma *vma,
bind_flags);
}
- set_bit(I915_BO_WAS_BOUND_BIT, &vma->obj->flags);
-
atomic_or(bind_flags, &vma->flags);
return 0;
}
@@ -1310,6 +1308,19 @@ err_unpin:
return err;
}
+void vma_invalidate_tlb(struct i915_address_space *vm, u32 *tlb)
+{
+ /*
+ * Before we release the pages that were bound by this vma, we
+ * must invalidate all the TLBs that may still have a reference
+ * back to our physical address. It only needs to be done once,
+ * so after updating the PTE to point away from the pages, record
+ * the most recent TLB invalidation seqno, and if we have not yet
+ * flushed the TLBs upon release, perform a full invalidation.
+ */
+ WRITE_ONCE(*tlb, intel_gt_next_invalidate_tlb_full(vm->gt));
+}
+
static void __vma_put_pages(struct i915_vma *vma, unsigned int count)
{
/* We allocate under vma_get_pages, so beware the shrinker */
@@ -1941,7 +1952,12 @@ struct dma_fence *__i915_vma_evict(struct i915_vma *vma, bool async)
vma->vm->skip_pte_rewrite;
trace_i915_vma_unbind(vma);
- unbind_fence = i915_vma_resource_unbind(vma_res);
+ if (async)
+ unbind_fence = i915_vma_resource_unbind(vma_res,
+ &vma->obj->mm.tlb);
+ else
+ unbind_fence = i915_vma_resource_unbind(vma_res, NULL);
+
vma->resource = NULL;
atomic_and(~(I915_VMA_BIND_MASK | I915_VMA_ERROR | I915_VMA_GGTT_WRITE),
@@ -1949,10 +1965,13 @@ struct dma_fence *__i915_vma_evict(struct i915_vma *vma, bool async)
i915_vma_detach(vma);
- if (!async && unbind_fence) {
- dma_fence_wait(unbind_fence, false);
- dma_fence_put(unbind_fence);
- unbind_fence = NULL;
+ if (!async) {
+ if (unbind_fence) {
+ dma_fence_wait(unbind_fence, false);
+ dma_fence_put(unbind_fence);
+ unbind_fence = NULL;
+ }
+ vma_invalidate_tlb(vma->vm, &vma->obj->mm.tlb);
}
/*
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 88ca0bd9c900..33a58f605d75 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -213,6 +213,7 @@ bool i915_vma_misplaced(const struct i915_vma *vma,
u64 size, u64 alignment, u64 flags);
void __i915_vma_set_map_and_fenceable(struct i915_vma *vma);
void i915_vma_revoke_mmap(struct i915_vma *vma);
+void vma_invalidate_tlb(struct i915_address_space *vm, u32 *tlb);
struct dma_fence *__i915_vma_evict(struct i915_vma *vma, bool async);
int __i915_vma_unbind(struct i915_vma *vma);
int __must_check i915_vma_unbind(struct i915_vma *vma);
diff --git a/drivers/gpu/drm/i915/i915_vma_resource.c b/drivers/gpu/drm/i915/i915_vma_resource.c
index 27c55027387a..5a67995ea5fe 100644
--- a/drivers/gpu/drm/i915/i915_vma_resource.c
+++ b/drivers/gpu/drm/i915/i915_vma_resource.c
@@ -223,10 +223,13 @@ i915_vma_resource_fence_notify(struct i915_sw_fence *fence,
* Return: A refcounted pointer to a dma-fence that signals when unbinding is
* complete.
*/
-struct dma_fence *i915_vma_resource_unbind(struct i915_vma_resource *vma_res)
+struct dma_fence *i915_vma_resource_unbind(struct i915_vma_resource *vma_res,
+ u32 *tlb)
{
struct i915_address_space *vm = vma_res->vm;
+ vma_res->tlb = tlb;
+
/* Reference for the sw fence */
i915_vma_resource_get(vma_res);
diff --git a/drivers/gpu/drm/i915/i915_vma_resource.h b/drivers/gpu/drm/i915/i915_vma_resource.h
index 5d8427caa2ba..06923d1816e7 100644
--- a/drivers/gpu/drm/i915/i915_vma_resource.h
+++ b/drivers/gpu/drm/i915/i915_vma_resource.h
@@ -67,6 +67,7 @@ struct i915_page_sizes {
* taken when the unbind is scheduled.
* @skip_pte_rewrite: During ggtt suspend and vm takedown pte rewriting
* needs to be skipped for unbind.
+ * @tlb: pointer for obj->mm.tlb, if async unbind. Otherwise, NULL
*
* The lifetime of a struct i915_vma_resource is from a binding request to
* the actual possible asynchronous unbind has completed.
@@ -119,6 +120,8 @@ struct i915_vma_resource {
bool immediate_unbind:1;
bool needs_wakeref:1;
bool skip_pte_rewrite:1;
+
+ u32 *tlb;
};
bool i915_vma_resource_hold(struct i915_vma_resource *vma_res,
@@ -131,7 +134,8 @@ struct i915_vma_resource *i915_vma_resource_alloc(void);
void i915_vma_resource_free(struct i915_vma_resource *vma_res);
-struct dma_fence *i915_vma_resource_unbind(struct i915_vma_resource *vma_res);
+struct dma_fence *i915_vma_resource_unbind(struct i915_vma_resource *vma_res,
+ u32 *tlb);
void __i915_vma_resource_init(struct i915_vma_resource *vma_res);
diff --git a/drivers/gpu/drm/imx/dcss/dcss-kms.c b/drivers/gpu/drm/imx/dcss/dcss-kms.c
index 9b84df34a6a1..8cf3352d8858 100644
--- a/drivers/gpu/drm/imx/dcss/dcss-kms.c
+++ b/drivers/gpu/drm/imx/dcss/dcss-kms.c
@@ -142,8 +142,6 @@ struct dcss_kms_dev *dcss_kms_attach(struct dcss_dev *dcss)
drm_kms_helper_poll_init(drm);
- drm_bridge_connector_enable_hpd(kms->connector);
-
ret = drm_dev_register(drm, 0);
if (ret)
goto cleanup_crtc;
diff --git a/drivers/gpu/drm/meson/meson_drv.c b/drivers/gpu/drm/meson/meson_drv.c
index 1b70938cfd2c..bd4ca11d3ff5 100644
--- a/drivers/gpu/drm/meson/meson_drv.c
+++ b/drivers/gpu/drm/meson/meson_drv.c
@@ -115,8 +115,11 @@ static bool meson_vpu_has_available_connectors(struct device *dev)
for_each_endpoint_of_node(dev->of_node, ep) {
/* If the endpoint node exists, consider it enabled */
remote = of_graph_get_remote_port(ep);
- if (remote)
+ if (remote) {
+ of_node_put(remote);
+ of_node_put(ep);
return true;
+ }
}
return false;
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
index 568182e68dd7..d8cf71fb0512 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
@@ -2605,6 +2605,27 @@ nv172_chipset = {
};
static const struct nvkm_device_chip
+nv173_chipset = {
+ .name = "GA103",
+ .bar = { 0x00000001, tu102_bar_new },
+ .bios = { 0x00000001, nvkm_bios_new },
+ .devinit = { 0x00000001, ga100_devinit_new },
+ .fb = { 0x00000001, ga102_fb_new },
+ .gpio = { 0x00000001, ga102_gpio_new },
+ .i2c = { 0x00000001, gm200_i2c_new },
+ .imem = { 0x00000001, nv50_instmem_new },
+ .mc = { 0x00000001, ga100_mc_new },
+ .mmu = { 0x00000001, tu102_mmu_new },
+ .pci = { 0x00000001, gp100_pci_new },
+ .privring = { 0x00000001, gm200_privring_new },
+ .timer = { 0x00000001, gk20a_timer_new },
+ .top = { 0x00000001, ga100_top_new },
+ .disp = { 0x00000001, ga102_disp_new },
+ .dma = { 0x00000001, gv100_dma_new },
+ .fifo = { 0x00000001, ga102_fifo_new },
+};
+
+static const struct nvkm_device_chip
nv174_chipset = {
.name = "GA104",
.bar = { 0x00000001, tu102_bar_new },
@@ -3067,6 +3088,7 @@ nvkm_device_ctor(const struct nvkm_device_func *func,
case 0x167: device->chip = &nv167_chipset; break;
case 0x168: device->chip = &nv168_chipset; break;
case 0x172: device->chip = &nv172_chipset; break;
+ case 0x173: device->chip = &nv173_chipset; break;
case 0x174: device->chip = &nv174_chipset; break;
case 0x176: device->chip = &nv176_chipset; break;
case 0x177: device->chip = &nv177_chipset; break;
diff --git a/drivers/gpu/drm/sun4i/sun6i_mipi_dsi.c b/drivers/gpu/drm/sun4i/sun6i_mipi_dsi.c
index b4dfa166eccd..34234a144e87 100644
--- a/drivers/gpu/drm/sun4i/sun6i_mipi_dsi.c
+++ b/drivers/gpu/drm/sun4i/sun6i_mipi_dsi.c
@@ -531,7 +531,7 @@ static void sun6i_dsi_setup_timings(struct sun6i_dsi *dsi,
struct drm_display_mode *mode)
{
struct mipi_dsi_device *device = dsi->device;
- unsigned int Bpp = mipi_dsi_pixel_format_to_bpp(device->format) / 8;
+ int Bpp = mipi_dsi_pixel_format_to_bpp(device->format) / 8;
u16 hbp = 0, hfp = 0, hsa = 0, hblk = 0, vblk = 0;
u32 basic_ctl = 0;
size_t bytes;
@@ -555,7 +555,7 @@ static void sun6i_dsi_setup_timings(struct sun6i_dsi *dsi,
* (4 bytes). Its minimal size is therefore 10 bytes
*/
#define HSA_PACKET_OVERHEAD 10
- hsa = max((unsigned int)HSA_PACKET_OVERHEAD,
+ hsa = max(HSA_PACKET_OVERHEAD,
(mode->hsync_end - mode->hsync_start) * Bpp - HSA_PACKET_OVERHEAD);
/*
@@ -564,7 +564,7 @@ static void sun6i_dsi_setup_timings(struct sun6i_dsi *dsi,
* therefore 6 bytes
*/
#define HBP_PACKET_OVERHEAD 6
- hbp = max((unsigned int)HBP_PACKET_OVERHEAD,
+ hbp = max(HBP_PACKET_OVERHEAD,
(mode->htotal - mode->hsync_end) * Bpp - HBP_PACKET_OVERHEAD);
/*
@@ -574,7 +574,7 @@ static void sun6i_dsi_setup_timings(struct sun6i_dsi *dsi,
* 16 bytes
*/
#define HFP_PACKET_OVERHEAD 16
- hfp = max((unsigned int)HFP_PACKET_OVERHEAD,
+ hfp = max(HFP_PACKET_OVERHEAD,
(mode->hsync_start - mode->hdisplay) * Bpp - HFP_PACKET_OVERHEAD);
/*
@@ -583,7 +583,7 @@ static void sun6i_dsi_setup_timings(struct sun6i_dsi *dsi,
* bytes). Its minimal size is therefore 10 bytes.
*/
#define HBLK_PACKET_OVERHEAD 10
- hblk = max((unsigned int)HBLK_PACKET_OVERHEAD,
+ hblk = max(HBLK_PACKET_OVERHEAD,
(mode->htotal - (mode->hsync_end - mode->hsync_start)) * Bpp -
HBLK_PACKET_OVERHEAD);
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 0e210df65c30..97184c333526 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -912,7 +912,7 @@ int ttm_bo_validate(struct ttm_buffer_object *bo,
/*
* We might need to add a TTM.
*/
- if (bo->resource->mem_type == TTM_PL_SYSTEM) {
+ if (!bo->resource || bo->resource->mem_type == TTM_PL_SYSTEM) {
ret = ttm_tt_create(bo, true);
if (ret)
return ret;