diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
70 files changed, 2218 insertions, 1989 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 8e22882b66aa..006d49d6b4af 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -47,7 +47,7 @@ subdir-ccflags-$(CONFIG_DRM_AMDGPU_WERROR) += -Werror amdgpu-y := amdgpu_drv.o # add KMS driver -amdgpu-y += amdgpu_device.o amdgpu_doorbell_mgr.o amdgpu_kms.o \ +amdgpu-y += amdgpu_device.o amdgpu_reg_access.o amdgpu_doorbell_mgr.o amdgpu_kms.o \ amdgpu_atombios.o atombios_crtc.o amdgpu_connectors.o \ atom.o amdgpu_fence.o amdgpu_ttm.o amdgpu_object.o amdgpu_gart.o \ amdgpu_encoders.o amdgpu_display.o amdgpu_i2c.o \ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 447e734c362b..59731014a55a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -81,6 +81,7 @@ #include "amdgpu_sdma.h" #include "amdgpu_lsdma.h" #include "amdgpu_nbio.h" +#include "amdgpu_reg_access.h" #include "amdgpu_hdp.h" #include "amdgpu_dm.h" #include "amdgpu_virt.h" @@ -217,9 +218,7 @@ extern struct amdgpu_watchdog_timer amdgpu_watchdog_timer; extern int amdgpu_async_gfx_ring; extern int amdgpu_mcbp; extern int amdgpu_discovery; -extern int amdgpu_mes; extern int amdgpu_mes_log_enable; -extern int amdgpu_mes_kiq; extern int amdgpu_uni_mes; extern int amdgpu_noretry; extern int amdgpu_force_asic_type; @@ -680,21 +679,6 @@ void amdgpu_cgs_destroy_device(struct cgs_device *cgs_device); /* * Core structure, functions and helpers. */ -typedef uint32_t (*amdgpu_rreg_t)(struct amdgpu_device*, uint32_t); -typedef void (*amdgpu_wreg_t)(struct amdgpu_device*, uint32_t, uint32_t); - -typedef uint32_t (*amdgpu_rreg_ext_t)(struct amdgpu_device*, uint64_t); -typedef void (*amdgpu_wreg_ext_t)(struct amdgpu_device*, uint64_t, uint32_t); - -typedef uint64_t (*amdgpu_rreg64_t)(struct amdgpu_device*, uint32_t); -typedef void (*amdgpu_wreg64_t)(struct amdgpu_device*, uint32_t, uint64_t); - -typedef uint64_t (*amdgpu_rreg64_ext_t)(struct amdgpu_device*, uint64_t); -typedef void (*amdgpu_wreg64_ext_t)(struct amdgpu_device*, uint64_t, uint64_t); - -typedef uint32_t (*amdgpu_block_rreg_t)(struct amdgpu_device*, uint32_t, uint32_t); -typedef void (*amdgpu_block_wreg_t)(struct amdgpu_device*, uint32_t, uint32_t, uint32_t); - struct amdgpu_mmio_remap { u32 reg_offset; resource_size_t bus_addr; @@ -791,6 +775,12 @@ struct amd_powerplay { (rid == 0x01) || \ (rid == 0x10)))) +enum amdgpu_mqd_update_flag { + AMDGPU_UPDATE_FLAG_DBG_WA_ENABLE = 1, + AMDGPU_UPDATE_FLAG_DBG_WA_DISABLE = 2, + AMDGPU_UPDATE_FLAG_IS_GWS = 4, /* quirk for gfx9 IP */ +}; + struct amdgpu_mqd_prop { uint64_t mqd_gpu_addr; uint64_t hqd_base_gpu_addr; @@ -811,6 +801,10 @@ struct amdgpu_mqd_prop { uint64_t fence_address; bool tmz_queue; bool kernel_queue; + uint32_t *cu_mask; + uint32_t cu_mask_count; + uint32_t cu_flags; + bool is_user_cu_masked; }; struct amdgpu_mqd { @@ -906,42 +900,8 @@ struct amdgpu_device { /* protects concurrent MM_INDEX/DATA based register access */ spinlock_t mmio_idx_lock; struct amdgpu_mmio_remap rmmio_remap; - /* protects concurrent SMC based register access */ - spinlock_t smc_idx_lock; - amdgpu_rreg_t smc_rreg; - amdgpu_wreg_t smc_wreg; - /* protects concurrent PCIE register access */ - spinlock_t pcie_idx_lock; - amdgpu_rreg_t pcie_rreg; - amdgpu_wreg_t pcie_wreg; - amdgpu_rreg_t pciep_rreg; - amdgpu_wreg_t pciep_wreg; - amdgpu_rreg_ext_t pcie_rreg_ext; - amdgpu_wreg_ext_t pcie_wreg_ext; - amdgpu_rreg64_t pcie_rreg64; - amdgpu_wreg64_t pcie_wreg64; - amdgpu_rreg64_ext_t pcie_rreg64_ext; - amdgpu_wreg64_ext_t pcie_wreg64_ext; - /* protects concurrent UVD register access */ - spinlock_t uvd_ctx_idx_lock; - amdgpu_rreg_t uvd_ctx_rreg; - amdgpu_wreg_t uvd_ctx_wreg; - /* protects concurrent DIDT register access */ - spinlock_t didt_idx_lock; - amdgpu_rreg_t didt_rreg; - amdgpu_wreg_t didt_wreg; - /* protects concurrent gc_cac register access */ - spinlock_t gc_cac_idx_lock; - amdgpu_rreg_t gc_cac_rreg; - amdgpu_wreg_t gc_cac_wreg; - /* protects concurrent se_cac register access */ - spinlock_t se_cac_idx_lock; - amdgpu_rreg_t se_cac_rreg; - amdgpu_wreg_t se_cac_wreg; - /* protects concurrent ENDPOINT (audio) register access */ - spinlock_t audio_endpt_idx_lock; - amdgpu_block_rreg_t audio_endpt_rreg; - amdgpu_block_wreg_t audio_endpt_wreg; + /* Indirect register access blocks */ + struct amdgpu_reg_access reg; struct amdgpu_doorbell doorbell; /* clock/pll info */ @@ -1297,42 +1257,6 @@ size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos, void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos, void *buf, size_t size, bool write); -uint32_t amdgpu_device_wait_on_rreg(struct amdgpu_device *adev, - uint32_t inst, uint32_t reg_addr, char reg_name[], - uint32_t expected_value, uint32_t mask); -uint32_t amdgpu_device_rreg(struct amdgpu_device *adev, - uint32_t reg, uint32_t acc_flags); -u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev, - u64 reg_addr); -uint32_t amdgpu_device_xcc_rreg(struct amdgpu_device *adev, - uint32_t reg, uint32_t acc_flags, - uint32_t xcc_id); -void amdgpu_device_wreg(struct amdgpu_device *adev, - uint32_t reg, uint32_t v, - uint32_t acc_flags); -void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev, - u64 reg_addr, u32 reg_data); -void amdgpu_device_xcc_wreg(struct amdgpu_device *adev, - uint32_t reg, uint32_t v, - uint32_t acc_flags, - uint32_t xcc_id); -void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev, - uint32_t reg, uint32_t v, uint32_t xcc_id); -void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value); -uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset); - -u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev, - u32 reg_addr); -u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev, - u32 reg_addr); -u64 amdgpu_device_indirect_rreg64_ext(struct amdgpu_device *adev, - u64 reg_addr); -void amdgpu_device_indirect_wreg(struct amdgpu_device *adev, - u32 reg_addr, u32 reg_data); -void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev, - u32 reg_addr, u64 reg_data); -void amdgpu_device_indirect_wreg64_ext(struct amdgpu_device *adev, - u64 reg_addr, u64 reg_data); u32 amdgpu_device_get_rev_id(struct amdgpu_device *adev); bool amdgpu_device_asic_has_dc_support(struct pci_dev *pdev, enum amd_asic_type asic_type); @@ -1372,28 +1296,30 @@ int emu_soc_asic_init(struct amdgpu_device *adev); #define REG_GET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK) #define RREG32_XCC(reg, inst) amdgpu_device_xcc_rreg(adev, (reg), 0, inst) #define WREG32_XCC(reg, v, inst) amdgpu_device_xcc_wreg(adev, (reg), (v), 0, inst) -#define RREG32_PCIE(reg) adev->pcie_rreg(adev, (reg)) -#define WREG32_PCIE(reg, v) adev->pcie_wreg(adev, (reg), (v)) -#define RREG32_PCIE_PORT(reg) adev->pciep_rreg(adev, (reg)) -#define WREG32_PCIE_PORT(reg, v) adev->pciep_wreg(adev, (reg), (v)) -#define RREG32_PCIE_EXT(reg) adev->pcie_rreg_ext(adev, (reg)) -#define WREG32_PCIE_EXT(reg, v) adev->pcie_wreg_ext(adev, (reg), (v)) -#define RREG64_PCIE(reg) adev->pcie_rreg64(adev, (reg)) -#define WREG64_PCIE(reg, v) adev->pcie_wreg64(adev, (reg), (v)) -#define RREG64_PCIE_EXT(reg) adev->pcie_rreg64_ext(adev, (reg)) -#define WREG64_PCIE_EXT(reg, v) adev->pcie_wreg64_ext(adev, (reg), (v)) -#define RREG32_SMC(reg) adev->smc_rreg(adev, (reg)) -#define WREG32_SMC(reg, v) adev->smc_wreg(adev, (reg), (v)) -#define RREG32_UVD_CTX(reg) adev->uvd_ctx_rreg(adev, (reg)) -#define WREG32_UVD_CTX(reg, v) adev->uvd_ctx_wreg(adev, (reg), (v)) -#define RREG32_DIDT(reg) adev->didt_rreg(adev, (reg)) -#define WREG32_DIDT(reg, v) adev->didt_wreg(adev, (reg), (v)) -#define RREG32_GC_CAC(reg) adev->gc_cac_rreg(adev, (reg)) -#define WREG32_GC_CAC(reg, v) adev->gc_cac_wreg(adev, (reg), (v)) -#define RREG32_SE_CAC(reg) adev->se_cac_rreg(adev, (reg)) -#define WREG32_SE_CAC(reg, v) adev->se_cac_wreg(adev, (reg), (v)) -#define RREG32_AUDIO_ENDPT(block, reg) adev->audio_endpt_rreg(adev, (block), (reg)) -#define WREG32_AUDIO_ENDPT(block, reg, v) adev->audio_endpt_wreg(adev, (block), (reg), (v)) +#define RREG32_PCIE(reg) amdgpu_reg_pcie_rd32(adev, (reg)) +#define WREG32_PCIE(reg, v) amdgpu_reg_pcie_wr32(adev, (reg), (v)) +#define RREG32_PCIE_PORT(reg) amdgpu_reg_pciep_rd32(adev, (reg)) +#define WREG32_PCIE_PORT(reg, v) amdgpu_reg_pciep_wr32(adev, (reg), (v)) +#define RREG32_PCIE_EXT(reg) amdgpu_reg_pcie_ext_rd32(adev, (reg)) +#define WREG32_PCIE_EXT(reg, v) amdgpu_reg_pcie_ext_wr32(adev, (reg), (v)) +#define RREG64_PCIE(reg) amdgpu_reg_pcie_rd64(adev, (reg)) +#define WREG64_PCIE(reg, v) amdgpu_reg_pcie_wr64(adev, (reg), (v)) +#define RREG64_PCIE_EXT(reg) amdgpu_reg_pcie_ext_rd64(adev, (reg)) +#define WREG64_PCIE_EXT(reg, v) amdgpu_reg_pcie_ext_wr64(adev, (reg), (v)) +#define RREG32_SMC(reg) amdgpu_reg_smc_rd32(adev, (reg)) +#define WREG32_SMC(reg, v) amdgpu_reg_smc_wr32(adev, (reg), (v)) +#define RREG32_UVD_CTX(reg) amdgpu_reg_uvd_ctx_rd32(adev, (reg)) +#define WREG32_UVD_CTX(reg, v) amdgpu_reg_uvd_ctx_wr32(adev, (reg), (v)) +#define RREG32_DIDT(reg) amdgpu_reg_didt_rd32(adev, (reg)) +#define WREG32_DIDT(reg, v) amdgpu_reg_didt_wr32(adev, (reg), (v)) +#define RREG32_GC_CAC(reg) amdgpu_reg_gc_cac_rd32(adev, (reg)) +#define WREG32_GC_CAC(reg, v) amdgpu_reg_gc_cac_wr32(adev, (reg), (v)) +#define RREG32_SE_CAC(reg) amdgpu_reg_se_cac_rd32(adev, (reg)) +#define WREG32_SE_CAC(reg, v) amdgpu_reg_se_cac_wr32(adev, (reg), (v)) +#define RREG32_AUDIO_ENDPT(block, reg) \ + amdgpu_reg_audio_endpt_rd32(adev, (block), (reg)) +#define WREG32_AUDIO_ENDPT(block, reg, v) \ + amdgpu_reg_audio_endpt_wr32(adev, (block), (reg), (v)) #define WREG32_P(reg, val, mask) \ do { \ uint32_t tmp_ = RREG32(reg); \ @@ -1523,10 +1449,6 @@ void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring); void amdgpu_device_halt(struct amdgpu_device *adev); -u32 amdgpu_device_pcie_port_rreg(struct amdgpu_device *adev, - u32 reg); -void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev, - u32 reg, u32 v); struct dma_fence *amdgpu_device_get_gang(struct amdgpu_device *adev); struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev, struct dma_fence *gang); @@ -1536,6 +1458,8 @@ struct dma_fence *amdgpu_device_enforce_isolation(struct amdgpu_device *adev, bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev); ssize_t amdgpu_get_soft_full_reset_mask(struct amdgpu_ring *ring); ssize_t amdgpu_show_reset_mask(char *buf, uint32_t supported_reset); +void amdgpu_sdma_set_vm_pte_scheds(struct amdgpu_device *adev, + const struct amdgpu_vm_pte_funcs *vm_pte_funcs); /* atpx handler */ #if defined(CONFIG_VGA_SWITCHEROO) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 06c1913d5a3f..29b400cdd6d5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1439,7 +1439,10 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info, *process_info = info; } - vm->process_info = *process_info; + if (cmpxchg(&vm->process_info, NULL, *process_info) != NULL) { + ret = -EINVAL; + goto already_acquired; + } /* Validate page directory and attach eviction fence */ ret = amdgpu_bo_reserve(vm->root.bo, true); @@ -1479,6 +1482,7 @@ validate_pd_fail: amdgpu_bo_unreserve(vm->root.bo); reserve_pd_fail: vm->process_info = NULL; +already_acquired: if (info) { dma_fence_put(&info->eviction_fence->base); *process_info = NULL; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c index 1cbba9803d31..6f3c68cde75e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c @@ -35,6 +35,7 @@ static int amdgpu_benchmark_do_move(struct amdgpu_device *adev, unsigned size, struct dma_fence *fence; int i, r; + mutex_lock(&adev->mman.default_entity.lock); stime = ktime_get(); for (i = 0; i < n; i++) { r = amdgpu_copy_buffer(adev, &adev->mman.default_entity, @@ -49,6 +50,7 @@ static int amdgpu_benchmark_do_move(struct amdgpu_device *adev, unsigned size, } exit_do_move: + mutex_unlock(&adev->mman.default_entity.lock); etime = ktime_get(); *time_ms = ktime_ms_delta(etime, stime); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 24e4b4fc9156..70ea9b0831a0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -84,13 +84,6 @@ static int amdgpu_cs_job_idx(struct amdgpu_cs_parser *p, if (r) return r; - /* - * Abort if there is no run queue associated with this entity. - * Possibly because of disabled HW IP. - */ - if (entity->rq == NULL) - return -EINVAL; - /* Check if we can add this IB to some existing job */ for (i = 0; i < p->gang_size; ++i) if (p->entities[i] == entity) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index f2c038c91c70..7af86a32c0c5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -231,13 +231,19 @@ static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip, } else { struct amdgpu_fpriv *fpriv; - fpriv = container_of(ctx->ctx_mgr, struct amdgpu_fpriv, ctx_mgr); + /* TODO: Stop using fpriv here, we only need the xcp_id. */ + fpriv = container_of(ctx->mgr, struct amdgpu_fpriv, ctx_mgr); r = amdgpu_xcp_select_scheds(adev, hw_ip, hw_prio, fpriv, &num_scheds, &scheds); if (r) goto error_free_entity; } + if (num_scheds == 0) { + r = -EINVAL; + goto error_free_entity; + } + /* disable load balance if the hw engine retains context among dependent jobs */ if (hw_ip == AMDGPU_HW_IP_VCN_ENC || hw_ip == AMDGPU_HW_IP_VCN_DEC || @@ -348,7 +354,6 @@ static int amdgpu_ctx_init(struct amdgpu_ctx_mgr *mgr, int32_t priority, else ctx->stable_pstate = current_stable_pstate; - ctx->ctx_mgr = &(fpriv->ctx_mgr); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h index 090dfe86f75b..cf8d700a22fe 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h @@ -44,20 +44,19 @@ struct amdgpu_ctx_entity { struct amdgpu_ctx { struct kref refcount; - struct amdgpu_ctx_mgr *mgr; + spinlock_t ring_lock; unsigned reset_counter; unsigned reset_counter_query; - uint64_t generation; - spinlock_t ring_lock; - struct amdgpu_ctx_entity *entities[AMDGPU_HW_IP_NUM][AMDGPU_MAX_ENTITY_NUM]; - bool preamble_presented; int32_t init_priority; int32_t override_priority; + uint32_t stable_pstate; atomic_t guilty; + bool preamble_presented; + uint64_t generation; unsigned long ras_counter_ce; unsigned long ras_counter_ue; - uint32_t stable_pstate; - struct amdgpu_ctx_mgr *ctx_mgr; + struct amdgpu_ctx_mgr *mgr; + struct amdgpu_ctx_entity *entities[AMDGPU_HW_IP_NUM][AMDGPU_MAX_ENTITY_NUM]; }; struct amdgpu_ctx_mgr { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index f7467af2e102..b42f866935ab 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -638,7 +638,7 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf, if (size & 0x3 || *pos & 0x3) return -EINVAL; - if (!adev->didt_rreg) + if (!adev->reg.didt.rreg) return -EOPNOTSUPP; r = pm_runtime_get_sync(adev_to_drm(adev)->dev); @@ -696,7 +696,7 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user if (size & 0x3 || *pos & 0x3) return -EINVAL; - if (!adev->didt_wreg) + if (!adev->reg.didt.wreg) return -EOPNOTSUPP; r = pm_runtime_get_sync(adev_to_drm(adev)->dev); @@ -752,7 +752,7 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf, ssize_t result = 0; int r; - if (!adev->smc_rreg) + if (!adev->reg.smc.rreg) return -EOPNOTSUPP; if (size & 0x3 || *pos & 0x3) @@ -810,7 +810,7 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user * ssize_t result = 0; int r; - if (!adev->smc_wreg) + if (!adev->reg.smc.wreg) return -EOPNOTSUPP; if (size & 0x3 || *pos & 0x3) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 3e19b51a2763..6f6973e8cd53 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -847,558 +847,6 @@ bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev) } /** - * amdgpu_device_rreg - read a memory mapped IO or indirect register - * - * @adev: amdgpu_device pointer - * @reg: dword aligned register offset - * @acc_flags: access flags which require special behavior - * - * Returns the 32 bit value from the offset specified. - */ -uint32_t amdgpu_device_rreg(struct amdgpu_device *adev, - uint32_t reg, uint32_t acc_flags) -{ - uint32_t ret; - - if (amdgpu_device_skip_hw_access(adev)) - return 0; - - if ((reg * 4) < adev->rmmio_size) { - if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && - amdgpu_sriov_runtime(adev) && - down_read_trylock(&adev->reset_domain->sem)) { - ret = amdgpu_kiq_rreg(adev, reg, 0); - up_read(&adev->reset_domain->sem); - } else { - ret = readl(((void __iomem *)adev->rmmio) + (reg * 4)); - } - } else { - ret = adev->pcie_rreg(adev, reg * 4); - } - - trace_amdgpu_device_rreg(adev->pdev->device, reg, ret); - - return ret; -} - -/* - * MMIO register read with bytes helper functions - * @offset:bytes offset from MMIO start - */ - -/** - * amdgpu_mm_rreg8 - read a memory mapped IO register - * - * @adev: amdgpu_device pointer - * @offset: byte aligned register offset - * - * Returns the 8 bit value from the offset specified. - */ -uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset) -{ - if (amdgpu_device_skip_hw_access(adev)) - return 0; - - if (offset < adev->rmmio_size) - return (readb(adev->rmmio + offset)); - BUG(); -} - - -/** - * amdgpu_device_xcc_rreg - read a memory mapped IO or indirect register with specific XCC - * - * @adev: amdgpu_device pointer - * @reg: dword aligned register offset - * @acc_flags: access flags which require special behavior - * @xcc_id: xcc accelerated compute core id - * - * Returns the 32 bit value from the offset specified. - */ -uint32_t amdgpu_device_xcc_rreg(struct amdgpu_device *adev, - uint32_t reg, uint32_t acc_flags, - uint32_t xcc_id) -{ - uint32_t ret, rlcg_flag; - - if (amdgpu_device_skip_hw_access(adev)) - return 0; - - if ((reg * 4) < adev->rmmio_size) { - if (amdgpu_sriov_vf(adev) && - !amdgpu_sriov_runtime(adev) && - adev->gfx.rlc.rlcg_reg_access_supported && - amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags, - GC_HWIP, false, - &rlcg_flag)) { - ret = amdgpu_virt_rlcg_reg_rw(adev, reg, 0, rlcg_flag, GET_INST(GC, xcc_id)); - } else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && - amdgpu_sriov_runtime(adev) && - down_read_trylock(&adev->reset_domain->sem)) { - ret = amdgpu_kiq_rreg(adev, reg, xcc_id); - up_read(&adev->reset_domain->sem); - } else { - ret = readl(((void __iomem *)adev->rmmio) + (reg * 4)); - } - } else { - ret = adev->pcie_rreg(adev, reg * 4); - } - - return ret; -} - -/* - * MMIO register write with bytes helper functions - * @offset:bytes offset from MMIO start - * @value: the value want to be written to the register - */ - -/** - * amdgpu_mm_wreg8 - read a memory mapped IO register - * - * @adev: amdgpu_device pointer - * @offset: byte aligned register offset - * @value: 8 bit value to write - * - * Writes the value specified to the offset specified. - */ -void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value) -{ - if (amdgpu_device_skip_hw_access(adev)) - return; - - if (offset < adev->rmmio_size) - writeb(value, adev->rmmio + offset); - else - BUG(); -} - -/** - * amdgpu_device_wreg - write to a memory mapped IO or indirect register - * - * @adev: amdgpu_device pointer - * @reg: dword aligned register offset - * @v: 32 bit value to write to the register - * @acc_flags: access flags which require special behavior - * - * Writes the value specified to the offset specified. - */ -void amdgpu_device_wreg(struct amdgpu_device *adev, - uint32_t reg, uint32_t v, - uint32_t acc_flags) -{ - if (amdgpu_device_skip_hw_access(adev)) - return; - - if ((reg * 4) < adev->rmmio_size) { - if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && - amdgpu_sriov_runtime(adev) && - down_read_trylock(&adev->reset_domain->sem)) { - amdgpu_kiq_wreg(adev, reg, v, 0); - up_read(&adev->reset_domain->sem); - } else { - writel(v, ((void __iomem *)adev->rmmio) + (reg * 4)); - } - } else { - adev->pcie_wreg(adev, reg * 4, v); - } - - trace_amdgpu_device_wreg(adev->pdev->device, reg, v); -} - -/** - * amdgpu_mm_wreg_mmio_rlc - write register either with direct/indirect mmio or with RLC path if in range - * - * @adev: amdgpu_device pointer - * @reg: mmio/rlc register - * @v: value to write - * @xcc_id: xcc accelerated compute core id - * - * this function is invoked only for the debugfs register access - */ -void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev, - uint32_t reg, uint32_t v, - uint32_t xcc_id) -{ - if (amdgpu_device_skip_hw_access(adev)) - return; - - if (amdgpu_sriov_fullaccess(adev) && - adev->gfx.rlc.funcs && - adev->gfx.rlc.funcs->is_rlcg_access_range) { - if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg)) - return amdgpu_sriov_wreg(adev, reg, v, 0, 0, xcc_id); - } else if ((reg * 4) >= adev->rmmio_size) { - adev->pcie_wreg(adev, reg * 4, v); - } else { - writel(v, ((void __iomem *)adev->rmmio) + (reg * 4)); - } -} - -/** - * amdgpu_device_xcc_wreg - write to a memory mapped IO or indirect register with specific XCC - * - * @adev: amdgpu_device pointer - * @reg: dword aligned register offset - * @v: 32 bit value to write to the register - * @acc_flags: access flags which require special behavior - * @xcc_id: xcc accelerated compute core id - * - * Writes the value specified to the offset specified. - */ -void amdgpu_device_xcc_wreg(struct amdgpu_device *adev, - uint32_t reg, uint32_t v, - uint32_t acc_flags, uint32_t xcc_id) -{ - uint32_t rlcg_flag; - - if (amdgpu_device_skip_hw_access(adev)) - return; - - if ((reg * 4) < adev->rmmio_size) { - if (amdgpu_sriov_vf(adev) && - !amdgpu_sriov_runtime(adev) && - adev->gfx.rlc.rlcg_reg_access_supported && - amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags, - GC_HWIP, true, - &rlcg_flag)) { - amdgpu_virt_rlcg_reg_rw(adev, reg, v, rlcg_flag, GET_INST(GC, xcc_id)); - } else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && - amdgpu_sriov_runtime(adev) && - down_read_trylock(&adev->reset_domain->sem)) { - amdgpu_kiq_wreg(adev, reg, v, xcc_id); - up_read(&adev->reset_domain->sem); - } else { - writel(v, ((void __iomem *)adev->rmmio) + (reg * 4)); - } - } else { - adev->pcie_wreg(adev, reg * 4, v); - } -} - -/** - * amdgpu_device_indirect_rreg - read an indirect register - * - * @adev: amdgpu_device pointer - * @reg_addr: indirect register address to read from - * - * Returns the value of indirect register @reg_addr - */ -u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev, - u32 reg_addr) -{ - unsigned long flags, pcie_index, pcie_data; - void __iomem *pcie_index_offset; - void __iomem *pcie_data_offset; - u32 r; - - pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev); - pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev); - - spin_lock_irqsave(&adev->pcie_idx_lock, flags); - pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4; - pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4; - - writel(reg_addr, pcie_index_offset); - readl(pcie_index_offset); - r = readl(pcie_data_offset); - spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); - - return r; -} - -u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev, - u64 reg_addr) -{ - unsigned long flags, pcie_index, pcie_index_hi, pcie_data; - u32 r; - void __iomem *pcie_index_offset; - void __iomem *pcie_index_hi_offset; - void __iomem *pcie_data_offset; - - if (unlikely(!adev->nbio.funcs)) { - pcie_index = AMDGPU_PCIE_INDEX_FALLBACK; - pcie_data = AMDGPU_PCIE_DATA_FALLBACK; - } else { - pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev); - pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev); - } - - if (reg_addr >> 32) { - if (unlikely(!adev->nbio.funcs)) - pcie_index_hi = AMDGPU_PCIE_INDEX_HI_FALLBACK; - else - pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev); - } else { - pcie_index_hi = 0; - } - - spin_lock_irqsave(&adev->pcie_idx_lock, flags); - pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4; - pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4; - if (pcie_index_hi != 0) - pcie_index_hi_offset = (void __iomem *)adev->rmmio + - pcie_index_hi * 4; - - writel(reg_addr, pcie_index_offset); - readl(pcie_index_offset); - if (pcie_index_hi != 0) { - writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset); - readl(pcie_index_hi_offset); - } - r = readl(pcie_data_offset); - - /* clear the high bits */ - if (pcie_index_hi != 0) { - writel(0, pcie_index_hi_offset); - readl(pcie_index_hi_offset); - } - - spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); - - return r; -} - -/** - * amdgpu_device_indirect_rreg64 - read a 64bits indirect register - * - * @adev: amdgpu_device pointer - * @reg_addr: indirect register address to read from - * - * Returns the value of indirect register @reg_addr - */ -u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev, - u32 reg_addr) -{ - unsigned long flags, pcie_index, pcie_data; - void __iomem *pcie_index_offset; - void __iomem *pcie_data_offset; - u64 r; - - pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev); - pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev); - - spin_lock_irqsave(&adev->pcie_idx_lock, flags); - pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4; - pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4; - - /* read low 32 bits */ - writel(reg_addr, pcie_index_offset); - readl(pcie_index_offset); - r = readl(pcie_data_offset); - /* read high 32 bits */ - writel(reg_addr + 4, pcie_index_offset); - readl(pcie_index_offset); - r |= ((u64)readl(pcie_data_offset) << 32); - spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); - - return r; -} - -u64 amdgpu_device_indirect_rreg64_ext(struct amdgpu_device *adev, - u64 reg_addr) -{ - unsigned long flags, pcie_index, pcie_data; - unsigned long pcie_index_hi = 0; - void __iomem *pcie_index_offset; - void __iomem *pcie_index_hi_offset; - void __iomem *pcie_data_offset; - u64 r; - - pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev); - pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev); - if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset)) - pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev); - - spin_lock_irqsave(&adev->pcie_idx_lock, flags); - pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4; - pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4; - if (pcie_index_hi != 0) - pcie_index_hi_offset = (void __iomem *)adev->rmmio + - pcie_index_hi * 4; - - /* read low 32 bits */ - writel(reg_addr, pcie_index_offset); - readl(pcie_index_offset); - if (pcie_index_hi != 0) { - writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset); - readl(pcie_index_hi_offset); - } - r = readl(pcie_data_offset); - /* read high 32 bits */ - writel(reg_addr + 4, pcie_index_offset); - readl(pcie_index_offset); - if (pcie_index_hi != 0) { - writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset); - readl(pcie_index_hi_offset); - } - r |= ((u64)readl(pcie_data_offset) << 32); - - /* clear the high bits */ - if (pcie_index_hi != 0) { - writel(0, pcie_index_hi_offset); - readl(pcie_index_hi_offset); - } - - spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); - - return r; -} - -/** - * amdgpu_device_indirect_wreg - write an indirect register address - * - * @adev: amdgpu_device pointer - * @reg_addr: indirect register offset - * @reg_data: indirect register data - * - */ -void amdgpu_device_indirect_wreg(struct amdgpu_device *adev, - u32 reg_addr, u32 reg_data) -{ - unsigned long flags, pcie_index, pcie_data; - void __iomem *pcie_index_offset; - void __iomem *pcie_data_offset; - - pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev); - pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev); - - spin_lock_irqsave(&adev->pcie_idx_lock, flags); - pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4; - pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4; - - writel(reg_addr, pcie_index_offset); - readl(pcie_index_offset); - writel(reg_data, pcie_data_offset); - readl(pcie_data_offset); - spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); -} - -void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev, - u64 reg_addr, u32 reg_data) -{ - unsigned long flags, pcie_index, pcie_index_hi, pcie_data; - void __iomem *pcie_index_offset; - void __iomem *pcie_index_hi_offset; - void __iomem *pcie_data_offset; - - pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev); - pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev); - if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset)) - pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev); - else - pcie_index_hi = 0; - - spin_lock_irqsave(&adev->pcie_idx_lock, flags); - pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4; - pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4; - if (pcie_index_hi != 0) - pcie_index_hi_offset = (void __iomem *)adev->rmmio + - pcie_index_hi * 4; - - writel(reg_addr, pcie_index_offset); - readl(pcie_index_offset); - if (pcie_index_hi != 0) { - writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset); - readl(pcie_index_hi_offset); - } - writel(reg_data, pcie_data_offset); - readl(pcie_data_offset); - - /* clear the high bits */ - if (pcie_index_hi != 0) { - writel(0, pcie_index_hi_offset); - readl(pcie_index_hi_offset); - } - - spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); -} - -/** - * amdgpu_device_indirect_wreg64 - write a 64bits indirect register address - * - * @adev: amdgpu_device pointer - * @reg_addr: indirect register offset - * @reg_data: indirect register data - * - */ -void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev, - u32 reg_addr, u64 reg_data) -{ - unsigned long flags, pcie_index, pcie_data; - void __iomem *pcie_index_offset; - void __iomem *pcie_data_offset; - - pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev); - pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev); - - spin_lock_irqsave(&adev->pcie_idx_lock, flags); - pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4; - pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4; - - /* write low 32 bits */ - writel(reg_addr, pcie_index_offset); - readl(pcie_index_offset); - writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset); - readl(pcie_data_offset); - /* write high 32 bits */ - writel(reg_addr + 4, pcie_index_offset); - readl(pcie_index_offset); - writel((u32)(reg_data >> 32), pcie_data_offset); - readl(pcie_data_offset); - spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); -} - -void amdgpu_device_indirect_wreg64_ext(struct amdgpu_device *adev, - u64 reg_addr, u64 reg_data) -{ - unsigned long flags, pcie_index, pcie_data; - unsigned long pcie_index_hi = 0; - void __iomem *pcie_index_offset; - void __iomem *pcie_index_hi_offset; - void __iomem *pcie_data_offset; - - pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev); - pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev); - if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset)) - pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev); - - spin_lock_irqsave(&adev->pcie_idx_lock, flags); - pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4; - pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4; - if (pcie_index_hi != 0) - pcie_index_hi_offset = (void __iomem *)adev->rmmio + - pcie_index_hi * 4; - - /* write low 32 bits */ - writel(reg_addr, pcie_index_offset); - readl(pcie_index_offset); - if (pcie_index_hi != 0) { - writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset); - readl(pcie_index_hi_offset); - } - writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset); - readl(pcie_data_offset); - /* write high 32 bits */ - writel(reg_addr + 4, pcie_index_offset); - readl(pcie_index_offset); - if (pcie_index_hi != 0) { - writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset); - readl(pcie_index_hi_offset); - } - writel((u32)(reg_data >> 32), pcie_data_offset); - readl(pcie_data_offset); - - /* clear the high bits */ - if (pcie_index_hi != 0) { - writel(0, pcie_index_hi_offset); - readl(pcie_index_hi_offset); - } - - spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); -} - -/** * amdgpu_device_get_rev_id - query device rev_id * * @adev: amdgpu_device pointer @@ -1410,149 +858,6 @@ u32 amdgpu_device_get_rev_id(struct amdgpu_device *adev) return adev->nbio.funcs->get_rev_id(adev); } -/** - * amdgpu_invalid_rreg - dummy reg read function - * - * @adev: amdgpu_device pointer - * @reg: offset of register - * - * Dummy register read function. Used for register blocks - * that certain asics don't have (all asics). - * Returns the value in the register. - */ -static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg) -{ - dev_err(adev->dev, "Invalid callback to read register 0x%04X\n", reg); - BUG(); - return 0; -} - -static uint32_t amdgpu_invalid_rreg_ext(struct amdgpu_device *adev, uint64_t reg) -{ - dev_err(adev->dev, "Invalid callback to read register 0x%llX\n", reg); - BUG(); - return 0; -} - -/** - * amdgpu_invalid_wreg - dummy reg write function - * - * @adev: amdgpu_device pointer - * @reg: offset of register - * @v: value to write to the register - * - * Dummy register read function. Used for register blocks - * that certain asics don't have (all asics). - */ -static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v) -{ - dev_err(adev->dev, - "Invalid callback to write register 0x%04X with 0x%08X\n", reg, - v); - BUG(); -} - -static void amdgpu_invalid_wreg_ext(struct amdgpu_device *adev, uint64_t reg, uint32_t v) -{ - dev_err(adev->dev, - "Invalid callback to write register 0x%llX with 0x%08X\n", reg, - v); - BUG(); -} - -/** - * amdgpu_invalid_rreg64 - dummy 64 bit reg read function - * - * @adev: amdgpu_device pointer - * @reg: offset of register - * - * Dummy register read function. Used for register blocks - * that certain asics don't have (all asics). - * Returns the value in the register. - */ -static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg) -{ - dev_err(adev->dev, "Invalid callback to read 64 bit register 0x%04X\n", - reg); - BUG(); - return 0; -} - -static uint64_t amdgpu_invalid_rreg64_ext(struct amdgpu_device *adev, uint64_t reg) -{ - dev_err(adev->dev, "Invalid callback to read register 0x%llX\n", reg); - BUG(); - return 0; -} - -/** - * amdgpu_invalid_wreg64 - dummy reg write function - * - * @adev: amdgpu_device pointer - * @reg: offset of register - * @v: value to write to the register - * - * Dummy register read function. Used for register blocks - * that certain asics don't have (all asics). - */ -static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v) -{ - dev_err(adev->dev, - "Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n", - reg, v); - BUG(); -} - -static void amdgpu_invalid_wreg64_ext(struct amdgpu_device *adev, uint64_t reg, uint64_t v) -{ - dev_err(adev->dev, - "Invalid callback to write 64 bit register 0x%llX with 0x%08llX\n", - reg, v); - BUG(); -} - -/** - * amdgpu_block_invalid_rreg - dummy reg read function - * - * @adev: amdgpu_device pointer - * @block: offset of instance - * @reg: offset of register - * - * Dummy register read function. Used for register blocks - * that certain asics don't have (all asics). - * Returns the value in the register. - */ -static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev, - uint32_t block, uint32_t reg) -{ - dev_err(adev->dev, - "Invalid callback to read register 0x%04X in block 0x%04X\n", - reg, block); - BUG(); - return 0; -} - -/** - * amdgpu_block_invalid_wreg - dummy reg write function - * - * @adev: amdgpu_device pointer - * @block: offset of instance - * @reg: offset of register - * @v: value to write to the register - * - * Dummy register read function. Used for register blocks - * that certain asics don't have (all asics). - */ -static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev, - uint32_t block, - uint32_t reg, uint32_t v) -{ - dev_err(adev->dev, - "Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n", - reg, block, v); - BUG(); -} - static uint32_t amdgpu_device_get_vbios_flags(struct amdgpu_device *adev) { if (hweight32(adev->aid_mask) && (adev->flags & AMD_IS_APU)) @@ -3156,9 +2461,7 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) if (r) goto init_failed; - if (adev->mman.buffer_funcs_ring && - adev->mman.buffer_funcs_ring->sched.ready) - amdgpu_ttm_set_buffer_funcs_status(adev, true); + amdgpu_ttm_set_buffer_funcs_status(adev, true); /* Don't init kfd if whole hive need to be reset during init */ if (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) { @@ -4047,8 +3350,7 @@ static int amdgpu_device_ip_resume(struct amdgpu_device *adev) r = amdgpu_device_ip_resume_phase2(adev); - if (adev->mman.buffer_funcs_ring->sched.ready) - amdgpu_ttm_set_buffer_funcs_status(adev, true); + amdgpu_ttm_set_buffer_funcs_status(adev, true); if (r) return r; @@ -4108,17 +3410,6 @@ bool amdgpu_device_asic_has_dc_support(struct pci_dev *pdev, case CHIP_VERDE: case CHIP_OLAND: return amdgpu_dc != 0 && IS_ENABLED(CONFIG_DRM_AMD_DC_SI); - case CHIP_KAVERI: - case CHIP_KABINI: - case CHIP_MULLINS: - /* - * We have systems in the wild with these ASICs that require - * TRAVIS and NUTMEG support which is not supported with DC. - * - * Fallback to the non-DC driver here by default so as not to - * cause regressions. - */ - return amdgpu_dc > 0; default: return amdgpu_dc != 0; #else @@ -4396,26 +3687,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS); bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); - adev->smc_rreg = &amdgpu_invalid_rreg; - adev->smc_wreg = &amdgpu_invalid_wreg; - adev->pcie_rreg = &amdgpu_invalid_rreg; - adev->pcie_wreg = &amdgpu_invalid_wreg; - adev->pcie_rreg_ext = &amdgpu_invalid_rreg_ext; - adev->pcie_wreg_ext = &amdgpu_invalid_wreg_ext; - adev->pciep_rreg = &amdgpu_invalid_rreg; - adev->pciep_wreg = &amdgpu_invalid_wreg; - adev->pcie_rreg64 = &amdgpu_invalid_rreg64; - adev->pcie_wreg64 = &amdgpu_invalid_wreg64; - adev->pcie_rreg64_ext = &amdgpu_invalid_rreg64_ext; - adev->pcie_wreg64_ext = &amdgpu_invalid_wreg64_ext; - adev->uvd_ctx_rreg = &amdgpu_invalid_rreg; - adev->uvd_ctx_wreg = &amdgpu_invalid_wreg; - adev->didt_rreg = &amdgpu_invalid_rreg; - adev->didt_wreg = &amdgpu_invalid_wreg; - adev->gc_cac_rreg = &amdgpu_invalid_rreg; - adev->gc_cac_wreg = &amdgpu_invalid_wreg; - adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg; - adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg; + amdgpu_reg_access_init(adev); dev_info( adev->dev, @@ -4460,13 +3732,6 @@ int amdgpu_device_init(struct amdgpu_device *adev, return r; spin_lock_init(&adev->mmio_idx_lock); - spin_lock_init(&adev->smc_idx_lock); - spin_lock_init(&adev->pcie_idx_lock); - spin_lock_init(&adev->uvd_ctx_idx_lock); - spin_lock_init(&adev->didt_idx_lock); - spin_lock_init(&adev->gc_cac_idx_lock); - spin_lock_init(&adev->se_cac_idx_lock); - spin_lock_init(&adev->audio_endpt_idx_lock); spin_lock_init(&adev->mm_stats.lock); spin_lock_init(&adev->virt.rlcg_reg_lock); spin_lock_init(&adev->wb.lock); @@ -5195,8 +4460,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool notify_clients) return 0; unwind_evict: - if (adev->mman.buffer_funcs_ring->sched.ready) - amdgpu_ttm_set_buffer_funcs_status(adev, true); + amdgpu_ttm_set_buffer_funcs_status(adev, true); amdgpu_fence_driver_hw_init(adev); unwind_userq: @@ -5930,8 +5194,7 @@ int amdgpu_device_reinit_after_reset(struct amdgpu_reset_context *reset_context) if (r) goto out; - if (tmp_adev->mman.buffer_funcs_ring->sched.ready) - amdgpu_ttm_set_buffer_funcs_status(tmp_adev, true); + amdgpu_ttm_set_buffer_funcs_status(tmp_adev, true); r = amdgpu_device_ip_resume_phase3(tmp_adev); if (r) @@ -6309,7 +5572,7 @@ static void amdgpu_device_halt_activities(struct amdgpu_device *adev, if (!amdgpu_ring_sched_ready(ring)) continue; - drm_sched_stop(&ring->sched, job ? &job->base : NULL); + drm_sched_wqueue_stop(&ring->sched); if (need_emergency_restart) amdgpu_job_stop_all_jobs_on_sched(&ring->sched); @@ -6393,7 +5656,7 @@ static int amdgpu_device_sched_resume(struct list_head *device_list, if (!amdgpu_ring_sched_ready(ring)) continue; - drm_sched_start(&ring->sched, 0); + drm_sched_wqueue_start(&ring->sched); } if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled) @@ -7376,39 +6639,6 @@ void amdgpu_device_halt(struct amdgpu_device *adev) pci_wait_for_pending_transaction(pdev); } -u32 amdgpu_device_pcie_port_rreg(struct amdgpu_device *adev, - u32 reg) -{ - unsigned long flags, address, data; - u32 r; - - address = adev->nbio.funcs->get_pcie_port_index_offset(adev); - data = adev->nbio.funcs->get_pcie_port_data_offset(adev); - - spin_lock_irqsave(&adev->pcie_idx_lock, flags); - WREG32(address, reg * 4); - (void)RREG32(address); - r = RREG32(data); - spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); - return r; -} - -void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev, - u32 reg, u32 v) -{ - unsigned long flags, address, data; - - address = adev->nbio.funcs->get_pcie_port_index_offset(adev); - data = adev->nbio.funcs->get_pcie_port_data_offset(adev); - - spin_lock_irqsave(&adev->pcie_idx_lock, flags); - WREG32(address, reg * 4); - (void)RREG32(address); - WREG32(data, v); - (void)RREG32(data); - spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); -} - /** * amdgpu_device_get_gang - return a reference to the current gang * @adev: amdgpu_device pointer @@ -7591,36 +6821,6 @@ bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev) } } -uint32_t amdgpu_device_wait_on_rreg(struct amdgpu_device *adev, - uint32_t inst, uint32_t reg_addr, char reg_name[], - uint32_t expected_value, uint32_t mask) -{ - uint32_t ret = 0; - uint32_t old_ = 0; - uint32_t tmp_ = RREG32(reg_addr); - uint32_t loop = adev->usec_timeout; - - while ((tmp_ & (mask)) != (expected_value)) { - if (old_ != tmp_) { - loop = adev->usec_timeout; - old_ = tmp_; - } else - udelay(1); - tmp_ = RREG32(reg_addr); - loop--; - if (!loop) { - dev_warn( - adev->dev, - "Register(%d) [%s] failed to reach value 0x%08x != 0x%08xn", - inst, reg_name, (uint32_t)expected_value, - (uint32_t)(tmp_ & (mask))); - ret = -ETIMEDOUT; - break; - } - } - return ret; -} - ssize_t amdgpu_get_soft_full_reset_mask(struct amdgpu_ring *ring) { ssize_t size = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index af3d2fd61cf3..6c8b3c2687dc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -2298,6 +2298,7 @@ static int amdgpu_discovery_set_display_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(3, 5, 1): case IP_VERSION(3, 6, 0): case IP_VERSION(4, 1, 0): + case IP_VERSION(4, 2, 0): /* TODO: Fix IP version. DC code expects version 4.0.1 */ if (adev->ip_versions[DCE_HWIP][0] == IP_VERSION(4, 1, 0)) adev->ip_versions[DCE_HWIP][0] = IP_VERSION(4, 0, 1); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 95d26f086d54..03814a23eb54 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -223,9 +223,7 @@ uint amdgpu_dc_visual_confirm; int amdgpu_async_gfx_ring = 1; int amdgpu_mcbp = -1; int amdgpu_discovery = -1; -int amdgpu_mes; int amdgpu_mes_log_enable = 0; -int amdgpu_mes_kiq; int amdgpu_uni_mes = 1; int amdgpu_noretry = -1; int amdgpu_force_asic_type = -1; @@ -691,15 +689,6 @@ MODULE_PARM_DESC(discovery, module_param_named(discovery, amdgpu_discovery, int, 0444); /** - * DOC: mes (int) - * Enable Micro Engine Scheduler. This is a new hw scheduling engine for gfx, sdma, and compute. - * (0 = disabled (default), 1 = enabled) - */ -MODULE_PARM_DESC(mes, - "Enable Micro Engine Scheduler (0 = disabled (default), 1 = enabled)"); -module_param_named(mes, amdgpu_mes, int, 0444); - -/** * DOC: mes_log_enable (int) * Enable Micro Engine Scheduler log. This is used to enable/disable MES internal log. * (0 = disabled (default), 1 = enabled) @@ -709,15 +698,6 @@ MODULE_PARM_DESC(mes_log_enable, module_param_named(mes_log_enable, amdgpu_mes_log_enable, int, 0444); /** - * DOC: mes_kiq (int) - * Enable Micro Engine Scheduler KIQ. This is a new engine pipe for kiq. - * (0 = disabled (default), 1 = enabled) - */ -MODULE_PARM_DESC(mes_kiq, - "Enable Micro Engine Scheduler KIQ (0 = disabled (default), 1 = enabled)"); -module_param_named(mes_kiq, amdgpu_mes_kiq, int, 0444); - -/** * DOC: uni_mes (int) * Enable Unified Micro Engine Scheduler. This is a new engine pipe for unified scheduler. * (0 = disabled (default), 1 = enabled) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 1054d66c54fa..d209591e3710 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -89,16 +89,6 @@ static u32 amdgpu_fence_read(struct amdgpu_ring *ring) return seq; } -static void amdgpu_fence_save_fence_wptr_start(struct amdgpu_fence *af) -{ - af->fence_wptr_start = af->ring->wptr; -} - -static void amdgpu_fence_save_fence_wptr_end(struct amdgpu_fence *af) -{ - af->fence_wptr_end = af->ring->wptr; -} - /** * amdgpu_fence_emit - emit a fence on the requested ring * @@ -107,16 +97,14 @@ static void amdgpu_fence_save_fence_wptr_end(struct amdgpu_fence *af) * @flags: flags to pass into the subordinate .emit_fence() call * * Emits a fence command on the requested ring (all asics). - * Returns 0 on success, -ENOMEM on failure. */ -int amdgpu_fence_emit(struct amdgpu_ring *ring, struct amdgpu_fence *af, - unsigned int flags) +void amdgpu_fence_emit(struct amdgpu_ring *ring, struct amdgpu_fence *af, + unsigned int flags) { struct amdgpu_device *adev = ring->adev; struct dma_fence *fence; struct dma_fence __rcu **ptr; uint32_t seq; - int r; fence = &af->base; af->ring = ring; @@ -126,11 +114,9 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct amdgpu_fence *af, &ring->fence_drv.lock, adev->fence_context + ring->idx, seq); - amdgpu_fence_save_fence_wptr_start(af); amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, seq, flags | AMDGPU_FENCE_FLAG_INT); - amdgpu_fence_save_fence_wptr_end(af); - amdgpu_fence_save_wptr(af); + pm_runtime_get_noresume(adev_to_drm(adev)->dev); ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask]; if (unlikely(rcu_dereference_protected(*ptr, 1))) { @@ -141,10 +127,13 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct amdgpu_fence *af, rcu_read_unlock(); if (old) { - r = dma_fence_wait(old, false); + /* + * dma_fence_wait(old, false) is not interruptible. + * It will not return an error in this case. + * So we can safely ignore the return value. + */ + dma_fence_wait(old, false); dma_fence_put(old); - if (r) - return r; } } @@ -154,8 +143,6 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct amdgpu_fence *af, * emitting the fence would mess up the hardware ring buffer. */ rcu_assign_pointer(*ptr, dma_fence_get(fence)); - - return 0; } /** @@ -241,7 +228,6 @@ bool amdgpu_fence_process(struct amdgpu_ring *ring) do { struct dma_fence *fence, **ptr; - struct amdgpu_fence *am_fence; ++last_seq; last_seq &= drv->num_fences_mask; @@ -254,12 +240,6 @@ bool amdgpu_fence_process(struct amdgpu_ring *ring) if (!fence) continue; - /* Save the wptr in the fence driver so we know what the last processed - * wptr was. This is required for re-emitting the ring state for - * queues that are reset but are not guilty and thus have no guilty fence. - */ - am_fence = container_of(fence, struct amdgpu_fence, base); - drv->signalled_wptr = am_fence->wptr; dma_fence_signal(fence); dma_fence_put(fence); pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); @@ -708,25 +688,29 @@ void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring) */ /** - * amdgpu_fence_driver_update_timedout_fence_state - Update fence state and set errors + * amdgpu_ring_set_fence_errors_and_reemit - Set dma_fence errors and reemit * - * @af: fence of the ring to update + * @ring: the ring to operate on + * @guilty_fence: fence of the ring to update * */ -void amdgpu_fence_driver_update_timedout_fence_state(struct amdgpu_fence *af) +void amdgpu_ring_set_fence_errors_and_reemit(struct amdgpu_ring *ring, + struct amdgpu_fence *guilty_fence) { struct dma_fence *unprocessed; struct dma_fence __rcu **ptr; struct amdgpu_fence *fence; - struct amdgpu_ring *ring = af->ring; unsigned long flags; u32 seq, last_seq; - bool reemitted = false; + unsigned int i; + bool is_guilty_fence; + bool is_guilty_context; last_seq = amdgpu_fence_read(ring) & ring->fence_drv.num_fences_mask; seq = ring->fence_drv.sync_seq & ring->fence_drv.num_fences_mask; - /* mark all fences from the guilty context with an error */ + ring->reemit = true; + amdgpu_ring_alloc(ring, ring->ring_backup_entries_to_copy); spin_lock_irqsave(&ring->fence_drv.lock, flags); do { last_seq++; @@ -738,39 +722,45 @@ void amdgpu_fence_driver_update_timedout_fence_state(struct amdgpu_fence *af) if (unprocessed && !dma_fence_is_signaled_locked(unprocessed)) { fence = container_of(unprocessed, struct amdgpu_fence, base); + is_guilty_fence = fence == guilty_fence; + is_guilty_context = fence->context == guilty_fence->context; - if (fence->reemitted > 1) - reemitted = true; - else if (fence == af) + /* mark all fences from the guilty context with an error */ + if (is_guilty_fence) dma_fence_set_error(&fence->base, -ETIME); - else if (fence->context == af->context) + else if (is_guilty_context) dma_fence_set_error(&fence->base, -ECANCELED); + + /* reemit the packet stream and update wptrs */ + fence->ib_wptr = ring->wptr; + for (i = 0; i < fence->ib_dw_size; i++) { + /* Skip the IB(s) for the guilty context. */ + if (is_guilty_context && + i >= fence->skip_ib_dw_start_offset && + i < fence->skip_ib_dw_end_offset) + amdgpu_ring_write(ring, ring->funcs->nop); + else + amdgpu_ring_write(ring, + ring->ring_backup[fence->backup_idx + i]); + } } rcu_read_unlock(); } while (last_seq != seq); spin_unlock_irqrestore(&ring->fence_drv.lock, flags); - - if (reemitted) { - /* if we've already reemitted once then just cancel everything */ - amdgpu_fence_driver_force_completion(af->ring); - af->ring->ring_backup_entries_to_copy = 0; - } -} - -void amdgpu_fence_save_wptr(struct amdgpu_fence *af) -{ - af->wptr = af->ring->wptr; + amdgpu_ring_commit(ring); + ring->reemit = false; } static void amdgpu_ring_backup_unprocessed_command(struct amdgpu_ring *ring, - u64 start_wptr, u64 end_wptr) + struct amdgpu_fence *af) { - unsigned int first_idx = start_wptr & ring->buf_mask; - unsigned int last_idx = end_wptr & ring->buf_mask; + unsigned int first_idx = af->ib_wptr & ring->buf_mask; + unsigned int dw_size = af->ib_dw_size; unsigned int i; + af->backup_idx = ring->ring_backup_entries_to_copy; /* Backup the contents of the ring buffer. */ - for (i = first_idx; i != last_idx; ++i, i &= ring->buf_mask) + for (i = first_idx; dw_size > 0; ++i, i &= ring->buf_mask, --dw_size) ring->ring_backup[ring->ring_backup_entries_to_copy++] = ring->ring[i]; } @@ -780,12 +770,10 @@ void amdgpu_ring_backup_unprocessed_commands(struct amdgpu_ring *ring, struct dma_fence *unprocessed; struct dma_fence __rcu **ptr; struct amdgpu_fence *fence; - u64 wptr; u32 seq, last_seq; last_seq = amdgpu_fence_read(ring) & ring->fence_drv.num_fences_mask; seq = ring->fence_drv.sync_seq & ring->fence_drv.num_fences_mask; - wptr = ring->fence_drv.signalled_wptr; ring->ring_backup_entries_to_copy = 0; do { @@ -799,21 +787,7 @@ void amdgpu_ring_backup_unprocessed_commands(struct amdgpu_ring *ring, if (unprocessed && !dma_fence_is_signaled(unprocessed)) { fence = container_of(unprocessed, struct amdgpu_fence, base); - /* save everything if the ring is not guilty, otherwise - * just save the content from other contexts. - */ - if (!fence->reemitted && - (!guilty_fence || (fence->context != guilty_fence->context))) { - amdgpu_ring_backup_unprocessed_command(ring, wptr, - fence->wptr); - } else if (!fence->reemitted) { - /* always save the fence */ - amdgpu_ring_backup_unprocessed_command(ring, - fence->fence_wptr_start, - fence->fence_wptr_end); - } - wptr = fence->wptr; - fence->reemitted++; + amdgpu_ring_backup_unprocessed_command(ring, fence); } rcu_read_unlock(); } while (last_seq != seq); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 77578ecc6782..cab3196a87fb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -503,6 +503,55 @@ void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev, int xcc_id) &ring->mqd_ptr); } +void amdgpu_gfx_mqd_symmetrically_map_cu_mask(struct amdgpu_device *adev, const uint32_t *cu_mask, + uint32_t cu_mask_count, uint32_t *se_mask) +{ + struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info; + struct amdgpu_gfx_config *gfx_info = &adev->gfx.config; + uint32_t cu_per_sh[8][4] = {0}; + int i, se, sh, cu, cu_bitmap_sh_mul; + int xcc_inst = ffs(adev->gfx.xcc_mask) - 1; + bool wgp_mode_req = amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 0, 0); + int cu_inc = wgp_mode_req ? 2 : 1; + uint32_t en_mask = wgp_mode_req ? 0x3 : 0x1; + int num_xcc, inc, inst = 0; + + if (xcc_inst < 0) + xcc_inst = 0; + + num_xcc = hweight16(adev->gfx.xcc_mask); + if (!num_xcc) + num_xcc = 1; + + inc = cu_inc * num_xcc; + + cu_bitmap_sh_mul = 2; + + for (se = 0; se < gfx_info->max_shader_engines; se++) + for (sh = 0; sh < gfx_info->max_sh_per_se; sh++) + cu_per_sh[se][sh] = hweight32( + cu_info->bitmap[xcc_inst][se % 4][sh + (se / 4) * + cu_bitmap_sh_mul]); + + for (i = 0; i < gfx_info->max_shader_engines; i++) + se_mask[i] = 0; + + i = inst; + for (cu = 0; cu < 16; cu += cu_inc) { + for (sh = 0; sh < gfx_info->max_sh_per_se; sh++) { + for (se = 0; se < gfx_info->max_shader_engines; se++) { + if (cu_per_sh[se][sh] > cu) { + if ((i / 32) < cu_mask_count && (cu_mask[i / 32] & (1 << (i % 32)))) + se_mask[se] |= en_mask << (cu + sh * 16); + i += inc; + if (i >= cu_mask_count * 32) + return; + } + } + } + } +} + int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev, int xcc_id) { struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id]; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 585cc8e81bb2..720ed3a2c78c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -583,6 +583,8 @@ int amdgpu_gfx_kiq_init(struct amdgpu_device *adev, int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev, unsigned mqd_size, int xcc_id); void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev, int xcc_id); +void amdgpu_gfx_mqd_symmetrically_map_cu_mask(struct amdgpu_device *adev, const uint32_t *cu_mask, + uint32_t cu_mask_count, uint32_t *se_mask); int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev, int xcc_id); int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev, int xcc_id); int amdgpu_gfx_disable_kgq(struct amdgpu_device *adev, int xcc_id); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index 5179fa008626..a0940db1cd36 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -742,7 +742,7 @@ void amdgpu_gmc_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, * translation. Avoid this by doing the invalidation from the SDMA * itself at least for GART. */ - mutex_lock(&adev->mman.gtt_window_lock); + mutex_lock(&adev->mman.default_entity.lock); r = amdgpu_job_alloc_with_ib(ring->adev, &adev->mman.default_entity.base, AMDGPU_FENCE_OWNER_UNDEFINED, 16 * 4, AMDGPU_IB_POOL_IMMEDIATE, @@ -755,7 +755,7 @@ void amdgpu_gmc_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, job->ibs->ptr[job->ibs->length_dw++] = ring->funcs->nop; amdgpu_ring_pad_ib(ring, &job->ibs[0]); fence = amdgpu_job_submit(job); - mutex_unlock(&adev->mman.gtt_window_lock); + mutex_unlock(&adev->mman.default_entity.lock); dma_fence_wait(fence, false); dma_fence_put(fence); @@ -763,7 +763,7 @@ void amdgpu_gmc_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, return; error_alloc: - mutex_unlock(&adev->mman.gtt_window_lock); + mutex_unlock(&adev->mman.default_entity.lock); dev_err(adev->dev, "Error flushing GPU TLB using the SDMA (%d)!\n", r); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c index ac276bb53c7c..620fddde4c4d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c @@ -324,17 +324,13 @@ int amdgpu_gtt_mgr_init(struct amdgpu_device *adev, uint64_t gtt_size) { struct amdgpu_gtt_mgr *mgr = &adev->mman.gtt_mgr; struct ttm_resource_manager *man = &mgr->manager; - uint64_t start, size; man->use_tt = true; man->func = &amdgpu_gtt_mgr_func; ttm_resource_manager_init(man, &adev->mman.bdev, gtt_size); - start = AMDGPU_GTT_MAX_TRANSFER_SIZE * AMDGPU_GTT_NUM_TRANSFER_WINDOWS; - start += amdgpu_vce_required_gart_pages(adev); - size = (adev->gmc.gart_size >> PAGE_SHIFT) - start; - drm_mm_init(&mgr->mm, start, size); + drm_mm_init(&mgr->mm, 0, adev->gmc.gart_size >> PAGE_SHIFT); spin_lock_init(&mgr->lock); ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_TT, &mgr->manager); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 3a7bab87b5d8..63f62c670df5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -129,6 +129,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs, struct amdgpu_ib *ib = &ibs[0]; struct dma_fence *tmp = NULL; struct amdgpu_fence *af; + struct amdgpu_fence *vm_af; bool need_ctx_switch; struct amdgpu_vm *vm; uint64_t fence_ctx; @@ -215,25 +216,28 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs, dma_fence_put(tmp); } - if ((ib->flags & AMDGPU_IB_FLAG_EMIT_MEM_SYNC) && ring->funcs->emit_mem_sync) - ring->funcs->emit_mem_sync(ring); + if (job) { + vm_af = job->hw_vm_fence; + /* VM sequence */ + vm_af->ib_wptr = ring->wptr; + amdgpu_vm_flush(ring, job, need_pipe_sync); + vm_af->ib_dw_size = + amdgpu_ring_get_dw_distance(ring, vm_af->ib_wptr, ring->wptr); + } - if (ring->funcs->emit_wave_limit && - ring->hw_prio == AMDGPU_GFX_PIPE_PRIO_HIGH) - ring->funcs->emit_wave_limit(ring, true); + /* IB sequence */ + af->ib_wptr = ring->wptr; + amdgpu_ring_ib_begin(ring); if (ring->funcs->insert_start) ring->funcs->insert_start(ring); - if (job) { - r = amdgpu_vm_flush(ring, job, need_pipe_sync); - if (r) { - amdgpu_ring_undo(ring); - goto free_fence; - } - } + if ((ib->flags & AMDGPU_IB_FLAG_EMIT_MEM_SYNC) && ring->funcs->emit_mem_sync) + ring->funcs->emit_mem_sync(ring); - amdgpu_ring_ib_begin(ring); + if (ring->funcs->emit_wave_limit && + ring->hw_prio == AMDGPU_GFX_PIPE_PRIO_HIGH) + ring->funcs->emit_wave_limit(ring, true); if (ring->funcs->emit_gfx_shadow && adev->gfx.cp_gfx_shadow) amdgpu_ring_emit_gfx_shadow(ring, shadow_va, csa_va, gds_va, @@ -243,6 +247,9 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs, cond_exec = amdgpu_ring_init_cond_exec(ring, ring->cond_exe_gpu_addr); + /* Skip the IB for guilty contexts */ + af->skip_ib_dw_start_offset = + amdgpu_ring_get_dw_distance(ring, af->ib_wptr, ring->wptr); amdgpu_device_flush_hdp(adev, ring); if (need_ctx_switch) @@ -281,6 +288,9 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs, amdgpu_ring_emit_frame_cntl(ring, false, secure); amdgpu_device_invalidate_hdp(adev, ring); + /* Skip the IB for guilty contexts */ + af->skip_ib_dw_end_offset = + amdgpu_ring_get_dw_distance(ring, af->ib_wptr, ring->wptr); if (ib->flags & AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE) fence_flags |= AMDGPU_FENCE_FLAG_TC_WB_ONLY; @@ -297,14 +307,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs, amdgpu_ring_init_cond_exec(ring, ring->cond_exe_gpu_addr); } - r = amdgpu_fence_emit(ring, af, fence_flags); - if (r) { - dev_err(adev->dev, "failed to emit fence (%d)\n", r); - if (job && job->vmid) - amdgpu_vmid_reset(adev, ring->vm_hub, job->vmid); - amdgpu_ring_undo(ring); - goto free_fence; - } + amdgpu_fence_emit(ring, af, fence_flags); *f = &af->base; /* get a ref for the job */ if (job) @@ -323,15 +326,10 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs, ring->hw_prio == AMDGPU_GFX_PIPE_PRIO_HIGH) ring->funcs->emit_wave_limit(ring, false); - /* Save the wptr associated with this fence. - * This must be last for resets to work properly - * as we need to save the wptr associated with this - * fence so we know what rings contents to backup - * after we reset the queue. - */ - amdgpu_fence_save_wptr(af); - amdgpu_ring_ib_end(ring); + + af->ib_dw_size = amdgpu_ring_get_dw_distance(ring, af->ib_wptr, ring->wptr); + amdgpu_ring_commit(ring); return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index affc4a3f995b..07771721af9d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -92,7 +92,6 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) struct drm_wedge_task_info *info = NULL; struct amdgpu_task_info *ti = NULL; struct amdgpu_device *adev = ring->adev; - enum drm_gpu_sched_stat status = DRM_GPU_SCHED_STAT_RESET; int idx, r; if (!drm_dev_enter(adev_to_drm(adev), &idx)) { @@ -147,8 +146,6 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) ring->sched.name); drm_dev_wedged_event(adev_to_drm(adev), DRM_WEDGE_RECOVERY_NONE, info); - /* This is needed to add the job back to the pending list */ - status = DRM_GPU_SCHED_STAT_NO_HANG; goto exit; } dev_err(adev->dev, "Ring %s reset failed\n", ring->sched.name); @@ -184,7 +181,8 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) exit: amdgpu_vm_put_task_info(ti); drm_dev_exit(idx); - return status; + /* This is needed to add the job back to the pending list */ + return DRM_GPU_SCHED_STAT_NO_HANG; } int amdgpu_job_alloc(struct amdgpu_device *adev, struct amdgpu_vm *vm, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 950d32ac4ddb..741d1919ef88 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -21,6 +21,7 @@ * * */ +#include "amdgpu_reg_access.h" #include <linux/debugfs.h> #include <linux/list.h> #include <linux/module.h> @@ -42,6 +43,7 @@ #include "amdgpu_reset.h" #include "amdgpu_psp.h" #include "amdgpu_ras_mgr.h" +#include "amdgpu_virt_ras_cmd.h" #ifdef CONFIG_X86_MCE_AMD #include <asm/mce.h> @@ -228,19 +230,30 @@ static int amdgpu_check_address_validity(struct amdgpu_device *adev, return 0; if (amdgpu_sriov_vf(adev)) { - if (amdgpu_virt_check_vf_critical_region(adev, address, &hit)) - return -EPERM; - return hit ? -EACCES : 0; + if (amdgpu_uniras_enabled(adev)) { + if (amdgpu_virt_ras_check_address_validity(adev, address, &hit)) + return -EPERM; + if (hit) + return -EACCES; + } else { + if (amdgpu_virt_check_vf_critical_region(adev, address, &hit)) + return -EPERM; + return hit ? -EACCES : 0; + } } if ((address >= adev->gmc.mc_vram_size) || (address >= RAS_UMC_INJECT_ADDR_LIMIT)) return -EFAULT; - if (amdgpu_uniras_enabled(adev)) - count = amdgpu_ras_mgr_lookup_bad_pages_in_a_row(adev, address, - page_pfns, ARRAY_SIZE(page_pfns)); - else + if (amdgpu_uniras_enabled(adev)) { + if (amdgpu_sriov_vf(adev)) + count = amdgpu_virt_ras_convert_retired_address(adev, address, + page_pfns, ARRAY_SIZE(page_pfns)); + else + count = amdgpu_ras_mgr_lookup_bad_pages_in_a_row(adev, address, + page_pfns, ARRAY_SIZE(page_pfns)); + } else count = amdgpu_umc_lookup_bad_pages_in_a_row(adev, address, page_pfns, ARRAY_SIZE(page_pfns)); @@ -3118,9 +3131,11 @@ static int __amdgpu_ras_convert_rec_array_from_rom(struct amdgpu_device *adev, enum amdgpu_memory_partition nps) { int i = 0; + uint64_t chan_idx_v2; enum amdgpu_memory_partition save_nps; save_nps = (bps[0].retired_page >> UMC_NPS_SHIFT) & UMC_NPS_MASK; + chan_idx_v2 = bps[0].retired_page & UMC_CHANNEL_IDX_V2; /*old asics just have pa in eeprom*/ if (IP_VERSION_MAJ(amdgpu_ip_version(adev, UMC_HWIP, 0)) < 12) { @@ -3132,7 +3147,7 @@ static int __amdgpu_ras_convert_rec_array_from_rom(struct amdgpu_device *adev, for (i = 0; i < adev->umc.retire_unit; i++) bps[i].retired_page &= ~(UMC_NPS_MASK << UMC_NPS_SHIFT); - if (save_nps) { + if (save_nps || chan_idx_v2) { if (save_nps == nps) { if (amdgpu_umc_pages_in_a_row(adev, err_data, bps[0].retired_page << AMDGPU_GPU_PAGE_SHIFT)) @@ -3176,10 +3191,12 @@ static int __amdgpu_ras_convert_rec_from_rom(struct amdgpu_device *adev, enum amdgpu_memory_partition nps) { int i = 0; + uint64_t chan_idx_v2; enum amdgpu_memory_partition save_nps; if (!amdgpu_ras_smu_eeprom_supported(adev)) { save_nps = (bps->retired_page >> UMC_NPS_SHIFT) & UMC_NPS_MASK; + chan_idx_v2 = bps->retired_page & UMC_CHANNEL_IDX_V2; bps->retired_page &= ~(UMC_NPS_MASK << UMC_NPS_SHIFT); } else { /* if pmfw manages eeprom, save_nps is not stored on eeprom, @@ -3201,16 +3218,19 @@ static int __amdgpu_ras_convert_rec_from_rom(struct amdgpu_device *adev, err_data->err_addr[i].mcumc_id = bps->mcumc_id; } } else { - if (bps->address) { + if (save_nps || chan_idx_v2) { if (amdgpu_ras_mca2pa_by_idx(adev, bps, err_data)) return -EINVAL; } else { /* for specific old eeprom data, mca address is not stored, * calc it from pa */ - if (amdgpu_umc_pa2mca(adev, bps->retired_page << AMDGPU_GPU_PAGE_SHIFT, - &(bps->address), AMDGPU_NPS1_PARTITION_MODE)) - return -EINVAL; + if (bps->address == 0) + if (amdgpu_umc_pa2mca(adev, + bps->retired_page << AMDGPU_GPU_PAGE_SHIFT, + &(bps->address), + AMDGPU_NPS1_PARTITION_MODE)) + return -EINVAL; if (amdgpu_ras_mca2pa(adev, bps, err_data)) return -EOPNOTSUPP; @@ -5516,11 +5536,11 @@ static void amdgpu_ras_boot_time_error_reporting(struct amdgpu_device *adev, * is changed. In such case, replace the aqua_vanjaram implementation * with more common helper */ reg_addr = (mmMP0_SMN_C2PMSG_92 << 2) + - aqua_vanjaram_encode_ext_smn_addressing(instance); + amdgpu_reg_get_smn_base64(adev, MP0_HWIP, instance); fw_status = amdgpu_device_indirect_rreg_ext(adev, reg_addr); reg_addr = (mmMP0_SMN_C2PMSG_126 << 2) + - aqua_vanjaram_encode_ext_smn_addressing(instance); + amdgpu_reg_get_smn_base64(adev, MP0_HWIP, instance); boot_error = amdgpu_device_indirect_rreg_ext(adev, reg_addr); socket_id = AMDGPU_RAS_GPU_ERR_SOCKET_ID(boot_error); @@ -5586,7 +5606,7 @@ static bool amdgpu_ras_boot_error_detected(struct amdgpu_device *adev, int retry_loop; reg_addr = (mmMP0_SMN_C2PMSG_92 << 2) + - aqua_vanjaram_encode_ext_smn_addressing(instance); + amdgpu_reg_get_smn_base64(adev, MP0_HWIP, instance); for (retry_loop = 0; retry_loop < AMDGPU_RAS_BOOT_STATUS_POLLING_LIMIT; retry_loop++) { reg_data = amdgpu_device_indirect_rreg_ext(adev, reg_addr); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c index 6fba9d5b29ea..44fba4b6aa92 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c @@ -508,6 +508,9 @@ int amdgpu_ras_eeprom_reset_table(struct amdgpu_ras_eeprom_control *control) control->bad_channel_bitmap = 0; amdgpu_dpm_send_hbm_bad_channel_flag(adev, control->bad_channel_bitmap); con->update_channel_flag = false; + /* there is no record on eeprom now, clear the counter */ + if (con->eh_data) + con->eh_data->count_saved = 0; amdgpu_ras_debugfs_set_ret_size(control); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reg_access.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_reg_access.c new file mode 100644 index 000000000000..bf8645390bdc --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reg_access.c @@ -0,0 +1,959 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <linux/delay.h> + +#include "amdgpu.h" +#include "amdgpu_reset.h" +#include "amdgpu_trace.h" +#include "amdgpu_virt.h" +#include "amdgpu_reg_access.h" + +#define AMDGPU_PCIE_INDEX_FALLBACK (0x38 >> 2) +#define AMDGPU_PCIE_INDEX_HI_FALLBACK (0x44 >> 2) +#define AMDGPU_PCIE_DATA_FALLBACK (0x3C >> 2) + +void amdgpu_reg_access_init(struct amdgpu_device *adev) +{ + spin_lock_init(&adev->reg.smc.lock); + adev->reg.smc.rreg = NULL; + adev->reg.smc.wreg = NULL; + + spin_lock_init(&adev->reg.uvd_ctx.lock); + adev->reg.uvd_ctx.rreg = NULL; + adev->reg.uvd_ctx.wreg = NULL; + + spin_lock_init(&adev->reg.didt.lock); + adev->reg.didt.rreg = NULL; + adev->reg.didt.wreg = NULL; + + spin_lock_init(&adev->reg.gc_cac.lock); + adev->reg.gc_cac.rreg = NULL; + adev->reg.gc_cac.wreg = NULL; + + spin_lock_init(&adev->reg.se_cac.lock); + adev->reg.se_cac.rreg = NULL; + adev->reg.se_cac.wreg = NULL; + + spin_lock_init(&adev->reg.audio_endpt.lock); + adev->reg.audio_endpt.rreg = NULL; + adev->reg.audio_endpt.wreg = NULL; + + spin_lock_init(&adev->reg.pcie.lock); + adev->reg.pcie.rreg = NULL; + adev->reg.pcie.wreg = NULL; + adev->reg.pcie.rreg_ext = NULL; + adev->reg.pcie.wreg_ext = NULL; + adev->reg.pcie.rreg64 = NULL; + adev->reg.pcie.wreg64 = NULL; + adev->reg.pcie.rreg64_ext = NULL; + adev->reg.pcie.wreg64_ext = NULL; + adev->reg.pcie.port_rreg = NULL; + adev->reg.pcie.port_wreg = NULL; +} + +uint32_t amdgpu_reg_smc_rd32(struct amdgpu_device *adev, uint32_t reg) +{ + if (!adev->reg.smc.rreg) { + dev_err_once(adev->dev, "SMC register read not supported\n"); + return 0; + } + return adev->reg.smc.rreg(adev, reg); +} + +void amdgpu_reg_smc_wr32(struct amdgpu_device *adev, uint32_t reg, uint32_t v) +{ + if (!adev->reg.smc.wreg) { + dev_err_once(adev->dev, "SMC register write not supported\n"); + return; + } + adev->reg.smc.wreg(adev, reg, v); +} + +uint32_t amdgpu_reg_uvd_ctx_rd32(struct amdgpu_device *adev, uint32_t reg) +{ + if (!adev->reg.uvd_ctx.rreg) { + dev_err_once(adev->dev, + "UVD_CTX register read not supported\n"); + return 0; + } + return adev->reg.uvd_ctx.rreg(adev, reg); +} + +void amdgpu_reg_uvd_ctx_wr32(struct amdgpu_device *adev, uint32_t reg, + uint32_t v) +{ + if (!adev->reg.uvd_ctx.wreg) { + dev_err_once(adev->dev, + "UVD_CTX register write not supported\n"); + return; + } + adev->reg.uvd_ctx.wreg(adev, reg, v); +} + +uint32_t amdgpu_reg_didt_rd32(struct amdgpu_device *adev, uint32_t reg) +{ + if (!adev->reg.didt.rreg) { + dev_err_once(adev->dev, "DIDT register read not supported\n"); + return 0; + } + return adev->reg.didt.rreg(adev, reg); +} + +void amdgpu_reg_didt_wr32(struct amdgpu_device *adev, uint32_t reg, uint32_t v) +{ + if (!adev->reg.didt.wreg) { + dev_err_once(adev->dev, "DIDT register write not supported\n"); + return; + } + adev->reg.didt.wreg(adev, reg, v); +} + +uint32_t amdgpu_reg_gc_cac_rd32(struct amdgpu_device *adev, uint32_t reg) +{ + if (!adev->reg.gc_cac.rreg) { + dev_err_once(adev->dev, "GC_CAC register read not supported\n"); + return 0; + } + return adev->reg.gc_cac.rreg(adev, reg); +} + +void amdgpu_reg_gc_cac_wr32(struct amdgpu_device *adev, uint32_t reg, + uint32_t v) +{ + if (!adev->reg.gc_cac.wreg) { + dev_err_once(adev->dev, + "GC_CAC register write not supported\n"); + return; + } + adev->reg.gc_cac.wreg(adev, reg, v); +} + +uint32_t amdgpu_reg_se_cac_rd32(struct amdgpu_device *adev, uint32_t reg) +{ + if (!adev->reg.se_cac.rreg) { + dev_err_once(adev->dev, "SE_CAC register read not supported\n"); + return 0; + } + return adev->reg.se_cac.rreg(adev, reg); +} + +void amdgpu_reg_se_cac_wr32(struct amdgpu_device *adev, uint32_t reg, + uint32_t v) +{ + if (!adev->reg.se_cac.wreg) { + dev_err_once(adev->dev, + "SE_CAC register write not supported\n"); + return; + } + adev->reg.se_cac.wreg(adev, reg, v); +} + +uint32_t amdgpu_reg_audio_endpt_rd32(struct amdgpu_device *adev, uint32_t block, + uint32_t reg) +{ + if (!adev->reg.audio_endpt.rreg) { + dev_err_once(adev->dev, + "AUDIO_ENDPT register read not supported\n"); + return 0; + } + return adev->reg.audio_endpt.rreg(adev, block, reg); +} + +void amdgpu_reg_audio_endpt_wr32(struct amdgpu_device *adev, uint32_t block, + uint32_t reg, uint32_t v) +{ + if (!adev->reg.audio_endpt.wreg) { + dev_err_once(adev->dev, + "AUDIO_ENDPT register write not supported\n"); + return; + } + adev->reg.audio_endpt.wreg(adev, block, reg, v); +} + +uint32_t amdgpu_reg_pcie_rd32(struct amdgpu_device *adev, uint32_t reg) +{ + if (!adev->reg.pcie.rreg) { + dev_err_once(adev->dev, "PCIE register read not supported\n"); + return 0; + } + return adev->reg.pcie.rreg(adev, reg); +} + +void amdgpu_reg_pcie_wr32(struct amdgpu_device *adev, uint32_t reg, uint32_t v) +{ + if (!adev->reg.pcie.wreg) { + dev_err_once(adev->dev, "PCIE register write not supported\n"); + return; + } + adev->reg.pcie.wreg(adev, reg, v); +} + +uint32_t amdgpu_reg_pcie_ext_rd32(struct amdgpu_device *adev, uint64_t reg) +{ + if (!adev->reg.pcie.rreg_ext) { + dev_err_once(adev->dev, "PCIE EXT register read not supported\n"); + return 0; + } + return adev->reg.pcie.rreg_ext(adev, reg); +} + +void amdgpu_reg_pcie_ext_wr32(struct amdgpu_device *adev, uint64_t reg, + uint32_t v) +{ + if (!adev->reg.pcie.wreg_ext) { + dev_err_once(adev->dev, "PCIE EXT register write not supported\n"); + return; + } + adev->reg.pcie.wreg_ext(adev, reg, v); +} + +uint64_t amdgpu_reg_pcie_rd64(struct amdgpu_device *adev, uint32_t reg) +{ + if (!adev->reg.pcie.rreg64) { + dev_err_once(adev->dev, "PCIE 64-bit register read not supported\n"); + return 0; + } + return adev->reg.pcie.rreg64(adev, reg); +} + +void amdgpu_reg_pcie_wr64(struct amdgpu_device *adev, uint32_t reg, uint64_t v) +{ + if (!adev->reg.pcie.wreg64) { + dev_err_once(adev->dev, "PCIE 64-bit register write not supported\n"); + return; + } + adev->reg.pcie.wreg64(adev, reg, v); +} + +uint64_t amdgpu_reg_pcie_ext_rd64(struct amdgpu_device *adev, uint64_t reg) +{ + if (!adev->reg.pcie.rreg64_ext) { + dev_err_once(adev->dev, "PCIE EXT 64-bit register read not supported\n"); + return 0; + } + return adev->reg.pcie.rreg64_ext(adev, reg); +} + +void amdgpu_reg_pcie_ext_wr64(struct amdgpu_device *adev, uint64_t reg, + uint64_t v) +{ + if (!adev->reg.pcie.wreg64_ext) { + dev_err_once(adev->dev, "PCIE EXT 64-bit register write not supported\n"); + return; + } + adev->reg.pcie.wreg64_ext(adev, reg, v); +} + +uint32_t amdgpu_reg_pciep_rd32(struct amdgpu_device *adev, uint32_t reg) +{ + if (!adev->reg.pcie.port_rreg) { + dev_err_once(adev->dev, "PCIEP register read not supported\n"); + return 0; + } + return adev->reg.pcie.port_rreg(adev, reg); +} + +void amdgpu_reg_pciep_wr32(struct amdgpu_device *adev, uint32_t reg, uint32_t v) +{ + if (!adev->reg.pcie.port_wreg) { + dev_err_once(adev->dev, "PCIEP register write not supported\n"); + return; + } + adev->reg.pcie.port_wreg(adev, reg, v); +} + +static int amdgpu_reg_get_smn_base_version(struct amdgpu_device *adev) +{ + struct pci_dev *pdev = adev->pdev; + int id; + + if (amdgpu_sriov_vf(adev)) + return -EOPNOTSUPP; + + id = (pdev->device >> 4) & 0xFFFF; + if (id == 0x74A || id == 0x74B || id == 0x75A || id == 0x75B) + return 1; + + return -EOPNOTSUPP; +} + +uint64_t amdgpu_reg_get_smn_base64(struct amdgpu_device *adev, + enum amd_hw_ip_block_type block, + int die_inst) +{ + if (!adev->reg.smn.get_smn_base) { + int version = amdgpu_reg_get_smn_base_version(adev); + switch (version) { + case 1: + return amdgpu_reg_smn_v1_0_get_base(adev, block, + die_inst); + default: + dev_err_once( + adev->dev, + "SMN base address query not supported for this device\n"); + return 0; + } + return 0; + } + return adev->reg.smn.get_smn_base(adev, block, die_inst); +} + +uint64_t amdgpu_reg_smn_v1_0_get_base(struct amdgpu_device *adev, + enum amd_hw_ip_block_type block, + int die_inst) +{ + uint64_t smn_base; + + if (die_inst == 0) + return 0; + + switch (block) { + case XGMI_HWIP: + case NBIO_HWIP: + case MP0_HWIP: + case UMC_HWIP: + case DF_HWIP: + smn_base = ((uint64_t)(die_inst & 0x3) << 32) | (1ULL << 34); + break; + default: + dev_warn_once( + adev->dev, + "SMN base address query not supported for this block %d\n", + block); + smn_base = 0; + break; + } + + return smn_base; +} + +/* + * register access helper functions. + */ + +/** + * amdgpu_device_rreg - read a memory mapped IO or indirect register + * + * @adev: amdgpu_device pointer + * @reg: dword aligned register offset + * @acc_flags: access flags which require special behavior + * + * Returns the 32 bit value from the offset specified. + */ +uint32_t amdgpu_device_rreg(struct amdgpu_device *adev, uint32_t reg, + uint32_t acc_flags) +{ + uint32_t ret; + + if (amdgpu_device_skip_hw_access(adev)) + return 0; + + if ((reg * 4) < adev->rmmio_size) { + if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && + amdgpu_sriov_runtime(adev) && + down_read_trylock(&adev->reset_domain->sem)) { + ret = amdgpu_kiq_rreg(adev, reg, 0); + up_read(&adev->reset_domain->sem); + } else { + ret = readl(((void __iomem *)adev->rmmio) + (reg * 4)); + } + } else { + ret = amdgpu_reg_pcie_rd32(adev, reg * 4); + } + + trace_amdgpu_device_rreg(adev->pdev->device, reg, ret); + + return ret; +} + +/* + * MMIO register read with bytes helper functions + * @offset:bytes offset from MMIO start + */ + +/** + * amdgpu_mm_rreg8 - read a memory mapped IO register + * + * @adev: amdgpu_device pointer + * @offset: byte aligned register offset + * + * Returns the 8 bit value from the offset specified. + */ +uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset) +{ + if (amdgpu_device_skip_hw_access(adev)) + return 0; + + if (offset < adev->rmmio_size) + return (readb(adev->rmmio + offset)); + BUG(); +} + +/** + * amdgpu_device_xcc_rreg - read a memory mapped IO or indirect register with specific XCC + * + * @adev: amdgpu_device pointer + * @reg: dword aligned register offset + * @acc_flags: access flags which require special behavior + * @xcc_id: xcc accelerated compute core id + * + * Returns the 32 bit value from the offset specified. + */ +uint32_t amdgpu_device_xcc_rreg(struct amdgpu_device *adev, uint32_t reg, + uint32_t acc_flags, uint32_t xcc_id) +{ + uint32_t ret, rlcg_flag; + + if (amdgpu_device_skip_hw_access(adev)) + return 0; + + if ((reg * 4) < adev->rmmio_size) { + if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_runtime(adev) && + adev->gfx.rlc.rlcg_reg_access_supported && + amdgpu_virt_get_rlcg_reg_access_flag( + adev, acc_flags, GC_HWIP, false, &rlcg_flag)) { + ret = amdgpu_virt_rlcg_reg_rw(adev, reg, 0, rlcg_flag, + GET_INST(GC, xcc_id)); + } else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && + amdgpu_sriov_runtime(adev) && + down_read_trylock(&adev->reset_domain->sem)) { + ret = amdgpu_kiq_rreg(adev, reg, xcc_id); + up_read(&adev->reset_domain->sem); + } else { + ret = readl(((void __iomem *)adev->rmmio) + (reg * 4)); + } + } else { + ret = amdgpu_reg_pcie_rd32(adev, reg * 4); + } + + return ret; +} + +/* + * MMIO register write with bytes helper functions + * @offset:bytes offset from MMIO start + * @value: the value want to be written to the register + */ + +/** + * amdgpu_mm_wreg8 - read a memory mapped IO register + * + * @adev: amdgpu_device pointer + * @offset: byte aligned register offset + * @value: 8 bit value to write + * + * Writes the value specified to the offset specified. + */ +void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value) +{ + if (amdgpu_device_skip_hw_access(adev)) + return; + + if (offset < adev->rmmio_size) + writeb(value, adev->rmmio + offset); + else + BUG(); +} + +/** + * amdgpu_device_wreg - write to a memory mapped IO or indirect register + * + * @adev: amdgpu_device pointer + * @reg: dword aligned register offset + * @v: 32 bit value to write to the register + * @acc_flags: access flags which require special behavior + * + * Writes the value specified to the offset specified. + */ +void amdgpu_device_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, + uint32_t acc_flags) +{ + if (amdgpu_device_skip_hw_access(adev)) + return; + + if ((reg * 4) < adev->rmmio_size) { + if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && + amdgpu_sriov_runtime(adev) && + down_read_trylock(&adev->reset_domain->sem)) { + amdgpu_kiq_wreg(adev, reg, v, 0); + up_read(&adev->reset_domain->sem); + } else { + writel(v, ((void __iomem *)adev->rmmio) + (reg * 4)); + } + } else { + amdgpu_reg_pcie_wr32(adev, reg * 4, v); + } + + trace_amdgpu_device_wreg(adev->pdev->device, reg, v); +} + +/** + * amdgpu_mm_wreg_mmio_rlc - write register either with direct/indirect mmio or with RLC path if in range + * + * @adev: amdgpu_device pointer + * @reg: mmio/rlc register + * @v: value to write + * @xcc_id: xcc accelerated compute core id + * + * this function is invoked only for the debugfs register access + */ +void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev, uint32_t reg, + uint32_t v, uint32_t xcc_id) +{ + if (amdgpu_device_skip_hw_access(adev)) + return; + + if (amdgpu_sriov_fullaccess(adev) && adev->gfx.rlc.funcs && + adev->gfx.rlc.funcs->is_rlcg_access_range) { + if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg)) + return amdgpu_sriov_wreg(adev, reg, v, 0, 0, xcc_id); + } else if ((reg * 4) >= adev->rmmio_size) { + amdgpu_reg_pcie_wr32(adev, reg * 4, v); + } else { + writel(v, ((void __iomem *)adev->rmmio) + (reg * 4)); + } +} + +/** + * amdgpu_device_xcc_wreg - write to a memory mapped IO or indirect register with specific XCC + * + * @adev: amdgpu_device pointer + * @reg: dword aligned register offset + * @v: 32 bit value to write to the register + * @acc_flags: access flags which require special behavior + * @xcc_id: xcc accelerated compute core id + * + * Writes the value specified to the offset specified. + */ +void amdgpu_device_xcc_wreg(struct amdgpu_device *adev, uint32_t reg, + uint32_t v, uint32_t acc_flags, uint32_t xcc_id) +{ + uint32_t rlcg_flag; + + if (amdgpu_device_skip_hw_access(adev)) + return; + + if ((reg * 4) < adev->rmmio_size) { + if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_runtime(adev) && + adev->gfx.rlc.rlcg_reg_access_supported && + amdgpu_virt_get_rlcg_reg_access_flag( + adev, acc_flags, GC_HWIP, true, &rlcg_flag)) { + amdgpu_virt_rlcg_reg_rw(adev, reg, v, rlcg_flag, + GET_INST(GC, xcc_id)); + } else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && + amdgpu_sriov_runtime(adev) && + down_read_trylock(&adev->reset_domain->sem)) { + amdgpu_kiq_wreg(adev, reg, v, xcc_id); + up_read(&adev->reset_domain->sem); + } else { + writel(v, ((void __iomem *)adev->rmmio) + (reg * 4)); + } + } else { + amdgpu_reg_pcie_wr32(adev, reg * 4, v); + } +} + +/** + * amdgpu_device_indirect_rreg - read an indirect register + * + * @adev: amdgpu_device pointer + * @reg_addr: indirect register address to read from + * + * Returns the value of indirect register @reg_addr + */ +u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev, u32 reg_addr) +{ + unsigned long flags, pcie_index, pcie_data; + void __iomem *pcie_index_offset; + void __iomem *pcie_data_offset; + u32 r; + + pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev); + pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev); + + spin_lock_irqsave(&adev->reg.pcie.lock, flags); + pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4; + pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4; + + writel(reg_addr, pcie_index_offset); + readl(pcie_index_offset); + r = readl(pcie_data_offset); + spin_unlock_irqrestore(&adev->reg.pcie.lock, flags); + + return r; +} + +u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev, u64 reg_addr) +{ + unsigned long flags, pcie_index, pcie_index_hi, pcie_data; + u32 r; + void __iomem *pcie_index_offset; + void __iomem *pcie_index_hi_offset; + void __iomem *pcie_data_offset; + + if (unlikely(!adev->nbio.funcs)) { + pcie_index = AMDGPU_PCIE_INDEX_FALLBACK; + pcie_data = AMDGPU_PCIE_DATA_FALLBACK; + } else { + pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev); + pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev); + } + + if (reg_addr >> 32) { + if (unlikely(!adev->nbio.funcs)) + pcie_index_hi = AMDGPU_PCIE_INDEX_HI_FALLBACK; + else + pcie_index_hi = + adev->nbio.funcs->get_pcie_index_hi_offset( + adev); + } else { + pcie_index_hi = 0; + } + + spin_lock_irqsave(&adev->reg.pcie.lock, flags); + pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4; + pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4; + if (pcie_index_hi != 0) + pcie_index_hi_offset = + (void __iomem *)adev->rmmio + pcie_index_hi * 4; + + writel(reg_addr, pcie_index_offset); + readl(pcie_index_offset); + if (pcie_index_hi != 0) { + writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset); + readl(pcie_index_hi_offset); + } + r = readl(pcie_data_offset); + + /* clear the high bits */ + if (pcie_index_hi != 0) { + writel(0, pcie_index_hi_offset); + readl(pcie_index_hi_offset); + } + + spin_unlock_irqrestore(&adev->reg.pcie.lock, flags); + + return r; +} + +/** + * amdgpu_device_indirect_rreg64 - read a 64bits indirect register + * + * @adev: amdgpu_device pointer + * @reg_addr: indirect register address to read from + * + * Returns the value of indirect register @reg_addr + */ +u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev, u32 reg_addr) +{ + unsigned long flags, pcie_index, pcie_data; + void __iomem *pcie_index_offset; + void __iomem *pcie_data_offset; + u64 r; + + pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev); + pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev); + + spin_lock_irqsave(&adev->reg.pcie.lock, flags); + pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4; + pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4; + + /* read low 32 bits */ + writel(reg_addr, pcie_index_offset); + readl(pcie_index_offset); + r = readl(pcie_data_offset); + /* read high 32 bits */ + writel(reg_addr + 4, pcie_index_offset); + readl(pcie_index_offset); + r |= ((u64)readl(pcie_data_offset) << 32); + spin_unlock_irqrestore(&adev->reg.pcie.lock, flags); + + return r; +} + +u64 amdgpu_device_indirect_rreg64_ext(struct amdgpu_device *adev, u64 reg_addr) +{ + unsigned long flags, pcie_index, pcie_data; + unsigned long pcie_index_hi = 0; + void __iomem *pcie_index_offset; + void __iomem *pcie_index_hi_offset; + void __iomem *pcie_data_offset; + u64 r; + + pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev); + pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev); + if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset)) + pcie_index_hi = + adev->nbio.funcs->get_pcie_index_hi_offset(adev); + + spin_lock_irqsave(&adev->reg.pcie.lock, flags); + pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4; + pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4; + if (pcie_index_hi != 0) + pcie_index_hi_offset = + (void __iomem *)adev->rmmio + pcie_index_hi * 4; + + /* read low 32 bits */ + writel(reg_addr, pcie_index_offset); + readl(pcie_index_offset); + if (pcie_index_hi != 0) { + writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset); + readl(pcie_index_hi_offset); + } + r = readl(pcie_data_offset); + /* read high 32 bits */ + writel(reg_addr + 4, pcie_index_offset); + readl(pcie_index_offset); + if (pcie_index_hi != 0) { + writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset); + readl(pcie_index_hi_offset); + } + r |= ((u64)readl(pcie_data_offset) << 32); + + /* clear the high bits */ + if (pcie_index_hi != 0) { + writel(0, pcie_index_hi_offset); + readl(pcie_index_hi_offset); + } + + spin_unlock_irqrestore(&adev->reg.pcie.lock, flags); + + return r; +} + +/** + * amdgpu_device_indirect_wreg - write an indirect register address + * + * @adev: amdgpu_device pointer + * @reg_addr: indirect register offset + * @reg_data: indirect register data + * + */ +void amdgpu_device_indirect_wreg(struct amdgpu_device *adev, u32 reg_addr, + u32 reg_data) +{ + unsigned long flags, pcie_index, pcie_data; + void __iomem *pcie_index_offset; + void __iomem *pcie_data_offset; + + pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev); + pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev); + + spin_lock_irqsave(&adev->reg.pcie.lock, flags); + pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4; + pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4; + + writel(reg_addr, pcie_index_offset); + readl(pcie_index_offset); + writel(reg_data, pcie_data_offset); + readl(pcie_data_offset); + spin_unlock_irqrestore(&adev->reg.pcie.lock, flags); +} + +void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev, u64 reg_addr, + u32 reg_data) +{ + unsigned long flags, pcie_index, pcie_index_hi, pcie_data; + void __iomem *pcie_index_offset; + void __iomem *pcie_index_hi_offset; + void __iomem *pcie_data_offset; + + pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev); + pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev); + if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset)) + pcie_index_hi = + adev->nbio.funcs->get_pcie_index_hi_offset(adev); + else + pcie_index_hi = 0; + + spin_lock_irqsave(&adev->reg.pcie.lock, flags); + pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4; + pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4; + if (pcie_index_hi != 0) + pcie_index_hi_offset = + (void __iomem *)adev->rmmio + pcie_index_hi * 4; + + writel(reg_addr, pcie_index_offset); + readl(pcie_index_offset); + if (pcie_index_hi != 0) { + writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset); + readl(pcie_index_hi_offset); + } + writel(reg_data, pcie_data_offset); + readl(pcie_data_offset); + + /* clear the high bits */ + if (pcie_index_hi != 0) { + writel(0, pcie_index_hi_offset); + readl(pcie_index_hi_offset); + } + + spin_unlock_irqrestore(&adev->reg.pcie.lock, flags); +} + +/** + * amdgpu_device_indirect_wreg64 - write a 64bits indirect register address + * + * @adev: amdgpu_device pointer + * @reg_addr: indirect register offset + * @reg_data: indirect register data + * + */ +void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev, u32 reg_addr, + u64 reg_data) +{ + unsigned long flags, pcie_index, pcie_data; + void __iomem *pcie_index_offset; + void __iomem *pcie_data_offset; + + pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev); + pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev); + + spin_lock_irqsave(&adev->reg.pcie.lock, flags); + pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4; + pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4; + + /* write low 32 bits */ + writel(reg_addr, pcie_index_offset); + readl(pcie_index_offset); + writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset); + readl(pcie_data_offset); + /* write high 32 bits */ + writel(reg_addr + 4, pcie_index_offset); + readl(pcie_index_offset); + writel((u32)(reg_data >> 32), pcie_data_offset); + readl(pcie_data_offset); + spin_unlock_irqrestore(&adev->reg.pcie.lock, flags); +} + +void amdgpu_device_indirect_wreg64_ext(struct amdgpu_device *adev, u64 reg_addr, + u64 reg_data) +{ + unsigned long flags, pcie_index, pcie_data; + unsigned long pcie_index_hi = 0; + void __iomem *pcie_index_offset; + void __iomem *pcie_index_hi_offset; + void __iomem *pcie_data_offset; + + pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev); + pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev); + if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset)) + pcie_index_hi = + adev->nbio.funcs->get_pcie_index_hi_offset(adev); + + spin_lock_irqsave(&adev->reg.pcie.lock, flags); + pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4; + pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4; + if (pcie_index_hi != 0) + pcie_index_hi_offset = + (void __iomem *)adev->rmmio + pcie_index_hi * 4; + + /* write low 32 bits */ + writel(reg_addr, pcie_index_offset); + readl(pcie_index_offset); + if (pcie_index_hi != 0) { + writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset); + readl(pcie_index_hi_offset); + } + writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset); + readl(pcie_data_offset); + /* write high 32 bits */ + writel(reg_addr + 4, pcie_index_offset); + readl(pcie_index_offset); + if (pcie_index_hi != 0) { + writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset); + readl(pcie_index_hi_offset); + } + writel((u32)(reg_data >> 32), pcie_data_offset); + readl(pcie_data_offset); + + /* clear the high bits */ + if (pcie_index_hi != 0) { + writel(0, pcie_index_hi_offset); + readl(pcie_index_hi_offset); + } + + spin_unlock_irqrestore(&adev->reg.pcie.lock, flags); +} + +u32 amdgpu_device_pcie_port_rreg(struct amdgpu_device *adev, u32 reg) +{ + unsigned long flags, address, data; + u32 r; + + address = adev->nbio.funcs->get_pcie_port_index_offset(adev); + data = adev->nbio.funcs->get_pcie_port_data_offset(adev); + + spin_lock_irqsave(&adev->reg.pcie.lock, flags); + WREG32(address, reg * 4); + (void)RREG32(address); + r = RREG32(data); + spin_unlock_irqrestore(&adev->reg.pcie.lock, flags); + return r; +} + +void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev, u32 reg, u32 v) +{ + unsigned long flags, address, data; + + address = adev->nbio.funcs->get_pcie_port_index_offset(adev); + data = adev->nbio.funcs->get_pcie_port_data_offset(adev); + + spin_lock_irqsave(&adev->reg.pcie.lock, flags); + WREG32(address, reg * 4); + (void)RREG32(address); + WREG32(data, v); + (void)RREG32(data); + spin_unlock_irqrestore(&adev->reg.pcie.lock, flags); +} + +uint32_t amdgpu_device_wait_on_rreg(struct amdgpu_device *adev, uint32_t inst, + uint32_t reg_addr, char reg_name[], + uint32_t expected_value, uint32_t mask) +{ + uint32_t ret = 0; + uint32_t old_ = 0; + uint32_t tmp_ = RREG32(reg_addr); + uint32_t loop = adev->usec_timeout; + + while ((tmp_ & (mask)) != (expected_value)) { + if (old_ != tmp_) { + loop = adev->usec_timeout; + old_ = tmp_; + } else + udelay(1); + tmp_ = RREG32(reg_addr); + loop--; + if (!loop) { + dev_warn( + adev->dev, + "Register(%d) [%s] failed to reach value 0x%08x != 0x%08xn", + inst, reg_name, (uint32_t)expected_value, + (uint32_t)(tmp_ & (mask))); + ret = -ETIMEDOUT; + break; + } + } + return ret; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reg_access.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_reg_access.h new file mode 100644 index 000000000000..4d88e5cd19fc --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reg_access.h @@ -0,0 +1,163 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef __AMDGPU_REG_ACCESS_H__ +#define __AMDGPU_REG_ACCESS_H__ + +#include <linux/types.h> +#include <linux/spinlock.h> + +#include "amdgpu_ip.h" + +struct amdgpu_device; + +typedef uint32_t (*amdgpu_rreg_t)(struct amdgpu_device *, uint32_t); +typedef void (*amdgpu_wreg_t)(struct amdgpu_device *, uint32_t, uint32_t); +typedef uint32_t (*amdgpu_rreg_ext_t)(struct amdgpu_device *, uint64_t); +typedef void (*amdgpu_wreg_ext_t)(struct amdgpu_device *, uint64_t, uint32_t); +typedef uint64_t (*amdgpu_rreg64_t)(struct amdgpu_device *, uint32_t); +typedef void (*amdgpu_wreg64_t)(struct amdgpu_device *, uint32_t, uint64_t); +typedef uint64_t (*amdgpu_rreg64_ext_t)(struct amdgpu_device *, uint64_t); +typedef void (*amdgpu_wreg64_ext_t)(struct amdgpu_device *, uint64_t, uint64_t); + +typedef uint32_t (*amdgpu_block_rreg_t)(struct amdgpu_device *, uint32_t, + uint32_t); +typedef void (*amdgpu_block_wreg_t)(struct amdgpu_device *, uint32_t, uint32_t, + uint32_t); +typedef uint64_t (*amdgpu_reg_get_smn_base64_t)(struct amdgpu_device *adev, + enum amd_hw_ip_block_type block, + int die_inst); + +struct amdgpu_reg_ind { + spinlock_t lock; + amdgpu_rreg_t rreg; + amdgpu_wreg_t wreg; +}; + +struct amdgpu_reg_ind_blk { + spinlock_t lock; + amdgpu_block_rreg_t rreg; + amdgpu_block_wreg_t wreg; +}; + +struct amdgpu_reg_pcie_ind { + spinlock_t lock; + amdgpu_rreg_t rreg; + amdgpu_wreg_t wreg; + amdgpu_rreg_ext_t rreg_ext; + amdgpu_wreg_ext_t wreg_ext; + amdgpu_rreg64_t rreg64; + amdgpu_wreg64_t wreg64; + amdgpu_rreg64_ext_t rreg64_ext; + amdgpu_wreg64_ext_t wreg64_ext; + amdgpu_rreg_t port_rreg; + amdgpu_wreg_t port_wreg; +}; + +struct amdgpu_reg_smn_ext { + amdgpu_reg_get_smn_base64_t get_smn_base; +}; + +struct amdgpu_reg_access { + struct amdgpu_reg_ind smc; + struct amdgpu_reg_ind uvd_ctx; + struct amdgpu_reg_ind didt; + struct amdgpu_reg_ind gc_cac; + struct amdgpu_reg_ind se_cac; + struct amdgpu_reg_ind_blk audio_endpt; + struct amdgpu_reg_pcie_ind pcie; + struct amdgpu_reg_smn_ext smn; +}; + +void amdgpu_reg_access_init(struct amdgpu_device *adev); +uint32_t amdgpu_reg_smc_rd32(struct amdgpu_device *adev, uint32_t reg); +void amdgpu_reg_smc_wr32(struct amdgpu_device *adev, uint32_t reg, uint32_t v); +uint32_t amdgpu_reg_uvd_ctx_rd32(struct amdgpu_device *adev, uint32_t reg); +void amdgpu_reg_uvd_ctx_wr32(struct amdgpu_device *adev, uint32_t reg, uint32_t v); +uint32_t amdgpu_reg_didt_rd32(struct amdgpu_device *adev, uint32_t reg); +void amdgpu_reg_didt_wr32(struct amdgpu_device *adev, uint32_t reg, uint32_t v); +uint32_t amdgpu_reg_gc_cac_rd32(struct amdgpu_device *adev, uint32_t reg); +void amdgpu_reg_gc_cac_wr32(struct amdgpu_device *adev, uint32_t reg, + uint32_t v); +uint32_t amdgpu_reg_se_cac_rd32(struct amdgpu_device *adev, uint32_t reg); +void amdgpu_reg_se_cac_wr32(struct amdgpu_device *adev, uint32_t reg, + uint32_t v); +uint32_t amdgpu_reg_audio_endpt_rd32(struct amdgpu_device *adev, uint32_t block, + uint32_t reg); +void amdgpu_reg_audio_endpt_wr32(struct amdgpu_device *adev, uint32_t block, + uint32_t reg, uint32_t v); +uint32_t amdgpu_reg_pcie_rd32(struct amdgpu_device *adev, uint32_t reg); +void amdgpu_reg_pcie_wr32(struct amdgpu_device *adev, uint32_t reg, uint32_t v); +uint32_t amdgpu_reg_pcie_ext_rd32(struct amdgpu_device *adev, uint64_t reg); +void amdgpu_reg_pcie_ext_wr32(struct amdgpu_device *adev, uint64_t reg, + uint32_t v); +uint64_t amdgpu_reg_pcie_rd64(struct amdgpu_device *adev, uint32_t reg); +void amdgpu_reg_pcie_wr64(struct amdgpu_device *adev, uint32_t reg, uint64_t v); +uint64_t amdgpu_reg_pcie_ext_rd64(struct amdgpu_device *adev, uint64_t reg); +void amdgpu_reg_pcie_ext_wr64(struct amdgpu_device *adev, uint64_t reg, + uint64_t v); +uint32_t amdgpu_reg_pciep_rd32(struct amdgpu_device *adev, uint32_t reg); +void amdgpu_reg_pciep_wr32(struct amdgpu_device *adev, uint32_t reg, + uint32_t v); +uint64_t amdgpu_reg_get_smn_base64(struct amdgpu_device *adev, + enum amd_hw_ip_block_type block, + int die_inst); +uint64_t amdgpu_reg_smn_v1_0_get_base(struct amdgpu_device *adev, + enum amd_hw_ip_block_type block, + int die_inst); + +uint32_t amdgpu_device_rreg(struct amdgpu_device *adev, uint32_t reg, + uint32_t acc_flags); +uint32_t amdgpu_device_xcc_rreg(struct amdgpu_device *adev, uint32_t reg, + uint32_t acc_flags, uint32_t xcc_id); +void amdgpu_device_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, + uint32_t acc_flags); +void amdgpu_device_xcc_wreg(struct amdgpu_device *adev, uint32_t reg, + uint32_t v, uint32_t acc_flags, uint32_t xcc_id); +void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev, uint32_t reg, + uint32_t v, uint32_t xcc_id); +void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, + uint8_t value); +uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset); + +u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev, u32 reg_addr); +u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev, u64 reg_addr); +u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev, u32 reg_addr); +u64 amdgpu_device_indirect_rreg64_ext(struct amdgpu_device *adev, u64 reg_addr); +void amdgpu_device_indirect_wreg(struct amdgpu_device *adev, u32 reg_addr, + u32 reg_data); +void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev, u64 reg_addr, + u32 reg_data); +void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev, u32 reg_addr, + u64 reg_data); +void amdgpu_device_indirect_wreg64_ext(struct amdgpu_device *adev, u64 reg_addr, + u64 reg_data); + +u32 amdgpu_device_pcie_port_rreg(struct amdgpu_device *adev, u32 reg); +void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev, u32 reg, u32 v); + +uint32_t amdgpu_device_wait_on_rreg(struct amdgpu_device *adev, uint32_t inst, + uint32_t reg_addr, char reg_name[], + uint32_t expected_value, uint32_t mask); + +#endif /* __AMDGPU_REG_ACCESS_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 7c047f5a1549..66e8a2f7afcf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -90,10 +90,13 @@ int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned int ndw) ndw = (ndw + ring->funcs->align_mask) & ~ring->funcs->align_mask; /* Make sure we aren't trying to allocate more space - * than the maximum for one submission + * than the maximum for one submission. Skip for reemit + * since we may be reemitting several submissions. */ - if (WARN_ON_ONCE(ndw > ring->max_dw)) - return -ENOMEM; + if (!ring->reemit) { + if (WARN_ON_ONCE(ndw > ring->max_dw)) + return -ENOMEM; + } ring->count_dw = ndw; ring->wptr_old = ring->wptr; @@ -105,29 +108,6 @@ int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned int ndw) } /** - * amdgpu_ring_alloc_reemit - allocate space on the ring buffer for reemit - * - * @ring: amdgpu_ring structure holding ring information - * @ndw: number of dwords to allocate in the ring buffer - * - * Allocate @ndw dwords in the ring buffer (all asics). - * doesn't check the max_dw limit as we may be reemitting - * several submissions. - */ -static void amdgpu_ring_alloc_reemit(struct amdgpu_ring *ring, unsigned int ndw) -{ - /* Align requested size with padding so unlock_commit can - * pad safely */ - ndw = (ndw + ring->funcs->align_mask) & ~ring->funcs->align_mask; - - ring->count_dw = ndw; - ring->wptr_old = ring->wptr; - - if (ring->funcs->begin_use) - ring->funcs->begin_use(ring); -} - -/** * amdgpu_ring_insert_nop - insert NOP packets * * @ring: amdgpu_ring structure holding ring information @@ -875,7 +855,6 @@ void amdgpu_ring_reset_helper_begin(struct amdgpu_ring *ring, int amdgpu_ring_reset_helper_end(struct amdgpu_ring *ring, struct amdgpu_fence *guilty_fence) { - unsigned int i; int r; /* verify that the ring is functional */ @@ -883,16 +862,9 @@ int amdgpu_ring_reset_helper_end(struct amdgpu_ring *ring, if (r) return r; - /* set an error on all fences from the context */ - if (guilty_fence) - amdgpu_fence_driver_update_timedout_fence_state(guilty_fence); - /* Re-emit the non-guilty commands */ - if (ring->ring_backup_entries_to_copy) { - amdgpu_ring_alloc_reemit(ring, ring->ring_backup_entries_to_copy); - for (i = 0; i < ring->ring_backup_entries_to_copy; i++) - amdgpu_ring_write(ring, ring->ring_backup[i]); - amdgpu_ring_commit(ring); - } + /* set an error on all fences from the context and reemit */ + amdgpu_ring_set_fence_errors_and_reemit(ring, guilty_fence); + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index cb0fb1a989d2..ce5af137ee40 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -121,7 +121,6 @@ struct amdgpu_fence_driver { /* sync_seq is protected by ring emission lock */ uint32_t sync_seq; atomic_t last_seq; - u64 signalled_wptr; bool initialized; struct amdgpu_irq_src *irq_src; unsigned irq_type; @@ -146,23 +145,23 @@ struct amdgpu_fence { struct amdgpu_ring *ring; ktime_t start_timestamp; - /* wptr for the total submission for resets */ - u64 wptr; + /* location and size of the IB */ + u64 ib_wptr; + unsigned int ib_dw_size; + unsigned int skip_ib_dw_start_offset; + unsigned int skip_ib_dw_end_offset; /* fence context for resets */ u64 context; - /* has this fence been reemitted */ - unsigned int reemitted; - /* wptr for the fence for the submission */ - u64 fence_wptr_start; - u64 fence_wptr_end; + /* idx for ring backups */ + unsigned int backup_idx; }; extern const struct drm_sched_backend_ops amdgpu_sched_ops; void amdgpu_fence_driver_set_error(struct amdgpu_ring *ring, int error); void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring); -void amdgpu_fence_driver_update_timedout_fence_state(struct amdgpu_fence *af); -void amdgpu_fence_save_wptr(struct amdgpu_fence *af); +void amdgpu_ring_set_fence_errors_and_reemit(struct amdgpu_ring *ring, + struct amdgpu_fence *guilty_fence); int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring); int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring, @@ -172,8 +171,8 @@ void amdgpu_fence_driver_hw_init(struct amdgpu_device *adev); void amdgpu_fence_driver_hw_fini(struct amdgpu_device *adev); int amdgpu_fence_driver_sw_init(struct amdgpu_device *adev); void amdgpu_fence_driver_sw_fini(struct amdgpu_device *adev); -int amdgpu_fence_emit(struct amdgpu_ring *ring, struct amdgpu_fence *af, - unsigned int flags); +void amdgpu_fence_emit(struct amdgpu_ring *ring, struct amdgpu_fence *af, + unsigned int flags); int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s, uint32_t timeout); bool amdgpu_fence_process(struct amdgpu_ring *ring); @@ -313,6 +312,7 @@ struct amdgpu_ring { /* backups for resets */ uint32_t *ring_backup; unsigned int ring_backup_entries_to_copy; + bool reemit; unsigned rptr_offs; u64 rptr_gpu_addr; u32 *rptr_cpu_addr; @@ -522,6 +522,17 @@ static inline void amdgpu_ring_write_multiple(struct amdgpu_ring *ring, ring->count_dw -= count_dw; } +static inline unsigned int amdgpu_ring_get_dw_distance(struct amdgpu_ring *ring, + u64 start_wptr, u64 end_wptr) +{ + unsigned int start = start_wptr & ring->buf_mask; + unsigned int end = end_wptr & ring->buf_mask; + + if (end < start) + end += ring->ring_size >> 2; + return end - start; +} + /** * amdgpu_ring_patch_cond_exec - patch dw count of conditional execute * @ring: amdgpu_ring structure @@ -532,18 +543,13 @@ static inline void amdgpu_ring_write_multiple(struct amdgpu_ring *ring, static inline void amdgpu_ring_patch_cond_exec(struct amdgpu_ring *ring, unsigned int offset) { - unsigned cur; - if (!ring->funcs->init_cond_exec) return; WARN_ON(offset > ring->buf_mask); WARN_ON(ring->ring[offset] != 0); - cur = (ring->wptr - 1) & ring->buf_mask; - if (cur < offset) - cur += ring->ring_size >> 2; - ring->ring[offset] = cur - offset; + ring->ring[offset] = amdgpu_ring_get_dw_distance(ring, offset, ring->wptr - 1); } int amdgpu_ring_test_helper(struct amdgpu_ring *ring); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index eeaa56c8d129..b4ab309bf08a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -163,7 +163,8 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo, } static struct dma_fence * -amdgpu_ttm_job_submit(struct amdgpu_device *adev, struct amdgpu_job *job, u32 num_dw) +amdgpu_ttm_job_submit(struct amdgpu_device *adev, struct amdgpu_ttm_buffer_entity *entity, + struct amdgpu_job *job, u32 num_dw) { struct amdgpu_ring *ring; @@ -171,6 +172,8 @@ amdgpu_ttm_job_submit(struct amdgpu_device *adev, struct amdgpu_job *job, u32 nu amdgpu_ring_pad_ib(ring, &job->ibs[0]); WARN_ON(job->ibs[0].length_dw > num_dw); + lockdep_assert_held(&entity->lock); + return amdgpu_job_submit(job); } @@ -228,9 +231,7 @@ static int amdgpu_ttm_map_buffer(struct amdgpu_ttm_buffer_entity *entity, *size = min(*size, (uint64_t)num_pages * PAGE_SIZE - offset); - *addr = adev->gmc.gart_start; - *addr += (u64)window * AMDGPU_GTT_MAX_TRANSFER_SIZE * - AMDGPU_GPU_PAGE_SIZE; + *addr = amdgpu_compute_gart_address(&adev->gmc, entity, window); *addr += offset; num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8); @@ -248,7 +249,7 @@ static int amdgpu_ttm_map_buffer(struct amdgpu_ttm_buffer_entity *entity, src_addr += job->ibs[0].gpu_addr; dst_addr = amdgpu_bo_gpu_offset(adev->gart.bo); - dst_addr += window * AMDGPU_GTT_MAX_TRANSFER_SIZE * 8; + dst_addr += (entity->gart_window_offs[window] >> AMDGPU_GPU_PAGE_SHIFT) * 8; amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, dst_addr, num_bytes, 0); @@ -269,7 +270,7 @@ static int amdgpu_ttm_map_buffer(struct amdgpu_ttm_buffer_entity *entity, amdgpu_gart_map_vram_range(adev, pa, 0, num_pages, flags, cpu_addr); } - dma_fence_put(amdgpu_ttm_job_submit(adev, job, num_dw)); + dma_fence_put(amdgpu_ttm_job_submit(adev, entity, job, num_dw)); return 0; } @@ -313,7 +314,7 @@ static int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, amdgpu_res_first(src->mem, src->offset, size, &src_mm); amdgpu_res_first(dst->mem, dst->offset, size, &dst_mm); - mutex_lock(&adev->mman.gtt_window_lock); + mutex_lock(&entity->lock); while (src_mm.remaining) { uint64_t from, to, cur_size, tiling_flags; uint32_t num_type, data_format, max_com, write_compress_disable; @@ -368,7 +369,7 @@ static int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, amdgpu_res_next(&dst_mm, cur_size); } error: - mutex_unlock(&adev->mman.gtt_window_lock); + mutex_unlock(&entity->lock); *f = fence; return r; } @@ -1580,7 +1581,7 @@ static int amdgpu_ttm_access_memory_sdma(struct ttm_buffer_object *bo, if (r) goto out; - mutex_lock(&adev->mman.gtt_window_lock); + mutex_lock(&adev->mman.default_entity.lock); amdgpu_res_first(abo->tbo.resource, offset, len, &src_mm); src_addr = amdgpu_ttm_domain_start(adev, bo->resource->mem_type) + src_mm.start; @@ -1591,8 +1592,8 @@ static int amdgpu_ttm_access_memory_sdma(struct ttm_buffer_object *bo, amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, dst_addr, PAGE_SIZE, 0); - fence = amdgpu_ttm_job_submit(adev, job, num_dw); - mutex_unlock(&adev->mman.gtt_window_lock); + fence = amdgpu_ttm_job_submit(adev, &adev->mman.default_entity, job, num_dw); + mutex_unlock(&adev->mman.default_entity.lock); if (!dma_fence_wait_timeout(fence, false, adev->sdma_timeout)) r = -ETIMEDOUT; @@ -1908,7 +1909,7 @@ static void amdgpu_ttm_pools_fini(struct amdgpu_device *adev) } /** - * amdgpu_ttm_mmio_remap_bo_init - Allocate the singleton MMIO_REMAP BO + * amdgpu_ttm_alloc_mmio_remap_bo - Allocate the singleton MMIO_REMAP BO * @adev: amdgpu device * * Allocates a global BO with backing AMDGPU_PL_MMIO_REMAP when the @@ -2013,6 +2014,50 @@ static void amdgpu_ttm_free_mmio_remap_bo(struct amdgpu_device *adev) adev->rmmio_remap.bo = NULL; } +static int amdgpu_ttm_buffer_entity_init(struct amdgpu_gtt_mgr *mgr, + struct amdgpu_ttm_buffer_entity *entity, + enum drm_sched_priority prio, + struct drm_gpu_scheduler **scheds, + int num_schedulers, + u32 num_gart_windows) +{ + int i, r, num_pages; + + r = drm_sched_entity_init(&entity->base, prio, scheds, num_schedulers, NULL); + if (r) + return r; + + mutex_init(&entity->lock); + + if (ARRAY_SIZE(entity->gart_window_offs) < num_gart_windows) + return -EINVAL; + if (num_gart_windows == 0) + return 0; + + num_pages = num_gart_windows * AMDGPU_GTT_MAX_TRANSFER_SIZE; + r = amdgpu_gtt_mgr_alloc_entries(mgr, &entity->gart_node, num_pages, + DRM_MM_INSERT_BEST); + if (r) { + drm_sched_entity_destroy(&entity->base); + return r; + } + + for (i = 0; i < num_gart_windows; i++) { + entity->gart_window_offs[i] = + amdgpu_gtt_node_to_byte_offset(&entity->gart_node) + + i * AMDGPU_GTT_MAX_TRANSFER_SIZE * PAGE_SIZE; + } + + return 0; +} + +static void amdgpu_ttm_buffer_entity_fini(struct amdgpu_gtt_mgr *mgr, + struct amdgpu_ttm_buffer_entity *entity) +{ + amdgpu_gtt_mgr_free_entries(mgr, &entity->gart_node); + drm_sched_entity_destroy(&entity->base); +} + /* * amdgpu_ttm_init - Init the memory management (ttm) as well as various * gtt/vram related fields. @@ -2027,8 +2072,6 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) uint64_t gtt_size; int r; - mutex_init(&adev->mman.gtt_window_lock); - dma_set_max_seg_size(adev->dev, UINT_MAX); /* No others user of address space so set it to 0 */ r = ttm_device_init(&adev->mman.bdev, &amdgpu_bo_driver, adev->dev, @@ -2313,42 +2356,49 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable) struct amdgpu_ring *ring; struct drm_gpu_scheduler *sched; + if (!adev->mman.buffer_funcs_ring || !adev->mman.buffer_funcs_ring->sched.ready) { + dev_warn(adev->dev, "Not enabling DMA transfers for in kernel use"); + return; + } + ring = adev->mman.buffer_funcs_ring; sched = &ring->sched; - r = drm_sched_entity_init(&adev->mman.default_entity.base, - DRM_SCHED_PRIORITY_KERNEL, &sched, - 1, NULL); - if (r) { + r = amdgpu_ttm_buffer_entity_init(&adev->mman.gtt_mgr, + &adev->mman.default_entity, + DRM_SCHED_PRIORITY_KERNEL, + &sched, 1, 0); + if (r < 0) { dev_err(adev->dev, - "Failed setting up TTM BO move entity (%d)\n", - r); + "Failed setting up TTM entity (%d)\n", r); return; } - r = drm_sched_entity_init(&adev->mman.clear_entity.base, - DRM_SCHED_PRIORITY_NORMAL, &sched, - 1, NULL); - if (r) { + r = amdgpu_ttm_buffer_entity_init(&adev->mman.gtt_mgr, + &adev->mman.clear_entity, + DRM_SCHED_PRIORITY_NORMAL, + &sched, 1, 1); + if (r < 0) { dev_err(adev->dev, - "Failed setting up TTM BO clear entity (%d)\n", - r); - goto error_free_entity; + "Failed setting up TTM BO clear entity (%d)\n", r); + goto error_free_default_entity; } - r = drm_sched_entity_init(&adev->mman.move_entity.base, - DRM_SCHED_PRIORITY_NORMAL, &sched, - 1, NULL); - if (r) { + r = amdgpu_ttm_buffer_entity_init(&adev->mman.gtt_mgr, + &adev->mman.move_entity, + DRM_SCHED_PRIORITY_NORMAL, + &sched, 1, 2); + if (r < 0) { dev_err(adev->dev, - "Failed setting up TTM BO move entity (%d)\n", - r); - drm_sched_entity_destroy(&adev->mman.clear_entity.base); - goto error_free_entity; + "Failed setting up TTM BO move entity (%d)\n", r); + goto error_free_clear_entity; } } else { - drm_sched_entity_destroy(&adev->mman.default_entity.base); - drm_sched_entity_destroy(&adev->mman.clear_entity.base); - drm_sched_entity_destroy(&adev->mman.move_entity.base); + amdgpu_ttm_buffer_entity_fini(&adev->mman.gtt_mgr, + &adev->mman.default_entity); + amdgpu_ttm_buffer_entity_fini(&adev->mman.gtt_mgr, + &adev->mman.clear_entity); + amdgpu_ttm_buffer_entity_fini(&adev->mman.gtt_mgr, + &adev->mman.move_entity); /* Drop all the old fences since re-creating the scheduler entities * will allocate new contexts. */ @@ -2365,8 +2415,12 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable) return; -error_free_entity: - drm_sched_entity_destroy(&adev->mman.default_entity.base); +error_free_clear_entity: + amdgpu_ttm_buffer_entity_fini(&adev->mman.gtt_mgr, + &adev->mman.clear_entity); +error_free_default_entity: + amdgpu_ttm_buffer_entity_fini(&adev->mman.gtt_mgr, + &adev->mman.default_entity); } static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev, @@ -2440,7 +2494,7 @@ int amdgpu_copy_buffer(struct amdgpu_device *adev, byte_count -= cur_size_in_bytes; } - *fence = amdgpu_ttm_job_submit(adev, job, num_dw); + *fence = amdgpu_ttm_job_submit(adev, entity, job, num_dw); return 0; @@ -2483,7 +2537,7 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_device *adev, byte_count -= cur_size; } - *fence = amdgpu_ttm_job_submit(adev, job, num_dw); + *fence = amdgpu_ttm_job_submit(adev, entity, job, num_dw); return 0; } @@ -2503,6 +2557,7 @@ int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo, struct dma_fence **fence) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); + struct amdgpu_ttm_buffer_entity *entity; struct amdgpu_res_cursor cursor; u64 addr; int r = 0; @@ -2513,11 +2568,12 @@ int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo, if (!fence) return -EINVAL; + entity = &adev->mman.clear_entity; *fence = dma_fence_get_stub(); amdgpu_res_first(bo->tbo.resource, 0, amdgpu_bo_size(bo), &cursor); - mutex_lock(&adev->mman.gtt_window_lock); + mutex_lock(&entity->lock); while (cursor.remaining) { struct dma_fence *next = NULL; u64 size; @@ -2530,13 +2586,12 @@ int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo, /* Never clear more than 256MiB at once to avoid timeouts */ size = min(cursor.size, 256ULL << 20); - r = amdgpu_ttm_map_buffer(&adev->mman.clear_entity, - &bo->tbo, bo->tbo.resource, &cursor, - 1, false, &size, &addr); + r = amdgpu_ttm_map_buffer(entity, &bo->tbo, bo->tbo.resource, &cursor, + 0, false, &size, &addr); if (r) goto err; - r = amdgpu_ttm_fill_mem(adev, &adev->mman.clear_entity, 0, addr, size, resv, + r = amdgpu_ttm_fill_mem(adev, entity, 0, addr, size, resv, &next, true, AMDGPU_KERNEL_JOB_ID_TTM_CLEAR_BUFFER); if (r) @@ -2548,7 +2603,7 @@ int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo, amdgpu_res_next(&cursor, size); } err: - mutex_unlock(&adev->mman.gtt_window_lock); + mutex_unlock(&entity->lock); return r; } @@ -2573,7 +2628,7 @@ int amdgpu_fill_buffer(struct amdgpu_ttm_buffer_entity *entity, amdgpu_res_first(bo->tbo.resource, 0, amdgpu_bo_size(bo), &dst); - mutex_lock(&adev->mman.gtt_window_lock); + mutex_lock(&entity->lock); while (dst.remaining) { struct dma_fence *next; uint64_t cur_size, to; @@ -2582,7 +2637,7 @@ int amdgpu_fill_buffer(struct amdgpu_ttm_buffer_entity *entity, cur_size = min(dst.size, 256ULL << 20); r = amdgpu_ttm_map_buffer(entity, &bo->tbo, bo->tbo.resource, &dst, - 1, false, &cur_size, &to); + 0, false, &cur_size, &to); if (r) goto error; @@ -2598,7 +2653,7 @@ int amdgpu_fill_buffer(struct amdgpu_ttm_buffer_entity *entity, amdgpu_res_next(&dst, cur_size); } error: - mutex_unlock(&adev->mman.gtt_window_lock); + mutex_unlock(&entity->lock); if (f) *f = dma_fence_get(fence); dma_fence_put(fence); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index 143201ecea3f..bf101215757e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -29,6 +29,7 @@ #include <drm/ttm/ttm_placement.h> #include "amdgpu_vram_mgr.h" #include "amdgpu_hmm.h" +#include "amdgpu_gmc.h" #define AMDGPU_PL_GDS (TTM_PL_PRIV + 0) #define AMDGPU_PL_GWS (TTM_PL_PRIV + 1) @@ -38,8 +39,7 @@ #define AMDGPU_PL_MMIO_REMAP (TTM_PL_PRIV + 5) #define __AMDGPU_PL_NUM (TTM_PL_PRIV + 6) -#define AMDGPU_GTT_MAX_TRANSFER_SIZE 512 -#define AMDGPU_GTT_NUM_TRANSFER_WINDOWS 2 +#define AMDGPU_GTT_MAX_TRANSFER_SIZE 1024 extern const struct attribute_group amdgpu_vram_mgr_attr_group; extern const struct attribute_group amdgpu_gtt_mgr_attr_group; @@ -54,6 +54,9 @@ struct amdgpu_gtt_mgr { struct amdgpu_ttm_buffer_entity { struct drm_sched_entity base; + struct mutex lock; + struct drm_mm_node gart_node; + u64 gart_window_offs[2]; }; struct amdgpu_mman { @@ -67,8 +70,7 @@ struct amdgpu_mman { struct amdgpu_ring *buffer_funcs_ring; bool buffer_funcs_enabled; - struct mutex gtt_window_lock; - + /* @default_entity: for workarounds, has no gart windows */ struct amdgpu_ttm_buffer_entity default_entity; struct amdgpu_ttm_buffer_entity clear_entity; struct amdgpu_ttm_buffer_entity move_entity; @@ -205,6 +207,27 @@ static inline int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, } #endif +/** + * amdgpu_compute_gart_address() - Returns GART address of an entity's window + * @gmc: The &struct amdgpu_gmc instance to use + * @entity: The &struct amdgpu_ttm_buffer_entity owning the GART window + * @index: The window to use (must be 0 or 1) + */ +static inline u64 amdgpu_compute_gart_address(struct amdgpu_gmc *gmc, + struct amdgpu_ttm_buffer_entity *entity, + int index) +{ + return gmc->gart_start + entity->gart_window_offs[index]; +} + +/** + * amdgpu_gtt_node_to_byte_offset() - Returns a byte offset of a gtt node + */ +static inline u64 amdgpu_gtt_node_to_byte_offset(const struct drm_mm_node *gtt_node) +{ + return gtt_node->start * (u64)PAGE_SIZE; +} + void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct amdgpu_hmm_range *range); int amdgpu_ttm_tt_get_userptr(const struct ttm_buffer_object *tbo, uint64_t *user_addr); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c index 9d67b770bcc2..7c450350847d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c @@ -446,8 +446,7 @@ static int amdgpu_userq_wait_for_last_fence(struct amdgpu_usermode_queue *queue) return ret; } -static void amdgpu_userq_cleanup(struct amdgpu_usermode_queue *queue, - int queue_id) +static void amdgpu_userq_cleanup(struct amdgpu_usermode_queue *queue) { struct amdgpu_userq_mgr *uq_mgr = queue->userq_mgr; struct amdgpu_device *adev = uq_mgr->adev; @@ -461,7 +460,6 @@ static void amdgpu_userq_cleanup(struct amdgpu_usermode_queue *queue, uq_funcs->mqd_destroy(queue); amdgpu_userq_fence_driver_free(queue); /* Use interrupt-safe locking since IRQ handlers may access these XArrays */ - xa_erase_irq(&uq_mgr->userq_xa, (unsigned long)queue_id); xa_erase_irq(&adev->userq_doorbell_xa, queue->doorbell_index); queue->userq_mgr = NULL; list_del(&queue->userq_va_list); @@ -470,12 +468,6 @@ static void amdgpu_userq_cleanup(struct amdgpu_usermode_queue *queue, up_read(&adev->reset_domain->sem); } -static struct amdgpu_usermode_queue * -amdgpu_userq_find(struct amdgpu_userq_mgr *uq_mgr, int qid) -{ - return xa_load(&uq_mgr->userq_xa, qid); -} - void amdgpu_userq_ensure_ev_fence(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_eviction_fence_mgr *evf_mgr) @@ -625,22 +617,13 @@ unref_bo: } static int -amdgpu_userq_destroy(struct drm_file *filp, int queue_id) +amdgpu_userq_destroy(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_usermode_queue *queue) { - struct amdgpu_fpriv *fpriv = filp->driver_priv; - struct amdgpu_userq_mgr *uq_mgr = &fpriv->userq_mgr; struct amdgpu_device *adev = uq_mgr->adev; - struct amdgpu_usermode_queue *queue; int r = 0; cancel_delayed_work_sync(&uq_mgr->resume_work); mutex_lock(&uq_mgr->userq_mutex); - queue = amdgpu_userq_find(uq_mgr, queue_id); - if (!queue) { - drm_dbg_driver(adev_to_drm(uq_mgr->adev), "Invalid queue id to destroy\n"); - mutex_unlock(&uq_mgr->userq_mutex); - return -EINVAL; - } amdgpu_userq_wait_for_last_fence(queue); /* Cancel any pending hang detection work and cleanup */ if (queue->hang_detect_fence) { @@ -672,7 +655,7 @@ amdgpu_userq_destroy(struct drm_file *filp, int queue_id) drm_warn(adev_to_drm(uq_mgr->adev), "trying to destroy a HW mapping userq\n"); queue->state = AMDGPU_USERQ_STATE_HUNG; } - amdgpu_userq_cleanup(queue, queue_id); + amdgpu_userq_cleanup(queue); mutex_unlock(&uq_mgr->userq_mutex); pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); @@ -680,6 +663,37 @@ amdgpu_userq_destroy(struct drm_file *filp, int queue_id) return r; } +static void amdgpu_userq_kref_destroy(struct kref *kref) +{ + int r; + struct amdgpu_usermode_queue *queue = + container_of(kref, struct amdgpu_usermode_queue, refcount); + struct amdgpu_userq_mgr *uq_mgr = queue->userq_mgr; + + r = amdgpu_userq_destroy(uq_mgr, queue); + if (r) + drm_file_err(uq_mgr->file, "Failed to destroy usermode queue %d\n", r); +} + +struct amdgpu_usermode_queue *amdgpu_userq_get(struct amdgpu_userq_mgr *uq_mgr, u32 qid) +{ + struct amdgpu_usermode_queue *queue; + + xa_lock(&uq_mgr->userq_xa); + queue = xa_load(&uq_mgr->userq_xa, qid); + if (queue) + kref_get(&queue->refcount); + xa_unlock(&uq_mgr->userq_xa); + + return queue; +} + +void amdgpu_userq_put(struct amdgpu_usermode_queue *queue) +{ + if (queue) + kref_put(&queue->refcount, amdgpu_userq_kref_destroy); +} + static int amdgpu_userq_priority_permit(struct drm_file *filp, int priority) { @@ -834,6 +848,9 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args) goto unlock; } + /* drop this refcount during queue destroy */ + kref_init(&queue->refcount); + /* Wait for mode-1 reset to complete */ down_read(&adev->reset_domain->sem); r = xa_err(xa_store_irq(&adev->userq_doorbell_xa, index, queue, GFP_KERNEL)); @@ -985,7 +1002,9 @@ int amdgpu_userq_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { union drm_amdgpu_userq *args = data; - int r; + struct amdgpu_fpriv *fpriv = filp->driver_priv; + struct amdgpu_usermode_queue *queue; + int r = 0; if (!amdgpu_userq_enabled(dev)) return -ENOTSUPP; @@ -1000,11 +1019,16 @@ int amdgpu_userq_ioctl(struct drm_device *dev, void *data, drm_file_err(filp, "Failed to create usermode queue\n"); break; - case AMDGPU_USERQ_OP_FREE: - r = amdgpu_userq_destroy(filp, args->in.queue_id); - if (r) - drm_file_err(filp, "Failed to destroy usermode queue\n"); + case AMDGPU_USERQ_OP_FREE: { + xa_lock(&fpriv->userq_mgr.userq_xa); + queue = __xa_erase(&fpriv->userq_mgr.userq_xa, args->in.queue_id); + xa_unlock(&fpriv->userq_mgr.userq_xa); + if (!queue) + return -ENOENT; + + amdgpu_userq_put(queue); break; + } default: drm_dbg_driver(dev, "Invalid user queue op specified: %d\n", args->in.op); @@ -1023,16 +1047,23 @@ amdgpu_userq_restore_all(struct amdgpu_userq_mgr *uq_mgr) /* Resume all the queues for this process */ xa_for_each(&uq_mgr->userq_xa, queue_id, queue) { + queue = amdgpu_userq_get(uq_mgr, queue_id); + if (!queue) + continue; + if (!amdgpu_userq_buffer_vas_mapped(queue)) { drm_file_err(uq_mgr->file, "trying restore queue without va mapping\n"); queue->state = AMDGPU_USERQ_STATE_INVALID_VA; + amdgpu_userq_put(queue); continue; } r = amdgpu_userq_restore_helper(queue); if (r) ret = r; + + amdgpu_userq_put(queue); } if (ret) @@ -1266,9 +1297,13 @@ amdgpu_userq_evict_all(struct amdgpu_userq_mgr *uq_mgr) amdgpu_userq_detect_and_reset_queues(uq_mgr); /* Try to unmap all the queues in this process ctx */ xa_for_each(&uq_mgr->userq_xa, queue_id, queue) { + queue = amdgpu_userq_get(uq_mgr, queue_id); + if (!queue) + continue; r = amdgpu_userq_preempt_helper(queue); if (r) ret = r; + amdgpu_userq_put(queue); } if (ret) @@ -1301,16 +1336,24 @@ amdgpu_userq_wait_for_signal(struct amdgpu_userq_mgr *uq_mgr) int ret; xa_for_each(&uq_mgr->userq_xa, queue_id, queue) { + queue = amdgpu_userq_get(uq_mgr, queue_id); + if (!queue) + continue; + struct dma_fence *f = queue->last_fence; - if (!f || dma_fence_is_signaled(f)) + if (!f || dma_fence_is_signaled(f)) { + amdgpu_userq_put(queue); continue; + } ret = dma_fence_wait_timeout(f, true, msecs_to_jiffies(100)); if (ret <= 0) { drm_file_err(uq_mgr->file, "Timed out waiting for fence=%llu:%llu\n", f->context, f->seqno); + amdgpu_userq_put(queue); return -ETIMEDOUT; } + amdgpu_userq_put(queue); } return 0; @@ -1361,20 +1404,23 @@ int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct drm_file *f void amdgpu_userq_mgr_fini(struct amdgpu_userq_mgr *userq_mgr) { struct amdgpu_usermode_queue *queue; - unsigned long queue_id; + unsigned long queue_id = 0; + + for (;;) { + xa_lock(&userq_mgr->userq_xa); + queue = xa_find(&userq_mgr->userq_xa, &queue_id, ULONG_MAX, + XA_PRESENT); + if (queue) + __xa_erase(&userq_mgr->userq_xa, queue_id); + xa_unlock(&userq_mgr->userq_xa); - cancel_delayed_work_sync(&userq_mgr->resume_work); + if (!queue) + break; - mutex_lock(&userq_mgr->userq_mutex); - amdgpu_userq_detect_and_reset_queues(userq_mgr); - xa_for_each(&userq_mgr->userq_xa, queue_id, queue) { - amdgpu_userq_wait_for_last_fence(queue); - amdgpu_userq_unmap_helper(queue); - amdgpu_userq_cleanup(queue, queue_id); + amdgpu_userq_put(queue); } xa_destroy(&userq_mgr->userq_xa); - mutex_unlock(&userq_mgr->userq_mutex); mutex_destroy(&userq_mgr->userq_mutex); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h index 5845d8959034..54e1997b3cc0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h @@ -74,6 +74,7 @@ struct amdgpu_usermode_queue { struct dentry *debugfs_queue; struct delayed_work hang_detect_work; struct dma_fence *hang_detect_fence; + struct kref refcount; struct list_head userq_va_list; }; @@ -81,6 +82,8 @@ struct amdgpu_usermode_queue { struct amdgpu_userq_funcs { int (*mqd_create)(struct amdgpu_usermode_queue *queue, struct drm_amdgpu_userq_in *args); + int (*mqd_update)(struct amdgpu_usermode_queue *queue, + struct drm_amdgpu_userq_in *args); void (*mqd_destroy)(struct amdgpu_usermode_queue *uq); int (*unmap)(struct amdgpu_usermode_queue *queue); int (*map)(struct amdgpu_usermode_queue *queue); @@ -112,6 +115,9 @@ struct amdgpu_db_info { struct amdgpu_userq_obj *db_obj; }; +struct amdgpu_usermode_queue *amdgpu_userq_get(struct amdgpu_userq_mgr *uq_mgr, u32 qid); +void amdgpu_userq_put(struct amdgpu_usermode_queue *queue); + int amdgpu_userq_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct drm_file *file_priv, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c index 7e9cf1868cc9..d8ce7b3733e7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c @@ -461,33 +461,31 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { struct amdgpu_device *adev = drm_to_adev(dev); + struct drm_amdgpu_userq_signal *args = data; + const unsigned int num_write_bo_handles = args->num_bo_write_handles; + const unsigned int num_read_bo_handles = args->num_bo_read_handles; struct amdgpu_fpriv *fpriv = filp->driver_priv; struct amdgpu_userq_mgr *userq_mgr = &fpriv->userq_mgr; - struct drm_amdgpu_userq_signal *args = data; - struct drm_gem_object **gobj_write = NULL; - struct drm_gem_object **gobj_read = NULL; - struct amdgpu_usermode_queue *queue; + struct drm_gem_object **gobj_write, **gobj_read; + u32 *syncobj_handles, num_syncobj_handles; struct amdgpu_userq_fence *userq_fence; + struct amdgpu_usermode_queue *queue = NULL; struct drm_syncobj **syncobj = NULL; - u32 *bo_handles_write, num_write_bo_handles; - u32 *syncobj_handles, num_syncobj_handles; - u32 *bo_handles_read, num_read_bo_handles; - int r, i, entry, rentry, wentry; struct dma_fence *fence; struct drm_exec exec; + int r, i, entry; u64 wptr; if (!amdgpu_userq_enabled(dev)) return -ENOTSUPP; - if (args->num_syncobj_handles > AMDGPU_USERQ_MAX_HANDLES || - args->num_bo_write_handles > AMDGPU_USERQ_MAX_HANDLES || + if (args->num_bo_write_handles > AMDGPU_USERQ_MAX_HANDLES || args->num_bo_read_handles > AMDGPU_USERQ_MAX_HANDLES) return -EINVAL; num_syncobj_handles = args->num_syncobj_handles; - syncobj_handles = memdup_user(u64_to_user_ptr(args->syncobj_handles), - size_mul(sizeof(u32), num_syncobj_handles)); + syncobj_handles = memdup_array_user(u64_to_user_ptr(args->syncobj_handles), + num_syncobj_handles, sizeof(u32)); if (IS_ERR(syncobj_handles)) return PTR_ERR(syncobj_handles); @@ -506,54 +504,22 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data, } } - num_read_bo_handles = args->num_bo_read_handles; - bo_handles_read = memdup_user(u64_to_user_ptr(args->bo_read_handles), - sizeof(u32) * num_read_bo_handles); - if (IS_ERR(bo_handles_read)) { - r = PTR_ERR(bo_handles_read); + r = drm_gem_objects_lookup(filp, + u64_to_user_ptr(args->bo_read_handles), + num_read_bo_handles, + &gobj_read); + if (r) goto free_syncobj; - } - /* Array of pointers to the GEM read objects */ - gobj_read = kmalloc_array(num_read_bo_handles, sizeof(*gobj_read), GFP_KERNEL); - if (!gobj_read) { - r = -ENOMEM; - goto free_bo_handles_read; - } - - for (rentry = 0; rentry < num_read_bo_handles; rentry++) { - gobj_read[rentry] = drm_gem_object_lookup(filp, bo_handles_read[rentry]); - if (!gobj_read[rentry]) { - r = -ENOENT; - goto put_gobj_read; - } - } - - num_write_bo_handles = args->num_bo_write_handles; - bo_handles_write = memdup_user(u64_to_user_ptr(args->bo_write_handles), - sizeof(u32) * num_write_bo_handles); - if (IS_ERR(bo_handles_write)) { - r = PTR_ERR(bo_handles_write); + r = drm_gem_objects_lookup(filp, + u64_to_user_ptr(args->bo_write_handles), + num_write_bo_handles, + &gobj_write); + if (r) goto put_gobj_read; - } - - /* Array of pointers to the GEM write objects */ - gobj_write = kmalloc_array(num_write_bo_handles, sizeof(*gobj_write), GFP_KERNEL); - if (!gobj_write) { - r = -ENOMEM; - goto free_bo_handles_write; - } - - for (wentry = 0; wentry < num_write_bo_handles; wentry++) { - gobj_write[wentry] = drm_gem_object_lookup(filp, bo_handles_write[wentry]); - if (!gobj_write[wentry]) { - r = -ENOENT; - goto put_gobj_write; - } - } /* Retrieve the user queue */ - queue = xa_load(&userq_mgr->userq_xa, args->queue_id); + queue = amdgpu_userq_get(userq_mgr, args->queue_id); if (!queue) { r = -ENOENT; goto put_gobj_write; @@ -629,17 +595,13 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data, exec_fini: drm_exec_fini(&exec); put_gobj_write: - while (wentry-- > 0) - drm_gem_object_put(gobj_write[wentry]); + for (i = 0; i < num_write_bo_handles; i++) + drm_gem_object_put(gobj_write[i]); kfree(gobj_write); -free_bo_handles_write: - kfree(bo_handles_write); put_gobj_read: - while (rentry-- > 0) - drm_gem_object_put(gobj_read[rentry]); + for (i = 0; i < num_read_bo_handles; i++) + drm_gem_object_put(gobj_read[i]); kfree(gobj_read); -free_bo_handles_read: - kfree(bo_handles_read); free_syncobj: while (entry-- > 0) if (syncobj[entry]) @@ -648,98 +610,73 @@ free_syncobj: free_syncobj_handles: kfree(syncobj_handles); + if (queue) + amdgpu_userq_put(queue); + return r; } int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { - u32 *syncobj_handles, *timeline_points, *timeline_handles, *bo_handles_read, *bo_handles_write; - u32 num_syncobj, num_read_bo_handles, num_write_bo_handles; - struct drm_amdgpu_userq_fence_info *fence_info = NULL; struct drm_amdgpu_userq_wait *wait_info = data; + const unsigned int num_write_bo_handles = wait_info->num_bo_write_handles; + const unsigned int num_read_bo_handles = wait_info->num_bo_read_handles; + struct drm_amdgpu_userq_fence_info *fence_info = NULL; struct amdgpu_fpriv *fpriv = filp->driver_priv; struct amdgpu_userq_mgr *userq_mgr = &fpriv->userq_mgr; - struct amdgpu_usermode_queue *waitq; - struct drm_gem_object **gobj_write; - struct drm_gem_object **gobj_read; + struct drm_gem_object **gobj_write, **gobj_read; + u32 *timeline_points, *timeline_handles; + struct amdgpu_usermode_queue *waitq = NULL; + u32 *syncobj_handles, num_syncobj; struct dma_fence **fences = NULL; u16 num_points, num_fences = 0; - int r, i, rentry, wentry, cnt; struct drm_exec exec; + int r, i, cnt; if (!amdgpu_userq_enabled(dev)) return -ENOTSUPP; - if (wait_info->num_syncobj_handles > AMDGPU_USERQ_MAX_HANDLES || - wait_info->num_bo_write_handles > AMDGPU_USERQ_MAX_HANDLES || + if (wait_info->num_bo_write_handles > AMDGPU_USERQ_MAX_HANDLES || wait_info->num_bo_read_handles > AMDGPU_USERQ_MAX_HANDLES) return -EINVAL; - num_read_bo_handles = wait_info->num_bo_read_handles; - bo_handles_read = memdup_user(u64_to_user_ptr(wait_info->bo_read_handles), - size_mul(sizeof(u32), num_read_bo_handles)); - if (IS_ERR(bo_handles_read)) - return PTR_ERR(bo_handles_read); - - num_write_bo_handles = wait_info->num_bo_write_handles; - bo_handles_write = memdup_user(u64_to_user_ptr(wait_info->bo_write_handles), - size_mul(sizeof(u32), num_write_bo_handles)); - if (IS_ERR(bo_handles_write)) { - r = PTR_ERR(bo_handles_write); - goto free_bo_handles_read; - } - num_syncobj = wait_info->num_syncobj_handles; - syncobj_handles = memdup_user(u64_to_user_ptr(wait_info->syncobj_handles), - size_mul(sizeof(u32), num_syncobj)); - if (IS_ERR(syncobj_handles)) { - r = PTR_ERR(syncobj_handles); - goto free_bo_handles_write; - } + syncobj_handles = memdup_array_user(u64_to_user_ptr(wait_info->syncobj_handles), + num_syncobj, sizeof(u32)); + if (IS_ERR(syncobj_handles)) + return PTR_ERR(syncobj_handles); + num_points = wait_info->num_syncobj_timeline_handles; - timeline_handles = memdup_user(u64_to_user_ptr(wait_info->syncobj_timeline_handles), - sizeof(u32) * num_points); + timeline_handles = memdup_array_user(u64_to_user_ptr(wait_info->syncobj_timeline_handles), + num_points, sizeof(u32)); if (IS_ERR(timeline_handles)) { r = PTR_ERR(timeline_handles); goto free_syncobj_handles; } - timeline_points = memdup_user(u64_to_user_ptr(wait_info->syncobj_timeline_points), - sizeof(u32) * num_points); + timeline_points = memdup_array_user(u64_to_user_ptr(wait_info->syncobj_timeline_points), + num_points, sizeof(u32)); + if (IS_ERR(timeline_points)) { r = PTR_ERR(timeline_points); goto free_timeline_handles; } - gobj_read = kmalloc_array(num_read_bo_handles, sizeof(*gobj_read), GFP_KERNEL); - if (!gobj_read) { - r = -ENOMEM; + r = drm_gem_objects_lookup(filp, + u64_to_user_ptr(wait_info->bo_read_handles), + num_read_bo_handles, + &gobj_read); + if (r) goto free_timeline_points; - } - - for (rentry = 0; rentry < num_read_bo_handles; rentry++) { - gobj_read[rentry] = drm_gem_object_lookup(filp, bo_handles_read[rentry]); - if (!gobj_read[rentry]) { - r = -ENOENT; - goto put_gobj_read; - } - } - gobj_write = kmalloc_array(num_write_bo_handles, sizeof(*gobj_write), GFP_KERNEL); - if (!gobj_write) { - r = -ENOMEM; + r = drm_gem_objects_lookup(filp, + u64_to_user_ptr(wait_info->bo_write_handles), + num_write_bo_handles, + &gobj_write); + if (r) goto put_gobj_read; - } - - for (wentry = 0; wentry < num_write_bo_handles; wentry++) { - gobj_write[wentry] = drm_gem_object_lookup(filp, bo_handles_write[wentry]); - if (!gobj_write[wentry]) { - r = -ENOENT; - goto put_gobj_write; - } - } drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, (num_read_bo_handles + num_write_bo_handles)); @@ -926,7 +863,7 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data, */ num_fences = dma_fence_dedup_array(fences, num_fences); - waitq = xa_load(&userq_mgr->userq_xa, wait_info->waitq_id); + waitq = amdgpu_userq_get(userq_mgr, wait_info->waitq_id); if (!waitq) { r = -EINVAL; goto free_fences; @@ -983,43 +920,25 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data, r = -EFAULT; goto free_fences; } - - kfree(fences); - kfree(fence_info); } - drm_exec_fini(&exec); - for (i = 0; i < num_read_bo_handles; i++) - drm_gem_object_put(gobj_read[i]); - kfree(gobj_read); - - for (i = 0; i < num_write_bo_handles; i++) - drm_gem_object_put(gobj_write[i]); - kfree(gobj_write); - - kfree(timeline_points); - kfree(timeline_handles); - kfree(syncobj_handles); - kfree(bo_handles_write); - kfree(bo_handles_read); - - return 0; - free_fences: - while (num_fences-- > 0) - dma_fence_put(fences[num_fences]); - kfree(fences); + if (fences) { + while (num_fences-- > 0) + dma_fence_put(fences[num_fences]); + kfree(fences); + } free_fence_info: kfree(fence_info); exec_fini: drm_exec_fini(&exec); put_gobj_write: - while (wentry-- > 0) - drm_gem_object_put(gobj_write[wentry]); + for (i = 0; i < num_write_bo_handles; i++) + drm_gem_object_put(gobj_write[i]); kfree(gobj_write); put_gobj_read: - while (rentry-- > 0) - drm_gem_object_put(gobj_read[rentry]); + for (i = 0; i < num_read_bo_handles; i++) + drm_gem_object_put(gobj_read[i]); kfree(gobj_read); free_timeline_points: kfree(timeline_points); @@ -1027,10 +946,9 @@ free_timeline_handles: kfree(timeline_handles); free_syncobj_handles: kfree(syncobj_handles); -free_bo_handles_write: - kfree(bo_handles_write); -free_bo_handles_read: - kfree(bo_handles_read); + + if (waitq) + amdgpu_userq_put(waitq); return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index a7d8f1ce6ac2..eb4a15db2ef2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -451,24 +451,6 @@ void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp) } /** - * amdgpu_vce_required_gart_pages() - gets number of GART pages required by VCE - * - * @adev: amdgpu_device pointer - * - * Returns how many GART pages we need before GTT for the VCE IP block. - * For VCE1, see vce_v1_0_ensure_vcpu_bo_32bit_addr for details. - * For VCE2+, this is not needed so return zero. - */ -u32 amdgpu_vce_required_gart_pages(struct amdgpu_device *adev) -{ - /* VCE IP block not added yet, so can't use amdgpu_ip_version */ - if (adev->family == AMDGPU_FAMILY_SI) - return 512; - - return 0; -} - -/** * amdgpu_vce_get_create_msg - generate a VCE create msg * * @ring: ring we should submit the msg to diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h index 1c3464ce5037..778c714c8385 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h @@ -52,6 +52,7 @@ struct amdgpu_vce { uint32_t srbm_soft_reset; unsigned num_rings; uint32_t keyselect; + struct drm_mm_node gart_node; }; int amdgpu_vce_early_init(struct amdgpu_device *adev); @@ -61,7 +62,6 @@ int amdgpu_vce_entity_init(struct amdgpu_device *adev, struct amdgpu_ring *ring) int amdgpu_vce_suspend(struct amdgpu_device *adev); int amdgpu_vce_resume(struct amdgpu_device *adev); void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp); -u32 amdgpu_vce_required_gart_pages(struct amdgpu_device *adev); int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, struct amdgpu_job *job, struct amdgpu_ib *ib); int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 8b095087feb4..dcd49b0fb6e0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -764,12 +764,9 @@ bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring, * @need_pipe_sync: is pipe sync needed * * Emit a VM flush when it is necessary. - * - * Returns: - * 0 on success, errno otherwise. */ -int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, - bool need_pipe_sync) +void amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, + bool need_pipe_sync) { struct amdgpu_device *adev = ring->adev; struct amdgpu_isolation *isolation = &adev->isolation[ring->xcp_id]; @@ -783,8 +780,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool cleaner_shader_needed = false; bool pasid_mapping_needed = false; struct dma_fence *fence = NULL; - unsigned int patch; - int r; + unsigned int patch = 0; if (amdgpu_vmid_had_gpu_reset(adev, id)) { gds_switch_needed = true; @@ -812,9 +808,20 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync && !cleaner_shader_needed) - return 0; + return; amdgpu_ring_ib_begin(ring); + + /* There is no matching insert_end for this on purpose for the vm flush. + * The IB portion of the submission has both. Having multiple + * insert_start sequences is ok, but you can only have one insert_end + * per submission based on the way VCN FW works. For JPEG + * you can as many insert_start and insert_end sequences as you like as + * long as the rest of the packets come between start and end sequences. + */ + if (ring->funcs->insert_start) + ring->funcs->insert_start(ring); + if (ring->funcs->init_cond_exec) patch = amdgpu_ring_init_cond_exec(ring, ring->cond_exe_gpu_addr); @@ -845,9 +852,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, } if (vm_flush_needed || pasid_mapping_needed || cleaner_shader_needed) { - r = amdgpu_fence_emit(ring, job->hw_vm_fence, 0); - if (r) - return r; + amdgpu_fence_emit(ring, job->hw_vm_fence, 0); fence = &job->hw_vm_fence->base; /* get a ref for the job */ dma_fence_get(fence); @@ -892,7 +897,6 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, } amdgpu_ring_ib_end(ring); - return 0; } /** @@ -3210,3 +3214,20 @@ void amdgpu_vm_print_task_info(struct amdgpu_device *adev, task_info->process_name, task_info->tgid, task_info->task.comm, task_info->task.pid); } + +void amdgpu_sdma_set_vm_pte_scheds(struct amdgpu_device *adev, + const struct amdgpu_vm_pte_funcs *vm_pte_funcs) +{ + struct drm_gpu_scheduler *sched; + int i; + + for (i = 0; i < adev->sdma.num_instances; i++) { + if (adev->sdma.has_page_queue) + sched = &adev->sdma.instance[i].page.sched; + else + sched = &adev->sdma.instance[i].ring.sched; + adev->vm_manager.vm_pte_scheds[i] = sched; + } + adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances; + adev->vm_manager.vm_pte_funcs = vm_pte_funcs; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index a914ceec90aa..46628b0e699b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -513,7 +513,7 @@ int amdgpu_vm_validate(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct ww_acquire_ctx *ticket, int (*callback)(void *p, struct amdgpu_bo *bo), void *param); -int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync); +void amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync); int amdgpu_vm_update_pdes(struct amdgpu_device *adev, struct amdgpu_vm *vm, bool immediate); int amdgpu_vm_clear_freed(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 11e56df1d91b..e36c287b3289 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -338,7 +338,7 @@ static u32 xgmi_v6_4_get_link_status(struct amdgpu_device *adev, int global_link if (!(adev->aid_mask & BIT(i))) return U32_MAX; - addr += adev->asic_funcs->encode_ext_smn_addressing(i); + addr += amdgpu_reg_get_smn_base64(adev, XGMI_HWIP, i); return RREG32_PCIE_EXT(addr); } @@ -347,6 +347,9 @@ int amdgpu_get_xgmi_link_status(struct amdgpu_device *adev, int global_link_num) { u32 xgmi_state_reg_val; + if (amdgpu_sriov_vf(adev)) + return AMDGPU_XGMI_LINK_NA; + if (adev->gmc.xgmi.num_physical_nodes <= 1) return -EINVAL; @@ -1290,7 +1293,10 @@ static void amdgpu_xgmi_legacy_reset_ras_error_count(struct amdgpu_device *adev) static void __xgmi_v6_4_0_reset_error_count(struct amdgpu_device *adev, int xgmi_inst, u64 mca_base) { - WREG64_MCA(xgmi_inst, mca_base, ACA_REG_IDX_STATUS, 0ULL); + uint64_t smn_base = + amdgpu_reg_get_smn_base64(adev, XGMI_HWIP, xgmi_inst); + + WREG64_MCA(smn_base, mca_base, ACA_REG_IDX_STATUS, 0ULL); } static void xgmi_v6_4_0_reset_error_count(struct amdgpu_device *adev, int xgmi_inst) @@ -1500,6 +1506,7 @@ static void __xgmi_v6_4_0_query_error_count(struct amdgpu_device *adev, struct a u64 mca_base, struct ras_err_data *err_data) { int xgmi_inst = mcm_info->die_id; + uint64_t smn_base; u64 status = 0; status = RREG64_MCA(xgmi_inst, mca_base, ACA_REG_IDX_STATUS); @@ -1516,8 +1523,8 @@ static void __xgmi_v6_4_0_query_error_count(struct amdgpu_device *adev, struct a default: break; } - - WREG64_MCA(xgmi_inst, mca_base, ACA_REG_IDX_STATUS, 0ULL); + smn_base = amdgpu_reg_get_smn_base64(adev, XGMI_HWIP, xgmi_inst); + WREG64_MCA(smn_base, mca_base, ACA_REG_IDX_STATUS, 0ULL); } static void xgmi_v6_4_0_query_error_count(struct amdgpu_device *adev, int xgmi_inst, struct ras_err_data *err_data) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h index cffb2f805de2..a841f342a3eb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h @@ -470,14 +470,23 @@ struct amd_sriov_ras_chk_criti { uint32_t hit; }; +union amd_sriov_ras_host_push { + struct amd_sriov_ras_telemetry_error_count error_count; + struct amd_sriov_ras_cper_dump cper_dump; + struct amd_sriov_ras_chk_criti chk_criti; +}; + +#define AMD_SRIOV_UNIRAS_BLOCKS_BUF_SIZE 4096 +#define AMD_SRIOV_UNIRAS_CMD_MAX_SIZE (4096 * 13) +struct amd_sriov_uniras_shared_mem { + uint8_t blocks_ecc_buf[AMD_SRIOV_UNIRAS_BLOCKS_BUF_SIZE]; + uint8_t cmd_buf[AMD_SRIOV_UNIRAS_CMD_MAX_SIZE]; +}; + struct amdsriov_ras_telemetry { struct amd_sriov_ras_telemetry_header header; - - union { - struct amd_sriov_ras_telemetry_error_count error_count; - struct amd_sriov_ras_cper_dump cper_dump; - struct amd_sriov_ras_chk_criti chk_criti; - } body; + union amd_sriov_ras_host_push body; + struct amd_sriov_uniras_shared_mem uniras_shared_mem; }; /* version data stored in MAILBOX_MSGBUF_RCV_DW1 for future expansion */ @@ -510,6 +519,10 @@ _Static_assert(AMD_SRIOV_MSG_RESERVE_UCODE % 4 == 0, _Static_assert(AMD_SRIOV_MSG_RESERVE_UCODE > AMD_SRIOV_UCODE_ID__MAX, "AMD_SRIOV_MSG_RESERVE_UCODE must be bigger than AMD_SRIOV_UCODE_ID__MAX"); +_Static_assert( + sizeof(struct amdsriov_ras_telemetry) <= AMD_SRIOV_MSG_RAS_TELEMETRY_SIZE_KB_V1 << 10, +"amdsriov_ras_telemetry must be " stringification(AMD_SRIOV_MSG_RAS_TELEMETRY_SIZE_KB_V1) " KB"); + #undef _stringification #undef stringification #endif diff --git a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c index d9842aa25283..72ea37dbfea8 100644 --- a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c +++ b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c @@ -58,25 +58,6 @@ void aqua_vanjaram_doorbell_index_init(struct amdgpu_device *adev) adev->doorbell_index.max_assignment = AMDGPU_DOORBELL_LAYOUT1_MAX_ASSIGNMENT << 1; } -/* Fixed pattern for smn addressing on different AIDs: - * bit[34]: indicate cross AID access - * bit[33:32]: indicate target AID id - * AID id range is 0 ~ 3 as maximum AID number is 4. - */ -u64 aqua_vanjaram_encode_ext_smn_addressing(int ext_id) -{ - u64 ext_offset; - - /* local routing and bit[34:32] will be zeros */ - if (ext_id == 0) - return 0; - - /* Initiated from host, accessing to all non-zero aids are cross traffic */ - ext_offset = ((u64)(ext_id & 0x3) << 32) | (1ULL << 34); - - return ext_offset; -} - static enum amdgpu_gfx_partition __aqua_vanjaram_calc_xcp_mode(struct amdgpu_xcp_mgr *xcp_mgr) { @@ -590,7 +571,7 @@ static void aqua_read_smn_ext(struct amdgpu_device *adev, uint64_t smn_addr, int i) { regdata->addr = - smn_addr + adev->asic_funcs->encode_ext_smn_addressing(i); + smn_addr + amdgpu_reg_get_smn_base64(adev, XGMI_HWIP, i); regdata->value = RREG32_PCIE_EXT(regdata->addr); } diff --git a/drivers/gpu/drm/amd/amdgpu/atom.c b/drivers/gpu/drm/amd/amdgpu/atom.c index e4ce3029d3fb..6e37961f6be5 100644 --- a/drivers/gpu/drm/amd/amdgpu/atom.c +++ b/drivers/gpu/drm/amd/amdgpu/atom.c @@ -1462,8 +1462,6 @@ static void atom_get_vbios_pn(struct atom_context *ctx) ctx->vbios_pn[count] = 0; } - - drm_info(ctx->card->dev, "ATOM BIOS: %s\n", ctx->vbios_pn); } static void atom_get_vbios_version(struct atom_context *ctx) @@ -1520,6 +1518,30 @@ static void atom_get_vbios_build(struct atom_context *ctx) strscpy(ctx->build_num, str, len); } +static inline void atom_print_vbios_info(struct atom_context *ctx) +{ + char vbios_info[256]; + int off = 0; + + if (ctx->vbios_pn[0]) + off += scnprintf(vbios_info + off, sizeof(vbios_info) - off, + "%s", ctx->vbios_pn); + if (ctx->build_num[0]) + off += scnprintf(vbios_info + off, sizeof(vbios_info) - off, + "%sbuild: %s", off ? ", " : "", + ctx->build_num); + if (ctx->vbios_ver_str[0]) + off += scnprintf(vbios_info + off, sizeof(vbios_info) - off, + "%sver: %s", off ? ", " : "", + ctx->vbios_ver_str); + if (ctx->date[0]) + off += scnprintf(vbios_info + off, sizeof(vbios_info) - off, + "%s%.10s", off ? ", " : "", + ctx->date); + if (off) + drm_info(ctx->card->dev, "ATOM BIOS: %s\n", vbios_info); +} + struct atom_context *amdgpu_atom_parse(struct card_info *card, void *bios) { int base; @@ -1582,6 +1604,8 @@ struct atom_context *amdgpu_atom_parse(struct card_info *card, void *bios) atom_get_vbios_version(ctx); atom_get_vbios_build(ctx); + atom_print_vbios_info(ctx); + return ctx; } diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index c081784a19c4..29954c7d61b0 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c @@ -154,11 +154,11 @@ static u32 cik_pcie_rreg(struct amdgpu_device *adev, u32 reg) unsigned long flags; u32 r; - spin_lock_irqsave(&adev->pcie_idx_lock, flags); + spin_lock_irqsave(&adev->reg.pcie.lock, flags); WREG32(mmPCIE_INDEX, reg); (void)RREG32(mmPCIE_INDEX); r = RREG32(mmPCIE_DATA); - spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); + spin_unlock_irqrestore(&adev->reg.pcie.lock, flags); return r; } @@ -166,12 +166,12 @@ static void cik_pcie_wreg(struct amdgpu_device *adev, u32 reg, u32 v) { unsigned long flags; - spin_lock_irqsave(&adev->pcie_idx_lock, flags); + spin_lock_irqsave(&adev->reg.pcie.lock, flags); WREG32(mmPCIE_INDEX, reg); (void)RREG32(mmPCIE_INDEX); WREG32(mmPCIE_DATA, v); (void)RREG32(mmPCIE_DATA); - spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); + spin_unlock_irqrestore(&adev->reg.pcie.lock, flags); } static u32 cik_smc_rreg(struct amdgpu_device *adev, u32 reg) @@ -179,10 +179,10 @@ static u32 cik_smc_rreg(struct amdgpu_device *adev, u32 reg) unsigned long flags; u32 r; - spin_lock_irqsave(&adev->smc_idx_lock, flags); + spin_lock_irqsave(&adev->reg.smc.lock, flags); WREG32(mmSMC_IND_INDEX_0, (reg)); r = RREG32(mmSMC_IND_DATA_0); - spin_unlock_irqrestore(&adev->smc_idx_lock, flags); + spin_unlock_irqrestore(&adev->reg.smc.lock, flags); return r; } @@ -190,10 +190,10 @@ static void cik_smc_wreg(struct amdgpu_device *adev, u32 reg, u32 v) { unsigned long flags; - spin_lock_irqsave(&adev->smc_idx_lock, flags); + spin_lock_irqsave(&adev->reg.smc.lock, flags); WREG32(mmSMC_IND_INDEX_0, (reg)); WREG32(mmSMC_IND_DATA_0, (v)); - spin_unlock_irqrestore(&adev->smc_idx_lock, flags); + spin_unlock_irqrestore(&adev->reg.smc.lock, flags); } static u32 cik_uvd_ctx_rreg(struct amdgpu_device *adev, u32 reg) @@ -201,10 +201,10 @@ static u32 cik_uvd_ctx_rreg(struct amdgpu_device *adev, u32 reg) unsigned long flags; u32 r; - spin_lock_irqsave(&adev->uvd_ctx_idx_lock, flags); + spin_lock_irqsave(&adev->reg.uvd_ctx.lock, flags); WREG32(mmUVD_CTX_INDEX, ((reg) & 0x1ff)); r = RREG32(mmUVD_CTX_DATA); - spin_unlock_irqrestore(&adev->uvd_ctx_idx_lock, flags); + spin_unlock_irqrestore(&adev->reg.uvd_ctx.lock, flags); return r; } @@ -212,10 +212,10 @@ static void cik_uvd_ctx_wreg(struct amdgpu_device *adev, u32 reg, u32 v) { unsigned long flags; - spin_lock_irqsave(&adev->uvd_ctx_idx_lock, flags); + spin_lock_irqsave(&adev->reg.uvd_ctx.lock, flags); WREG32(mmUVD_CTX_INDEX, ((reg) & 0x1ff)); WREG32(mmUVD_CTX_DATA, (v)); - spin_unlock_irqrestore(&adev->uvd_ctx_idx_lock, flags); + spin_unlock_irqrestore(&adev->reg.uvd_ctx.lock, flags); } static u32 cik_didt_rreg(struct amdgpu_device *adev, u32 reg) @@ -223,10 +223,10 @@ static u32 cik_didt_rreg(struct amdgpu_device *adev, u32 reg) unsigned long flags; u32 r; - spin_lock_irqsave(&adev->didt_idx_lock, flags); + spin_lock_irqsave(&adev->reg.didt.lock, flags); WREG32(mmDIDT_IND_INDEX, (reg)); r = RREG32(mmDIDT_IND_DATA); - spin_unlock_irqrestore(&adev->didt_idx_lock, flags); + spin_unlock_irqrestore(&adev->reg.didt.lock, flags); return r; } @@ -234,10 +234,10 @@ static void cik_didt_wreg(struct amdgpu_device *adev, u32 reg, u32 v) { unsigned long flags; - spin_lock_irqsave(&adev->didt_idx_lock, flags); + spin_lock_irqsave(&adev->reg.didt.lock, flags); WREG32(mmDIDT_IND_INDEX, (reg)); WREG32(mmDIDT_IND_DATA, (v)); - spin_unlock_irqrestore(&adev->didt_idx_lock, flags); + spin_unlock_irqrestore(&adev->reg.didt.lock, flags); } static const u32 bonaire_golden_spm_registers[] = @@ -1027,7 +1027,7 @@ static bool cik_read_bios_from_rom(struct amdgpu_device *adev, dw_ptr = (u32 *)bios; length_dw = ALIGN(length_bytes, 4) / 4; /* take the smc lock since we are using the smc index */ - spin_lock_irqsave(&adev->smc_idx_lock, flags); + spin_lock_irqsave(&adev->reg.smc.lock, flags); /* set rom index to 0 */ WREG32(mmSMC_IND_INDEX_0, ixROM_INDEX); WREG32(mmSMC_IND_DATA_0, 0); @@ -1035,7 +1035,7 @@ static bool cik_read_bios_from_rom(struct amdgpu_device *adev, WREG32(mmSMC_IND_INDEX_0, ixROM_DATA); for (i = 0; i < length_dw; i++) dw_ptr[i] = RREG32(mmSMC_IND_DATA_0); - spin_unlock_irqrestore(&adev->smc_idx_lock, flags); + spin_unlock_irqrestore(&adev->reg.smc.lock, flags); return true; } @@ -1984,14 +1984,14 @@ static int cik_common_early_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; - adev->smc_rreg = &cik_smc_rreg; - adev->smc_wreg = &cik_smc_wreg; - adev->pcie_rreg = &cik_pcie_rreg; - adev->pcie_wreg = &cik_pcie_wreg; - adev->uvd_ctx_rreg = &cik_uvd_ctx_rreg; - adev->uvd_ctx_wreg = &cik_uvd_ctx_wreg; - adev->didt_rreg = &cik_didt_rreg; - adev->didt_wreg = &cik_didt_wreg; + adev->reg.smc.rreg = cik_smc_rreg; + adev->reg.smc.wreg = cik_smc_wreg; + adev->reg.pcie.rreg = &cik_pcie_rreg; + adev->reg.pcie.wreg = &cik_pcie_wreg; + adev->reg.uvd_ctx.rreg = &cik_uvd_ctx_rreg; + adev->reg.uvd_ctx.wreg = &cik_uvd_ctx_wreg; + adev->reg.didt.rreg = &cik_didt_rreg; + adev->reg.didt.wreg = &cik_didt_wreg; adev->asic_funcs = &cik_asic_funcs; diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index 9e8715b4739d..22780c09177d 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -53,7 +53,6 @@ static const u32 sdma_offsets[SDMA_MAX_INSTANCE] = static void cik_sdma_set_ring_funcs(struct amdgpu_device *adev); static void cik_sdma_set_irq_funcs(struct amdgpu_device *adev); static void cik_sdma_set_buffer_funcs(struct amdgpu_device *adev); -static void cik_sdma_set_vm_pte_funcs(struct amdgpu_device *adev); static int cik_sdma_soft_reset(struct amdgpu_ip_block *ip_block); u32 amdgpu_cik_gpu_check_soft_reset(struct amdgpu_device *adev); @@ -919,6 +918,14 @@ static void cik_enable_sdma_mgls(struct amdgpu_device *adev, } } +static const struct amdgpu_vm_pte_funcs cik_sdma_vm_pte_funcs = { + .copy_pte_num_dw = 7, + .copy_pte = cik_sdma_vm_copy_pte, + + .write_pte = cik_sdma_vm_write_pte, + .set_pte_pde = cik_sdma_vm_set_pte_pde, +}; + static int cik_sdma_early_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; @@ -933,7 +940,7 @@ static int cik_sdma_early_init(struct amdgpu_ip_block *ip_block) cik_sdma_set_ring_funcs(adev); cik_sdma_set_irq_funcs(adev); cik_sdma_set_buffer_funcs(adev); - cik_sdma_set_vm_pte_funcs(adev); + amdgpu_sdma_set_vm_pte_scheds(adev, &cik_sdma_vm_pte_funcs); return 0; } @@ -1337,26 +1344,6 @@ static void cik_sdma_set_buffer_funcs(struct amdgpu_device *adev) adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring; } -static const struct amdgpu_vm_pte_funcs cik_sdma_vm_pte_funcs = { - .copy_pte_num_dw = 7, - .copy_pte = cik_sdma_vm_copy_pte, - - .write_pte = cik_sdma_vm_write_pte, - .set_pte_pde = cik_sdma_vm_set_pte_pde, -}; - -static void cik_sdma_set_vm_pte_funcs(struct amdgpu_device *adev) -{ - unsigned i; - - adev->vm_manager.vm_pte_funcs = &cik_sdma_vm_pte_funcs; - for (i = 0; i < adev->sdma.num_instances; i++) { - adev->vm_manager.vm_pte_scheds[i] = - &adev->sdma.instance[i].ring.sched; - } - adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances; -} - const struct amdgpu_ip_block_version cik_sdma_ip_block = { .type = AMD_IP_BLOCK_TYPE_SDMA, diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c index a7ffe10eea1b..f1052acea5ec 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c @@ -175,10 +175,10 @@ static u32 dce_v10_0_audio_endpt_rreg(struct amdgpu_device *adev, unsigned long flags; u32 r; - spin_lock_irqsave(&adev->audio_endpt_idx_lock, flags); + spin_lock_irqsave(&adev->reg.audio_endpt.lock, flags); WREG32(mmAZALIA_F0_CODEC_ENDPOINT_INDEX + block_offset, reg); r = RREG32(mmAZALIA_F0_CODEC_ENDPOINT_DATA + block_offset); - spin_unlock_irqrestore(&adev->audio_endpt_idx_lock, flags); + spin_unlock_irqrestore(&adev->reg.audio_endpt.lock, flags); return r; } @@ -188,10 +188,10 @@ static void dce_v10_0_audio_endpt_wreg(struct amdgpu_device *adev, { unsigned long flags; - spin_lock_irqsave(&adev->audio_endpt_idx_lock, flags); + spin_lock_irqsave(&adev->reg.audio_endpt.lock, flags); WREG32(mmAZALIA_F0_CODEC_ENDPOINT_INDEX + block_offset, reg); WREG32(mmAZALIA_F0_CODEC_ENDPOINT_DATA + block_offset, v); - spin_unlock_irqrestore(&adev->audio_endpt_idx_lock, flags); + spin_unlock_irqrestore(&adev->reg.audio_endpt.lock, flags); } static u32 dce_v10_0_vblank_get_counter(struct amdgpu_device *adev, int crtc) @@ -2750,8 +2750,8 @@ static int dce_v10_0_early_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; - adev->audio_endpt_rreg = &dce_v10_0_audio_endpt_rreg; - adev->audio_endpt_wreg = &dce_v10_0_audio_endpt_wreg; + adev->reg.audio_endpt.rreg = &dce_v10_0_audio_endpt_rreg; + adev->reg.audio_endpt.wreg = &dce_v10_0_audio_endpt_wreg; dce_v10_0_set_display_funcs(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c index a72e20db5363..c153a6e1e22a 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c @@ -138,10 +138,10 @@ static u32 dce_v6_0_audio_endpt_rreg(struct amdgpu_device *adev, unsigned long flags; u32 r; - spin_lock_irqsave(&adev->audio_endpt_idx_lock, flags); + spin_lock_irqsave(&adev->reg.audio_endpt.lock, flags); WREG32(mmAZALIA_F0_CODEC_ENDPOINT_INDEX + block_offset, reg); r = RREG32(mmAZALIA_F0_CODEC_ENDPOINT_DATA + block_offset); - spin_unlock_irqrestore(&adev->audio_endpt_idx_lock, flags); + spin_unlock_irqrestore(&adev->reg.audio_endpt.lock, flags); return r; } @@ -151,11 +151,11 @@ static void dce_v6_0_audio_endpt_wreg(struct amdgpu_device *adev, { unsigned long flags; - spin_lock_irqsave(&adev->audio_endpt_idx_lock, flags); + spin_lock_irqsave(&adev->reg.audio_endpt.lock, flags); WREG32(mmAZALIA_F0_CODEC_ENDPOINT_INDEX + block_offset, reg | AZALIA_F0_CODEC_ENDPOINT_INDEX__AZALIA_ENDPOINT_REG_WRITE_EN_MASK); WREG32(mmAZALIA_F0_CODEC_ENDPOINT_DATA + block_offset, v); - spin_unlock_irqrestore(&adev->audio_endpt_idx_lock, flags); + spin_unlock_irqrestore(&adev->reg.audio_endpt.lock, flags); } static u32 dce_v6_0_vblank_get_counter(struct amdgpu_device *adev, int crtc) @@ -2697,8 +2697,8 @@ static int dce_v6_0_early_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; - adev->audio_endpt_rreg = &dce_v6_0_audio_endpt_rreg; - adev->audio_endpt_wreg = &dce_v6_0_audio_endpt_wreg; + adev->reg.audio_endpt.rreg = &dce_v6_0_audio_endpt_rreg; + adev->reg.audio_endpt.wreg = &dce_v6_0_audio_endpt_wreg; dce_v6_0_set_display_funcs(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index 4221c7b7c506..a85a9e32fde4 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c @@ -126,10 +126,10 @@ static u32 dce_v8_0_audio_endpt_rreg(struct amdgpu_device *adev, unsigned long flags; u32 r; - spin_lock_irqsave(&adev->audio_endpt_idx_lock, flags); + spin_lock_irqsave(&adev->reg.audio_endpt.lock, flags); WREG32(mmAZALIA_F0_CODEC_ENDPOINT_INDEX + block_offset, reg); r = RREG32(mmAZALIA_F0_CODEC_ENDPOINT_DATA + block_offset); - spin_unlock_irqrestore(&adev->audio_endpt_idx_lock, flags); + spin_unlock_irqrestore(&adev->reg.audio_endpt.lock, flags); return r; } @@ -139,10 +139,10 @@ static void dce_v8_0_audio_endpt_wreg(struct amdgpu_device *adev, { unsigned long flags; - spin_lock_irqsave(&adev->audio_endpt_idx_lock, flags); + spin_lock_irqsave(&adev->reg.audio_endpt.lock, flags); WREG32(mmAZALIA_F0_CODEC_ENDPOINT_INDEX + block_offset, reg); WREG32(mmAZALIA_F0_CODEC_ENDPOINT_DATA + block_offset, v); - spin_unlock_irqrestore(&adev->audio_endpt_idx_lock, flags); + spin_unlock_irqrestore(&adev->reg.audio_endpt.lock, flags); } static u32 dce_v8_0_vblank_get_counter(struct amdgpu_device *adev, int crtc) @@ -2655,8 +2655,8 @@ static int dce_v8_0_early_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; - adev->audio_endpt_rreg = &dce_v8_0_audio_endpt_rreg; - adev->audio_endpt_wreg = &dce_v8_0_audio_endpt_wreg; + adev->reg.audio_endpt.rreg = &dce_v8_0_audio_endpt_rreg; + adev->reg.audio_endpt.wreg = &dce_v8_0_audio_endpt_wreg; dce_v8_0_set_display_funcs(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c index 621aeca53880..7e7e6c389895 100644 --- a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c +++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c @@ -51,7 +51,7 @@ static uint64_t df_v3_6_get_fica(struct amdgpu_device *adev, address = adev->nbio.funcs->get_pcie_index_offset(adev); data = adev->nbio.funcs->get_pcie_data_offset(adev); - spin_lock_irqsave(&adev->pcie_idx_lock, flags); + spin_lock_irqsave(&adev->reg.pcie.lock, flags); WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessAddress3); WREG32(data, ficaa_val); @@ -61,7 +61,7 @@ static uint64_t df_v3_6_get_fica(struct amdgpu_device *adev, WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessDataHi3); ficadh_val = RREG32(data); - spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); + spin_unlock_irqrestore(&adev->reg.pcie.lock, flags); return (((ficadh_val & 0xFFFFFFFFFFFFFFFF) << 32) | ficadl_val); } @@ -74,7 +74,7 @@ static void df_v3_6_set_fica(struct amdgpu_device *adev, uint32_t ficaa_val, address = adev->nbio.funcs->get_pcie_index_offset(adev); data = adev->nbio.funcs->get_pcie_data_offset(adev); - spin_lock_irqsave(&adev->pcie_idx_lock, flags); + spin_lock_irqsave(&adev->reg.pcie.lock, flags); WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessAddress3); WREG32(data, ficaa_val); @@ -84,7 +84,7 @@ static void df_v3_6_set_fica(struct amdgpu_device *adev, uint32_t ficaa_val, WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessDataHi3); WREG32(data, ficadh_val); - spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); + spin_unlock_irqrestore(&adev->reg.pcie.lock, flags); } /* @@ -102,12 +102,12 @@ static void df_v3_6_perfmon_rreg(struct amdgpu_device *adev, address = adev->nbio.funcs->get_pcie_index_offset(adev); data = adev->nbio.funcs->get_pcie_data_offset(adev); - spin_lock_irqsave(&adev->pcie_idx_lock, flags); + spin_lock_irqsave(&adev->reg.pcie.lock, flags); WREG32(address, lo_addr); *lo_val = RREG32(data); WREG32(address, hi_addr); *hi_val = RREG32(data); - spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); + spin_unlock_irqrestore(&adev->reg.pcie.lock, flags); } /* @@ -124,12 +124,12 @@ static void df_v3_6_perfmon_wreg(struct amdgpu_device *adev, uint32_t lo_addr, address = adev->nbio.funcs->get_pcie_index_offset(adev); data = adev->nbio.funcs->get_pcie_data_offset(adev); - spin_lock_irqsave(&adev->pcie_idx_lock, flags); + spin_lock_irqsave(&adev->reg.pcie.lock, flags); WREG32(address, lo_addr); WREG32(data, lo_val); WREG32(address, hi_addr); WREG32(data, hi_val); - spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); + spin_unlock_irqrestore(&adev->reg.pcie.lock, flags); } /* same as perfmon_wreg but return status on write value check */ @@ -143,7 +143,7 @@ static int df_v3_6_perfmon_arm_with_status(struct amdgpu_device *adev, address = adev->nbio.funcs->get_pcie_index_offset(adev); data = adev->nbio.funcs->get_pcie_data_offset(adev); - spin_lock_irqsave(&adev->pcie_idx_lock, flags); + spin_lock_irqsave(&adev->reg.pcie.lock, flags); WREG32(address, lo_addr); WREG32(data, lo_val); WREG32(address, hi_addr); @@ -153,7 +153,7 @@ static int df_v3_6_perfmon_arm_with_status(struct amdgpu_device *adev, lo_val_rb = RREG32(data); WREG32(address, hi_addr); hi_val_rb = RREG32(data); - spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); + spin_unlock_irqrestore(&adev->reg.pcie.lock, flags); if (!(lo_val == lo_val_rb && hi_val == hi_val_rb)) return -EBUSY; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 427975b5a1d9..b1a1b8a10a08 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -4238,6 +4238,37 @@ static int gfx_v11_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev) return gfx_v11_0_cp_gfx_start(adev); } +static void gfx_v11_0_compute_mqd_set_cu_mask(struct amdgpu_device *adev, + struct v11_compute_mqd *mqd, + struct amdgpu_mqd_prop *prop) +{ + uint32_t se_mask[8] = {0}; + uint32_t wa_mask; + bool has_wa_flag = prop->cu_flags & (AMDGPU_UPDATE_FLAG_DBG_WA_ENABLE | + AMDGPU_UPDATE_FLAG_DBG_WA_DISABLE); + + if (!has_wa_flag && (!prop->cu_mask || !prop->cu_mask_count)) + return; + + if (has_wa_flag) { + wa_mask = (prop->cu_flags & AMDGPU_UPDATE_FLAG_DBG_WA_ENABLE) ? + 0xffff : 0xffffffff; + mqd->compute_static_thread_mgmt_se0 = wa_mask; + mqd->compute_static_thread_mgmt_se1 = wa_mask; + mqd->compute_static_thread_mgmt_se2 = wa_mask; + mqd->compute_static_thread_mgmt_se3 = wa_mask; + return; + } + + amdgpu_gfx_mqd_symmetrically_map_cu_mask(adev, prop->cu_mask, + prop->cu_mask_count, se_mask); + + mqd->compute_static_thread_mgmt_se0 = se_mask[0]; + mqd->compute_static_thread_mgmt_se1 = se_mask[1]; + mqd->compute_static_thread_mgmt_se2 = se_mask[2]; + mqd->compute_static_thread_mgmt_se3 = se_mask[3]; +} + static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m, struct amdgpu_mqd_prop *prop) { @@ -4372,6 +4403,8 @@ static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m, /* set UQ fenceaddress */ mqd->fence_address_lo = lower_32_bits(prop->fence_address); mqd->fence_address_hi = upper_32_bits(prop->fence_address); + /* set CU mask */ + gfx_v11_0_compute_mqd_set_cu_mask(adev, mqd, prop); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index 79ea1af363a5..a418ae609c36 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -3109,6 +3109,37 @@ static int gfx_v12_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev) return gfx_v12_0_cp_gfx_start(adev); } +static void gfx_v12_0_compute_mqd_set_cu_mask(struct amdgpu_device *adev, + struct v12_compute_mqd *mqd, + struct amdgpu_mqd_prop *prop) +{ + uint32_t se_mask[8] = {0}; + uint32_t wa_mask; + bool has_wa_flag = prop->cu_flags & (AMDGPU_UPDATE_FLAG_DBG_WA_ENABLE | + AMDGPU_UPDATE_FLAG_DBG_WA_DISABLE); + + if (!has_wa_flag && (!prop->cu_mask || !prop->cu_mask_count)) + return; + + if (has_wa_flag) { + wa_mask = (prop->cu_flags & AMDGPU_UPDATE_FLAG_DBG_WA_ENABLE) ? + 0xffff : 0xffffffff; + mqd->compute_static_thread_mgmt_se0 = wa_mask; + mqd->compute_static_thread_mgmt_se1 = wa_mask; + mqd->compute_static_thread_mgmt_se2 = wa_mask; + mqd->compute_static_thread_mgmt_se3 = wa_mask; + return; + } + + amdgpu_gfx_mqd_symmetrically_map_cu_mask(adev, prop->cu_mask, + prop->cu_mask_count, se_mask); + + mqd->compute_static_thread_mgmt_se0 = se_mask[0]; + mqd->compute_static_thread_mgmt_se1 = se_mask[1]; + mqd->compute_static_thread_mgmt_se2 = se_mask[2]; + mqd->compute_static_thread_mgmt_se3 = se_mask[3]; +} + static int gfx_v12_0_compute_mqd_init(struct amdgpu_device *adev, void *m, struct amdgpu_mqd_prop *prop) { @@ -3242,6 +3273,8 @@ static int gfx_v12_0_compute_mqd_init(struct amdgpu_device *adev, void *m, /* set UQ fenceaddress */ mqd->fence_address_lo = lower_32_bits(prop->fence_address); mqd->fence_address_hi = upper_32_bits(prop->fence_address); + /* set CU mask */ + gfx_v12_0_compute_mqd_set_cu_mask(adev, mqd, prop); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c index eb9725ae1607..557d15b90ad2 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c @@ -1405,7 +1405,7 @@ static void gfx_v12_1_xcc_init_compute_vmid(struct amdgpu_device *adev, /* * Configure apertures: * LDS: 0x20000000'00000000 - 0x20000001'00000000 (4GB) - * Scratch: 0x10000000'00000000 - 0x10000001'00000000 (4GB) + * Scratch: 0x10000000'00000000 - 0x11ffffff'ffffffff (128PB 57-bit) */ sh_mem_bases = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE, (adev->gmc.private_aperture_start >> 58)); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c index b9671fc39e2a..da4a0cf4aad0 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c @@ -654,9 +654,15 @@ static int gmc_v12_0_early_init(struct amdgpu_ip_block *ip_block) adev->gmc.shared_aperture_start = 0x2000000000000000ULL; adev->gmc.shared_aperture_end = adev->gmc.shared_aperture_start + (4ULL << 30) - 1; + adev->gmc.private_aperture_start = 0x1000000000000000ULL; - adev->gmc.private_aperture_end = - adev->gmc.private_aperture_start + (4ULL << 30) - 1; + if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(12, 1, 0)) + adev->gmc.private_aperture_end = + adev->gmc.private_aperture_start + (1ULL << 57) - 1; + else + adev->gmc.private_aperture_end = + adev->gmc.private_aperture_start + (4ULL << 30) - 1; + adev->gmc.noretry_flags = AMDGPU_VM_NORETRY_FLAGS_TF; return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_1.c b/drivers/gpu/drm/amd/amdgpu/gmc_v12_1.c index ef6e550ce7c3..dc8865c5879c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_1.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_1.c @@ -345,9 +345,7 @@ static void gmc_v12_1_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, return; } - mutex_lock(&adev->mman.gtt_window_lock); gmc_v12_1_flush_vm_hub(adev, vmid, vmhub, 0); - mutex_unlock(&adev->mman.gtt_window_lock); return; } diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index f17c3839aea1..7ce1a1b95606 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -283,10 +283,10 @@ static u32 nv_didt_rreg(struct amdgpu_device *adev, u32 reg) address = SOC15_REG_OFFSET(GC, 0, mmDIDT_IND_INDEX); data = SOC15_REG_OFFSET(GC, 0, mmDIDT_IND_DATA); - spin_lock_irqsave(&adev->didt_idx_lock, flags); + spin_lock_irqsave(&adev->reg.didt.lock, flags); WREG32(address, (reg)); r = RREG32(data); - spin_unlock_irqrestore(&adev->didt_idx_lock, flags); + spin_unlock_irqrestore(&adev->reg.didt.lock, flags); return r; } @@ -297,10 +297,10 @@ static void nv_didt_wreg(struct amdgpu_device *adev, u32 reg, u32 v) address = SOC15_REG_OFFSET(GC, 0, mmDIDT_IND_INDEX); data = SOC15_REG_OFFSET(GC, 0, mmDIDT_IND_DATA); - spin_lock_irqsave(&adev->didt_idx_lock, flags); + spin_lock_irqsave(&adev->reg.didt.lock, flags); WREG32(address, (reg)); WREG32(data, (v)); - spin_unlock_irqrestore(&adev->didt_idx_lock, flags); + spin_unlock_irqrestore(&adev->reg.didt.lock, flags); } static u32 nv_get_config_memsize(struct amdgpu_device *adev) @@ -635,21 +635,15 @@ static int nv_common_early_init(struct amdgpu_ip_block *ip_block) struct amdgpu_device *adev = ip_block->adev; adev->nbio.funcs->set_reg_remap(adev); - adev->smc_rreg = NULL; - adev->smc_wreg = NULL; - adev->pcie_rreg = &amdgpu_device_indirect_rreg; - adev->pcie_wreg = &amdgpu_device_indirect_wreg; - adev->pcie_rreg64 = &amdgpu_device_indirect_rreg64; - adev->pcie_wreg64 = &amdgpu_device_indirect_wreg64; - adev->pciep_rreg = amdgpu_device_pcie_port_rreg; - adev->pciep_wreg = amdgpu_device_pcie_port_wreg; - - /* TODO: will add them during VCN v2 implementation */ - adev->uvd_ctx_rreg = NULL; - adev->uvd_ctx_wreg = NULL; - - adev->didt_rreg = &nv_didt_rreg; - adev->didt_wreg = &nv_didt_wreg; + adev->reg.pcie.rreg = &amdgpu_device_indirect_rreg; + adev->reg.pcie.wreg = &amdgpu_device_indirect_wreg; + adev->reg.pcie.rreg64 = &amdgpu_device_indirect_rreg64; + adev->reg.pcie.wreg64 = &amdgpu_device_indirect_wreg64; + adev->reg.pcie.port_rreg = &amdgpu_device_pcie_port_rreg; + adev->reg.pcie.port_wreg = &amdgpu_device_pcie_port_wreg; + + adev->reg.didt.rreg = &nv_didt_rreg; + adev->reg.didt.wreg = &nv_didt_wreg; adev->asic_funcs = &nv_asic_funcs; diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c index d1e1a4369521..a0c84f81c0c9 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c @@ -166,7 +166,7 @@ static void psp_v13_0_bootloader_print_status(struct psp_context *psp, bl_status_reg = (SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_92) << 2) + - adev->asic_funcs->encode_ext_smn_addressing(i); + amdgpu_reg_get_smn_base64(adev, MP0_HWIP, i); at += snprintf(bl_status_msg + at, PSP13_BL_STATUS_SIZE - at, " status(%02i): 0x%08x", i, diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v15_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v15_0.c index 723ddae17644..73a709773e85 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v15_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v15_0.c @@ -69,12 +69,12 @@ static int psp_v15_0_0_ring_stop(struct psp_context *psp, 0x80000000, 0x80000000, false); } else { /* Write the ring destroy command*/ - WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_64, + WREG32_SOC15(MP0, 0, regMPASP_PCRU1_MPASP_C2PMSG_64, GFX_CTRL_CMD_ID_DESTROY_RINGS); /* there might be handshake issue with hardware which needs delay */ mdelay(20); /* Wait for response flag (bit 31) */ - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_64), + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_PCRU1_MPASP_C2PMSG_64), 0x80000000, 0x80000000, false); } @@ -116,7 +116,7 @@ static int psp_v15_0_0_ring_create(struct psp_context *psp, } else { /* Wait for sOS ready for ring creation */ - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_64), + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_PCRU1_MPASP_C2PMSG_64), 0x80000000, 0x80000000, false); if (ret) { DRM_ERROR("Failed to wait for trust OS ready for ring creation\n"); @@ -125,23 +125,23 @@ static int psp_v15_0_0_ring_create(struct psp_context *psp, /* Write low address of the ring to C2PMSG_69 */ psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr); - WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_69, psp_ring_reg); + WREG32_SOC15(MP0, 0, regMPASP_PCRU1_MPASP_C2PMSG_69, psp_ring_reg); /* Write high address of the ring to C2PMSG_70 */ psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr); - WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_70, psp_ring_reg); + WREG32_SOC15(MP0, 0, regMPASP_PCRU1_MPASP_C2PMSG_70, psp_ring_reg); /* Write size of ring to C2PMSG_71 */ psp_ring_reg = ring->ring_size; - WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_71, psp_ring_reg); + WREG32_SOC15(MP0, 0, regMPASP_PCRU1_MPASP_C2PMSG_71, psp_ring_reg); /* Write the ring initialization command to C2PMSG_64 */ psp_ring_reg = ring_type; psp_ring_reg = psp_ring_reg << 16; - WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_64, psp_ring_reg); + WREG32_SOC15(MP0, 0, regMPASP_PCRU1_MPASP_C2PMSG_64, psp_ring_reg); /* there might be handshake issue with hardware which needs delay */ mdelay(20); /* Wait for response flag (bit 31) in C2PMSG_64 */ - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_64), + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_PCRU1_MPASP_C2PMSG_64), 0x80000000, 0x8000FFFF, false); } @@ -174,7 +174,7 @@ static uint32_t psp_v15_0_0_ring_get_wptr(struct psp_context *psp) if (amdgpu_sriov_vf(adev)) data = RREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_102); else - data = RREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_67); + data = RREG32_SOC15(MP0, 0, regMPASP_PCRU1_MPASP_C2PMSG_67); return data; } @@ -188,7 +188,7 @@ static void psp_v15_0_0_ring_set_wptr(struct psp_context *psp, uint32_t value) WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_101, GFX_CTRL_CMD_ID_CONSUME_CMD); } else - WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_67, value); + WREG32_SOC15(MP0, 0, regMPASP_PCRU1_MPASP_C2PMSG_67, value); } static const struct psp_funcs psp_v15_0_0_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c index 92ce580647cd..0090ace49024 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c @@ -51,7 +51,6 @@ static void sdma_v2_4_set_ring_funcs(struct amdgpu_device *adev); static void sdma_v2_4_set_buffer_funcs(struct amdgpu_device *adev); -static void sdma_v2_4_set_vm_pte_funcs(struct amdgpu_device *adev); static void sdma_v2_4_set_irq_funcs(struct amdgpu_device *adev); MODULE_FIRMWARE("amdgpu/topaz_sdma.bin"); @@ -809,6 +808,14 @@ static void sdma_v2_4_ring_emit_wreg(struct amdgpu_ring *ring, amdgpu_ring_write(ring, val); } +static const struct amdgpu_vm_pte_funcs sdma_v2_4_vm_pte_funcs = { + .copy_pte_num_dw = 7, + .copy_pte = sdma_v2_4_vm_copy_pte, + + .write_pte = sdma_v2_4_vm_write_pte, + .set_pte_pde = sdma_v2_4_vm_set_pte_pde, +}; + static int sdma_v2_4_early_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; @@ -822,7 +829,7 @@ static int sdma_v2_4_early_init(struct amdgpu_ip_block *ip_block) sdma_v2_4_set_ring_funcs(adev); sdma_v2_4_set_buffer_funcs(adev); - sdma_v2_4_set_vm_pte_funcs(adev); + amdgpu_sdma_set_vm_pte_scheds(adev, &sdma_v2_4_vm_pte_funcs); sdma_v2_4_set_irq_funcs(adev); return 0; @@ -1232,26 +1239,6 @@ static void sdma_v2_4_set_buffer_funcs(struct amdgpu_device *adev) adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring; } -static const struct amdgpu_vm_pte_funcs sdma_v2_4_vm_pte_funcs = { - .copy_pte_num_dw = 7, - .copy_pte = sdma_v2_4_vm_copy_pte, - - .write_pte = sdma_v2_4_vm_write_pte, - .set_pte_pde = sdma_v2_4_vm_set_pte_pde, -}; - -static void sdma_v2_4_set_vm_pte_funcs(struct amdgpu_device *adev) -{ - unsigned i; - - adev->vm_manager.vm_pte_funcs = &sdma_v2_4_vm_pte_funcs; - for (i = 0; i < adev->sdma.num_instances; i++) { - adev->vm_manager.vm_pte_scheds[i] = - &adev->sdma.instance[i].ring.sched; - } - adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances; -} - const struct amdgpu_ip_block_version sdma_v2_4_ip_block = { .type = AMD_IP_BLOCK_TYPE_SDMA, .major = 2, diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index 1c076bd1cf73..2526d393162a 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -51,7 +51,6 @@ static void sdma_v3_0_set_ring_funcs(struct amdgpu_device *adev); static void sdma_v3_0_set_buffer_funcs(struct amdgpu_device *adev); -static void sdma_v3_0_set_vm_pte_funcs(struct amdgpu_device *adev); static void sdma_v3_0_set_irq_funcs(struct amdgpu_device *adev); MODULE_FIRMWARE("amdgpu/tonga_sdma.bin"); @@ -1082,6 +1081,14 @@ static void sdma_v3_0_ring_emit_wreg(struct amdgpu_ring *ring, amdgpu_ring_write(ring, val); } +static const struct amdgpu_vm_pte_funcs sdma_v3_0_vm_pte_funcs = { + .copy_pte_num_dw = 7, + .copy_pte = sdma_v3_0_vm_copy_pte, + + .write_pte = sdma_v3_0_vm_write_pte, + .set_pte_pde = sdma_v3_0_vm_set_pte_pde, +}; + static int sdma_v3_0_early_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; @@ -1102,7 +1109,7 @@ static int sdma_v3_0_early_init(struct amdgpu_ip_block *ip_block) sdma_v3_0_set_ring_funcs(adev); sdma_v3_0_set_buffer_funcs(adev); - sdma_v3_0_set_vm_pte_funcs(adev); + amdgpu_sdma_set_vm_pte_scheds(adev, &sdma_v3_0_vm_pte_funcs); sdma_v3_0_set_irq_funcs(adev); return 0; @@ -1674,26 +1681,6 @@ static void sdma_v3_0_set_buffer_funcs(struct amdgpu_device *adev) adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring; } -static const struct amdgpu_vm_pte_funcs sdma_v3_0_vm_pte_funcs = { - .copy_pte_num_dw = 7, - .copy_pte = sdma_v3_0_vm_copy_pte, - - .write_pte = sdma_v3_0_vm_write_pte, - .set_pte_pde = sdma_v3_0_vm_set_pte_pde, -}; - -static void sdma_v3_0_set_vm_pte_funcs(struct amdgpu_device *adev) -{ - unsigned i; - - adev->vm_manager.vm_pte_funcs = &sdma_v3_0_vm_pte_funcs; - for (i = 0; i < adev->sdma.num_instances; i++) { - adev->vm_manager.vm_pte_scheds[i] = - &adev->sdma.instance[i].ring.sched; - } - adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances; -} - const struct amdgpu_ip_block_version sdma_v3_0_ip_block = { .type = AMD_IP_BLOCK_TYPE_SDMA, diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index f38004e6064e..44f0f23e1148 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -129,7 +129,6 @@ static const struct amdgpu_hwip_reg_entry sdma_reg_list_4_0[] = { static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev); static void sdma_v4_0_set_buffer_funcs(struct amdgpu_device *adev); -static void sdma_v4_0_set_vm_pte_funcs(struct amdgpu_device *adev); static void sdma_v4_0_set_irq_funcs(struct amdgpu_device *adev); static void sdma_v4_0_set_ras_funcs(struct amdgpu_device *adev); @@ -1751,6 +1750,14 @@ static bool sdma_v4_0_fw_support_paging_queue(struct amdgpu_device *adev) } } +static const struct amdgpu_vm_pte_funcs sdma_v4_0_vm_pte_funcs = { + .copy_pte_num_dw = 7, + .copy_pte = sdma_v4_0_vm_copy_pte, + + .write_pte = sdma_v4_0_vm_write_pte, + .set_pte_pde = sdma_v4_0_vm_set_pte_pde, +}; + static int sdma_v4_0_early_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; @@ -1769,7 +1776,7 @@ static int sdma_v4_0_early_init(struct amdgpu_ip_block *ip_block) sdma_v4_0_set_ring_funcs(adev); sdma_v4_0_set_buffer_funcs(adev); - sdma_v4_0_set_vm_pte_funcs(adev); + amdgpu_sdma_set_vm_pte_scheds(adev, &sdma_v4_0_vm_pte_funcs); sdma_v4_0_set_irq_funcs(adev); sdma_v4_0_set_ras_funcs(adev); @@ -2597,48 +2604,37 @@ static void sdma_v4_0_emit_fill_buffer(struct amdgpu_ib *ib, } static const struct amdgpu_buffer_funcs sdma_v4_0_buffer_funcs = { - .copy_max_bytes = 0x400000, + .copy_max_bytes = 1 << 22, .copy_num_dw = 7, .emit_copy_buffer = sdma_v4_0_emit_copy_buffer, - .fill_max_bytes = 0x400000, + .fill_max_bytes = 1 << 22, + .fill_num_dw = 5, + .emit_fill_buffer = sdma_v4_0_emit_fill_buffer, +}; + +static const struct amdgpu_buffer_funcs sdma_v4_4_buffer_funcs = { + .copy_max_bytes = 1 << 30, + .copy_num_dw = 7, + .emit_copy_buffer = sdma_v4_0_emit_copy_buffer, + + .fill_max_bytes = 1 << 30, .fill_num_dw = 5, .emit_fill_buffer = sdma_v4_0_emit_fill_buffer, }; static void sdma_v4_0_set_buffer_funcs(struct amdgpu_device *adev) { - adev->mman.buffer_funcs = &sdma_v4_0_buffer_funcs; + if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) >= IP_VERSION(4, 4, 0)) + adev->mman.buffer_funcs = &sdma_v4_4_buffer_funcs; + else + adev->mman.buffer_funcs = &sdma_v4_0_buffer_funcs; if (adev->sdma.has_page_queue) adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].page; else adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring; } -static const struct amdgpu_vm_pte_funcs sdma_v4_0_vm_pte_funcs = { - .copy_pte_num_dw = 7, - .copy_pte = sdma_v4_0_vm_copy_pte, - - .write_pte = sdma_v4_0_vm_write_pte, - .set_pte_pde = sdma_v4_0_vm_set_pte_pde, -}; - -static void sdma_v4_0_set_vm_pte_funcs(struct amdgpu_device *adev) -{ - struct drm_gpu_scheduler *sched; - unsigned i; - - adev->vm_manager.vm_pte_funcs = &sdma_v4_0_vm_pte_funcs; - for (i = 0; i < adev->sdma.num_instances; i++) { - if (adev->sdma.has_page_queue) - sched = &adev->sdma.instance[i].page.sched; - else - sched = &adev->sdma.instance[i].ring.sched; - adev->vm_manager.vm_pte_scheds[i] = sched; - } - adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances; -} - static void sdma_v4_0_get_ras_error_count(uint32_t value, uint32_t instance, uint32_t *sec_count) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c index a1443990d5c6..78bdfed0a7fd 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c @@ -104,7 +104,6 @@ static const struct amdgpu_hwip_reg_entry sdma_reg_list_4_4_2[] = { static void sdma_v4_4_2_set_ring_funcs(struct amdgpu_device *adev); static void sdma_v4_4_2_set_buffer_funcs(struct amdgpu_device *adev); -static void sdma_v4_4_2_set_vm_pte_funcs(struct amdgpu_device *adev); static void sdma_v4_4_2_set_irq_funcs(struct amdgpu_device *adev); static void sdma_v4_4_2_set_ras_funcs(struct amdgpu_device *adev); static void sdma_v4_4_2_update_reset_mask(struct amdgpu_device *adev); @@ -1347,6 +1346,14 @@ static const struct amdgpu_sdma_funcs sdma_v4_4_2_sdma_funcs = { .soft_reset_kernel_queue = &sdma_v4_4_2_soft_reset_engine, }; +static const struct amdgpu_vm_pte_funcs sdma_v4_4_2_vm_pte_funcs = { + .copy_pte_num_dw = 7, + .copy_pte = sdma_v4_4_2_vm_copy_pte, + + .write_pte = sdma_v4_4_2_vm_write_pte, + .set_pte_pde = sdma_v4_4_2_vm_set_pte_pde, +}; + static int sdma_v4_4_2_early_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; @@ -1362,7 +1369,7 @@ static int sdma_v4_4_2_early_init(struct amdgpu_ip_block *ip_block) sdma_v4_4_2_set_ring_funcs(adev); sdma_v4_4_2_set_buffer_funcs(adev); - sdma_v4_4_2_set_vm_pte_funcs(adev); + amdgpu_sdma_set_vm_pte_scheds(adev, &sdma_v4_4_2_vm_pte_funcs); sdma_v4_4_2_set_irq_funcs(adev); sdma_v4_4_2_set_ras_funcs(adev); return 0; @@ -2298,11 +2305,11 @@ static void sdma_v4_4_2_emit_fill_buffer(struct amdgpu_ib *ib, } static const struct amdgpu_buffer_funcs sdma_v4_4_2_buffer_funcs = { - .copy_max_bytes = 0x400000, + .copy_max_bytes = 1 << 30, .copy_num_dw = 7, .emit_copy_buffer = sdma_v4_4_2_emit_copy_buffer, - .fill_max_bytes = 0x400000, + .fill_max_bytes = 1 << 30, .fill_num_dw = 5, .emit_fill_buffer = sdma_v4_4_2_emit_fill_buffer, }; @@ -2316,30 +2323,6 @@ static void sdma_v4_4_2_set_buffer_funcs(struct amdgpu_device *adev) adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring; } -static const struct amdgpu_vm_pte_funcs sdma_v4_4_2_vm_pte_funcs = { - .copy_pte_num_dw = 7, - .copy_pte = sdma_v4_4_2_vm_copy_pte, - - .write_pte = sdma_v4_4_2_vm_write_pte, - .set_pte_pde = sdma_v4_4_2_vm_set_pte_pde, -}; - -static void sdma_v4_4_2_set_vm_pte_funcs(struct amdgpu_device *adev) -{ - struct drm_gpu_scheduler *sched; - unsigned i; - - adev->vm_manager.vm_pte_funcs = &sdma_v4_4_2_vm_pte_funcs; - for (i = 0; i < adev->sdma.num_instances; i++) { - if (adev->sdma.has_page_queue) - sched = &adev->sdma.instance[i].page.sched; - else - sched = &adev->sdma.instance[i].ring.sched; - adev->vm_manager.vm_pte_scheds[i] = sched; - } - adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances; -} - /** * sdma_v4_4_2_update_reset_mask - update reset mask for SDMA * @adev: Pointer to the AMDGPU device structure diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index e3a035c9fece..52f4e9e099cb 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -110,7 +110,6 @@ static const struct amdgpu_hwip_reg_entry sdma_reg_list_5_0[] = { static void sdma_v5_0_set_ring_funcs(struct amdgpu_device *adev); static void sdma_v5_0_set_buffer_funcs(struct amdgpu_device *adev); -static void sdma_v5_0_set_vm_pte_funcs(struct amdgpu_device *adev); static void sdma_v5_0_set_irq_funcs(struct amdgpu_device *adev); static int sdma_v5_0_stop_queue(struct amdgpu_ring *ring); static int sdma_v5_0_restore_queue(struct amdgpu_ring *ring); @@ -1357,6 +1356,13 @@ static const struct amdgpu_sdma_funcs sdma_v5_0_sdma_funcs = { .soft_reset_kernel_queue = &sdma_v5_0_soft_reset_engine, }; +static const struct amdgpu_vm_pte_funcs sdma_v5_0_vm_pte_funcs = { + .copy_pte_num_dw = 7, + .copy_pte = sdma_v5_0_vm_copy_pte, + .write_pte = sdma_v5_0_vm_write_pte, + .set_pte_pde = sdma_v5_0_vm_set_pte_pde, +}; + static int sdma_v5_0_early_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; @@ -1368,7 +1374,7 @@ static int sdma_v5_0_early_init(struct amdgpu_ip_block *ip_block) sdma_v5_0_set_ring_funcs(adev); sdma_v5_0_set_buffer_funcs(adev); - sdma_v5_0_set_vm_pte_funcs(adev); + amdgpu_sdma_set_vm_pte_scheds(adev, &sdma_v5_0_vm_pte_funcs); sdma_v5_0_set_irq_funcs(adev); sdma_v5_0_set_mqd_funcs(adev); @@ -2052,27 +2058,6 @@ static void sdma_v5_0_set_buffer_funcs(struct amdgpu_device *adev) } } -static const struct amdgpu_vm_pte_funcs sdma_v5_0_vm_pte_funcs = { - .copy_pte_num_dw = 7, - .copy_pte = sdma_v5_0_vm_copy_pte, - .write_pte = sdma_v5_0_vm_write_pte, - .set_pte_pde = sdma_v5_0_vm_set_pte_pde, -}; - -static void sdma_v5_0_set_vm_pte_funcs(struct amdgpu_device *adev) -{ - unsigned i; - - if (adev->vm_manager.vm_pte_funcs == NULL) { - adev->vm_manager.vm_pte_funcs = &sdma_v5_0_vm_pte_funcs; - for (i = 0; i < adev->sdma.num_instances; i++) { - adev->vm_manager.vm_pte_scheds[i] = - &adev->sdma.instance[i].ring.sched; - } - adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances; - } -} - const struct amdgpu_ip_block_version sdma_v5_0_ip_block = { .type = AMD_IP_BLOCK_TYPE_SDMA, .major = 5, diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index feebaa8cd9b1..b4fb90cc8f7d 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -111,7 +111,6 @@ static const struct amdgpu_hwip_reg_entry sdma_reg_list_5_2[] = { static void sdma_v5_2_set_ring_funcs(struct amdgpu_device *adev); static void sdma_v5_2_set_buffer_funcs(struct amdgpu_device *adev); -static void sdma_v5_2_set_vm_pte_funcs(struct amdgpu_device *adev); static void sdma_v5_2_set_irq_funcs(struct amdgpu_device *adev); static int sdma_v5_2_stop_queue(struct amdgpu_ring *ring); static int sdma_v5_2_restore_queue(struct amdgpu_ring *ring); @@ -1248,6 +1247,13 @@ static void sdma_v5_2_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask); } +static const struct amdgpu_vm_pte_funcs sdma_v5_2_vm_pte_funcs = { + .copy_pte_num_dw = 7, + .copy_pte = sdma_v5_2_vm_copy_pte, + .write_pte = sdma_v5_2_vm_write_pte, + .set_pte_pde = sdma_v5_2_vm_set_pte_pde, +}; + static int sdma_v5_2_early_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; @@ -1259,7 +1265,7 @@ static int sdma_v5_2_early_init(struct amdgpu_ip_block *ip_block) sdma_v5_2_set_ring_funcs(adev); sdma_v5_2_set_buffer_funcs(adev); - sdma_v5_2_set_vm_pte_funcs(adev); + amdgpu_sdma_set_vm_pte_scheds(adev, &sdma_v5_2_vm_pte_funcs); sdma_v5_2_set_irq_funcs(adev); sdma_v5_2_set_mqd_funcs(adev); @@ -2039,11 +2045,11 @@ static void sdma_v5_2_emit_fill_buffer(struct amdgpu_ib *ib, } static const struct amdgpu_buffer_funcs sdma_v5_2_buffer_funcs = { - .copy_max_bytes = 0x400000, + .copy_max_bytes = 1 << 30, .copy_num_dw = 7, .emit_copy_buffer = sdma_v5_2_emit_copy_buffer, - .fill_max_bytes = 0x400000, + .fill_max_bytes = 1 << 30, /* HW supports 1 << 30, but PAL uses 1 << 22 */ .fill_num_dw = 5, .emit_fill_buffer = sdma_v5_2_emit_fill_buffer, }; @@ -2056,27 +2062,6 @@ static void sdma_v5_2_set_buffer_funcs(struct amdgpu_device *adev) } } -static const struct amdgpu_vm_pte_funcs sdma_v5_2_vm_pte_funcs = { - .copy_pte_num_dw = 7, - .copy_pte = sdma_v5_2_vm_copy_pte, - .write_pte = sdma_v5_2_vm_write_pte, - .set_pte_pde = sdma_v5_2_vm_set_pte_pde, -}; - -static void sdma_v5_2_set_vm_pte_funcs(struct amdgpu_device *adev) -{ - unsigned i; - - if (adev->vm_manager.vm_pte_funcs == NULL) { - adev->vm_manager.vm_pte_funcs = &sdma_v5_2_vm_pte_funcs; - for (i = 0; i < adev->sdma.num_instances; i++) { - adev->vm_manager.vm_pte_scheds[i] = - &adev->sdma.instance[i].ring.sched; - } - adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances; - } -} - const struct amdgpu_ip_block_version sdma_v5_2_ip_block = { .type = AMD_IP_BLOCK_TYPE_SDMA, .major = 5, diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c index b40126f5d3ef..b005672f2f96 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c @@ -120,7 +120,6 @@ static const struct amdgpu_hwip_reg_entry sdma_reg_list_6_0[] = { static void sdma_v6_0_set_ring_funcs(struct amdgpu_device *adev); static void sdma_v6_0_set_buffer_funcs(struct amdgpu_device *adev); -static void sdma_v6_0_set_vm_pte_funcs(struct amdgpu_device *adev); static void sdma_v6_0_set_irq_funcs(struct amdgpu_device *adev); static int sdma_v6_0_start(struct amdgpu_device *adev); @@ -1280,6 +1279,13 @@ static void sdma_v6_0_get_csa_info(struct amdgpu_device *adev, csa_info->alignment = SDMA6_CSA_ALIGNMENT; } +static const struct amdgpu_vm_pte_funcs sdma_v6_0_vm_pte_funcs = { + .copy_pte_num_dw = 7, + .copy_pte = sdma_v6_0_vm_copy_pte, + .write_pte = sdma_v6_0_vm_write_pte, + .set_pte_pde = sdma_v6_0_vm_set_pte_pde, +}; + static int sdma_v6_0_early_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; @@ -1308,7 +1314,7 @@ static int sdma_v6_0_early_init(struct amdgpu_ip_block *ip_block) sdma_v6_0_set_ring_funcs(adev); sdma_v6_0_set_buffer_funcs(adev); - sdma_v6_0_set_vm_pte_funcs(adev); + amdgpu_sdma_set_vm_pte_scheds(adev, &sdma_v6_0_vm_pte_funcs); sdma_v6_0_set_irq_funcs(adev); sdma_v6_0_set_mqd_funcs(adev); sdma_v6_0_set_ras_funcs(adev); @@ -1878,11 +1884,11 @@ static void sdma_v6_0_emit_fill_buffer(struct amdgpu_ib *ib, } static const struct amdgpu_buffer_funcs sdma_v6_0_buffer_funcs = { - .copy_max_bytes = 0x400000, + .copy_max_bytes = 1 << 30, .copy_num_dw = 7, .emit_copy_buffer = sdma_v6_0_emit_copy_buffer, - .fill_max_bytes = 0x400000, + .fill_max_bytes = 1 << 30, .fill_num_dw = 5, .emit_fill_buffer = sdma_v6_0_emit_fill_buffer, }; @@ -1893,25 +1899,6 @@ static void sdma_v6_0_set_buffer_funcs(struct amdgpu_device *adev) adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring; } -static const struct amdgpu_vm_pte_funcs sdma_v6_0_vm_pte_funcs = { - .copy_pte_num_dw = 7, - .copy_pte = sdma_v6_0_vm_copy_pte, - .write_pte = sdma_v6_0_vm_write_pte, - .set_pte_pde = sdma_v6_0_vm_set_pte_pde, -}; - -static void sdma_v6_0_set_vm_pte_funcs(struct amdgpu_device *adev) -{ - unsigned i; - - adev->vm_manager.vm_pte_funcs = &sdma_v6_0_vm_pte_funcs; - for (i = 0; i < adev->sdma.num_instances; i++) { - adev->vm_manager.vm_pte_scheds[i] = - &adev->sdma.instance[i].ring.sched; - } - adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances; -} - const struct amdgpu_ip_block_version sdma_v6_0_ip_block = { .type = AMD_IP_BLOCK_TYPE_SDMA, .major = 6, diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c index 8d16ef257bcb..5679a94d0815 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c @@ -119,7 +119,6 @@ static const struct amdgpu_hwip_reg_entry sdma_reg_list_7_0[] = { static void sdma_v7_0_set_ring_funcs(struct amdgpu_device *adev); static void sdma_v7_0_set_buffer_funcs(struct amdgpu_device *adev); -static void sdma_v7_0_set_vm_pte_funcs(struct amdgpu_device *adev); static void sdma_v7_0_set_irq_funcs(struct amdgpu_device *adev); static int sdma_v7_0_start(struct amdgpu_device *adev); @@ -1264,6 +1263,13 @@ static void sdma_v7_0_get_csa_info(struct amdgpu_device *adev, csa_info->alignment = SDMA7_CSA_ALIGNMENT; } +static const struct amdgpu_vm_pte_funcs sdma_v7_0_vm_pte_funcs = { + .copy_pte_num_dw = 8, + .copy_pte = sdma_v7_0_vm_copy_pte, + .write_pte = sdma_v7_0_vm_write_pte, + .set_pte_pde = sdma_v7_0_vm_set_pte_pde, +}; + static int sdma_v7_0_early_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; @@ -1294,7 +1300,7 @@ static int sdma_v7_0_early_init(struct amdgpu_ip_block *ip_block) sdma_v7_0_set_ring_funcs(adev); sdma_v7_0_set_buffer_funcs(adev); - sdma_v7_0_set_vm_pte_funcs(adev); + amdgpu_sdma_set_vm_pte_scheds(adev, &sdma_v7_0_vm_pte_funcs); sdma_v7_0_set_irq_funcs(adev); sdma_v7_0_set_mqd_funcs(adev); adev->sdma.get_csa_info = &sdma_v7_0_get_csa_info; @@ -1829,10 +1835,10 @@ static void sdma_v7_0_emit_fill_buffer(struct amdgpu_ib *ib, } static const struct amdgpu_buffer_funcs sdma_v7_0_buffer_funcs = { - .copy_max_bytes = 0x400000, + .copy_max_bytes = 1 << 30, .copy_num_dw = 8, .emit_copy_buffer = sdma_v7_0_emit_copy_buffer, - .fill_max_bytes = 0x400000, + .fill_max_bytes = 1 << 30, .fill_num_dw = 5, .emit_fill_buffer = sdma_v7_0_emit_fill_buffer, }; @@ -1843,25 +1849,6 @@ static void sdma_v7_0_set_buffer_funcs(struct amdgpu_device *adev) adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring; } -static const struct amdgpu_vm_pte_funcs sdma_v7_0_vm_pte_funcs = { - .copy_pte_num_dw = 8, - .copy_pte = sdma_v7_0_vm_copy_pte, - .write_pte = sdma_v7_0_vm_write_pte, - .set_pte_pde = sdma_v7_0_vm_set_pte_pde, -}; - -static void sdma_v7_0_set_vm_pte_funcs(struct amdgpu_device *adev) -{ - unsigned i; - - adev->vm_manager.vm_pte_funcs = &sdma_v7_0_vm_pte_funcs; - for (i = 0; i < adev->sdma.num_instances; i++) { - adev->vm_manager.vm_pte_scheds[i] = - &adev->sdma.instance[i].ring.sched; - } - adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances; -} - const struct amdgpu_ip_block_version sdma_v7_0_ip_block = { .type = AMD_IP_BLOCK_TYPE_SDMA, .major = 7, diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_1.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_1.c index 0824cba48f2e..03bf1f86098f 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_1.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_1.c @@ -110,7 +110,6 @@ static const struct amdgpu_hwip_reg_entry sdma_reg_list_7_1[] = { static void sdma_v7_1_set_ring_funcs(struct amdgpu_device *adev); static void sdma_v7_1_set_buffer_funcs(struct amdgpu_device *adev); -static void sdma_v7_1_set_vm_pte_funcs(struct amdgpu_device *adev); static void sdma_v7_1_set_irq_funcs(struct amdgpu_device *adev); static int sdma_v7_1_inst_start(struct amdgpu_device *adev, uint32_t inst_mask); @@ -1248,6 +1247,13 @@ static void sdma_v7_1_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask); } +static const struct amdgpu_vm_pte_funcs sdma_v7_1_vm_pte_funcs = { + .copy_pte_num_dw = 8, + .copy_pte = sdma_v7_1_vm_copy_pte, + .write_pte = sdma_v7_1_vm_write_pte, + .set_pte_pde = sdma_v7_1_vm_set_pte_pde, +}; + static int sdma_v7_1_early_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; @@ -1261,7 +1267,7 @@ static int sdma_v7_1_early_init(struct amdgpu_ip_block *ip_block) sdma_v7_1_set_ring_funcs(adev); sdma_v7_1_set_buffer_funcs(adev); - sdma_v7_1_set_vm_pte_funcs(adev); + amdgpu_sdma_set_vm_pte_scheds(adev, &sdma_v7_1_vm_pte_funcs); sdma_v7_1_set_irq_funcs(adev); sdma_v7_1_set_mqd_funcs(adev); @@ -1739,10 +1745,10 @@ static void sdma_v7_1_emit_fill_buffer(struct amdgpu_ib *ib, } static const struct amdgpu_buffer_funcs sdma_v7_1_buffer_funcs = { - .copy_max_bytes = 0x400000, + .copy_max_bytes = 1 << 30, .copy_num_dw = 8, .emit_copy_buffer = sdma_v7_1_emit_copy_buffer, - .fill_max_bytes = 0x400000, + .fill_max_bytes = 1 << 30, .fill_num_dw = 5, .emit_fill_buffer = sdma_v7_1_emit_fill_buffer, }; @@ -1753,25 +1759,6 @@ static void sdma_v7_1_set_buffer_funcs(struct amdgpu_device *adev) adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring; } -static const struct amdgpu_vm_pte_funcs sdma_v7_1_vm_pte_funcs = { - .copy_pte_num_dw = 8, - .copy_pte = sdma_v7_1_vm_copy_pte, - .write_pte = sdma_v7_1_vm_write_pte, - .set_pte_pde = sdma_v7_1_vm_set_pte_pde, -}; - -static void sdma_v7_1_set_vm_pte_funcs(struct amdgpu_device *adev) -{ - unsigned i; - - adev->vm_manager.vm_pte_funcs = &sdma_v7_1_vm_pte_funcs; - for (i = 0; i < adev->sdma.num_instances; i++) { - adev->vm_manager.vm_pte_scheds[i] = - &adev->sdma.instance[i].ring.sched; - } - adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances; -} - const struct amdgpu_ip_block_version sdma_v7_1_ip_block = { .type = AMD_IP_BLOCK_TYPE_SDMA, .major = 7, diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c index 509d43b238f3..c26cb3e8bff6 100644 --- a/drivers/gpu/drm/amd/amdgpu/si.c +++ b/drivers/gpu/drm/amd/amdgpu/si.c @@ -1027,11 +1027,11 @@ static u32 si_pcie_rreg(struct amdgpu_device *adev, u32 reg) unsigned long flags; u32 r; - spin_lock_irqsave(&adev->pcie_idx_lock, flags); + spin_lock_irqsave(&adev->reg.pcie.lock, flags); WREG32(AMDGPU_PCIE_INDEX, reg); (void)RREG32(AMDGPU_PCIE_INDEX); r = RREG32(AMDGPU_PCIE_DATA); - spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); + spin_unlock_irqrestore(&adev->reg.pcie.lock, flags); return r; } @@ -1039,12 +1039,12 @@ static void si_pcie_wreg(struct amdgpu_device *adev, u32 reg, u32 v) { unsigned long flags; - spin_lock_irqsave(&adev->pcie_idx_lock, flags); + spin_lock_irqsave(&adev->reg.pcie.lock, flags); WREG32(AMDGPU_PCIE_INDEX, reg); (void)RREG32(AMDGPU_PCIE_INDEX); WREG32(AMDGPU_PCIE_DATA, v); (void)RREG32(AMDGPU_PCIE_DATA); - spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); + spin_unlock_irqrestore(&adev->reg.pcie.lock, flags); } static u32 si_pciep_rreg(struct amdgpu_device *adev, u32 reg) @@ -1052,11 +1052,11 @@ static u32 si_pciep_rreg(struct amdgpu_device *adev, u32 reg) unsigned long flags; u32 r; - spin_lock_irqsave(&adev->pcie_idx_lock, flags); + spin_lock_irqsave(&adev->reg.pcie.lock, flags); WREG32(PCIE_PORT_INDEX, ((reg) & 0xff)); (void)RREG32(PCIE_PORT_INDEX); r = RREG32(PCIE_PORT_DATA); - spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); + spin_unlock_irqrestore(&adev->reg.pcie.lock, flags); return r; } @@ -1064,12 +1064,12 @@ static void si_pciep_wreg(struct amdgpu_device *adev, u32 reg, u32 v) { unsigned long flags; - spin_lock_irqsave(&adev->pcie_idx_lock, flags); + spin_lock_irqsave(&adev->reg.pcie.lock, flags); WREG32(PCIE_PORT_INDEX, ((reg) & 0xff)); (void)RREG32(PCIE_PORT_INDEX); WREG32(PCIE_PORT_DATA, (v)); (void)RREG32(PCIE_PORT_DATA); - spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); + spin_unlock_irqrestore(&adev->reg.pcie.lock, flags); } static u32 si_smc_rreg(struct amdgpu_device *adev, u32 reg) @@ -1077,10 +1077,10 @@ static u32 si_smc_rreg(struct amdgpu_device *adev, u32 reg) unsigned long flags; u32 r; - spin_lock_irqsave(&adev->smc_idx_lock, flags); + spin_lock_irqsave(&adev->reg.smc.lock, flags); WREG32(mmSMC_IND_INDEX_0, (reg)); r = RREG32(mmSMC_IND_DATA_0); - spin_unlock_irqrestore(&adev->smc_idx_lock, flags); + spin_unlock_irqrestore(&adev->reg.smc.lock, flags); return r; } @@ -1088,10 +1088,10 @@ static void si_smc_wreg(struct amdgpu_device *adev, u32 reg, u32 v) { unsigned long flags; - spin_lock_irqsave(&adev->smc_idx_lock, flags); + spin_lock_irqsave(&adev->reg.smc.lock, flags); WREG32(mmSMC_IND_INDEX_0, (reg)); WREG32(mmSMC_IND_DATA_0, (v)); - spin_unlock_irqrestore(&adev->smc_idx_lock, flags); + spin_unlock_irqrestore(&adev->reg.smc.lock, flags); } static u32 si_uvd_ctx_rreg(struct amdgpu_device *adev, u32 reg) @@ -1099,10 +1099,10 @@ static u32 si_uvd_ctx_rreg(struct amdgpu_device *adev, u32 reg) unsigned long flags; u32 r; - spin_lock_irqsave(&adev->uvd_ctx_idx_lock, flags); + spin_lock_irqsave(&adev->reg.uvd_ctx.lock, flags); WREG32(mmUVD_CTX_INDEX, ((reg) & 0x1ff)); r = RREG32(mmUVD_CTX_DATA); - spin_unlock_irqrestore(&adev->uvd_ctx_idx_lock, flags); + spin_unlock_irqrestore(&adev->reg.uvd_ctx.lock, flags); return r; } @@ -1110,10 +1110,10 @@ static void si_uvd_ctx_wreg(struct amdgpu_device *adev, u32 reg, u32 v) { unsigned long flags; - spin_lock_irqsave(&adev->uvd_ctx_idx_lock, flags); + spin_lock_irqsave(&adev->reg.uvd_ctx.lock, flags); WREG32(mmUVD_CTX_INDEX, ((reg) & 0x1ff)); WREG32(mmUVD_CTX_DATA, (v)); - spin_unlock_irqrestore(&adev->uvd_ctx_idx_lock, flags); + spin_unlock_irqrestore(&adev->reg.uvd_ctx.lock, flags); } static struct amdgpu_allowed_register_entry si_allowed_read_registers[] = { @@ -2037,16 +2037,14 @@ static int si_common_early_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; - adev->smc_rreg = &si_smc_rreg; - adev->smc_wreg = &si_smc_wreg; - adev->pcie_rreg = &si_pcie_rreg; - adev->pcie_wreg = &si_pcie_wreg; - adev->pciep_rreg = &si_pciep_rreg; - adev->pciep_wreg = &si_pciep_wreg; - adev->uvd_ctx_rreg = si_uvd_ctx_rreg; - adev->uvd_ctx_wreg = si_uvd_ctx_wreg; - adev->didt_rreg = NULL; - adev->didt_wreg = NULL; + adev->reg.smc.rreg = si_smc_rreg; + adev->reg.smc.wreg = si_smc_wreg; + adev->reg.pcie.rreg = &si_pcie_rreg; + adev->reg.pcie.wreg = &si_pcie_wreg; + adev->reg.pcie.port_rreg = &si_pciep_rreg; + adev->reg.pcie.port_wreg = &si_pciep_wreg; + adev->reg.uvd_ctx.rreg = &si_uvd_ctx_rreg; + adev->reg.uvd_ctx.wreg = &si_uvd_ctx_wreg; adev->asic_funcs = &si_asic_funcs; @@ -2382,10 +2380,10 @@ static inline u32 si_pif_phy0_rreg(struct amdgpu_device *adev, u32 reg) unsigned long flags; u32 r; - spin_lock_irqsave(&adev->pcie_idx_lock, flags); + spin_lock_irqsave(&adev->reg.pcie.lock, flags); WREG32(EVERGREEN_PIF_PHY0_INDEX, ((reg) & 0xffff)); r = RREG32(EVERGREEN_PIF_PHY0_DATA); - spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); + spin_unlock_irqrestore(&adev->reg.pcie.lock, flags); return r; } @@ -2393,10 +2391,10 @@ static inline void si_pif_phy0_wreg(struct amdgpu_device *adev, u32 reg, u32 v) { unsigned long flags; - spin_lock_irqsave(&adev->pcie_idx_lock, flags); + spin_lock_irqsave(&adev->reg.pcie.lock, flags); WREG32(EVERGREEN_PIF_PHY0_INDEX, ((reg) & 0xffff)); WREG32(EVERGREEN_PIF_PHY0_DATA, (v)); - spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); + spin_unlock_irqrestore(&adev->reg.pcie.lock, flags); } static inline u32 si_pif_phy1_rreg(struct amdgpu_device *adev, u32 reg) @@ -2404,10 +2402,10 @@ static inline u32 si_pif_phy1_rreg(struct amdgpu_device *adev, u32 reg) unsigned long flags; u32 r; - spin_lock_irqsave(&adev->pcie_idx_lock, flags); + spin_lock_irqsave(&adev->reg.pcie.lock, flags); WREG32(EVERGREEN_PIF_PHY1_INDEX, ((reg) & 0xffff)); r = RREG32(EVERGREEN_PIF_PHY1_DATA); - spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); + spin_unlock_irqrestore(&adev->reg.pcie.lock, flags); return r; } @@ -2415,10 +2413,10 @@ static inline void si_pif_phy1_wreg(struct amdgpu_device *adev, u32 reg, u32 v) { unsigned long flags; - spin_lock_irqsave(&adev->pcie_idx_lock, flags); + spin_lock_irqsave(&adev->reg.pcie.lock, flags); WREG32(EVERGREEN_PIF_PHY1_INDEX, ((reg) & 0xffff)); WREG32(EVERGREEN_PIF_PHY1_DATA, (v)); - spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); + spin_unlock_irqrestore(&adev->reg.pcie.lock, flags); } static void si_program_aspm(struct amdgpu_device *adev) { diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c index 74fcaa340d9b..3e58feb2d5e4 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dma.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c @@ -37,7 +37,6 @@ const u32 sdma_offsets[SDMA_MAX_INSTANCE] = static void si_dma_set_ring_funcs(struct amdgpu_device *adev); static void si_dma_set_buffer_funcs(struct amdgpu_device *adev); -static void si_dma_set_vm_pte_funcs(struct amdgpu_device *adev); static void si_dma_set_irq_funcs(struct amdgpu_device *adev); /** @@ -473,6 +472,14 @@ static void si_dma_ring_emit_wreg(struct amdgpu_ring *ring, amdgpu_ring_write(ring, val); } +static const struct amdgpu_vm_pte_funcs si_dma_vm_pte_funcs = { + .copy_pte_num_dw = 5, + .copy_pte = si_dma_vm_copy_pte, + + .write_pte = si_dma_vm_write_pte, + .set_pte_pde = si_dma_vm_set_pte_pde, +}; + static int si_dma_early_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; @@ -481,7 +488,7 @@ static int si_dma_early_init(struct amdgpu_ip_block *ip_block) si_dma_set_ring_funcs(adev); si_dma_set_buffer_funcs(adev); - si_dma_set_vm_pte_funcs(adev); + amdgpu_sdma_set_vm_pte_scheds(adev, &si_dma_vm_pte_funcs); si_dma_set_irq_funcs(adev); return 0; @@ -830,26 +837,6 @@ static void si_dma_set_buffer_funcs(struct amdgpu_device *adev) adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring; } -static const struct amdgpu_vm_pte_funcs si_dma_vm_pte_funcs = { - .copy_pte_num_dw = 5, - .copy_pte = si_dma_vm_copy_pte, - - .write_pte = si_dma_vm_write_pte, - .set_pte_pde = si_dma_vm_set_pte_pde, -}; - -static void si_dma_set_vm_pte_funcs(struct amdgpu_device *adev) -{ - unsigned i; - - adev->vm_manager.vm_pte_funcs = &si_dma_vm_pte_funcs; - for (i = 0; i < adev->sdma.num_instances; i++) { - adev->vm_manager.vm_pte_scheds[i] = - &adev->sdma.instance[i].ring.sched; - } - adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances; -} - const struct amdgpu_ip_block_version si_dma_ip_block = { .type = AMD_IP_BLOCK_TYPE_SDMA, diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 4e037a6978f0..b456e4541d9a 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -245,10 +245,10 @@ static u32 soc15_uvd_ctx_rreg(struct amdgpu_device *adev, u32 reg) address = SOC15_REG_OFFSET(UVD, 0, mmUVD_CTX_INDEX); data = SOC15_REG_OFFSET(UVD, 0, mmUVD_CTX_DATA); - spin_lock_irqsave(&adev->uvd_ctx_idx_lock, flags); + spin_lock_irqsave(&adev->reg.uvd_ctx.lock, flags); WREG32(address, ((reg) & 0x1ff)); r = RREG32(data); - spin_unlock_irqrestore(&adev->uvd_ctx_idx_lock, flags); + spin_unlock_irqrestore(&adev->reg.uvd_ctx.lock, flags); return r; } @@ -259,10 +259,10 @@ static void soc15_uvd_ctx_wreg(struct amdgpu_device *adev, u32 reg, u32 v) address = SOC15_REG_OFFSET(UVD, 0, mmUVD_CTX_INDEX); data = SOC15_REG_OFFSET(UVD, 0, mmUVD_CTX_DATA); - spin_lock_irqsave(&adev->uvd_ctx_idx_lock, flags); + spin_lock_irqsave(&adev->reg.uvd_ctx.lock, flags); WREG32(address, ((reg) & 0x1ff)); WREG32(data, (v)); - spin_unlock_irqrestore(&adev->uvd_ctx_idx_lock, flags); + spin_unlock_irqrestore(&adev->reg.uvd_ctx.lock, flags); } static u32 soc15_didt_rreg(struct amdgpu_device *adev, u32 reg) @@ -273,10 +273,10 @@ static u32 soc15_didt_rreg(struct amdgpu_device *adev, u32 reg) address = SOC15_REG_OFFSET(GC, 0, mmDIDT_IND_INDEX); data = SOC15_REG_OFFSET(GC, 0, mmDIDT_IND_DATA); - spin_lock_irqsave(&adev->didt_idx_lock, flags); + spin_lock_irqsave(&adev->reg.didt.lock, flags); WREG32(address, (reg)); r = RREG32(data); - spin_unlock_irqrestore(&adev->didt_idx_lock, flags); + spin_unlock_irqrestore(&adev->reg.didt.lock, flags); return r; } @@ -287,10 +287,10 @@ static void soc15_didt_wreg(struct amdgpu_device *adev, u32 reg, u32 v) address = SOC15_REG_OFFSET(GC, 0, mmDIDT_IND_INDEX); data = SOC15_REG_OFFSET(GC, 0, mmDIDT_IND_DATA); - spin_lock_irqsave(&adev->didt_idx_lock, flags); + spin_lock_irqsave(&adev->reg.didt.lock, flags); WREG32(address, (reg)); WREG32(data, (v)); - spin_unlock_irqrestore(&adev->didt_idx_lock, flags); + spin_unlock_irqrestore(&adev->reg.didt.lock, flags); } static u32 soc15_gc_cac_rreg(struct amdgpu_device *adev, u32 reg) @@ -298,10 +298,10 @@ static u32 soc15_gc_cac_rreg(struct amdgpu_device *adev, u32 reg) unsigned long flags; u32 r; - spin_lock_irqsave(&adev->gc_cac_idx_lock, flags); + spin_lock_irqsave(&adev->reg.gc_cac.lock, flags); WREG32_SOC15(GC, 0, mmGC_CAC_IND_INDEX, (reg)); r = RREG32_SOC15(GC, 0, mmGC_CAC_IND_DATA); - spin_unlock_irqrestore(&adev->gc_cac_idx_lock, flags); + spin_unlock_irqrestore(&adev->reg.gc_cac.lock, flags); return r; } @@ -309,10 +309,10 @@ static void soc15_gc_cac_wreg(struct amdgpu_device *adev, u32 reg, u32 v) { unsigned long flags; - spin_lock_irqsave(&adev->gc_cac_idx_lock, flags); + spin_lock_irqsave(&adev->reg.gc_cac.lock, flags); WREG32_SOC15(GC, 0, mmGC_CAC_IND_INDEX, (reg)); WREG32_SOC15(GC, 0, mmGC_CAC_IND_DATA, (v)); - spin_unlock_irqrestore(&adev->gc_cac_idx_lock, flags); + spin_unlock_irqrestore(&adev->reg.gc_cac.lock, flags); } static u32 soc15_se_cac_rreg(struct amdgpu_device *adev, u32 reg) @@ -320,10 +320,10 @@ static u32 soc15_se_cac_rreg(struct amdgpu_device *adev, u32 reg) unsigned long flags; u32 r; - spin_lock_irqsave(&adev->se_cac_idx_lock, flags); + spin_lock_irqsave(&adev->reg.se_cac.lock, flags); WREG32_SOC15(GC, 0, mmSE_CAC_IND_INDEX, (reg)); r = RREG32_SOC15(GC, 0, mmSE_CAC_IND_DATA); - spin_unlock_irqrestore(&adev->se_cac_idx_lock, flags); + spin_unlock_irqrestore(&adev->reg.se_cac.lock, flags); return r; } @@ -331,10 +331,10 @@ static void soc15_se_cac_wreg(struct amdgpu_device *adev, u32 reg, u32 v) { unsigned long flags; - spin_lock_irqsave(&adev->se_cac_idx_lock, flags); + spin_lock_irqsave(&adev->reg.se_cac.lock, flags); WREG32_SOC15(GC, 0, mmSE_CAC_IND_INDEX, (reg)); WREG32_SOC15(GC, 0, mmSE_CAC_IND_DATA, (v)); - spin_unlock_irqrestore(&adev->se_cac_idx_lock, flags); + spin_unlock_irqrestore(&adev->reg.se_cac.lock, flags); } static u32 soc15_get_config_memsize(struct amdgpu_device *adev) @@ -952,7 +952,6 @@ static const struct amdgpu_asic_funcs aqua_vanjaram_asic_funcs = .get_pcie_replay_count = &amdgpu_nbio_get_pcie_replay_count, .supports_baco = &soc15_supports_baco, .query_video_codecs = &soc15_query_video_codecs, - .encode_ext_smn_addressing = &aqua_vanjaram_encode_ext_smn_addressing, .get_reg_state = &aqua_vanjaram_get_reg_state, }; @@ -961,24 +960,22 @@ static int soc15_common_early_init(struct amdgpu_ip_block *ip_block) struct amdgpu_device *adev = ip_block->adev; adev->nbio.funcs->set_reg_remap(adev); - adev->smc_rreg = NULL; - adev->smc_wreg = NULL; - adev->pcie_rreg = &amdgpu_device_indirect_rreg; - adev->pcie_wreg = &amdgpu_device_indirect_wreg; - adev->pcie_rreg_ext = &amdgpu_device_indirect_rreg_ext; - adev->pcie_wreg_ext = &amdgpu_device_indirect_wreg_ext; - adev->pcie_rreg64 = &amdgpu_device_indirect_rreg64; - adev->pcie_wreg64 = &amdgpu_device_indirect_wreg64; - adev->pcie_rreg64_ext = &amdgpu_device_indirect_rreg64_ext; - adev->pcie_wreg64_ext = &amdgpu_device_indirect_wreg64_ext; - adev->uvd_ctx_rreg = &soc15_uvd_ctx_rreg; - adev->uvd_ctx_wreg = &soc15_uvd_ctx_wreg; - adev->didt_rreg = &soc15_didt_rreg; - adev->didt_wreg = &soc15_didt_wreg; - adev->gc_cac_rreg = &soc15_gc_cac_rreg; - adev->gc_cac_wreg = &soc15_gc_cac_wreg; - adev->se_cac_rreg = &soc15_se_cac_rreg; - adev->se_cac_wreg = &soc15_se_cac_wreg; + adev->reg.pcie.rreg = &amdgpu_device_indirect_rreg; + adev->reg.pcie.wreg = &amdgpu_device_indirect_wreg; + adev->reg.pcie.rreg_ext = &amdgpu_device_indirect_rreg_ext; + adev->reg.pcie.wreg_ext = &amdgpu_device_indirect_wreg_ext; + adev->reg.pcie.rreg64 = &amdgpu_device_indirect_rreg64; + adev->reg.pcie.wreg64 = &amdgpu_device_indirect_wreg64; + adev->reg.pcie.rreg64_ext = &amdgpu_device_indirect_rreg64_ext; + adev->reg.pcie.wreg64_ext = &amdgpu_device_indirect_wreg64_ext; + adev->reg.uvd_ctx.rreg = &soc15_uvd_ctx_rreg; + adev->reg.uvd_ctx.wreg = &soc15_uvd_ctx_wreg; + adev->reg.didt.rreg = &soc15_didt_rreg; + adev->reg.didt.wreg = &soc15_didt_wreg; + adev->reg.gc_cac.rreg = &soc15_gc_cac_rreg; + adev->reg.gc_cac.wreg = &soc15_gc_cac_wreg; + adev->reg.se_cac.rreg = &soc15_se_cac_rreg; + adev->reg.se_cac.wreg = &soc15_se_cac_wreg; adev->rev_id = amdgpu_device_get_rev_id(adev); adev->external_rev_id = 0xFF; @@ -1200,6 +1197,7 @@ static int soc15_common_early_init(struct amdgpu_ip_block *ip_block) case IP_VERSION(9, 4, 4): case IP_VERSION(9, 5, 0): adev->asic_funcs = &aqua_vanjaram_asic_funcs; + adev->reg.smn.get_smn_base = &amdgpu_reg_smn_v1_0_get_base; adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS | AMD_CG_SUPPORT_SDMA_MGCG | diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.h b/drivers/gpu/drm/amd/amdgpu/soc15.h index c8ac11a9cdef..46a6477b677b 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15.h @@ -118,7 +118,6 @@ int vega10_reg_base_init(struct amdgpu_device *adev); int vega20_reg_base_init(struct amdgpu_device *adev); int arct_reg_base_init(struct amdgpu_device *adev); int aldebaran_reg_base_init(struct amdgpu_device *adev); -u64 aqua_vanjaram_encode_ext_smn_addressing(int ext_id); int aqua_vanjaram_init_soc_config(struct amdgpu_device *adev); ssize_t aqua_vanjaram_get_reg_state(struct amdgpu_device *adev, enum amdgpu_reg_state reg_state, void *buf, diff --git a/drivers/gpu/drm/amd/amdgpu/soc15_common.h b/drivers/gpu/drm/amd/amdgpu/soc15_common.h index 242b24f73c17..a7b5a95ebebb 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15_common.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15_common.h @@ -195,19 +195,22 @@ __RREG32_SOC15_RLC__((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) + offset, AMDGPU_REGS_RLC, ip##_HWIP, inst) /* inst equals to ext for some IPs */ -#define RREG32_SOC15_EXT(ip, inst, reg, ext) \ - RREG32_PCIE_EXT((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) * 4 \ - + adev->asic_funcs->encode_ext_smn_addressing(ext)) \ - -#define WREG32_SOC15_EXT(ip, inst, reg, ext, value) \ - WREG32_PCIE_EXT((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) * 4 \ - + adev->asic_funcs->encode_ext_smn_addressing(ext), \ - value) \ - -#define RREG64_MCA(ext, mca_base, idx) \ - RREG64_PCIE_EXT(adev->asic_funcs->encode_ext_smn_addressing(ext) + mca_base + (idx * 8)) - -#define WREG64_MCA(ext, mca_base, idx, val) \ - WREG64_PCIE_EXT(adev->asic_funcs->encode_ext_smn_addressing(ext) + mca_base + (idx * 8), val) +#define RREG32_SOC15_EXT(ip, inst, reg, ext) \ + RREG32_PCIE_EXT((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + \ + reg) * 4 + \ + amdgpu_reg_get_smn_base64(adev, ip##_HWIP, inst)) + +#define WREG32_SOC15_EXT(ip, inst, reg, ext, value) \ + WREG32_PCIE_EXT( \ + (adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) * \ + 4 + \ + amdgpu_reg_get_smn_base64(adev, ip##_HWIP, inst), \ + value) + +#define RREG64_MCA(smn_base, mca_base, idx) \ + RREG64_PCIE_EXT(smn_base + mca_base + (idx * 8)) + +#define WREG64_MCA(smn_base, mca_base, idx, val) \ + WREG64_PCIE_EXT(smn_base + mca_base + (idx * 8), val) #endif diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index 8122a5cacf07..fbd1d97f33ad 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -229,10 +229,10 @@ static u32 soc21_didt_rreg(struct amdgpu_device *adev, u32 reg) address = SOC15_REG_OFFSET(GC, 0, regDIDT_IND_INDEX); data = SOC15_REG_OFFSET(GC, 0, regDIDT_IND_DATA); - spin_lock_irqsave(&adev->didt_idx_lock, flags); + spin_lock_irqsave(&adev->reg.didt.lock, flags); WREG32(address, (reg)); r = RREG32(data); - spin_unlock_irqrestore(&adev->didt_idx_lock, flags); + spin_unlock_irqrestore(&adev->reg.didt.lock, flags); return r; } @@ -243,10 +243,10 @@ static void soc21_didt_wreg(struct amdgpu_device *adev, u32 reg, u32 v) address = SOC15_REG_OFFSET(GC, 0, regDIDT_IND_INDEX); data = SOC15_REG_OFFSET(GC, 0, regDIDT_IND_DATA); - spin_lock_irqsave(&adev->didt_idx_lock, flags); + spin_lock_irqsave(&adev->reg.didt.lock, flags); WREG32(address, (reg)); WREG32(data, (v)); - spin_unlock_irqrestore(&adev->didt_idx_lock, flags); + spin_unlock_irqrestore(&adev->reg.didt.lock, flags); } static u32 soc21_get_config_memsize(struct amdgpu_device *adev) @@ -589,21 +589,15 @@ static int soc21_common_early_init(struct amdgpu_ip_block *ip_block) struct amdgpu_device *adev = ip_block->adev; adev->nbio.funcs->set_reg_remap(adev); - adev->smc_rreg = NULL; - adev->smc_wreg = NULL; - adev->pcie_rreg = &amdgpu_device_indirect_rreg; - adev->pcie_wreg = &amdgpu_device_indirect_wreg; - adev->pcie_rreg64 = &amdgpu_device_indirect_rreg64; - adev->pcie_wreg64 = &amdgpu_device_indirect_wreg64; - adev->pciep_rreg = amdgpu_device_pcie_port_rreg; - adev->pciep_wreg = amdgpu_device_pcie_port_wreg; - - /* TODO: will add them during VCN v2 implementation */ - adev->uvd_ctx_rreg = NULL; - adev->uvd_ctx_wreg = NULL; - - adev->didt_rreg = &soc21_didt_rreg; - adev->didt_wreg = &soc21_didt_wreg; + adev->reg.pcie.rreg = &amdgpu_device_indirect_rreg; + adev->reg.pcie.wreg = &amdgpu_device_indirect_wreg; + adev->reg.pcie.rreg64 = &amdgpu_device_indirect_rreg64; + adev->reg.pcie.wreg64 = &amdgpu_device_indirect_wreg64; + adev->reg.pcie.port_rreg = &amdgpu_device_pcie_port_rreg; + adev->reg.pcie.port_wreg = &amdgpu_device_pcie_port_wreg; + + adev->reg.didt.rreg = &soc21_didt_rreg; + adev->reg.didt.wreg = &soc21_didt_wreg; adev->asic_funcs = &soc21_asic_funcs; @@ -858,7 +852,9 @@ static int soc21_common_early_init(struct amdgpu_ip_block *ip_block) AMD_CG_SUPPORT_IH_CG | AMD_CG_SUPPORT_BIF_MGCG | AMD_CG_SUPPORT_BIF_LS; - adev->pg_flags = AMD_PG_SUPPORT_VCN | + adev->pg_flags = AMD_PG_SUPPORT_VCN_DPG | + AMD_PG_SUPPORT_VCN | + AMD_PG_SUPPORT_JPEG_DPG | AMD_PG_SUPPORT_JPEG | AMD_PG_SUPPORT_GFX_PG; adev->external_rev_id = adev->rev_id + 0x1; diff --git a/drivers/gpu/drm/amd/amdgpu/soc24.c b/drivers/gpu/drm/amd/amdgpu/soc24.c index ecb6c3fcfbd1..308f32daa780 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc24.c +++ b/drivers/gpu/drm/amd/amdgpu/soc24.c @@ -362,18 +362,12 @@ static int soc24_common_early_init(struct amdgpu_ip_block *ip_block) struct amdgpu_device *adev = ip_block->adev; adev->nbio.funcs->set_reg_remap(adev); - adev->smc_rreg = NULL; - adev->smc_wreg = NULL; - adev->pcie_rreg = &amdgpu_device_indirect_rreg; - adev->pcie_wreg = &amdgpu_device_indirect_wreg; - adev->pcie_rreg64 = &amdgpu_device_indirect_rreg64; - adev->pcie_wreg64 = &amdgpu_device_indirect_wreg64; - adev->pciep_rreg = amdgpu_device_pcie_port_rreg; - adev->pciep_wreg = amdgpu_device_pcie_port_wreg; - adev->uvd_ctx_rreg = NULL; - adev->uvd_ctx_wreg = NULL; - adev->didt_rreg = NULL; - adev->didt_wreg = NULL; + adev->reg.pcie.rreg = &amdgpu_device_indirect_rreg; + adev->reg.pcie.wreg = &amdgpu_device_indirect_wreg; + adev->reg.pcie.rreg64 = &amdgpu_device_indirect_rreg64; + adev->reg.pcie.wreg64 = &amdgpu_device_indirect_wreg64; + adev->reg.pcie.port_rreg = &amdgpu_device_pcie_port_rreg; + adev->reg.pcie.port_wreg = &amdgpu_device_pcie_port_wreg; adev->asic_funcs = &soc24_asic_funcs; diff --git a/drivers/gpu/drm/amd/amdgpu/soc_v1_0.c b/drivers/gpu/drm/amd/amdgpu/soc_v1_0.c index 59ab952d5cce..26e7566a5479 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/soc_v1_0.c @@ -250,22 +250,16 @@ static int soc_v1_0_common_early_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; - adev->smc_rreg = NULL; - adev->smc_wreg = NULL; - adev->pcie_rreg = &amdgpu_device_indirect_rreg; - adev->pcie_wreg = &amdgpu_device_indirect_wreg; - adev->pcie_rreg_ext = &amdgpu_device_indirect_rreg_ext; - adev->pcie_wreg_ext = &amdgpu_device_indirect_wreg_ext; - adev->pcie_rreg64 = &amdgpu_device_indirect_rreg64; - adev->pcie_wreg64 = &amdgpu_device_indirect_wreg64; - adev->pciep_rreg = amdgpu_device_pcie_port_rreg; - adev->pciep_wreg = amdgpu_device_pcie_port_wreg; - adev->pcie_rreg64_ext = &amdgpu_device_indirect_rreg64_ext; - adev->pcie_wreg64_ext = &amdgpu_device_indirect_wreg64_ext; - adev->uvd_ctx_rreg = NULL; - adev->uvd_ctx_wreg = NULL; - adev->didt_rreg = NULL; - adev->didt_wreg = NULL; + adev->reg.pcie.rreg = &amdgpu_device_indirect_rreg; + adev->reg.pcie.wreg = &amdgpu_device_indirect_wreg; + adev->reg.pcie.rreg_ext = &amdgpu_device_indirect_rreg_ext; + adev->reg.pcie.wreg_ext = &amdgpu_device_indirect_wreg_ext; + adev->reg.pcie.rreg64 = &amdgpu_device_indirect_rreg64; + adev->reg.pcie.wreg64 = &amdgpu_device_indirect_wreg64; + adev->reg.pcie.port_rreg = &amdgpu_device_pcie_port_rreg; + adev->reg.pcie.port_wreg = &amdgpu_device_pcie_port_wreg; + adev->reg.pcie.rreg64_ext = &amdgpu_device_indirect_rreg64_ext; + adev->reg.pcie.wreg64_ext = &amdgpu_device_indirect_wreg64_ext; adev->asic_funcs = &soc_v1_0_asic_funcs; diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v1_0.c index 9ae424618556..5b7b46d242c6 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v1_0.c @@ -47,11 +47,6 @@ #define VCE_V1_0_DATA_SIZE (7808 * (AMDGPU_MAX_VCE_HANDLES + 1)) #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK 0x02 -#define VCE_V1_0_GART_PAGE_START \ - (AMDGPU_GTT_MAX_TRANSFER_SIZE * AMDGPU_GTT_NUM_TRANSFER_WINDOWS) -#define VCE_V1_0_GART_ADDR_START \ - (VCE_V1_0_GART_PAGE_START * AMDGPU_GPU_PAGE_SIZE) - static void vce_v1_0_set_ring_funcs(struct amdgpu_device *adev); static void vce_v1_0_set_irq_funcs(struct amdgpu_device *adev); @@ -535,27 +530,29 @@ static int vce_v1_0_early_init(struct amdgpu_ip_block *ip_block) */ static int vce_v1_0_ensure_vcpu_bo_32bit_addr(struct amdgpu_device *adev) { - u64 gpu_addr = amdgpu_bo_gpu_offset(adev->vce.vcpu_bo); u64 bo_size = amdgpu_bo_size(adev->vce.vcpu_bo); u64 max_vcpu_bo_addr = 0xffffffff - bo_size; u64 num_pages = ALIGN(bo_size, AMDGPU_GPU_PAGE_SIZE) / AMDGPU_GPU_PAGE_SIZE; u64 pa = amdgpu_gmc_vram_pa(adev, adev->vce.vcpu_bo); u64 flags = AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE | AMDGPU_PTE_VALID; + u64 vce_gart_start_offs; + int r; - /* - * Check if the VCPU BO already has a 32-bit address. - * Eg. if MC is configured to put VRAM in the low address range. - */ - if (gpu_addr <= max_vcpu_bo_addr) - return 0; + r = amdgpu_gtt_mgr_alloc_entries(&adev->mman.gtt_mgr, + &adev->vce.gart_node, num_pages, + DRM_MM_INSERT_LOW); + if (r) + return r; + + vce_gart_start_offs = amdgpu_gtt_node_to_byte_offset(&adev->vce.gart_node); /* Check if we can map the VCPU BO in GART to a 32-bit address. */ - if (adev->gmc.gart_start + VCE_V1_0_GART_ADDR_START > max_vcpu_bo_addr) + if (adev->gmc.gart_start + vce_gart_start_offs > max_vcpu_bo_addr) return -EINVAL; - amdgpu_gart_map_vram_range(adev, pa, VCE_V1_0_GART_PAGE_START, + amdgpu_gart_map_vram_range(adev, pa, adev->vce.gart_node.start, num_pages, flags, adev->gart.ptr); - adev->vce.gpu_addr = adev->gmc.gart_start + VCE_V1_0_GART_ADDR_START; + adev->vce.gpu_addr = adev->gmc.gart_start + vce_gart_start_offs; if (adev->vce.gpu_addr > max_vcpu_bo_addr) return -EINVAL; @@ -610,7 +607,11 @@ static int vce_v1_0_sw_fini(struct amdgpu_ip_block *ip_block) if (r) return r; - return amdgpu_vce_sw_fini(adev); + r = amdgpu_vce_sw_fini(adev); + + amdgpu_gtt_mgr_free_entries(&adev->mman.gtt_mgr, &adev->vce.gart_node); + + return r; } /** diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 6a574b6c8e63..a256320b92f3 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -299,11 +299,11 @@ static u32 vi_pcie_rreg(struct amdgpu_device *adev, u32 reg) unsigned long flags; u32 r; - spin_lock_irqsave(&adev->pcie_idx_lock, flags); + spin_lock_irqsave(&adev->reg.pcie.lock, flags); WREG32_NO_KIQ(mmPCIE_INDEX, reg); (void)RREG32_NO_KIQ(mmPCIE_INDEX); r = RREG32_NO_KIQ(mmPCIE_DATA); - spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); + spin_unlock_irqrestore(&adev->reg.pcie.lock, flags); return r; } @@ -311,12 +311,12 @@ static void vi_pcie_wreg(struct amdgpu_device *adev, u32 reg, u32 v) { unsigned long flags; - spin_lock_irqsave(&adev->pcie_idx_lock, flags); + spin_lock_irqsave(&adev->reg.pcie.lock, flags); WREG32_NO_KIQ(mmPCIE_INDEX, reg); (void)RREG32_NO_KIQ(mmPCIE_INDEX); WREG32_NO_KIQ(mmPCIE_DATA, v); (void)RREG32_NO_KIQ(mmPCIE_DATA); - spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); + spin_unlock_irqrestore(&adev->reg.pcie.lock, flags); } static u32 vi_smc_rreg(struct amdgpu_device *adev, u32 reg) @@ -324,10 +324,10 @@ static u32 vi_smc_rreg(struct amdgpu_device *adev, u32 reg) unsigned long flags; u32 r; - spin_lock_irqsave(&adev->smc_idx_lock, flags); + spin_lock_irqsave(&adev->reg.smc.lock, flags); WREG32_NO_KIQ(mmSMC_IND_INDEX_11, (reg)); r = RREG32_NO_KIQ(mmSMC_IND_DATA_11); - spin_unlock_irqrestore(&adev->smc_idx_lock, flags); + spin_unlock_irqrestore(&adev->reg.smc.lock, flags); return r; } @@ -335,10 +335,10 @@ static void vi_smc_wreg(struct amdgpu_device *adev, u32 reg, u32 v) { unsigned long flags; - spin_lock_irqsave(&adev->smc_idx_lock, flags); + spin_lock_irqsave(&adev->reg.smc.lock, flags); WREG32_NO_KIQ(mmSMC_IND_INDEX_11, (reg)); WREG32_NO_KIQ(mmSMC_IND_DATA_11, (v)); - spin_unlock_irqrestore(&adev->smc_idx_lock, flags); + spin_unlock_irqrestore(&adev->reg.smc.lock, flags); } /* smu_8_0_d.h */ @@ -350,10 +350,10 @@ static u32 cz_smc_rreg(struct amdgpu_device *adev, u32 reg) unsigned long flags; u32 r; - spin_lock_irqsave(&adev->smc_idx_lock, flags); + spin_lock_irqsave(&adev->reg.smc.lock, flags); WREG32(mmMP0PUB_IND_INDEX, (reg)); r = RREG32(mmMP0PUB_IND_DATA); - spin_unlock_irqrestore(&adev->smc_idx_lock, flags); + spin_unlock_irqrestore(&adev->reg.smc.lock, flags); return r; } @@ -361,10 +361,10 @@ static void cz_smc_wreg(struct amdgpu_device *adev, u32 reg, u32 v) { unsigned long flags; - spin_lock_irqsave(&adev->smc_idx_lock, flags); + spin_lock_irqsave(&adev->reg.smc.lock, flags); WREG32(mmMP0PUB_IND_INDEX, (reg)); WREG32(mmMP0PUB_IND_DATA, (v)); - spin_unlock_irqrestore(&adev->smc_idx_lock, flags); + spin_unlock_irqrestore(&adev->reg.smc.lock, flags); } static u32 vi_uvd_ctx_rreg(struct amdgpu_device *adev, u32 reg) @@ -372,10 +372,10 @@ static u32 vi_uvd_ctx_rreg(struct amdgpu_device *adev, u32 reg) unsigned long flags; u32 r; - spin_lock_irqsave(&adev->uvd_ctx_idx_lock, flags); + spin_lock_irqsave(&adev->reg.uvd_ctx.lock, flags); WREG32(mmUVD_CTX_INDEX, ((reg) & 0x1ff)); r = RREG32(mmUVD_CTX_DATA); - spin_unlock_irqrestore(&adev->uvd_ctx_idx_lock, flags); + spin_unlock_irqrestore(&adev->reg.uvd_ctx.lock, flags); return r; } @@ -383,10 +383,10 @@ static void vi_uvd_ctx_wreg(struct amdgpu_device *adev, u32 reg, u32 v) { unsigned long flags; - spin_lock_irqsave(&adev->uvd_ctx_idx_lock, flags); + spin_lock_irqsave(&adev->reg.uvd_ctx.lock, flags); WREG32(mmUVD_CTX_INDEX, ((reg) & 0x1ff)); WREG32(mmUVD_CTX_DATA, (v)); - spin_unlock_irqrestore(&adev->uvd_ctx_idx_lock, flags); + spin_unlock_irqrestore(&adev->reg.uvd_ctx.lock, flags); } static u32 vi_didt_rreg(struct amdgpu_device *adev, u32 reg) @@ -394,10 +394,10 @@ static u32 vi_didt_rreg(struct amdgpu_device *adev, u32 reg) unsigned long flags; u32 r; - spin_lock_irqsave(&adev->didt_idx_lock, flags); + spin_lock_irqsave(&adev->reg.didt.lock, flags); WREG32(mmDIDT_IND_INDEX, (reg)); r = RREG32(mmDIDT_IND_DATA); - spin_unlock_irqrestore(&adev->didt_idx_lock, flags); + spin_unlock_irqrestore(&adev->reg.didt.lock, flags); return r; } @@ -405,10 +405,10 @@ static void vi_didt_wreg(struct amdgpu_device *adev, u32 reg, u32 v) { unsigned long flags; - spin_lock_irqsave(&adev->didt_idx_lock, flags); + spin_lock_irqsave(&adev->reg.didt.lock, flags); WREG32(mmDIDT_IND_INDEX, (reg)); WREG32(mmDIDT_IND_DATA, (v)); - spin_unlock_irqrestore(&adev->didt_idx_lock, flags); + spin_unlock_irqrestore(&adev->reg.didt.lock, flags); } static u32 vi_gc_cac_rreg(struct amdgpu_device *adev, u32 reg) @@ -416,10 +416,10 @@ static u32 vi_gc_cac_rreg(struct amdgpu_device *adev, u32 reg) unsigned long flags; u32 r; - spin_lock_irqsave(&adev->gc_cac_idx_lock, flags); + spin_lock_irqsave(&adev->reg.gc_cac.lock, flags); WREG32(mmGC_CAC_IND_INDEX, (reg)); r = RREG32(mmGC_CAC_IND_DATA); - spin_unlock_irqrestore(&adev->gc_cac_idx_lock, flags); + spin_unlock_irqrestore(&adev->reg.gc_cac.lock, flags); return r; } @@ -427,10 +427,10 @@ static void vi_gc_cac_wreg(struct amdgpu_device *adev, u32 reg, u32 v) { unsigned long flags; - spin_lock_irqsave(&adev->gc_cac_idx_lock, flags); + spin_lock_irqsave(&adev->reg.gc_cac.lock, flags); WREG32(mmGC_CAC_IND_INDEX, (reg)); WREG32(mmGC_CAC_IND_DATA, (v)); - spin_unlock_irqrestore(&adev->gc_cac_idx_lock, flags); + spin_unlock_irqrestore(&adev->reg.gc_cac.lock, flags); } @@ -649,7 +649,7 @@ static bool vi_read_bios_from_rom(struct amdgpu_device *adev, dw_ptr = (u32 *)bios; length_dw = ALIGN(length_bytes, 4) / 4; /* take the smc lock since we are using the smc index */ - spin_lock_irqsave(&adev->smc_idx_lock, flags); + spin_lock_irqsave(&adev->reg.smc.lock, flags); /* set rom index to 0 */ WREG32(mmSMC_IND_INDEX_11, ixROM_INDEX); WREG32(mmSMC_IND_DATA_11, 0); @@ -657,7 +657,7 @@ static bool vi_read_bios_from_rom(struct amdgpu_device *adev, WREG32(mmSMC_IND_INDEX_11, ixROM_DATA); for (i = 0; i < length_dw; i++) dw_ptr[i] = RREG32(mmSMC_IND_DATA_11); - spin_unlock_irqrestore(&adev->smc_idx_lock, flags); + spin_unlock_irqrestore(&adev->reg.smc.lock, flags); return true; } @@ -1454,20 +1454,20 @@ static int vi_common_early_init(struct amdgpu_ip_block *ip_block) struct amdgpu_device *adev = ip_block->adev; if (adev->flags & AMD_IS_APU) { - adev->smc_rreg = &cz_smc_rreg; - adev->smc_wreg = &cz_smc_wreg; + adev->reg.smc.rreg = cz_smc_rreg; + adev->reg.smc.wreg = cz_smc_wreg; } else { - adev->smc_rreg = &vi_smc_rreg; - adev->smc_wreg = &vi_smc_wreg; + adev->reg.smc.rreg = vi_smc_rreg; + adev->reg.smc.wreg = vi_smc_wreg; } - adev->pcie_rreg = &vi_pcie_rreg; - adev->pcie_wreg = &vi_pcie_wreg; - adev->uvd_ctx_rreg = &vi_uvd_ctx_rreg; - adev->uvd_ctx_wreg = &vi_uvd_ctx_wreg; - adev->didt_rreg = &vi_didt_rreg; - adev->didt_wreg = &vi_didt_wreg; - adev->gc_cac_rreg = &vi_gc_cac_rreg; - adev->gc_cac_wreg = &vi_gc_cac_wreg; + adev->reg.pcie.rreg = &vi_pcie_rreg; + adev->reg.pcie.wreg = &vi_pcie_wreg; + adev->reg.uvd_ctx.rreg = &vi_uvd_ctx_rreg; + adev->reg.uvd_ctx.wreg = &vi_uvd_ctx_wreg; + adev->reg.didt.rreg = &vi_didt_rreg; + adev->reg.didt.wreg = &vi_didt_wreg; + adev->reg.gc_cac.rreg = &vi_gc_cac_rreg; + adev->reg.gc_cac.wreg = &vi_gc_cac_wreg; adev->asic_funcs = &vi_asic_funcs; |
