summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2026-04-15 08:45:00 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2026-04-15 08:45:00 -0700
commit4a57e0913e8c7fff407e97909f4ae48caa84d612 (patch)
tree96c5d9056a7f1dcaaca5f00749a298a60967b01b /drivers/gpu/drm/amd/amdgpu
parentafac4c66d1aa6396ce44d94fe895d7b61e085fd4 (diff)
parent83e8d8bbffa8161e94f3aeee4dd09a35062a78c8 (diff)
downloadlwn-4a57e0913e8c7fff407e97909f4ae48caa84d612.tar.gz
lwn-4a57e0913e8c7fff407e97909f4ae48caa84d612.zip
Merge tag 'drm-next-2026-04-15' of https://gitlab.freedesktop.org/drm/kernel
Pull drm updates from Dave Airlie: "Highlights: - new DRM RAS infrastructure using netlink - amdgpu: enable DC on CIK APUs, and more IP enablement, and more user queue work - xe: purgeable BO support, and new hw enablement - dma-buf : add revocable operations Full summary: mm: - two-pass MMU interval notifiers - add gpu active/reclaim per-node stat counters math: - provide __KERNEL_DIV_ROUND_CLOSEST() in UAPI - implement DIV_ROUND_CLOSEST() with __KERNEL_DIV_ROUND_CLOSEST() rust: - shared tag with driver-core: register macro and io infra - core: rework DMA coherent API - core: add interop::list to interop with C linked lists - core: add more num::Bounded operations - core: enable generic_arg_infer and add EMSGSIZE - workqueue: add ARef<T> support for work and delayed work - add GPU buddy allocator abstraction - add DRM shmem GEM helper abstraction - allow drm:::Device to dispatch work and delayed work items to driver private data - add dma_resv_lock helper and raw accessors core: - introduce DRM RAS infrastructure over netlink - add connector panel_type property - fourcc: add ARM interleaved 64k modifier - colorop: add destroy helper - suballoc: split into alloc and init helpers - mode: provide DRM_ARGB_GET*() macros for reading color components edid: - provide drm_output_color_Format dma-buf: - provide revoke mechanism for shared buffers - rename move_notify to invalidate_mappings - always enable move_notify - protect dma_fence_ops with RCU and improve locking - clean pages with helpers atomic: - allocate drm_private_state via callback - helper: use system_percpu_wq buddy: - make buddy allocator available to gpu level - add kernel-doc for buddy allocator - improve aligned allocation ttm: - fix fence signalling - improve tests and docs - improve handling of gfp_retry_mayfail - use per-node stat counters to track memory allocations - port pool to use list_lru - drop NUMA specific pools - make pool shrinker numa aware - track allocated pages per numa node coreboot: - cleanup coreboot framebuffer support sched: - fix race condition in drm_sched_fini pagemap: - enable THP support - pass pagemap_addr by reference gem-shmem: - Track page accessed/dirty status across mmap/vmap gpusvm: - reenable device to device migration - fix unbalanced unclock bridge: - anx7625: Support USB-C plus DT bindings - connector: Fix EDID detection - dw-hdmi-qp: Support Vendor-Specfic and SDP Infoframes; improve others - fsl-ldb: Fix visual artifacts plus related DT property 'enable-termination-resistor' - imx8qxp-pixel-link: Improve bridge reference handling - lt9611: Support Port-B-only input plus DT bindings - tda998x: Support DRM_BRIDGE_ATTACH_NO_CONNECTOR; Clean up - Support TH1520 HDMI plus DT bindings - waveshare-dsi: Fix register and attach; Support 1..4 DSI lanes plus DT bindings - anx7625: Fix USB Type-C handling - cdns-mhdp8546-core: Handle HDCP state in bridge atomic_check - Support Lontium LT8713SX DP MST bridge plus DT bindings - analogix_dp: Use DP helpers for link training panel: - panel-jdi-lt070me05000: Use mipi-dsi multi functions - panel-edp: Support Add AUO B116XAT04.1 (HW: 1A); Support CMN N116BCL-EAK (C2); Support FriendlyELEC plus DT changes - panel-edp: Fix timings for BOE NV140WUM-N64 - ilitek-ili9882t: Allow GPIO calls to sleep - jadard: Support TAIGUAN XTI05101-01A - lxd: Support LXD M9189A plus DT bindings - mantix: Fix pixel clock; Clean up - motorola: Support Motorola Atrix 4G and Droid X2 plus DT bindings - novatek: Support Novatek/Tianma NT37700F plus DT bindings - simple: Support EDT ET057023UDBA plus DT bindings; Support Powertip PH800480T032-ZHC19 plus DT bindings; Support Waveshare 13.3" - novatek-nt36672a: Use mipi_dsi_*_multi() functions - panel-edp: Support BOE NV153WUM-N42, CMN N153JCA-ELK, CSW MNF307QS3-2 - support Himax HX83121A plus DT bindings - support JuTouch JT070TM041 plus DT bindings - support Samsung S6E8FC0 plus DT bindings - himax-hx83102c: support Samsung S6E8FC0 plus DT bindings; support backlight - ili9806e: support Rocktech RK050HR345-CT106A plus DT bindings - simple: support Tianma TM050RDH03 plus DT bindings amdgpu: - enable DC by default on CIK APUs - userq fence ioctl param size fixes - set panel_type to OLED for eDP - refactor DC i2c code - FAMS2 update - rework ttm handling to allow multiple engines - DC DCE 6.x cleanup - DC support for NUTMEG/TRAVIS DP bridge - DCN 4.2 support - GC12 idle power fix for compute - use struct drm_edid in non-DC code - enable NV12/P010 support on primary planes - support newer IP discovery tables - VCN/JPEG 5.0.2 support - GC/MES 12.1 updates - USERQ fixes - add DC idle state manager - eDP DSC seamless boot amdkfd: - GC 12.1 updates - non 4K page fixes xe: - basic Xe3p_LPG and NVL-P enabling patches - allow VM_BIND decompress support - add purgeable buffer object support - add xe_vm_get_property_ioctl - restrict multi-lrc to VCS/VECS engines - allow disabling VM overcommit in fault mode - dGPU memory optimizations - Workaround cleanups and simplification - Allow VFs VRAM quote changes using sysfs - convert GT stats to per-cpu counters - pagefault refactors - enable multi-queue on xe3p_xpc - disable DCC on PTL - make MMIO communication more robust - disable D3Cold for BMG on specific platforms - vfio: improve FLR sync for Xe VFIO i915/display: - C10/C20/LT PHY PLL divider verification - use trans push mechanism to generate PSR frame change on LNL+ - refactor DP DSC slice config - VGA decode refactoring - refactor DPT, gen2-4 overlay, masked field register macro helpers - refactor stolen memory allocation decisions - prepare for UHBR DP tunnels - refactor LT PHY PLL to use DPLL framework - implement register polling/waiting in display code - add shared stepping header between i915 and display i915: - fix potential overflow of shmem scatterlist length nouveau: - provide Z cull info to userspace - initial GA100 support - shutdown on PCI device shutdown nova-core: - harden GSP command queue - add support for large RPCs - simplify GSP sequencer and message handling - refactor falcon firmware handling - convert to new register macro - conver to new DMA coherent API - use checked arithmetic - add debugfs support for gsp-rm log buffers - fix aux device registration for multi-GPU msm: - CI: - Uprev mesa - Restore CI jobs for Qualcomm APQ8016 and APQ8096 devices - Core: - Switched to of_get_available_child_by_name() - DPU: - Fixes for DSC panels - Fixed brownout because of the frequency / OPP mismatch - Quad pipe preparation (not enabled yet) - Switched to virtual planes by default - Dropped VBIF_NRT support - Added support for Eliza platform - Reworked alpha handling - Switched to correct CWB definitions on Eliza - Dropped dummy INTF_0 on MSM8953 - Corrected INTFs related to DP-MST - DP: - Removed debug prints looking into PHY internals - DSI: - Fixes for DSC panels - RGB101010 support - Support for SC8280XP - Moved PHY bindings from display/ to phy/ - GPU: - Preemption support for x2-85 and a840 - IFPC support for a840 - SKU detection support for x2-85 and a840 - Expose AQE support (VK ray-pipeline) - Avoid locking in VM_BIND fence signaling path - Fix to avoid reclaim in GPU snapshot path - Disallow foreign mapping of _NO_SHARE BOs - HDMI: - Fixed infoframes programming - MDP5: - Dropped support for MSM8974v1 - Dropped now unused code for MSM8974 v1 and SDM660 / MSM8998 panthor: - add tracepoints for power and IRQs - fix fence handling - extend timestamp query with flags - support various sources for timestamp queries tyr: - fix names and model/versions rockchip: - vop2: use drm logging function - rk3576 displayport support - support CRTC background color atmel-hlcdc: - support sana5d65 LCD controller tilcdc: - use DT bindings schema - use managed DRM interfaces - support DRM_BRIDGE_ATTACH_NO_CONNECTOR verisilicon: - support DC8200 + DT bindings virtgpu: - support PRIME import with 3D enabled komeda: - fix integer overflow in AFBC checks mcde: - improve bridge handling gma500: - use drm client buffer for fbdev framebuffer amdxdna: - add sensors ioctls - provide NPU power estimate - support column utilization sensor - allow forcing DMA through IOMMU IOVA - support per-BO mem usage queries - refactor GEM implementation ivpu: - update boot API to v3.29.4 - limit per-user number of doorbells/contexts - perform engine reset on TDR error loongson: - replace custom code with drm_gem_ttm_dumb_map_offset() imx: - support planes behind the primary plane - fix bus-format selection vkms: - support CRTC background color v3d: - improve handling of struct v3d_stats komeda: - support Arm China Linlon D6 plus DT bindings imagination: - improve power-off sequence - support context-reset notification from firmware mediatek: - mtk_dsi: enable hs clock during pre-enable - Remove all conflicting aperture devices during probe - Add support for mt8167 display blocks" * tag 'drm-next-2026-04-15' of https://gitlab.freedesktop.org/drm/kernel: (1735 commits) drm/ttm/tests: Remove checks from ttm_pool_free_no_dma_alloc drm/ttm/tests: fix lru_count ASSERT drm/vram: remove DRM_VRAM_MM_FILE_OPERATIONS from docs drm/fb-helper: Fix a locking bug in an error path dma-fence: correct kernel-doc function parameter @flags ttm/pool: track allocated_pages per numa node. ttm/pool: make pool shrinker NUMA aware (v2) ttm/pool: drop numa specific pools ttm/pool: port to list_lru. (v2) drm/ttm: use gpu mm stats to track gpu memory allocations. (v4) mm: add gpu active/reclaim per-node stat counters (v2) gpu: nova-core: fix missing colon in SEC2 boot debug message gpu: nova-core: vbios: use from_le_bytes() for PCI ROM header parsing gpu: nova-core: bitfield: fix broken Default implementation gpu: nova-core: falcon: pad firmware DMA object size to required block alignment gpu: nova-core: gsp: fix undefined behavior in command queue code drm/shmem_helper: Make sure PMD entries get the writeable upgrade accel/ivpu: Trigger recovery on TDR with OS scheduling drm/msm: Use of_get_available_child_by_name() dt-bindings: display/msm: move DSI PHY bindings to phy/ subdir ...
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/Makefile6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h161
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v12_1.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c459
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c56
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c19
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h13
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c173
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c110
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c835
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c572
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_display.c21
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c14
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c237
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h55
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c118
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c98
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c49
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c45
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c56
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_job.c19
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h26
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_object.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c59
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_reg_access.c958
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_reg_access.h163
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c50
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h42
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c230
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h40
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c18
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c222
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c714
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c18
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c77
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c79
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h20
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c27
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h28
-rw-r--r--drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c21
-rw-r--r--drivers/gpu/drm/amd/amdgpu/atom.c28
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik.c52
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik_sdma.c31
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v10_0.c16
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v6_0.c16
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v8_0.c16
-rw-r--r--drivers/gpu/drm/amd/amdgpu/df_v3_6.c20
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c49
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c33
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c189
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v12_1_pkt.h223
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v12_1.c193
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c88
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v12_1.c33
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c143
-rw-r--r--drivers/gpu/drm/amd/amdgpu/ih_v7_0.c36
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_2.c840
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_2.h111
-rw-r--r--drivers/gpu/drm/amd/amdgpu/lsdma_v7_1.c99
-rw-r--r--drivers/gpu/drm/amd/amdgpu/lsdma_v7_1.h31
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mes_v12_1.c396
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c49
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c18
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c17
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c18
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v3_3.c55
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c16
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v4_2_0.c195
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nv.c32
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v13_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c31
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c31
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c54
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c39
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c31
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c35
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c33
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c33
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v7_1.c54
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si.c66
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si_dma.c31
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15.c68
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15_common.h31
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc21.c32
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc24.c20
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc_v1_0.c145
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc_v1_0.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/umc_v12_0.c113
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vce_v1_0.c33
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c19
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v5_0_2.c1219
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v5_0_2.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vi.c76
127 files changed, 7523 insertions, 3870 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index 8e22882b66aa..6a7e9bfec59e 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -47,7 +47,7 @@ subdir-ccflags-$(CONFIG_DRM_AMDGPU_WERROR) += -Werror
amdgpu-y := amdgpu_drv.o
# add KMS driver
-amdgpu-y += amdgpu_device.o amdgpu_doorbell_mgr.o amdgpu_kms.o \
+amdgpu-y += amdgpu_device.o amdgpu_reg_access.o amdgpu_doorbell_mgr.o amdgpu_kms.o \
amdgpu_atombios.o atombios_crtc.o amdgpu_connectors.o \
atom.o amdgpu_fence.o amdgpu_ttm.o amdgpu_object.o amdgpu_gart.o \
amdgpu_encoders.o amdgpu_display.o amdgpu_i2c.o \
@@ -86,7 +86,7 @@ amdgpu-y += \
nbio_v7_2.o hdp_v4_0.o hdp_v5_0.o aldebaran_reg_init.o aldebaran.o soc21.o soc24.o \
sienna_cichlid.o smu_v13_0_10.o nbio_v4_3.o hdp_v6_0.o nbio_v7_7.o hdp_v5_2.o lsdma_v6_0.o \
nbio_v7_9.o aqua_vanjaram.o nbio_v7_11.o lsdma_v7_0.o hdp_v7_0.o nbif_v6_3_1.o \
- cyan_skillfish_reg_init.o soc_v1_0.o
+ cyan_skillfish_reg_init.o soc_v1_0.o lsdma_v7_1.o
# add DF block
amdgpu-y += \
@@ -213,6 +213,7 @@ amdgpu-y += \
vcn_v4_0_5.o \
vcn_v5_0_0.o \
vcn_v5_0_1.o \
+ vcn_v5_0_2.o \
amdgpu_jpeg.o \
jpeg_v1_0.o \
jpeg_v2_0.o \
@@ -223,6 +224,7 @@ amdgpu-y += \
jpeg_v4_0_5.o \
jpeg_v5_0_0.o \
jpeg_v5_0_1.o \
+ jpeg_v5_0_2.o \
jpeg_v5_3_0.o
# add VPE block
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 447e734c362b..49e7881750fa 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -81,6 +81,7 @@
#include "amdgpu_sdma.h"
#include "amdgpu_lsdma.h"
#include "amdgpu_nbio.h"
+#include "amdgpu_reg_access.h"
#include "amdgpu_hdp.h"
#include "amdgpu_dm.h"
#include "amdgpu_virt.h"
@@ -217,9 +218,7 @@ extern struct amdgpu_watchdog_timer amdgpu_watchdog_timer;
extern int amdgpu_async_gfx_ring;
extern int amdgpu_mcbp;
extern int amdgpu_discovery;
-extern int amdgpu_mes;
extern int amdgpu_mes_log_enable;
-extern int amdgpu_mes_kiq;
extern int amdgpu_uni_mes;
extern int amdgpu_noretry;
extern int amdgpu_force_asic_type;
@@ -328,6 +327,7 @@ struct kfd_vm_fault_info;
struct amdgpu_hive_info;
struct amdgpu_reset_context;
struct amdgpu_reset_control;
+struct amdgpu_coredump_info;
enum amdgpu_cp_irq {
AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP = 0,
@@ -680,21 +680,6 @@ void amdgpu_cgs_destroy_device(struct cgs_device *cgs_device);
/*
* Core structure, functions and helpers.
*/
-typedef uint32_t (*amdgpu_rreg_t)(struct amdgpu_device*, uint32_t);
-typedef void (*amdgpu_wreg_t)(struct amdgpu_device*, uint32_t, uint32_t);
-
-typedef uint32_t (*amdgpu_rreg_ext_t)(struct amdgpu_device*, uint64_t);
-typedef void (*amdgpu_wreg_ext_t)(struct amdgpu_device*, uint64_t, uint32_t);
-
-typedef uint64_t (*amdgpu_rreg64_t)(struct amdgpu_device*, uint32_t);
-typedef void (*amdgpu_wreg64_t)(struct amdgpu_device*, uint32_t, uint64_t);
-
-typedef uint64_t (*amdgpu_rreg64_ext_t)(struct amdgpu_device*, uint64_t);
-typedef void (*amdgpu_wreg64_ext_t)(struct amdgpu_device*, uint64_t, uint64_t);
-
-typedef uint32_t (*amdgpu_block_rreg_t)(struct amdgpu_device*, uint32_t, uint32_t);
-typedef void (*amdgpu_block_wreg_t)(struct amdgpu_device*, uint32_t, uint32_t, uint32_t);
-
struct amdgpu_mmio_remap {
u32 reg_offset;
resource_size_t bus_addr;
@@ -705,6 +690,7 @@ enum amdgpu_uid_type {
AMDGPU_UID_TYPE_XCD,
AMDGPU_UID_TYPE_AID,
AMDGPU_UID_TYPE_SOC,
+ AMDGPU_UID_TYPE_MID,
AMDGPU_UID_TYPE_MAX
};
@@ -791,6 +777,12 @@ struct amd_powerplay {
(rid == 0x01) || \
(rid == 0x10))))
+enum amdgpu_mqd_update_flag {
+ AMDGPU_UPDATE_FLAG_DBG_WA_ENABLE = 1,
+ AMDGPU_UPDATE_FLAG_DBG_WA_DISABLE = 2,
+ AMDGPU_UPDATE_FLAG_IS_GWS = 4, /* quirk for gfx9 IP */
+};
+
struct amdgpu_mqd_prop {
uint64_t mqd_gpu_addr;
uint64_t hqd_base_gpu_addr;
@@ -811,6 +803,10 @@ struct amdgpu_mqd_prop {
uint64_t fence_address;
bool tmz_queue;
bool kernel_queue;
+ uint32_t *cu_mask;
+ uint32_t cu_mask_count;
+ uint32_t cu_flags;
+ bool is_user_cu_masked;
};
struct amdgpu_mqd {
@@ -906,42 +902,8 @@ struct amdgpu_device {
/* protects concurrent MM_INDEX/DATA based register access */
spinlock_t mmio_idx_lock;
struct amdgpu_mmio_remap rmmio_remap;
- /* protects concurrent SMC based register access */
- spinlock_t smc_idx_lock;
- amdgpu_rreg_t smc_rreg;
- amdgpu_wreg_t smc_wreg;
- /* protects concurrent PCIE register access */
- spinlock_t pcie_idx_lock;
- amdgpu_rreg_t pcie_rreg;
- amdgpu_wreg_t pcie_wreg;
- amdgpu_rreg_t pciep_rreg;
- amdgpu_wreg_t pciep_wreg;
- amdgpu_rreg_ext_t pcie_rreg_ext;
- amdgpu_wreg_ext_t pcie_wreg_ext;
- amdgpu_rreg64_t pcie_rreg64;
- amdgpu_wreg64_t pcie_wreg64;
- amdgpu_rreg64_ext_t pcie_rreg64_ext;
- amdgpu_wreg64_ext_t pcie_wreg64_ext;
- /* protects concurrent UVD register access */
- spinlock_t uvd_ctx_idx_lock;
- amdgpu_rreg_t uvd_ctx_rreg;
- amdgpu_wreg_t uvd_ctx_wreg;
- /* protects concurrent DIDT register access */
- spinlock_t didt_idx_lock;
- amdgpu_rreg_t didt_rreg;
- amdgpu_wreg_t didt_wreg;
- /* protects concurrent gc_cac register access */
- spinlock_t gc_cac_idx_lock;
- amdgpu_rreg_t gc_cac_rreg;
- amdgpu_wreg_t gc_cac_wreg;
- /* protects concurrent se_cac register access */
- spinlock_t se_cac_idx_lock;
- amdgpu_rreg_t se_cac_rreg;
- amdgpu_wreg_t se_cac_wreg;
- /* protects concurrent ENDPOINT (audio) register access */
- spinlock_t audio_endpt_idx_lock;
- amdgpu_block_rreg_t audio_endpt_rreg;
- amdgpu_block_wreg_t audio_endpt_wreg;
+ /* Indirect register access blocks */
+ struct amdgpu_reg_access reg;
struct amdgpu_doorbell doorbell;
/* clock/pll info */
@@ -1187,6 +1149,11 @@ struct amdgpu_device {
struct amdgpu_reset_domain *reset_domain;
+#ifdef CONFIG_DEV_COREDUMP
+ struct amdgpu_coredump_info *coredump;
+ struct work_struct coredump_work;
+#endif
+
struct mutex benchmark_mutex;
bool scpm_enabled;
@@ -1297,42 +1264,6 @@ size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos,
void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
void *buf, size_t size, bool write);
-uint32_t amdgpu_device_wait_on_rreg(struct amdgpu_device *adev,
- uint32_t inst, uint32_t reg_addr, char reg_name[],
- uint32_t expected_value, uint32_t mask);
-uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
- uint32_t reg, uint32_t acc_flags);
-u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev,
- u64 reg_addr);
-uint32_t amdgpu_device_xcc_rreg(struct amdgpu_device *adev,
- uint32_t reg, uint32_t acc_flags,
- uint32_t xcc_id);
-void amdgpu_device_wreg(struct amdgpu_device *adev,
- uint32_t reg, uint32_t v,
- uint32_t acc_flags);
-void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev,
- u64 reg_addr, u32 reg_data);
-void amdgpu_device_xcc_wreg(struct amdgpu_device *adev,
- uint32_t reg, uint32_t v,
- uint32_t acc_flags,
- uint32_t xcc_id);
-void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
- uint32_t reg, uint32_t v, uint32_t xcc_id);
-void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value);
-uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset);
-
-u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
- u32 reg_addr);
-u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
- u32 reg_addr);
-u64 amdgpu_device_indirect_rreg64_ext(struct amdgpu_device *adev,
- u64 reg_addr);
-void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
- u32 reg_addr, u32 reg_data);
-void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
- u32 reg_addr, u64 reg_data);
-void amdgpu_device_indirect_wreg64_ext(struct amdgpu_device *adev,
- u64 reg_addr, u64 reg_data);
u32 amdgpu_device_get_rev_id(struct amdgpu_device *adev);
bool amdgpu_device_asic_has_dc_support(struct pci_dev *pdev,
enum amd_asic_type asic_type);
@@ -1372,28 +1303,30 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
#define REG_GET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK)
#define RREG32_XCC(reg, inst) amdgpu_device_xcc_rreg(adev, (reg), 0, inst)
#define WREG32_XCC(reg, v, inst) amdgpu_device_xcc_wreg(adev, (reg), (v), 0, inst)
-#define RREG32_PCIE(reg) adev->pcie_rreg(adev, (reg))
-#define WREG32_PCIE(reg, v) adev->pcie_wreg(adev, (reg), (v))
-#define RREG32_PCIE_PORT(reg) adev->pciep_rreg(adev, (reg))
-#define WREG32_PCIE_PORT(reg, v) adev->pciep_wreg(adev, (reg), (v))
-#define RREG32_PCIE_EXT(reg) adev->pcie_rreg_ext(adev, (reg))
-#define WREG32_PCIE_EXT(reg, v) adev->pcie_wreg_ext(adev, (reg), (v))
-#define RREG64_PCIE(reg) adev->pcie_rreg64(adev, (reg))
-#define WREG64_PCIE(reg, v) adev->pcie_wreg64(adev, (reg), (v))
-#define RREG64_PCIE_EXT(reg) adev->pcie_rreg64_ext(adev, (reg))
-#define WREG64_PCIE_EXT(reg, v) adev->pcie_wreg64_ext(adev, (reg), (v))
-#define RREG32_SMC(reg) adev->smc_rreg(adev, (reg))
-#define WREG32_SMC(reg, v) adev->smc_wreg(adev, (reg), (v))
-#define RREG32_UVD_CTX(reg) adev->uvd_ctx_rreg(adev, (reg))
-#define WREG32_UVD_CTX(reg, v) adev->uvd_ctx_wreg(adev, (reg), (v))
-#define RREG32_DIDT(reg) adev->didt_rreg(adev, (reg))
-#define WREG32_DIDT(reg, v) adev->didt_wreg(adev, (reg), (v))
-#define RREG32_GC_CAC(reg) adev->gc_cac_rreg(adev, (reg))
-#define WREG32_GC_CAC(reg, v) adev->gc_cac_wreg(adev, (reg), (v))
-#define RREG32_SE_CAC(reg) adev->se_cac_rreg(adev, (reg))
-#define WREG32_SE_CAC(reg, v) adev->se_cac_wreg(adev, (reg), (v))
-#define RREG32_AUDIO_ENDPT(block, reg) adev->audio_endpt_rreg(adev, (block), (reg))
-#define WREG32_AUDIO_ENDPT(block, reg, v) adev->audio_endpt_wreg(adev, (block), (reg), (v))
+#define RREG32_PCIE(reg) amdgpu_reg_pcie_rd32(adev, (reg))
+#define WREG32_PCIE(reg, v) amdgpu_reg_pcie_wr32(adev, (reg), (v))
+#define RREG32_PCIE_PORT(reg) amdgpu_reg_pciep_rd32(adev, (reg))
+#define WREG32_PCIE_PORT(reg, v) amdgpu_reg_pciep_wr32(adev, (reg), (v))
+#define RREG32_PCIE_EXT(reg) amdgpu_reg_pcie_ext_rd32(adev, (reg))
+#define WREG32_PCIE_EXT(reg, v) amdgpu_reg_pcie_ext_wr32(adev, (reg), (v))
+#define RREG64_PCIE(reg) amdgpu_reg_pcie_rd64(adev, (reg))
+#define WREG64_PCIE(reg, v) amdgpu_reg_pcie_wr64(adev, (reg), (v))
+#define RREG64_PCIE_EXT(reg) amdgpu_reg_pcie_ext_rd64(adev, (reg))
+#define WREG64_PCIE_EXT(reg, v) amdgpu_reg_pcie_ext_wr64(adev, (reg), (v))
+#define RREG32_SMC(reg) amdgpu_reg_smc_rd32(adev, (reg))
+#define WREG32_SMC(reg, v) amdgpu_reg_smc_wr32(adev, (reg), (v))
+#define RREG32_UVD_CTX(reg) amdgpu_reg_uvd_ctx_rd32(adev, (reg))
+#define WREG32_UVD_CTX(reg, v) amdgpu_reg_uvd_ctx_wr32(adev, (reg), (v))
+#define RREG32_DIDT(reg) amdgpu_reg_didt_rd32(adev, (reg))
+#define WREG32_DIDT(reg, v) amdgpu_reg_didt_wr32(adev, (reg), (v))
+#define RREG32_GC_CAC(reg) amdgpu_reg_gc_cac_rd32(adev, (reg))
+#define WREG32_GC_CAC(reg, v) amdgpu_reg_gc_cac_wr32(adev, (reg), (v))
+#define RREG32_SE_CAC(reg) amdgpu_reg_se_cac_rd32(adev, (reg))
+#define WREG32_SE_CAC(reg, v) amdgpu_reg_se_cac_wr32(adev, (reg), (v))
+#define RREG32_AUDIO_ENDPT(block, reg) \
+ amdgpu_reg_audio_endpt_rd32(adev, (block), (reg))
+#define WREG32_AUDIO_ENDPT(block, reg, v) \
+ amdgpu_reg_audio_endpt_wr32(adev, (block), (reg), (v))
#define WREG32_P(reg, val, mask) \
do { \
uint32_t tmp_ = RREG32(reg); \
@@ -1523,10 +1456,6 @@ void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
struct amdgpu_ring *ring);
void amdgpu_device_halt(struct amdgpu_device *adev);
-u32 amdgpu_device_pcie_port_rreg(struct amdgpu_device *adev,
- u32 reg);
-void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev,
- u32 reg, u32 v);
struct dma_fence *amdgpu_device_get_gang(struct amdgpu_device *adev);
struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
struct dma_fence *gang);
@@ -1536,6 +1465,8 @@ struct dma_fence *amdgpu_device_enforce_isolation(struct amdgpu_device *adev,
bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev);
ssize_t amdgpu_get_soft_full_reset_mask(struct amdgpu_ring *ring);
ssize_t amdgpu_show_reset_mask(char *buf, uint32_t supported_reset);
+void amdgpu_sdma_set_vm_pte_scheds(struct amdgpu_device *adev,
+ const struct amdgpu_vm_pte_funcs *vm_pte_funcs);
/* atpx handler */
#if defined(CONFIG_VGA_SWITCHEROO)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v12_1.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v12_1.c
index 965c7e688535..bcb180f9d3ff 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v12_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v12_1.c
@@ -330,7 +330,7 @@ static uint32_t kgd_gfx_v12_1_set_address_watch(struct amdgpu_device *adev,
watch_address_cntl = 0;
watch_address_low = lower_32_bits(watch_address);
- watch_address_high = upper_32_bits(watch_address) & 0xffff;
+ watch_address_high = upper_32_bits(watch_address) & 0x1ffffff;
watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
TCP_WATCH0_CNTL,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
index 7f4751e5caaf..cd9aa5b45e94 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
@@ -373,249 +373,280 @@ int amdgpu_atomfirmware_get_uma_carveout_info(struct amdgpu_device *adev,
return -ENODEV;
}
-int
-amdgpu_atomfirmware_get_vram_info(struct amdgpu_device *adev,
+int amdgpu_atomfirmware_get_integrated_system_info(struct amdgpu_device *adev,
int *vram_width, int *vram_type,
int *vram_vendor)
{
struct amdgpu_mode_info *mode_info = &adev->mode_info;
- int index, i = 0;
+ int index;
u16 data_offset, size;
union igp_info *igp_info;
- union vram_info *vram_info;
- union umc_info *umc_info;
- union vram_module *vram_module;
u8 frev, crev;
u8 mem_type;
- u8 mem_vendor;
u32 mem_channel_number;
u32 mem_channel_width;
- u32 module_id;
- if (adev->flags & AMD_IS_APU)
- index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
+ index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
integratedsysteminfo);
- else {
- switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
- case IP_VERSION(12, 0, 0):
- case IP_VERSION(12, 0, 1):
- index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, umc_info);
+ if (amdgpu_atom_parse_data_header(mode_info->atom_context,
+ index, &size,
+ &frev, &crev, &data_offset)) {
+ igp_info = (union igp_info *)
+ (mode_info->atom_context->bios + data_offset);
+ switch (frev) {
+ case 1:
+ switch (crev) {
+ case 11:
+ case 12:
+ mem_channel_number = igp_info->v11.umachannelnumber;
+ if (!mem_channel_number)
+ mem_channel_number = 1;
+ mem_type = igp_info->v11.memorytype;
+ if (mem_type == LpDdr5MemType)
+ mem_channel_width = 32;
+ else
+ mem_channel_width = 64;
+ if (vram_width)
+ *vram_width = mem_channel_number * mem_channel_width;
+ if (vram_type)
+ *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
+ break;
+ default:
+ return -EINVAL;
+ }
+ break;
+ case 2:
+ switch (crev) {
+ case 1:
+ case 2:
+ mem_channel_number = igp_info->v21.umachannelnumber;
+ if (!mem_channel_number)
+ mem_channel_number = 1;
+ mem_type = igp_info->v21.memorytype;
+ if (mem_type == LpDdr5MemType)
+ mem_channel_width = 32;
+ else
+ mem_channel_width = 64;
+ if (vram_width)
+ *vram_width = mem_channel_number * mem_channel_width;
+ if (vram_type)
+ *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
+ break;
+ case 3:
+ mem_channel_number = igp_info->v23.umachannelnumber;
+ if (!mem_channel_number)
+ mem_channel_number = 1;
+ mem_type = igp_info->v23.memorytype;
+ if (mem_type == LpDdr5MemType)
+ mem_channel_width = 32;
+ else
+ mem_channel_width = 64;
+ if (vram_width)
+ *vram_width = mem_channel_number * mem_channel_width;
+ if (vram_type)
+ *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
+ break;
+ default:
+ return -EINVAL;
+ }
break;
default:
- index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, vram_info);
+ return -EINVAL;
}
+ } else {
+ return -EINVAL;
}
+ return 0;
+}
+
+int amdgpu_atomfirmware_get_umc_info(struct amdgpu_device *adev,
+ int *vram_width, int *vram_type,
+ int *vram_vendor)
+{
+ struct amdgpu_mode_info *mode_info = &adev->mode_info;
+ int index;
+ u16 data_offset, size;
+ union umc_info *umc_info;
+ u8 frev, crev;
+ u8 mem_type;
+ u8 mem_vendor;
+ u32 mem_channel_number;
+ u32 mem_channel_width;
+
+ index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, umc_info);
+
if (amdgpu_atom_parse_data_header(mode_info->atom_context,
index, &size,
&frev, &crev, &data_offset)) {
- if (adev->flags & AMD_IS_APU) {
- igp_info = (union igp_info *)
- (mode_info->atom_context->bios + data_offset);
- switch (frev) {
- case 1:
- switch (crev) {
- case 11:
- case 12:
- mem_channel_number = igp_info->v11.umachannelnumber;
- if (!mem_channel_number)
- mem_channel_number = 1;
- mem_type = igp_info->v11.memorytype;
- if (mem_type == LpDdr5MemType)
- mem_channel_width = 32;
- else
- mem_channel_width = 64;
- if (vram_width)
- *vram_width = mem_channel_number * mem_channel_width;
- if (vram_type)
- *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
- break;
- default:
- return -EINVAL;
- }
- break;
- case 2:
- switch (crev) {
- case 1:
- case 2:
- mem_channel_number = igp_info->v21.umachannelnumber;
- if (!mem_channel_number)
- mem_channel_number = 1;
- mem_type = igp_info->v21.memorytype;
- if (mem_type == LpDdr5MemType)
- mem_channel_width = 32;
- else
- mem_channel_width = 64;
- if (vram_width)
- *vram_width = mem_channel_number * mem_channel_width;
- if (vram_type)
- *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
- break;
- case 3:
- mem_channel_number = igp_info->v23.umachannelnumber;
- if (!mem_channel_number)
- mem_channel_number = 1;
- mem_type = igp_info->v23.memorytype;
- if (mem_type == LpDdr5MemType)
- mem_channel_width = 32;
- else
- mem_channel_width = 64;
- if (vram_width)
- *vram_width = mem_channel_number * mem_channel_width;
- if (vram_type)
- *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
- break;
- default:
- return -EINVAL;
- }
+ umc_info = (union umc_info *)(mode_info->atom_context->bios + data_offset);
+
+ if (frev == 4) {
+ switch (crev) {
+ case 0:
+ mem_channel_number = le32_to_cpu(umc_info->v40.channel_num);
+ mem_type = le32_to_cpu(umc_info->v40.vram_type);
+ mem_channel_width = le32_to_cpu(umc_info->v40.channel_width);
+ mem_vendor = RREG32(adev->bios_scratch_reg_offset + 4) & 0xF;
+ if (vram_vendor)
+ *vram_vendor = mem_vendor;
+ if (vram_type)
+ *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
+ if (vram_width)
+ *vram_width = mem_channel_number * (1 << mem_channel_width);
break;
default:
return -EINVAL;
}
} else {
- switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
- case IP_VERSION(12, 0, 0):
- case IP_VERSION(12, 0, 1):
- umc_info = (union umc_info *)(mode_info->atom_context->bios + data_offset);
-
- if (frev == 4) {
- switch (crev) {
- case 0:
- mem_channel_number = le32_to_cpu(umc_info->v40.channel_num);
- mem_type = le32_to_cpu(umc_info->v40.vram_type);
- mem_channel_width = le32_to_cpu(umc_info->v40.channel_width);
- mem_vendor = RREG32(adev->bios_scratch_reg_offset + 4) & 0xF;
- if (vram_vendor)
- *vram_vendor = mem_vendor;
- if (vram_type)
- *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
- if (vram_width)
- *vram_width = mem_channel_number * (1 << mem_channel_width);
- break;
- default:
- return -EINVAL;
- }
- } else
- return -EINVAL;
+ return -EINVAL;
+ }
+ } else {
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int amdgpu_atomfirmware_get_vram_info(struct amdgpu_device *adev,
+ int *vram_width, int *vram_type,
+ int *vram_vendor)
+{
+ struct amdgpu_mode_info *mode_info = &adev->mode_info;
+ int index, i = 0;
+ u16 data_offset, size;
+ union vram_info *vram_info;
+ union vram_module *vram_module;
+ u8 frev, crev;
+ u8 mem_type;
+ u8 mem_vendor;
+ u32 mem_channel_number;
+ u32 mem_channel_width;
+ u32 module_id;
+
+ index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, vram_info);
+
+ if (amdgpu_atom_parse_data_header(mode_info->atom_context,
+ index, &size,
+ &frev, &crev, &data_offset)) {
+ vram_info = (union vram_info *)
+ (mode_info->atom_context->bios + data_offset);
+
+ module_id = (RREG32(adev->bios_scratch_reg_offset + 4) & 0x00ff0000) >> 16;
+ if (frev == 3) {
+ switch (crev) {
+ /* v30 */
+ case 0:
+ vram_module = (union vram_module *)vram_info->v30.vram_module;
+ mem_vendor = (vram_module->v30.dram_vendor_id) & 0xF;
+ if (vram_vendor)
+ *vram_vendor = mem_vendor;
+ mem_type = vram_info->v30.memory_type;
+ if (vram_type)
+ *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
+ mem_channel_number = vram_info->v30.channel_num;
+ mem_channel_width = vram_info->v30.channel_width;
+ if (vram_width)
+ *vram_width = mem_channel_number * 16;
break;
default:
- vram_info = (union vram_info *)
- (mode_info->atom_context->bios + data_offset);
-
- module_id = (RREG32(adev->bios_scratch_reg_offset + 4) & 0x00ff0000) >> 16;
- if (frev == 3) {
- switch (crev) {
- /* v30 */
- case 0:
- vram_module = (union vram_module *)vram_info->v30.vram_module;
- mem_vendor = (vram_module->v30.dram_vendor_id) & 0xF;
- if (vram_vendor)
- *vram_vendor = mem_vendor;
- mem_type = vram_info->v30.memory_type;
- if (vram_type)
- *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
- mem_channel_number = vram_info->v30.channel_num;
- mem_channel_width = vram_info->v30.channel_width;
- if (vram_width)
- *vram_width = mem_channel_number * 16;
- break;
- default:
- return -EINVAL;
- }
- } else if (frev == 2) {
- switch (crev) {
- /* v23 */
- case 3:
- if (module_id > vram_info->v23.vram_module_num)
- module_id = 0;
- vram_module = (union vram_module *)vram_info->v23.vram_module;
- while (i < module_id) {
- vram_module = (union vram_module *)
- ((u8 *)vram_module + vram_module->v9.vram_module_size);
- i++;
- }
- mem_type = vram_module->v9.memory_type;
- if (vram_type)
- *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
- mem_channel_number = vram_module->v9.channel_num;
- mem_channel_width = vram_module->v9.channel_width;
- if (vram_width)
- *vram_width = mem_channel_number * (1 << mem_channel_width);
- mem_vendor = (vram_module->v9.vender_rev_id) & 0xF;
- if (vram_vendor)
- *vram_vendor = mem_vendor;
- break;
- /* v24 */
- case 4:
- if (module_id > vram_info->v24.vram_module_num)
- module_id = 0;
- vram_module = (union vram_module *)vram_info->v24.vram_module;
- while (i < module_id) {
- vram_module = (union vram_module *)
- ((u8 *)vram_module + vram_module->v10.vram_module_size);
- i++;
- }
- mem_type = vram_module->v10.memory_type;
- if (vram_type)
- *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
- mem_channel_number = vram_module->v10.channel_num;
- mem_channel_width = vram_module->v10.channel_width;
- if (vram_width)
- *vram_width = mem_channel_number * (1 << mem_channel_width);
- mem_vendor = (vram_module->v10.vender_rev_id) & 0xF;
- if (vram_vendor)
- *vram_vendor = mem_vendor;
- break;
- /* v25 */
- case 5:
- if (module_id > vram_info->v25.vram_module_num)
- module_id = 0;
- vram_module = (union vram_module *)vram_info->v25.vram_module;
- while (i < module_id) {
- vram_module = (union vram_module *)
- ((u8 *)vram_module + vram_module->v11.vram_module_size);
- i++;
- }
- mem_type = vram_module->v11.memory_type;
- if (vram_type)
- *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
- mem_channel_number = vram_module->v11.channel_num;
- mem_channel_width = vram_module->v11.channel_width;
- if (vram_width)
- *vram_width = mem_channel_number * (1 << mem_channel_width);
- mem_vendor = (vram_module->v11.vender_rev_id) & 0xF;
- if (vram_vendor)
- *vram_vendor = mem_vendor;
- break;
- /* v26 */
- case 6:
- if (module_id > vram_info->v26.vram_module_num)
- module_id = 0;
- vram_module = (union vram_module *)vram_info->v26.vram_module;
- while (i < module_id) {
- vram_module = (union vram_module *)
- ((u8 *)vram_module + vram_module->v9.vram_module_size);
- i++;
- }
- mem_type = vram_module->v9.memory_type;
- if (vram_type)
- *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
- mem_channel_number = vram_module->v9.channel_num;
- mem_channel_width = vram_module->v9.channel_width;
- if (vram_width)
- *vram_width = mem_channel_number * (1 << mem_channel_width);
- mem_vendor = (vram_module->v9.vender_rev_id) & 0xF;
- if (vram_vendor)
- *vram_vendor = mem_vendor;
- break;
- default:
- return -EINVAL;
- }
- } else {
- /* invalid frev */
- return -EINVAL;
+ return -EINVAL;
+ }
+ } else if (frev == 2) {
+ switch (crev) {
+ /* v23 */
+ case 3:
+ if (module_id > vram_info->v23.vram_module_num)
+ module_id = 0;
+ vram_module = (union vram_module *)vram_info->v23.vram_module;
+ while (i < module_id) {
+ vram_module = (union vram_module *)
+ ((u8 *)vram_module + vram_module->v9.vram_module_size);
+ i++;
}
+ mem_type = vram_module->v9.memory_type;
+ if (vram_type)
+ *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
+ mem_channel_number = vram_module->v9.channel_num;
+ mem_channel_width = vram_module->v9.channel_width;
+ if (vram_width)
+ *vram_width = mem_channel_number * (1 << mem_channel_width);
+ mem_vendor = (vram_module->v9.vender_rev_id) & 0xF;
+ if (vram_vendor)
+ *vram_vendor = mem_vendor;
+ break;
+ /* v24 */
+ case 4:
+ if (module_id > vram_info->v24.vram_module_num)
+ module_id = 0;
+ vram_module = (union vram_module *)vram_info->v24.vram_module;
+ while (i < module_id) {
+ vram_module = (union vram_module *)
+ ((u8 *)vram_module + vram_module->v10.vram_module_size);
+ i++;
+ }
+ mem_type = vram_module->v10.memory_type;
+ if (vram_type)
+ *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
+ mem_channel_number = vram_module->v10.channel_num;
+ mem_channel_width = vram_module->v10.channel_width;
+ if (vram_width)
+ *vram_width = mem_channel_number * (1 << mem_channel_width);
+ mem_vendor = (vram_module->v10.vender_rev_id) & 0xF;
+ if (vram_vendor)
+ *vram_vendor = mem_vendor;
+ break;
+ /* v25 */
+ case 5:
+ if (module_id > vram_info->v25.vram_module_num)
+ module_id = 0;
+ vram_module = (union vram_module *)vram_info->v25.vram_module;
+ while (i < module_id) {
+ vram_module = (union vram_module *)
+ ((u8 *)vram_module + vram_module->v11.vram_module_size);
+ i++;
+ }
+ mem_type = vram_module->v11.memory_type;
+ if (vram_type)
+ *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
+ mem_channel_number = vram_module->v11.channel_num;
+ mem_channel_width = vram_module->v11.channel_width;
+ if (vram_width)
+ *vram_width = mem_channel_number * (1 << mem_channel_width);
+ mem_vendor = (vram_module->v11.vender_rev_id) & 0xF;
+ if (vram_vendor)
+ *vram_vendor = mem_vendor;
+ break;
+ /* v26 */
+ case 6:
+ if (module_id > vram_info->v26.vram_module_num)
+ module_id = 0;
+ vram_module = (union vram_module *)vram_info->v26.vram_module;
+ while (i < module_id) {
+ vram_module = (union vram_module *)
+ ((u8 *)vram_module + vram_module->v9.vram_module_size);
+ i++;
+ }
+ mem_type = vram_module->v9.memory_type;
+ if (vram_type)
+ *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
+ mem_channel_number = vram_module->v9.channel_num;
+ mem_channel_width = vram_module->v9.channel_width;
+ if (vram_width)
+ *vram_width = mem_channel_number * (1 << mem_channel_width);
+ mem_vendor = (vram_module->v9.vender_rev_id) & 0xF;
+ if (vram_vendor)
+ *vram_vendor = mem_vendor;
+ break;
+ default:
+ return -EINVAL;
}
+ } else {
+ /* invalid frev */
+ return -EINVAL;
}
+
+ } else {
+ return -EINVAL;
}
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h
index 67c8d105729b..0760e4510513 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h
@@ -30,6 +30,10 @@ uint32_t amdgpu_atomfirmware_query_firmware_capability(struct amdgpu_device *ade
bool amdgpu_atomfirmware_gpu_virtualization_supported(struct amdgpu_device *adev);
void amdgpu_atomfirmware_scratch_regs_init(struct amdgpu_device *adev);
int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev);
+int amdgpu_atomfirmware_get_integrated_system_info(struct amdgpu_device *adev,
+ int *vram_width, int *vram_type, int *vram_vendor);
+int amdgpu_atomfirmware_get_umc_info(struct amdgpu_device *adev,
+ int *vram_width, int *vram_type, int *vram_vendor);
int amdgpu_atomfirmware_get_vram_info(struct amdgpu_device *adev,
int *vram_width, int *vram_type, int *vram_vendor);
int amdgpu_atomfirmware_get_uma_carveout_info(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
index 1cbba9803d31..6f3c68cde75e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
@@ -35,6 +35,7 @@ static int amdgpu_benchmark_do_move(struct amdgpu_device *adev, unsigned size,
struct dma_fence *fence;
int i, r;
+ mutex_lock(&adev->mman.default_entity.lock);
stime = ktime_get();
for (i = 0; i < n; i++) {
r = amdgpu_copy_buffer(adev, &adev->mman.default_entity,
@@ -49,6 +50,7 @@ static int amdgpu_benchmark_do_move(struct amdgpu_device *adev, unsigned size,
}
exit_do_move:
+ mutex_unlock(&adev->mman.default_entity.lock);
etime = ktime_get();
*time_ms = ktime_ms_delta(etime, stime);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
index d1bf2e150c1a..b04fa9fd90b7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
@@ -246,10 +246,10 @@ amdgpu_connector_find_encoder(struct drm_connector *connector,
return NULL;
}
-static struct edid *
+static const struct drm_edid *
amdgpu_connector_get_hardcoded_edid(struct amdgpu_device *adev)
{
- return drm_edid_duplicate(drm_edid_raw(adev->mode_info.bios_hardcoded_edid));
+ return drm_edid_dup(adev->mode_info.bios_hardcoded_edid);
}
static void amdgpu_connector_get_edid(struct drm_connector *connector)
@@ -268,8 +268,8 @@ static void amdgpu_connector_get_edid(struct drm_connector *connector)
if ((amdgpu_connector_encoder_get_dp_bridge_encoder_id(connector) !=
ENCODER_OBJECT_ID_NONE) &&
amdgpu_connector->ddc_bus->has_aux) {
- amdgpu_connector->edid = drm_get_edid(connector,
- &amdgpu_connector->ddc_bus->aux.ddc);
+ amdgpu_connector->edid = drm_edid_read_ddc(connector,
+ &amdgpu_connector->ddc_bus->aux.ddc);
} else if ((connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort) ||
(connector->connector_type == DRM_MODE_CONNECTOR_eDP)) {
struct amdgpu_connector_atom_dig *dig = amdgpu_connector->con_priv;
@@ -277,14 +277,14 @@ static void amdgpu_connector_get_edid(struct drm_connector *connector)
if ((dig->dp_sink_type == CONNECTOR_OBJECT_ID_DISPLAYPORT ||
dig->dp_sink_type == CONNECTOR_OBJECT_ID_eDP) &&
amdgpu_connector->ddc_bus->has_aux)
- amdgpu_connector->edid = drm_get_edid(connector,
- &amdgpu_connector->ddc_bus->aux.ddc);
+ amdgpu_connector->edid = drm_edid_read_ddc(connector,
+ &amdgpu_connector->ddc_bus->aux.ddc);
else if (amdgpu_connector->ddc_bus)
- amdgpu_connector->edid = drm_get_edid(connector,
- &amdgpu_connector->ddc_bus->adapter);
+ amdgpu_connector->edid = drm_edid_read_ddc(connector,
+ &amdgpu_connector->ddc_bus->adapter);
} else if (amdgpu_connector->ddc_bus) {
- amdgpu_connector->edid = drm_get_edid(connector,
- &amdgpu_connector->ddc_bus->adapter);
+ amdgpu_connector->edid = drm_edid_read_ddc(connector,
+ &amdgpu_connector->ddc_bus->adapter);
}
if (!amdgpu_connector->edid) {
@@ -292,30 +292,22 @@ static void amdgpu_connector_get_edid(struct drm_connector *connector)
if (((connector->connector_type == DRM_MODE_CONNECTOR_LVDS) ||
(connector->connector_type == DRM_MODE_CONNECTOR_eDP))) {
amdgpu_connector->edid = amdgpu_connector_get_hardcoded_edid(adev);
- drm_connector_update_edid_property(connector, amdgpu_connector->edid);
+ drm_edid_connector_update(connector, amdgpu_connector->edid);
}
}
}
-static void amdgpu_connector_free_edid(struct drm_connector *connector)
-{
- struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
-
- kfree(amdgpu_connector->edid);
- amdgpu_connector->edid = NULL;
-}
-
static int amdgpu_connector_ddc_get_modes(struct drm_connector *connector)
{
struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
int ret;
if (amdgpu_connector->edid) {
- drm_connector_update_edid_property(connector, amdgpu_connector->edid);
- ret = drm_add_edid_modes(connector, amdgpu_connector->edid);
+ drm_edid_connector_update(connector, amdgpu_connector->edid);
+ ret = drm_edid_connector_add_modes(connector);
return ret;
}
- drm_connector_update_edid_property(connector, NULL);
+ drm_edid_connector_update(connector, NULL);
return 0;
}
@@ -754,7 +746,7 @@ static void amdgpu_connector_destroy(struct drm_connector *connector)
{
struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
- amdgpu_connector_free_edid(connector);
+ drm_edid_free(amdgpu_connector->edid);
kfree(amdgpu_connector->con_priv);
drm_connector_unregister(connector);
drm_connector_cleanup(connector);
@@ -873,7 +865,7 @@ amdgpu_connector_vga_detect(struct drm_connector *connector, bool force)
dret = amdgpu_display_ddc_probe(amdgpu_connector, false);
if (dret) {
amdgpu_connector->detected_by_load = false;
- amdgpu_connector_free_edid(connector);
+ drm_edid_free(amdgpu_connector->edid);
amdgpu_connector_get_edid(connector);
if (!amdgpu_connector->edid) {
@@ -883,13 +875,13 @@ amdgpu_connector_vga_detect(struct drm_connector *connector, bool force)
ret = connector_status_connected;
} else {
amdgpu_connector->use_digital =
- !!(amdgpu_connector->edid->input & DRM_EDID_INPUT_DIGITAL);
+ drm_edid_is_digital(amdgpu_connector->edid);
/* some oems have boards with separate digital and analog connectors
* with a shared ddc line (often vga + hdmi)
*/
if (amdgpu_connector->use_digital && amdgpu_connector->shared_ddc) {
- amdgpu_connector_free_edid(connector);
+ drm_edid_free(amdgpu_connector->edid);
ret = connector_status_disconnected;
} else {
ret = connector_status_connected;
@@ -984,7 +976,7 @@ static void amdgpu_connector_shared_ddc(enum drm_connector_status *status,
/* hpd is our only option in this case */
if (!amdgpu_display_hpd_sense(adev,
amdgpu_connector->hpd.hpd)) {
- amdgpu_connector_free_edid(connector);
+ drm_edid_free(amdgpu_connector->edid);
*status = connector_status_disconnected;
}
}
@@ -1053,7 +1045,7 @@ amdgpu_connector_dvi_detect(struct drm_connector *connector, bool force)
}
if (dret) {
amdgpu_connector->detected_by_load = false;
- amdgpu_connector_free_edid(connector);
+ drm_edid_free(amdgpu_connector->edid);
amdgpu_connector_get_edid(connector);
if (!amdgpu_connector->edid) {
@@ -1063,13 +1055,13 @@ amdgpu_connector_dvi_detect(struct drm_connector *connector, bool force)
broken_edid = true; /* defer use_digital to later */
} else {
amdgpu_connector->use_digital =
- !!(amdgpu_connector->edid->input & DRM_EDID_INPUT_DIGITAL);
+ drm_edid_is_digital(amdgpu_connector->edid);
/* some oems have boards with separate digital and analog connectors
* with a shared ddc line (often vga + hdmi)
*/
if ((!amdgpu_connector->use_digital) && amdgpu_connector->shared_ddc) {
- amdgpu_connector_free_edid(connector);
+ drm_edid_free(amdgpu_connector->edid);
ret = connector_status_disconnected;
} else {
ret = connector_status_connected;
@@ -1239,6 +1231,8 @@ static enum drm_mode_status amdgpu_connector_dvi_mode_valid(struct drm_connector
case CONNECTOR_OBJECT_ID_HDMI_TYPE_B:
max_digital_pixel_clock_khz = max_dvi_single_link_pixel_clock * 2;
break;
+ default:
+ return MODE_BAD;
}
/* When the display EDID claims that it's an HDMI display,
@@ -1417,7 +1411,7 @@ amdgpu_connector_dp_detect(struct drm_connector *connector, bool force)
goto out;
}
- amdgpu_connector_free_edid(connector);
+ drm_edid_free(amdgpu_connector->edid);
if ((connector->connector_type == DRM_MODE_CONNECTOR_eDP) ||
(connector->connector_type == DRM_MODE_CONNECTOR_LVDS)) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 24e4b4fc9156..b24d5d21be5f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -84,13 +84,6 @@ static int amdgpu_cs_job_idx(struct amdgpu_cs_parser *p,
if (r)
return r;
- /*
- * Abort if there is no run queue associated with this entity.
- * Possibly because of disabled HW IP.
- */
- if (entity->rq == NULL)
- return -EINVAL;
-
/* Check if we can add this IB to some existing job */
for (i = 0; i < p->gang_size; ++i)
if (p->entities[i] == entity)
@@ -915,9 +908,8 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
goto out_free_user_pages;
amdgpu_bo_list_for_each_entry(e, p->bo_list) {
- /* One fence for TTM and one for each CS job */
r = drm_exec_prepare_obj(&p->exec, &e->bo->tbo.base,
- 1 + p->gang_size);
+ TTM_NUM_MOVE_FENCES + p->gang_size);
drm_exec_retry_on_contention(&p->exec);
if (unlikely(r))
goto out_free_user_pages;
@@ -927,7 +919,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
if (p->uf_bo) {
r = drm_exec_prepare_obj(&p->exec, &p->uf_bo->tbo.base,
- 1 + p->gang_size);
+ TTM_NUM_MOVE_FENCES + p->gang_size);
drm_exec_retry_on_contention(&p->exec);
if (unlikely(r))
goto out_free_user_pages;
@@ -1747,6 +1739,13 @@ int amdgpu_cs_wait_fences_ioctl(struct drm_device *dev, void *data,
struct drm_amdgpu_fence *fences;
int r;
+ /*
+ * fence_count must be non-zero; dma_fence_wait_any_timeout()
+ * does not accept an empty fence array.
+ */
+ if (!wait->in.fence_count)
+ return -EINVAL;
+
/* Get the fences from userspace */
fences = memdup_array_user(u64_to_user_ptr(wait->in.fences),
wait->in.fence_count,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index f2c038c91c70..7af86a32c0c5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -231,13 +231,19 @@ static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip,
} else {
struct amdgpu_fpriv *fpriv;
- fpriv = container_of(ctx->ctx_mgr, struct amdgpu_fpriv, ctx_mgr);
+ /* TODO: Stop using fpriv here, we only need the xcp_id. */
+ fpriv = container_of(ctx->mgr, struct amdgpu_fpriv, ctx_mgr);
r = amdgpu_xcp_select_scheds(adev, hw_ip, hw_prio, fpriv,
&num_scheds, &scheds);
if (r)
goto error_free_entity;
}
+ if (num_scheds == 0) {
+ r = -EINVAL;
+ goto error_free_entity;
+ }
+
/* disable load balance if the hw engine retains context among dependent jobs */
if (hw_ip == AMDGPU_HW_IP_VCN_ENC ||
hw_ip == AMDGPU_HW_IP_VCN_DEC ||
@@ -348,7 +354,6 @@ static int amdgpu_ctx_init(struct amdgpu_ctx_mgr *mgr, int32_t priority,
else
ctx->stable_pstate = current_stable_pstate;
- ctx->ctx_mgr = &(fpriv->ctx_mgr);
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
index 090dfe86f75b..cf8d700a22fe 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
@@ -44,20 +44,19 @@ struct amdgpu_ctx_entity {
struct amdgpu_ctx {
struct kref refcount;
- struct amdgpu_ctx_mgr *mgr;
+ spinlock_t ring_lock;
unsigned reset_counter;
unsigned reset_counter_query;
- uint64_t generation;
- spinlock_t ring_lock;
- struct amdgpu_ctx_entity *entities[AMDGPU_HW_IP_NUM][AMDGPU_MAX_ENTITY_NUM];
- bool preamble_presented;
int32_t init_priority;
int32_t override_priority;
+ uint32_t stable_pstate;
atomic_t guilty;
+ bool preamble_presented;
+ uint64_t generation;
unsigned long ras_counter_ce;
unsigned long ras_counter_ue;
- uint32_t stable_pstate;
- struct amdgpu_ctx_mgr *ctx_mgr;
+ struct amdgpu_ctx_mgr *mgr;
+ struct amdgpu_ctx_entity *entities[AMDGPU_HW_IP_NUM][AMDGPU_MAX_ENTITY_NUM];
};
struct amdgpu_ctx_mgr {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index f7467af2e102..092fd3309099 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -39,6 +39,7 @@
#include "amdgpu_reset.h"
#include "amdgpu_psp_ta.h"
+#include "amdgpu_userq.h"
#if defined(CONFIG_DEBUG_FS)
@@ -617,6 +618,110 @@ out:
}
/**
+ * amdgpu_debugfs_regs_pcie64_read - Read from a 64-bit PCIE register
+ *
+ * @f: open file handle
+ * @buf: User buffer to store read data in
+ * @size: Number of bytes to read
+ * @pos: Offset to seek to
+ */
+static ssize_t amdgpu_debugfs_regs_pcie64_read(struct file *f, char __user *buf,
+ size_t size, loff_t *pos)
+{
+ struct amdgpu_device *adev = file_inode(f)->i_private;
+ ssize_t result = 0;
+ int r;
+
+ if (size & 0x7 || *pos & 0x7)
+ return -EINVAL;
+
+ r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
+ if (r < 0) {
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+ return r;
+ }
+
+ r = amdgpu_virt_enable_access_debugfs(adev);
+ if (r < 0) {
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+ return r;
+ }
+
+ while (size) {
+ uint64_t value;
+
+ value = RREG64_PCIE_EXT(*pos);
+
+ r = put_user(value, (uint64_t *)buf);
+ if (r)
+ goto out;
+
+ result += 8;
+ buf += 8;
+ *pos += 8;
+ size -= 8;
+ }
+
+ r = result;
+out:
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+ amdgpu_virt_disable_access_debugfs(adev);
+ return r;
+}
+
+/**
+ * amdgpu_debugfs_regs_pcie64_write - Write to a 64-bit PCIE register
+ *
+ * @f: open file handle
+ * @buf: User buffer to write data from
+ * @size: Number of bytes to write
+ * @pos: Offset to seek to
+ */
+static ssize_t amdgpu_debugfs_regs_pcie64_write(struct file *f, const char __user *buf,
+ size_t size, loff_t *pos)
+{
+ struct amdgpu_device *adev = file_inode(f)->i_private;
+ ssize_t result = 0;
+ int r;
+
+ if (size & 0x7 || *pos & 0x7)
+ return -EINVAL;
+
+ r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
+ if (r < 0) {
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+ return r;
+ }
+
+ r = amdgpu_virt_enable_access_debugfs(adev);
+ if (r < 0) {
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+ return r;
+ }
+
+ while (size) {
+ uint64_t value;
+
+ r = get_user(value, (uint64_t *)buf);
+ if (r)
+ goto out;
+
+ WREG64_PCIE_EXT(*pos, value);
+
+ result += 8;
+ buf += 8;
+ *pos += 8;
+ size -= 8;
+ }
+
+ r = result;
+out:
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+ amdgpu_virt_disable_access_debugfs(adev);
+ return r;
+}
+
+/**
* amdgpu_debugfs_regs_didt_read - Read from a DIDT register
*
* @f: open file handle
@@ -638,7 +743,7 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf,
if (size & 0x3 || *pos & 0x3)
return -EINVAL;
- if (!adev->didt_rreg)
+ if (!adev->reg.didt.rreg)
return -EOPNOTSUPP;
r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
@@ -696,7 +801,7 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user
if (size & 0x3 || *pos & 0x3)
return -EINVAL;
- if (!adev->didt_wreg)
+ if (!adev->reg.didt.wreg)
return -EOPNOTSUPP;
r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
@@ -752,7 +857,7 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf,
ssize_t result = 0;
int r;
- if (!adev->smc_rreg)
+ if (!adev->reg.smc.rreg)
return -EOPNOTSUPP;
if (size & 0x3 || *pos & 0x3)
@@ -810,7 +915,7 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *
ssize_t result = 0;
int r;
- if (!adev->smc_wreg)
+ if (!adev->reg.smc.wreg)
return -EOPNOTSUPP;
if (size & 0x3 || *pos & 0x3)
@@ -1524,6 +1629,12 @@ static const struct file_operations amdgpu_debugfs_regs_pcie_fops = {
.write = amdgpu_debugfs_regs_pcie_write,
.llseek = default_llseek
};
+static const struct file_operations amdgpu_debugfs_regs_pcie64_fops = {
+ .owner = THIS_MODULE,
+ .read = amdgpu_debugfs_regs_pcie64_read,
+ .write = amdgpu_debugfs_regs_pcie64_write,
+ .llseek = default_llseek
+};
static const struct file_operations amdgpu_debugfs_regs_smc_fops = {
.owner = THIS_MODULE,
.read = amdgpu_debugfs_regs_smc_read,
@@ -1586,6 +1697,7 @@ static const struct file_operations *debugfs_regs[] = {
&amdgpu_debugfs_gprwave_fops,
&amdgpu_debugfs_regs_didt_fops,
&amdgpu_debugfs_regs_pcie_fops,
+ &amdgpu_debugfs_regs_pcie64_fops,
&amdgpu_debugfs_regs_smc_fops,
&amdgpu_debugfs_gca_config_fops,
&amdgpu_debugfs_sensors_fops,
@@ -1603,6 +1715,7 @@ static const char * const debugfs_regs_names[] = {
"amdgpu_gprwave",
"amdgpu_regs_didt",
"amdgpu_regs_pcie",
+ "amdgpu_regs_pcie64",
"amdgpu_regs_smc",
"amdgpu_gca_config",
"amdgpu_sensors",
@@ -2156,6 +2269,53 @@ static const struct file_operations amdgpu_pt_info_fops = {
.release = single_release,
};
+static int amdgpu_mqd_info_read(struct seq_file *m, void *unused)
+{
+ struct amdgpu_usermode_queue *queue = m->private;
+ struct amdgpu_bo *bo;
+ int r;
+
+ if (!queue || !queue->mqd.obj)
+ return -EINVAL;
+
+ bo = amdgpu_bo_ref(queue->mqd.obj);
+ r = amdgpu_bo_reserve(bo, true);
+ if (r) {
+ amdgpu_bo_unref(&bo);
+ return -EINVAL;
+ }
+
+ seq_printf(m, "queue_type: %d\n", queue->queue_type);
+ seq_printf(m, "mqd_gpu_address: 0x%llx\n", amdgpu_bo_gpu_offset(queue->mqd.obj));
+
+ amdgpu_bo_unreserve(bo);
+ amdgpu_bo_unref(&bo);
+
+ return 0;
+}
+
+static int amdgpu_mqd_info_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, amdgpu_mqd_info_read, inode->i_private);
+}
+
+static const struct file_operations amdgpu_mqd_info_fops = {
+ .owner = THIS_MODULE,
+ .open = amdgpu_mqd_info_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+void amdgpu_debugfs_userq_init(struct drm_file *file, struct amdgpu_usermode_queue *queue, int qid)
+{
+ char queue_name[32];
+
+ scnprintf(queue_name, sizeof(queue_name), "queue_%d", qid);
+ queue->debugfs_queue = debugfs_create_dir(queue_name, file->debugfs_client);
+ debugfs_create_file("mqd_info", 0444, queue->debugfs_queue, queue, &amdgpu_mqd_info_fops);
+}
+
void amdgpu_debugfs_vm_init(struct drm_file *file)
{
debugfs_create_file("vm_pagetable_info", 0444, file->debugfs_client, file,
@@ -2174,4 +2334,9 @@ int amdgpu_debugfs_regs_init(struct amdgpu_device *adev)
void amdgpu_debugfs_vm_init(struct drm_file *file)
{
}
+void amdgpu_debugfs_userq_init(struct drm_file *file,
+ struct amdgpu_usermode_queue *queue,
+ int qid)
+{
+}
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h
index e7b3c38e5186..e88b4a1e564c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h
@@ -25,6 +25,7 @@
/*
* Debugfs
*/
+struct amdgpu_usermode_queue;
int amdgpu_debugfs_regs_init(struct amdgpu_device *adev);
int amdgpu_debugfs_init(struct amdgpu_device *adev);
@@ -34,4 +35,7 @@ void amdgpu_debugfs_firmware_init(struct amdgpu_device *adev);
void amdgpu_debugfs_gem_init(struct amdgpu_device *adev);
void amdgpu_debugfs_mes_event_log_init(struct amdgpu_device *adev);
void amdgpu_debugfs_vm_init(struct drm_file *file);
+void amdgpu_debugfs_userq_init(struct drm_file *file,
+ struct amdgpu_usermode_queue *queue,
+ int qid);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c
index c38e7371bafc..fddf4e1252bd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c
@@ -32,8 +32,16 @@ void amdgpu_coredump(struct amdgpu_device *adev, bool skip_vram_check,
bool vram_lost, struct amdgpu_job *job)
{
}
+void amdgpu_coredump_init(struct amdgpu_device *adev)
+{
+}
+void amdgpu_coredump_fini(struct amdgpu_device *adev)
+{
+}
#else
+#define AMDGPU_CORE_DUMP_SIZE_MAX (256 * 1024 * 1024)
+
const char *hw_ip_names[MAX_HWIP] = {
[GC_HWIP] = "GC",
[HDP_HWIP] = "HDP",
@@ -187,20 +195,22 @@ static void amdgpu_devcoredump_fw_info(struct amdgpu_device *adev,
drm_printf(p, "VPE feature version: %u, fw version: 0x%08x\n",
adev->vpe.feature_version, adev->vpe.fw_version);
- drm_printf(p, "\nVBIOS Information\n");
- drm_printf(p, "vbios name : %s\n", ctx->name);
- drm_printf(p, "vbios pn : %s\n", ctx->vbios_pn);
- drm_printf(p, "vbios version : %d\n", ctx->version);
- drm_printf(p, "vbios ver_str : %s\n", ctx->vbios_ver_str);
- drm_printf(p, "vbios date : %s\n", ctx->date);
+ if (adev->bios) {
+ drm_printf(p, "\nVBIOS Information\n");
+ drm_printf(p, "vbios name : %s\n", ctx->name);
+ drm_printf(p, "vbios pn : %s\n", ctx->vbios_pn);
+ drm_printf(p, "vbios version : %d\n", ctx->version);
+ drm_printf(p, "vbios ver_str : %s\n", ctx->vbios_ver_str);
+ drm_printf(p, "vbios date : %s\n", ctx->date);
+ }else {
+ drm_printf(p, "\nVBIOS Information: NA\n");
+ }
}
static ssize_t
-amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count,
- void *data, size_t datalen)
+amdgpu_devcoredump_format(char *buffer, size_t count, struct amdgpu_coredump_info *coredump)
{
struct drm_printer p;
- struct amdgpu_coredump_info *coredump = data;
struct drm_print_iterator iter;
struct amdgpu_vm_fault_info *fault_info;
struct amdgpu_ip_block *ip_block;
@@ -208,7 +218,6 @@ amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count,
iter.data = buffer;
iter.offset = 0;
- iter.start = offset;
iter.remain = count;
p = drm_coredump_printer(&iter);
@@ -261,6 +270,8 @@ amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count,
}
}
+ amdgpu_discovery_dump(coredump->adev, &p);
+
/* IP firmware information */
drm_printf(&p, "\nIP Firmwares\n");
amdgpu_devcoredump_fw_info(coredump->adev, &p);
@@ -320,9 +331,63 @@ amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count,
return count - iter.remain;
}
+static ssize_t
+amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count,
+ void *data, size_t datalen)
+{
+ struct amdgpu_coredump_info *coredump = data;
+ ssize_t byte_copied;
+
+ if (!coredump)
+ return -ENODEV;
+
+ if (!coredump->formatted)
+ return -ENODEV;
+
+ if (offset >= coredump->formatted_size)
+ return 0;
+
+ byte_copied = count < coredump->formatted_size - offset ? count :
+ coredump->formatted_size - offset;
+ memcpy(buffer, coredump->formatted + offset, byte_copied);
+
+ return byte_copied;
+}
+
static void amdgpu_devcoredump_free(void *data)
{
- kfree(data);
+ struct amdgpu_coredump_info *coredump = data;
+
+ kvfree(coredump->formatted);
+ kvfree(data);
+}
+
+static void amdgpu_devcoredump_deferred_work(struct work_struct *work)
+{
+ struct amdgpu_device *adev = container_of(work, typeof(*adev), coredump_work);
+ struct amdgpu_coredump_info *coredump = adev->coredump;
+
+ /* Do a one-time preparation of the coredump output because
+ * repeatingly calling drm_coredump_printer is very slow.
+ */
+ coredump->formatted_size = amdgpu_devcoredump_format(
+ NULL, AMDGPU_CORE_DUMP_SIZE_MAX, coredump);
+ coredump->formatted = kvzalloc(coredump->formatted_size, GFP_KERNEL);
+ if (!coredump->formatted) {
+ amdgpu_devcoredump_free(coredump);
+ goto end;
+ }
+
+ amdgpu_devcoredump_format(coredump->formatted, coredump->formatted_size, coredump);
+
+ /* If there's an existing coredump for this device, the free function will be
+ * called immediately so coredump might be invalid after the call to dev_coredumpm.
+ */
+ dev_coredumpm(coredump->adev->dev, THIS_MODULE, coredump, 0, GFP_NOWAIT,
+ amdgpu_devcoredump_read, amdgpu_devcoredump_free);
+
+end:
+ adev->coredump = NULL;
}
void amdgpu_coredump(struct amdgpu_device *adev, bool skip_vram_check,
@@ -332,6 +397,10 @@ void amdgpu_coredump(struct amdgpu_device *adev, bool skip_vram_check,
struct amdgpu_coredump_info *coredump;
struct drm_sched_job *s_job;
+ /* No need to generate a new coredump if there's one in progress already. */
+ if (work_pending(&adev->coredump_work))
+ return;
+
coredump = kzalloc_obj(*coredump, GFP_NOWAIT);
if (!coredump)
return;
@@ -358,11 +427,26 @@ void amdgpu_coredump(struct amdgpu_device *adev, bool skip_vram_check,
ktime_get_ts64(&coredump->reset_time);
- dev_coredumpm(dev->dev, THIS_MODULE, coredump, 0, GFP_NOWAIT,
- amdgpu_devcoredump_read, amdgpu_devcoredump_free);
+ /* Update the current coredump pointer (no lock needed, this function can only be called
+ * from a single thread)
+ */
+ adev->coredump = coredump;
+ /* Kick off coredump formatting to a worker thread. */
+ queue_work(system_unbound_wq, &adev->coredump_work);
drm_info(dev, "AMDGPU device coredump file has been created\n");
drm_info(dev, "Check your /sys/class/drm/card%d/device/devcoredump/data\n",
dev->primary->index);
}
+
+void amdgpu_coredump_init(struct amdgpu_device *adev)
+{
+ INIT_WORK(&adev->coredump_work, amdgpu_devcoredump_deferred_work);
+}
+
+void amdgpu_coredump_fini(struct amdgpu_device *adev)
+{
+ /* Finish deferred coredump formatting before HW/IP teardown. */
+ flush_work(&adev->coredump_work);
+}
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h
index ef9772c6bcc9..f8f2f4df129b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h
@@ -35,12 +35,20 @@ struct amdgpu_coredump_info {
struct amdgpu_device *adev;
struct amdgpu_task_info reset_task_info;
struct timespec64 reset_time;
+
bool skip_vram_check;
bool reset_vram_lost;
struct amdgpu_ring *ring;
+ /* Readable form of coredevdump, generate once to speed up
+ * reading it (see drm_coredump_printer's documentation).
+ */
+ ssize_t formatted_size;
+ char *formatted;
};
#endif
void amdgpu_coredump(struct amdgpu_device *adev, bool skip_vram_check,
bool vram_lost, struct amdgpu_job *job);
+void amdgpu_coredump_init(struct amdgpu_device *adev);
+void amdgpu_coredump_fini(struct amdgpu_device *adev);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 6d8531f9b882..9c936519bb2b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -847,558 +847,6 @@ bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev)
}
/**
- * amdgpu_device_rreg - read a memory mapped IO or indirect register
- *
- * @adev: amdgpu_device pointer
- * @reg: dword aligned register offset
- * @acc_flags: access flags which require special behavior
- *
- * Returns the 32 bit value from the offset specified.
- */
-uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
- uint32_t reg, uint32_t acc_flags)
-{
- uint32_t ret;
-
- if (amdgpu_device_skip_hw_access(adev))
- return 0;
-
- if ((reg * 4) < adev->rmmio_size) {
- if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
- amdgpu_sriov_runtime(adev) &&
- down_read_trylock(&adev->reset_domain->sem)) {
- ret = amdgpu_kiq_rreg(adev, reg, 0);
- up_read(&adev->reset_domain->sem);
- } else {
- ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
- }
- } else {
- ret = adev->pcie_rreg(adev, reg * 4);
- }
-
- trace_amdgpu_device_rreg(adev->pdev->device, reg, ret);
-
- return ret;
-}
-
-/*
- * MMIO register read with bytes helper functions
- * @offset:bytes offset from MMIO start
- */
-
-/**
- * amdgpu_mm_rreg8 - read a memory mapped IO register
- *
- * @adev: amdgpu_device pointer
- * @offset: byte aligned register offset
- *
- * Returns the 8 bit value from the offset specified.
- */
-uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
-{
- if (amdgpu_device_skip_hw_access(adev))
- return 0;
-
- if (offset < adev->rmmio_size)
- return (readb(adev->rmmio + offset));
- BUG();
-}
-
-
-/**
- * amdgpu_device_xcc_rreg - read a memory mapped IO or indirect register with specific XCC
- *
- * @adev: amdgpu_device pointer
- * @reg: dword aligned register offset
- * @acc_flags: access flags which require special behavior
- * @xcc_id: xcc accelerated compute core id
- *
- * Returns the 32 bit value from the offset specified.
- */
-uint32_t amdgpu_device_xcc_rreg(struct amdgpu_device *adev,
- uint32_t reg, uint32_t acc_flags,
- uint32_t xcc_id)
-{
- uint32_t ret, rlcg_flag;
-
- if (amdgpu_device_skip_hw_access(adev))
- return 0;
-
- if ((reg * 4) < adev->rmmio_size) {
- if (amdgpu_sriov_vf(adev) &&
- !amdgpu_sriov_runtime(adev) &&
- adev->gfx.rlc.rlcg_reg_access_supported &&
- amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags,
- GC_HWIP, false,
- &rlcg_flag)) {
- ret = amdgpu_virt_rlcg_reg_rw(adev, reg, 0, rlcg_flag, GET_INST(GC, xcc_id));
- } else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
- amdgpu_sriov_runtime(adev) &&
- down_read_trylock(&adev->reset_domain->sem)) {
- ret = amdgpu_kiq_rreg(adev, reg, xcc_id);
- up_read(&adev->reset_domain->sem);
- } else {
- ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
- }
- } else {
- ret = adev->pcie_rreg(adev, reg * 4);
- }
-
- return ret;
-}
-
-/*
- * MMIO register write with bytes helper functions
- * @offset:bytes offset from MMIO start
- * @value: the value want to be written to the register
- */
-
-/**
- * amdgpu_mm_wreg8 - read a memory mapped IO register
- *
- * @adev: amdgpu_device pointer
- * @offset: byte aligned register offset
- * @value: 8 bit value to write
- *
- * Writes the value specified to the offset specified.
- */
-void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value)
-{
- if (amdgpu_device_skip_hw_access(adev))
- return;
-
- if (offset < adev->rmmio_size)
- writeb(value, adev->rmmio + offset);
- else
- BUG();
-}
-
-/**
- * amdgpu_device_wreg - write to a memory mapped IO or indirect register
- *
- * @adev: amdgpu_device pointer
- * @reg: dword aligned register offset
- * @v: 32 bit value to write to the register
- * @acc_flags: access flags which require special behavior
- *
- * Writes the value specified to the offset specified.
- */
-void amdgpu_device_wreg(struct amdgpu_device *adev,
- uint32_t reg, uint32_t v,
- uint32_t acc_flags)
-{
- if (amdgpu_device_skip_hw_access(adev))
- return;
-
- if ((reg * 4) < adev->rmmio_size) {
- if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
- amdgpu_sriov_runtime(adev) &&
- down_read_trylock(&adev->reset_domain->sem)) {
- amdgpu_kiq_wreg(adev, reg, v, 0);
- up_read(&adev->reset_domain->sem);
- } else {
- writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
- }
- } else {
- adev->pcie_wreg(adev, reg * 4, v);
- }
-
- trace_amdgpu_device_wreg(adev->pdev->device, reg, v);
-}
-
-/**
- * amdgpu_mm_wreg_mmio_rlc - write register either with direct/indirect mmio or with RLC path if in range
- *
- * @adev: amdgpu_device pointer
- * @reg: mmio/rlc register
- * @v: value to write
- * @xcc_id: xcc accelerated compute core id
- *
- * this function is invoked only for the debugfs register access
- */
-void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
- uint32_t reg, uint32_t v,
- uint32_t xcc_id)
-{
- if (amdgpu_device_skip_hw_access(adev))
- return;
-
- if (amdgpu_sriov_fullaccess(adev) &&
- adev->gfx.rlc.funcs &&
- adev->gfx.rlc.funcs->is_rlcg_access_range) {
- if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
- return amdgpu_sriov_wreg(adev, reg, v, 0, 0, xcc_id);
- } else if ((reg * 4) >= adev->rmmio_size) {
- adev->pcie_wreg(adev, reg * 4, v);
- } else {
- writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
- }
-}
-
-/**
- * amdgpu_device_xcc_wreg - write to a memory mapped IO or indirect register with specific XCC
- *
- * @adev: amdgpu_device pointer
- * @reg: dword aligned register offset
- * @v: 32 bit value to write to the register
- * @acc_flags: access flags which require special behavior
- * @xcc_id: xcc accelerated compute core id
- *
- * Writes the value specified to the offset specified.
- */
-void amdgpu_device_xcc_wreg(struct amdgpu_device *adev,
- uint32_t reg, uint32_t v,
- uint32_t acc_flags, uint32_t xcc_id)
-{
- uint32_t rlcg_flag;
-
- if (amdgpu_device_skip_hw_access(adev))
- return;
-
- if ((reg * 4) < adev->rmmio_size) {
- if (amdgpu_sriov_vf(adev) &&
- !amdgpu_sriov_runtime(adev) &&
- adev->gfx.rlc.rlcg_reg_access_supported &&
- amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags,
- GC_HWIP, true,
- &rlcg_flag)) {
- amdgpu_virt_rlcg_reg_rw(adev, reg, v, rlcg_flag, GET_INST(GC, xcc_id));
- } else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
- amdgpu_sriov_runtime(adev) &&
- down_read_trylock(&adev->reset_domain->sem)) {
- amdgpu_kiq_wreg(adev, reg, v, xcc_id);
- up_read(&adev->reset_domain->sem);
- } else {
- writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
- }
- } else {
- adev->pcie_wreg(adev, reg * 4, v);
- }
-}
-
-/**
- * amdgpu_device_indirect_rreg - read an indirect register
- *
- * @adev: amdgpu_device pointer
- * @reg_addr: indirect register address to read from
- *
- * Returns the value of indirect register @reg_addr
- */
-u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
- u32 reg_addr)
-{
- unsigned long flags, pcie_index, pcie_data;
- void __iomem *pcie_index_offset;
- void __iomem *pcie_data_offset;
- u32 r;
-
- pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
- pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
-
- spin_lock_irqsave(&adev->pcie_idx_lock, flags);
- pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
- pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
-
- writel(reg_addr, pcie_index_offset);
- readl(pcie_index_offset);
- r = readl(pcie_data_offset);
- spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
-
- return r;
-}
-
-u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev,
- u64 reg_addr)
-{
- unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
- u32 r;
- void __iomem *pcie_index_offset;
- void __iomem *pcie_index_hi_offset;
- void __iomem *pcie_data_offset;
-
- if (unlikely(!adev->nbio.funcs)) {
- pcie_index = AMDGPU_PCIE_INDEX_FALLBACK;
- pcie_data = AMDGPU_PCIE_DATA_FALLBACK;
- } else {
- pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
- pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
- }
-
- if (reg_addr >> 32) {
- if (unlikely(!adev->nbio.funcs))
- pcie_index_hi = AMDGPU_PCIE_INDEX_HI_FALLBACK;
- else
- pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
- } else {
- pcie_index_hi = 0;
- }
-
- spin_lock_irqsave(&adev->pcie_idx_lock, flags);
- pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
- pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
- if (pcie_index_hi != 0)
- pcie_index_hi_offset = (void __iomem *)adev->rmmio +
- pcie_index_hi * 4;
-
- writel(reg_addr, pcie_index_offset);
- readl(pcie_index_offset);
- if (pcie_index_hi != 0) {
- writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
- readl(pcie_index_hi_offset);
- }
- r = readl(pcie_data_offset);
-
- /* clear the high bits */
- if (pcie_index_hi != 0) {
- writel(0, pcie_index_hi_offset);
- readl(pcie_index_hi_offset);
- }
-
- spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
-
- return r;
-}
-
-/**
- * amdgpu_device_indirect_rreg64 - read a 64bits indirect register
- *
- * @adev: amdgpu_device pointer
- * @reg_addr: indirect register address to read from
- *
- * Returns the value of indirect register @reg_addr
- */
-u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
- u32 reg_addr)
-{
- unsigned long flags, pcie_index, pcie_data;
- void __iomem *pcie_index_offset;
- void __iomem *pcie_data_offset;
- u64 r;
-
- pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
- pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
-
- spin_lock_irqsave(&adev->pcie_idx_lock, flags);
- pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
- pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
-
- /* read low 32 bits */
- writel(reg_addr, pcie_index_offset);
- readl(pcie_index_offset);
- r = readl(pcie_data_offset);
- /* read high 32 bits */
- writel(reg_addr + 4, pcie_index_offset);
- readl(pcie_index_offset);
- r |= ((u64)readl(pcie_data_offset) << 32);
- spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
-
- return r;
-}
-
-u64 amdgpu_device_indirect_rreg64_ext(struct amdgpu_device *adev,
- u64 reg_addr)
-{
- unsigned long flags, pcie_index, pcie_data;
- unsigned long pcie_index_hi = 0;
- void __iomem *pcie_index_offset;
- void __iomem *pcie_index_hi_offset;
- void __iomem *pcie_data_offset;
- u64 r;
-
- pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
- pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
- if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
- pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
-
- spin_lock_irqsave(&adev->pcie_idx_lock, flags);
- pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
- pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
- if (pcie_index_hi != 0)
- pcie_index_hi_offset = (void __iomem *)adev->rmmio +
- pcie_index_hi * 4;
-
- /* read low 32 bits */
- writel(reg_addr, pcie_index_offset);
- readl(pcie_index_offset);
- if (pcie_index_hi != 0) {
- writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
- readl(pcie_index_hi_offset);
- }
- r = readl(pcie_data_offset);
- /* read high 32 bits */
- writel(reg_addr + 4, pcie_index_offset);
- readl(pcie_index_offset);
- if (pcie_index_hi != 0) {
- writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
- readl(pcie_index_hi_offset);
- }
- r |= ((u64)readl(pcie_data_offset) << 32);
-
- /* clear the high bits */
- if (pcie_index_hi != 0) {
- writel(0, pcie_index_hi_offset);
- readl(pcie_index_hi_offset);
- }
-
- spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
-
- return r;
-}
-
-/**
- * amdgpu_device_indirect_wreg - write an indirect register address
- *
- * @adev: amdgpu_device pointer
- * @reg_addr: indirect register offset
- * @reg_data: indirect register data
- *
- */
-void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
- u32 reg_addr, u32 reg_data)
-{
- unsigned long flags, pcie_index, pcie_data;
- void __iomem *pcie_index_offset;
- void __iomem *pcie_data_offset;
-
- pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
- pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
-
- spin_lock_irqsave(&adev->pcie_idx_lock, flags);
- pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
- pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
-
- writel(reg_addr, pcie_index_offset);
- readl(pcie_index_offset);
- writel(reg_data, pcie_data_offset);
- readl(pcie_data_offset);
- spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
-}
-
-void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev,
- u64 reg_addr, u32 reg_data)
-{
- unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
- void __iomem *pcie_index_offset;
- void __iomem *pcie_index_hi_offset;
- void __iomem *pcie_data_offset;
-
- pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
- pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
- if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
- pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
- else
- pcie_index_hi = 0;
-
- spin_lock_irqsave(&adev->pcie_idx_lock, flags);
- pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
- pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
- if (pcie_index_hi != 0)
- pcie_index_hi_offset = (void __iomem *)adev->rmmio +
- pcie_index_hi * 4;
-
- writel(reg_addr, pcie_index_offset);
- readl(pcie_index_offset);
- if (pcie_index_hi != 0) {
- writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
- readl(pcie_index_hi_offset);
- }
- writel(reg_data, pcie_data_offset);
- readl(pcie_data_offset);
-
- /* clear the high bits */
- if (pcie_index_hi != 0) {
- writel(0, pcie_index_hi_offset);
- readl(pcie_index_hi_offset);
- }
-
- spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
-}
-
-/**
- * amdgpu_device_indirect_wreg64 - write a 64bits indirect register address
- *
- * @adev: amdgpu_device pointer
- * @reg_addr: indirect register offset
- * @reg_data: indirect register data
- *
- */
-void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
- u32 reg_addr, u64 reg_data)
-{
- unsigned long flags, pcie_index, pcie_data;
- void __iomem *pcie_index_offset;
- void __iomem *pcie_data_offset;
-
- pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
- pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
-
- spin_lock_irqsave(&adev->pcie_idx_lock, flags);
- pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
- pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
-
- /* write low 32 bits */
- writel(reg_addr, pcie_index_offset);
- readl(pcie_index_offset);
- writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
- readl(pcie_data_offset);
- /* write high 32 bits */
- writel(reg_addr + 4, pcie_index_offset);
- readl(pcie_index_offset);
- writel((u32)(reg_data >> 32), pcie_data_offset);
- readl(pcie_data_offset);
- spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
-}
-
-void amdgpu_device_indirect_wreg64_ext(struct amdgpu_device *adev,
- u64 reg_addr, u64 reg_data)
-{
- unsigned long flags, pcie_index, pcie_data;
- unsigned long pcie_index_hi = 0;
- void __iomem *pcie_index_offset;
- void __iomem *pcie_index_hi_offset;
- void __iomem *pcie_data_offset;
-
- pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
- pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
- if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
- pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
-
- spin_lock_irqsave(&adev->pcie_idx_lock, flags);
- pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
- pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
- if (pcie_index_hi != 0)
- pcie_index_hi_offset = (void __iomem *)adev->rmmio +
- pcie_index_hi * 4;
-
- /* write low 32 bits */
- writel(reg_addr, pcie_index_offset);
- readl(pcie_index_offset);
- if (pcie_index_hi != 0) {
- writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
- readl(pcie_index_hi_offset);
- }
- writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
- readl(pcie_data_offset);
- /* write high 32 bits */
- writel(reg_addr + 4, pcie_index_offset);
- readl(pcie_index_offset);
- if (pcie_index_hi != 0) {
- writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
- readl(pcie_index_hi_offset);
- }
- writel((u32)(reg_data >> 32), pcie_data_offset);
- readl(pcie_data_offset);
-
- /* clear the high bits */
- if (pcie_index_hi != 0) {
- writel(0, pcie_index_hi_offset);
- readl(pcie_index_hi_offset);
- }
-
- spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
-}
-
-/**
* amdgpu_device_get_rev_id - query device rev_id
*
* @adev: amdgpu_device pointer
@@ -1410,149 +858,6 @@ u32 amdgpu_device_get_rev_id(struct amdgpu_device *adev)
return adev->nbio.funcs->get_rev_id(adev);
}
-/**
- * amdgpu_invalid_rreg - dummy reg read function
- *
- * @adev: amdgpu_device pointer
- * @reg: offset of register
- *
- * Dummy register read function. Used for register blocks
- * that certain asics don't have (all asics).
- * Returns the value in the register.
- */
-static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
-{
- dev_err(adev->dev, "Invalid callback to read register 0x%04X\n", reg);
- BUG();
- return 0;
-}
-
-static uint32_t amdgpu_invalid_rreg_ext(struct amdgpu_device *adev, uint64_t reg)
-{
- dev_err(adev->dev, "Invalid callback to read register 0x%llX\n", reg);
- BUG();
- return 0;
-}
-
-/**
- * amdgpu_invalid_wreg - dummy reg write function
- *
- * @adev: amdgpu_device pointer
- * @reg: offset of register
- * @v: value to write to the register
- *
- * Dummy register read function. Used for register blocks
- * that certain asics don't have (all asics).
- */
-static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
-{
- dev_err(adev->dev,
- "Invalid callback to write register 0x%04X with 0x%08X\n", reg,
- v);
- BUG();
-}
-
-static void amdgpu_invalid_wreg_ext(struct amdgpu_device *adev, uint64_t reg, uint32_t v)
-{
- dev_err(adev->dev,
- "Invalid callback to write register 0x%llX with 0x%08X\n", reg,
- v);
- BUG();
-}
-
-/**
- * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
- *
- * @adev: amdgpu_device pointer
- * @reg: offset of register
- *
- * Dummy register read function. Used for register blocks
- * that certain asics don't have (all asics).
- * Returns the value in the register.
- */
-static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
-{
- dev_err(adev->dev, "Invalid callback to read 64 bit register 0x%04X\n",
- reg);
- BUG();
- return 0;
-}
-
-static uint64_t amdgpu_invalid_rreg64_ext(struct amdgpu_device *adev, uint64_t reg)
-{
- dev_err(adev->dev, "Invalid callback to read register 0x%llX\n", reg);
- BUG();
- return 0;
-}
-
-/**
- * amdgpu_invalid_wreg64 - dummy reg write function
- *
- * @adev: amdgpu_device pointer
- * @reg: offset of register
- * @v: value to write to the register
- *
- * Dummy register read function. Used for register blocks
- * that certain asics don't have (all asics).
- */
-static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
-{
- dev_err(adev->dev,
- "Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
- reg, v);
- BUG();
-}
-
-static void amdgpu_invalid_wreg64_ext(struct amdgpu_device *adev, uint64_t reg, uint64_t v)
-{
- dev_err(adev->dev,
- "Invalid callback to write 64 bit register 0x%llX with 0x%08llX\n",
- reg, v);
- BUG();
-}
-
-/**
- * amdgpu_block_invalid_rreg - dummy reg read function
- *
- * @adev: amdgpu_device pointer
- * @block: offset of instance
- * @reg: offset of register
- *
- * Dummy register read function. Used for register blocks
- * that certain asics don't have (all asics).
- * Returns the value in the register.
- */
-static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
- uint32_t block, uint32_t reg)
-{
- dev_err(adev->dev,
- "Invalid callback to read register 0x%04X in block 0x%04X\n",
- reg, block);
- BUG();
- return 0;
-}
-
-/**
- * amdgpu_block_invalid_wreg - dummy reg write function
- *
- * @adev: amdgpu_device pointer
- * @block: offset of instance
- * @reg: offset of register
- * @v: value to write to the register
- *
- * Dummy register read function. Used for register blocks
- * that certain asics don't have (all asics).
- */
-static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
- uint32_t block,
- uint32_t reg, uint32_t v)
-{
- dev_err(adev->dev,
- "Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
- reg, block, v);
- BUG();
-}
-
static uint32_t amdgpu_device_get_vbios_flags(struct amdgpu_device *adev)
{
if (hweight32(adev->aid_mask) && (adev->flags & AMD_IS_APU))
@@ -3158,9 +2463,7 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
if (r)
goto init_failed;
- if (adev->mman.buffer_funcs_ring &&
- adev->mman.buffer_funcs_ring->sched.ready)
- amdgpu_ttm_set_buffer_funcs_status(adev, true);
+ amdgpu_ttm_set_buffer_funcs_status(adev, true);
/* Don't init kfd if whole hive need to be reset during init */
if (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) {
@@ -4059,8 +3362,7 @@ static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
r = amdgpu_device_ip_resume_phase2(adev);
- if (adev->mman.buffer_funcs_ring->sched.ready)
- amdgpu_ttm_set_buffer_funcs_status(adev, true);
+ amdgpu_ttm_set_buffer_funcs_status(adev, true);
if (r)
return r;
@@ -4120,17 +3422,6 @@ bool amdgpu_device_asic_has_dc_support(struct pci_dev *pdev,
case CHIP_VERDE:
case CHIP_OLAND:
return amdgpu_dc != 0 && IS_ENABLED(CONFIG_DRM_AMD_DC_SI);
- case CHIP_KAVERI:
- case CHIP_KABINI:
- case CHIP_MULLINS:
- /*
- * We have systems in the wild with these ASICs that require
- * TRAVIS and NUTMEG support which is not supported with DC.
- *
- * Fallback to the non-DC driver here by default so as not to
- * cause regressions.
- */
- return amdgpu_dc > 0;
default:
return amdgpu_dc != 0;
#else
@@ -4417,26 +3708,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
- adev->smc_rreg = &amdgpu_invalid_rreg;
- adev->smc_wreg = &amdgpu_invalid_wreg;
- adev->pcie_rreg = &amdgpu_invalid_rreg;
- adev->pcie_wreg = &amdgpu_invalid_wreg;
- adev->pcie_rreg_ext = &amdgpu_invalid_rreg_ext;
- adev->pcie_wreg_ext = &amdgpu_invalid_wreg_ext;
- adev->pciep_rreg = &amdgpu_invalid_rreg;
- adev->pciep_wreg = &amdgpu_invalid_wreg;
- adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
- adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
- adev->pcie_rreg64_ext = &amdgpu_invalid_rreg64_ext;
- adev->pcie_wreg64_ext = &amdgpu_invalid_wreg64_ext;
- adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
- adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
- adev->didt_rreg = &amdgpu_invalid_rreg;
- adev->didt_wreg = &amdgpu_invalid_wreg;
- adev->gc_cac_rreg = &amdgpu_invalid_rreg;
- adev->gc_cac_wreg = &amdgpu_invalid_wreg;
- adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
- adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
+ amdgpu_reg_access_init(adev);
dev_info(
adev->dev,
@@ -4481,13 +3753,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
return r;
spin_lock_init(&adev->mmio_idx_lock);
- spin_lock_init(&adev->smc_idx_lock);
- spin_lock_init(&adev->pcie_idx_lock);
- spin_lock_init(&adev->uvd_ctx_idx_lock);
- spin_lock_init(&adev->didt_idx_lock);
- spin_lock_init(&adev->gc_cac_idx_lock);
- spin_lock_init(&adev->se_cac_idx_lock);
- spin_lock_init(&adev->audio_endpt_idx_lock);
spin_lock_init(&adev->mm_stats.lock);
spin_lock_init(&adev->virt.rlcg_reg_lock);
spin_lock_init(&adev->wb.lock);
@@ -4525,6 +3790,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
INIT_WORK(&adev->userq_reset_work, amdgpu_userq_reset_work);
+ amdgpu_coredump_init(adev);
+
adev->gfx.gfx_off_req_count = 1;
adev->gfx.gfx_off_residency = 0;
adev->gfx.gfx_off_entrycount = 0;
@@ -4622,9 +3889,21 @@ int amdgpu_device_init(struct amdgpu_device *adev,
amdgpu_gmc_noretry_set(adev);
/* Need to get xgmi info early to decide the reset behavior*/
if (adev->gmc.xgmi.supported) {
- r = adev->gfxhub.funcs->get_xgmi_info(adev);
- if (r)
- return r;
+ if (adev->gfxhub.funcs &&
+ adev->gfxhub.funcs->get_xgmi_info) {
+ r = adev->gfxhub.funcs->get_xgmi_info(adev);
+ if (r)
+ return r;
+ }
+ }
+
+ if (adev->gmc.xgmi.connected_to_cpu) {
+ if (adev->mmhub.funcs &&
+ adev->mmhub.funcs->get_xgmi_info) {
+ r = adev->mmhub.funcs->get_xgmi_info(adev);
+ if (r)
+ return r;
+ }
}
/* enable PCIE atomic ops */
@@ -4946,6 +4225,7 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev)
if (pci_dev_is_disconnected(adev->pdev))
amdgpu_amdkfd_device_fini_sw(adev);
+ amdgpu_coredump_fini(adev);
amdgpu_device_ip_fini_early(adev);
amdgpu_irq_fini_hw(adev);
@@ -5216,8 +4496,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool notify_clients)
return 0;
unwind_evict:
- if (adev->mman.buffer_funcs_ring->sched.ready)
- amdgpu_ttm_set_buffer_funcs_status(adev, true);
+ amdgpu_ttm_set_buffer_funcs_status(adev, true);
amdgpu_fence_driver_hw_init(adev);
unwind_userq:
@@ -5951,8 +5230,7 @@ int amdgpu_device_reinit_after_reset(struct amdgpu_reset_context *reset_context)
if (r)
goto out;
- if (tmp_adev->mman.buffer_funcs_ring->sched.ready)
- amdgpu_ttm_set_buffer_funcs_status(tmp_adev, true);
+ amdgpu_ttm_set_buffer_funcs_status(tmp_adev, true);
r = amdgpu_device_ip_resume_phase3(tmp_adev);
if (r)
@@ -6330,7 +5608,7 @@ static void amdgpu_device_halt_activities(struct amdgpu_device *adev,
if (!amdgpu_ring_sched_ready(ring))
continue;
- drm_sched_stop(&ring->sched, job ? &job->base : NULL);
+ drm_sched_wqueue_stop(&ring->sched);
if (need_emergency_restart)
amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
@@ -6414,7 +5692,7 @@ static int amdgpu_device_sched_resume(struct list_head *device_list,
if (!amdgpu_ring_sched_ready(ring))
continue;
- drm_sched_start(&ring->sched, 0);
+ drm_sched_wqueue_start(&ring->sched);
}
if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled)
@@ -7397,39 +6675,6 @@ void amdgpu_device_halt(struct amdgpu_device *adev)
pci_wait_for_pending_transaction(pdev);
}
-u32 amdgpu_device_pcie_port_rreg(struct amdgpu_device *adev,
- u32 reg)
-{
- unsigned long flags, address, data;
- u32 r;
-
- address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
- data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
-
- spin_lock_irqsave(&adev->pcie_idx_lock, flags);
- WREG32(address, reg * 4);
- (void)RREG32(address);
- r = RREG32(data);
- spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
- return r;
-}
-
-void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev,
- u32 reg, u32 v)
-{
- unsigned long flags, address, data;
-
- address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
- data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
-
- spin_lock_irqsave(&adev->pcie_idx_lock, flags);
- WREG32(address, reg * 4);
- (void)RREG32(address);
- WREG32(data, v);
- (void)RREG32(data);
- spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
-}
-
/**
* amdgpu_device_get_gang - return a reference to the current gang
* @adev: amdgpu_device pointer
@@ -7612,36 +6857,6 @@ bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev)
}
}
-uint32_t amdgpu_device_wait_on_rreg(struct amdgpu_device *adev,
- uint32_t inst, uint32_t reg_addr, char reg_name[],
- uint32_t expected_value, uint32_t mask)
-{
- uint32_t ret = 0;
- uint32_t old_ = 0;
- uint32_t tmp_ = RREG32(reg_addr);
- uint32_t loop = adev->usec_timeout;
-
- while ((tmp_ & (mask)) != (expected_value)) {
- if (old_ != tmp_) {
- loop = adev->usec_timeout;
- old_ = tmp_;
- } else
- udelay(1);
- tmp_ = RREG32(reg_addr);
- loop--;
- if (!loop) {
- dev_warn(
- adev->dev,
- "Register(%d) [%s] failed to reach value 0x%08x != 0x%08xn",
- inst, reg_name, (uint32_t)expected_value,
- (uint32_t)(tmp_ & (mask)));
- ret = -ETIMEDOUT;
- break;
- }
- }
- return ret;
-}
-
ssize_t amdgpu_get_soft_full_reset_mask(struct amdgpu_ring *ring)
{
ssize_t size = 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index af3d2fd61cf3..8ec5465c3349 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -87,6 +87,7 @@
#include "sdma_v7_1.h"
#include "lsdma_v6_0.h"
#include "lsdma_v7_0.h"
+#include "lsdma_v7_1.h"
#include "vcn_v2_0.h"
#include "jpeg_v2_0.h"
#include "vcn_v3_0.h"
@@ -111,8 +112,10 @@
#include "smuio_v15_0_8.h"
#include "vcn_v5_0_0.h"
#include "vcn_v5_0_1.h"
+#include "vcn_v5_0_2.h"
#include "jpeg_v5_0_0.h"
#include "jpeg_v5_0_1.h"
+#include "jpeg_v5_0_2.h"
#include "jpeg_v5_3_0.h"
#include "amdgpu_ras_mgr.h"
@@ -132,6 +135,7 @@ MODULE_FIRMWARE("amdgpu/picasso_ip_discovery.bin");
MODULE_FIRMWARE("amdgpu/arcturus_ip_discovery.bin");
MODULE_FIRMWARE("amdgpu/aldebaran_ip_discovery.bin");
+/* Note: These registers are consistent across all the SOCs */
#define mmIP_DISCOVERY_VERSION 0x16A00
#define mmRCC_CONFIG_MEMSIZE 0xde3
#define mmMP0_SMN_C2PMSG_33 0x16061
@@ -139,6 +143,10 @@ MODULE_FIRMWARE("amdgpu/aldebaran_ip_discovery.bin");
#define mmMM_INDEX_HI 0x6
#define mmMM_DATA 0x1
+#define mmDRIVER_SCRATCH_0 0x94
+#define mmDRIVER_SCRATCH_1 0x95
+#define mmDRIVER_SCRATCH_2 0x96
+
static const char *hw_id_names[HW_ID_MAX] = {
[MP1_HWID] = "MP1",
[MP2_HWID] = "MP2",
@@ -253,39 +261,12 @@ static int hw_id_map[MAX_HWIP] = {
[ATU_HWIP] = ATU_HWID,
};
-static int amdgpu_discovery_read_binary_from_sysmem(struct amdgpu_device *adev, uint8_t *binary)
-{
- u64 tmr_offset, tmr_size, pos;
- void *discv_regn;
- int ret;
-
- ret = amdgpu_acpi_get_tmr_info(adev, &tmr_offset, &tmr_size);
- if (ret)
- return ret;
-
- pos = tmr_offset + tmr_size - DISCOVERY_TMR_OFFSET;
-
- /* This region is read-only and reserved from system use */
- discv_regn = memremap(pos, adev->discovery.size, MEMREMAP_WC);
- if (discv_regn) {
- memcpy(binary, discv_regn, adev->discovery.size);
- memunmap(discv_regn);
- return 0;
- }
-
- return -ENOENT;
-}
-
-#define IP_DISCOVERY_V2 2
-#define IP_DISCOVERY_V4 4
-
-static int amdgpu_discovery_read_binary_from_mem(struct amdgpu_device *adev,
- uint8_t *binary)
+static int amdgpu_discovery_get_tmr_info(struct amdgpu_device *adev,
+ bool *is_tmr_in_sysmem)
{
- bool sz_valid = true;
- uint64_t vram_size;
- int i, ret = 0;
- u32 msg;
+ u64 vram_size, tmr_offset, tmr_size;
+ u32 msg, tmr_offset_lo, tmr_offset_hi;
+ int i, ret;
if (!amdgpu_sriov_vf(adev)) {
/* It can take up to two second for IFWI init to complete on some dGPUs,
@@ -305,51 +286,102 @@ static int amdgpu_discovery_read_binary_from_mem(struct amdgpu_device *adev,
}
vram_size = RREG32(mmRCC_CONFIG_MEMSIZE);
- if (!vram_size || vram_size == U32_MAX)
- sz_valid = false;
+ if (vram_size == U32_MAX)
+ return -ENXIO;
+ else if (!vram_size)
+ *is_tmr_in_sysmem = true;
else
- vram_size <<= 20;
+ *is_tmr_in_sysmem = false;
- /*
- * If in VRAM, discovery TMR is marked for reservation. If it is in system mem,
- * then it is not required to be reserved.
- */
- if (sz_valid) {
- if (amdgpu_sriov_vf(adev) && adev->virt.is_dynamic_crit_regn_enabled) {
- /* For SRIOV VFs with dynamic critical region enabled,
- * we will get the IPD binary via below call.
- * If dynamic critical is disabled, fall through to normal seq.
- */
- if (amdgpu_virt_get_dynamic_data_info(adev,
- AMD_SRIOV_MSG_IPD_TABLE_ID, binary,
- &adev->discovery.size)) {
- dev_err(adev->dev,
- "failed to read discovery info from dynamic critical region.");
- ret = -EINVAL;
- goto exit;
- }
+ /* init the default tmr size and offset */
+ adev->discovery.size = DISCOVERY_TMR_SIZE;
+ if (vram_size)
+ adev->discovery.offset = (vram_size << 20) - DISCOVERY_TMR_OFFSET;
+
+ if (amdgpu_sriov_vf(adev)) {
+ if (adev->virt.is_dynamic_crit_regn_enabled) {
+ adev->discovery.offset =
+ adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_IPD_TABLE_ID].offset;
+ adev->discovery.size =
+ adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_IPD_TABLE_ID].size_kb << 10;
+ if (!adev->discovery.offset || !adev->discovery.size)
+ return -EINVAL;
} else {
- uint64_t pos = vram_size - DISCOVERY_TMR_OFFSET;
+ goto out;
+ }
+ } else {
+ tmr_size = RREG32(mmDRIVER_SCRATCH_2);
+ if (tmr_size) {
+ /* It's preferred to transition to PSP mailbox reg interface
+ * for both bare-metal and passthrough if available */
+ adev->discovery.size = (u32)tmr_size;
+ tmr_offset_lo = RREG32(mmDRIVER_SCRATCH_0);
+ tmr_offset_hi = RREG32(mmDRIVER_SCRATCH_1);
+ adev->discovery.offset = ((u64)le32_to_cpu(tmr_offset_hi) << 32 |
+ le32_to_cpu(tmr_offset_lo));
+ } else if (!vram_size) {
+ /* fall back to apci approach to query tmr offset if vram_size is 0 */
+ ret = amdgpu_acpi_get_tmr_info(adev, &tmr_offset, &tmr_size);
+ if (ret)
+ return ret;
+ adev->discovery.size = DISCOVERY_TMR_SIZE;
+ adev->discovery.offset = tmr_offset + tmr_size - DISCOVERY_TMR_OFFSET;
+ }
+ }
+out:
+ adev->discovery.bin = kzalloc(adev->discovery.size, GFP_KERNEL);
+ if (!adev->discovery.bin)
+ return -ENOMEM;
+ adev->discovery.debugfs_blob.data = adev->discovery.bin;
+ adev->discovery.debugfs_blob.size = adev->discovery.size;
- amdgpu_device_vram_access(adev, pos, (uint32_t *)binary,
- adev->discovery.size, false);
+ return 0;
+}
+
+static int amdgpu_discovery_read_binary_from_sysmem(struct amdgpu_device *adev, uint8_t *binary)
+{
+ void *discv_regn;
+
+ /* This region is read-only and reserved from system use */
+ discv_regn = memremap(adev->discovery.offset, adev->discovery.size, MEMREMAP_WC);
+ if (discv_regn) {
+ memcpy(binary, discv_regn, adev->discovery.size);
+ memunmap(discv_regn);
+ return 0;
+ }
+
+ return -ENOENT;
+}
+
+#define IP_DISCOVERY_V2 2
+#define IP_DISCOVERY_V4 4
+
+static int amdgpu_discovery_read_binary_from_mem(struct amdgpu_device *adev,
+ uint8_t *binary,
+ bool is_tmr_in_sysmem)
+{
+ int ret = 0;
+
+ if (!is_tmr_in_sysmem) {
+ if (amdgpu_sriov_vf(adev) &&
+ amdgpu_sriov_xgmi_connected_to_cpu(adev)) {
+ ret = amdgpu_discovery_read_binary_from_sysmem(adev, binary);
+ } else {
+ amdgpu_device_vram_access(adev, adev->discovery.offset,
+ (uint32_t *)binary,
+ adev->discovery.size, false);
adev->discovery.reserve_tmr = true;
}
} else {
ret = amdgpu_discovery_read_binary_from_sysmem(adev, binary);
}
- if (ret)
- dev_err(adev->dev,
- "failed to read discovery info from memory, vram size read: %llx",
- vram_size);
-exit:
return ret;
}
static int amdgpu_discovery_read_binary_from_file(struct amdgpu_device *adev,
- uint8_t *binary,
- const char *fw_name)
+ uint8_t *binary,
+ const char *fw_name)
{
const struct firmware *fw;
int r;
@@ -431,14 +463,12 @@ static void amdgpu_discovery_harvest_config_quirk(struct amdgpu_device *adev)
}
static int amdgpu_discovery_verify_npsinfo(struct amdgpu_device *adev,
- struct binary_header *bhdr)
+ struct table_info *info)
{
uint8_t *discovery_bin = adev->discovery.bin;
- struct table_info *info;
uint16_t checksum;
uint16_t offset;
- info = &bhdr->table_list[NPS_INFO];
offset = le16_to_cpu(info->offset);
checksum = le16_to_cpu(info->checksum);
@@ -491,23 +521,130 @@ static const char *amdgpu_discovery_get_fw_name(struct amdgpu_device *adev)
}
}
-static int amdgpu_discovery_init(struct amdgpu_device *adev)
+static int amdgpu_discovery_get_table_info(struct amdgpu_device *adev,
+ struct table_info **info,
+ uint16_t table_id)
{
+ struct binary_header *bhdr =
+ (struct binary_header *)adev->discovery.bin;
+ struct binary_header_v2 *bhdrv2;
+
+ switch (bhdr->version_major) {
+ case 2:
+ bhdrv2 = (struct binary_header_v2 *)adev->discovery.bin;
+ *info = &bhdrv2->table_list[table_id];
+ break;
+ case 1:
+ *info = &bhdr->table_list[table_id];
+ break;
+ default:
+ dev_err(adev->dev, "Invalid ip discovery table version\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int amdgpu_discovery_table_check(struct amdgpu_device *adev,
+ uint8_t *discovery_bin,
+ uint16_t table_id)
+{
+ int r, act_val, exp_val, table_size;
+ uint16_t offset, checksum;
struct table_info *info;
+ bool check_table = true;
+ char *table_name;
+
+ r = amdgpu_discovery_get_table_info(adev, &info, table_id);
+ if (r)
+ return r;
+ offset = le16_to_cpu(info->offset);
+ checksum = le16_to_cpu(info->checksum);
+
+ switch (table_id) {
+ case IP_DISCOVERY: {
+ struct ip_discovery_header *ihdr =
+ (struct ip_discovery_header *)(discovery_bin + offset);
+ act_val = le32_to_cpu(ihdr->signature);
+ exp_val = DISCOVERY_TABLE_SIGNATURE;
+ table_size = le16_to_cpu(ihdr->size);
+ table_name = "data table";
+ break;
+ }
+ case GC: {
+ struct gpu_info_header *ghdr =
+ (struct gpu_info_header *)(discovery_bin + offset);
+ act_val = le32_to_cpu(ghdr->table_id);
+ exp_val = GC_TABLE_ID;
+ table_size = le16_to_cpu(ghdr->size);
+ table_name = "gc table";
+ break;
+ }
+ case HARVEST_INFO: {
+ struct harvest_info_header *hhdr =
+ (struct harvest_info_header *)(discovery_bin + offset);
+ act_val = le32_to_cpu(hhdr->signature);
+ exp_val = HARVEST_TABLE_SIGNATURE;
+ table_size = sizeof(struct harvest_table);
+ table_name = "harvest table";
+ break;
+ }
+ case VCN_INFO: {
+ struct vcn_info_header *vhdr =
+ (struct vcn_info_header *)(discovery_bin + offset);
+ act_val = le32_to_cpu(vhdr->table_id);
+ exp_val = VCN_INFO_TABLE_ID;
+ table_size = le32_to_cpu(vhdr->size_bytes);
+ table_name = "vcn table";
+ break;
+ }
+ case MALL_INFO: {
+ struct mall_info_header *mhdr =
+ (struct mall_info_header *)(discovery_bin + offset);
+ act_val = le32_to_cpu(mhdr->table_id);
+ exp_val = MALL_INFO_TABLE_ID;
+ table_size = le32_to_cpu(mhdr->size_bytes);
+ table_name = "mall table";
+ check_table = false;
+ break;
+ }
+ default:
+ dev_err(adev->dev, "invalid ip discovery table id %d specified\n", table_id);
+ check_table = false;
+ break;
+ }
+
+ if (check_table && offset) {
+ if (act_val != exp_val) {
+ dev_err(adev->dev, "invalid ip discovery %s signature\n", table_name);
+ return -EINVAL;
+ }
+
+ if (!amdgpu_discovery_verify_checksum(adev, discovery_bin + offset,
+ table_size, checksum)) {
+ dev_err(adev->dev, "invalid ip discovery %s checksum\n", table_name);
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+static int amdgpu_discovery_init(struct amdgpu_device *adev)
+{
struct binary_header *bhdr;
uint8_t *discovery_bin;
const char *fw_name;
uint16_t offset;
uint16_t size;
uint16_t checksum;
+ uint16_t table_id;
+ bool is_tmr_in_sysmem;
int r;
- adev->discovery.bin = kzalloc(DISCOVERY_TMR_SIZE, GFP_KERNEL);
- if (!adev->discovery.bin)
- return -ENOMEM;
- adev->discovery.size = DISCOVERY_TMR_SIZE;
- adev->discovery.debugfs_blob.data = adev->discovery.bin;
- adev->discovery.debugfs_blob.size = adev->discovery.size;
+ r = amdgpu_discovery_get_tmr_info(adev, &is_tmr_in_sysmem);
+ if (r)
+ return r;
discovery_bin = adev->discovery.bin;
/* Read from file if it is the preferred option */
@@ -520,7 +657,8 @@ static int amdgpu_discovery_init(struct amdgpu_device *adev)
goto out;
} else {
drm_dbg(&adev->ddev, "use ip discovery information from memory");
- r = amdgpu_discovery_read_binary_from_mem(adev, discovery_bin);
+ r = amdgpu_discovery_read_binary_from_mem(adev, discovery_bin,
+ is_tmr_in_sysmem);
if (r)
goto out;
}
@@ -547,118 +685,10 @@ static int amdgpu_discovery_init(struct amdgpu_device *adev)
goto out;
}
- info = &bhdr->table_list[IP_DISCOVERY];
- offset = le16_to_cpu(info->offset);
- checksum = le16_to_cpu(info->checksum);
-
- if (offset) {
- struct ip_discovery_header *ihdr =
- (struct ip_discovery_header *)(discovery_bin + offset);
- if (le32_to_cpu(ihdr->signature) != DISCOVERY_TABLE_SIGNATURE) {
- dev_err(adev->dev, "invalid ip discovery data table signature\n");
- r = -EINVAL;
- goto out;
- }
-
- if (!amdgpu_discovery_verify_checksum(adev, discovery_bin + offset,
- le16_to_cpu(ihdr->size),
- checksum)) {
- dev_err(adev->dev, "invalid ip discovery data table checksum\n");
- r = -EINVAL;
- goto out;
- }
- }
-
- info = &bhdr->table_list[GC];
- offset = le16_to_cpu(info->offset);
- checksum = le16_to_cpu(info->checksum);
-
- if (offset) {
- struct gpu_info_header *ghdr =
- (struct gpu_info_header *)(discovery_bin + offset);
-
- if (le32_to_cpu(ghdr->table_id) != GC_TABLE_ID) {
- dev_err(adev->dev, "invalid ip discovery gc table id\n");
- r = -EINVAL;
- goto out;
- }
-
- if (!amdgpu_discovery_verify_checksum(adev, discovery_bin + offset,
- le32_to_cpu(ghdr->size),
- checksum)) {
- dev_err(adev->dev, "invalid gc data table checksum\n");
- r = -EINVAL;
- goto out;
- }
- }
-
- info = &bhdr->table_list[HARVEST_INFO];
- offset = le16_to_cpu(info->offset);
- checksum = le16_to_cpu(info->checksum);
-
- if (offset) {
- struct harvest_info_header *hhdr =
- (struct harvest_info_header *)(discovery_bin + offset);
-
- if (le32_to_cpu(hhdr->signature) != HARVEST_TABLE_SIGNATURE) {
- dev_err(adev->dev, "invalid ip discovery harvest table signature\n");
- r = -EINVAL;
- goto out;
- }
-
- if (!amdgpu_discovery_verify_checksum(adev,
- discovery_bin + offset,
- sizeof(struct harvest_table), checksum)) {
- dev_err(adev->dev, "invalid harvest data table checksum\n");
- r = -EINVAL;
- goto out;
- }
- }
-
- info = &bhdr->table_list[VCN_INFO];
- offset = le16_to_cpu(info->offset);
- checksum = le16_to_cpu(info->checksum);
-
- if (offset) {
- struct vcn_info_header *vhdr =
- (struct vcn_info_header *)(discovery_bin + offset);
-
- if (le32_to_cpu(vhdr->table_id) != VCN_INFO_TABLE_ID) {
- dev_err(adev->dev, "invalid ip discovery vcn table id\n");
- r = -EINVAL;
- goto out;
- }
-
- if (!amdgpu_discovery_verify_checksum(adev,
- discovery_bin + offset,
- le32_to_cpu(vhdr->size_bytes), checksum)) {
- dev_err(adev->dev, "invalid vcn data table checksum\n");
- r = -EINVAL;
- goto out;
- }
- }
-
- info = &bhdr->table_list[MALL_INFO];
- offset = le16_to_cpu(info->offset);
- checksum = le16_to_cpu(info->checksum);
-
- if (0 && offset) {
- struct mall_info_header *mhdr =
- (struct mall_info_header *)(discovery_bin + offset);
-
- if (le32_to_cpu(mhdr->table_id) != MALL_INFO_TABLE_ID) {
- dev_err(adev->dev, "invalid ip discovery mall table id\n");
- r = -EINVAL;
- goto out;
- }
-
- if (!amdgpu_discovery_verify_checksum(adev,
- discovery_bin + offset,
- le32_to_cpu(mhdr->size_bytes), checksum)) {
- dev_err(adev->dev, "invalid mall data table checksum\n");
- r = -EINVAL;
+ for (table_id = 0; table_id <= MALL_INFO; table_id++) {
+ r = amdgpu_discovery_table_check(adev, discovery_bin, table_id);
+ if (r)
goto out;
- }
}
return 0;
@@ -770,14 +800,15 @@ static void amdgpu_discovery_read_from_harvest_table(struct amdgpu_device *adev,
uint32_t *umc_harvest_count)
{
uint8_t *discovery_bin = adev->discovery.bin;
- struct binary_header *bhdr;
+ struct table_info *info;
struct harvest_table *harvest_info;
u16 offset;
int i;
- uint32_t umc_harvest_config = 0;
+ u64 umc_harvest_config = 0;
- bhdr = (struct binary_header *)discovery_bin;
- offset = le16_to_cpu(bhdr->table_list[HARVEST_INFO].offset);
+ if (amdgpu_discovery_get_table_info(adev, &info, HARVEST_INFO))
+ return;
+ offset = le16_to_cpu(info->offset);
if (!offset) {
dev_err(adev->dev, "invalid harvest table offset\n");
@@ -830,7 +861,7 @@ static void amdgpu_discovery_read_from_harvest_table(struct amdgpu_device *adev,
}
}
- adev->umc.active_mask = ((1 << adev->umc.node_inst_num) - 1) &
+ adev->umc.active_mask = ((1ULL << adev->umc.node_inst_num) - 1ULL) &
~umc_harvest_config;
}
@@ -1195,13 +1226,8 @@ static int amdgpu_discovery_sysfs_ips(struct amdgpu_device *adev,
ip_hw_instance->num_instance);
ip_hw_instance->num_base_addresses = ip->num_base_address;
- for (kk = 0; kk < ip_hw_instance->num_base_addresses; kk++) {
- if (reg_base_64)
- ip_hw_instance->base_addr[kk] =
- lower_32_bits(le64_to_cpu(ip->base_address_64[kk])) & 0x3FFFFFFF;
- else
- ip_hw_instance->base_addr[kk] = ip->base_address[kk];
- }
+ for (kk = 0; kk < ip_hw_instance->num_base_addresses; kk++)
+ ip_hw_instance->base_addr[kk] = ip->base_address[kk];
kobject_init(&ip_hw_instance->kobj, &ip_hw_instance_ktype);
ip_hw_instance->kobj.kset = &ip_hw_id->hw_id_kset;
@@ -1224,7 +1250,7 @@ static int amdgpu_discovery_sysfs_recurse(struct amdgpu_device *adev)
{
struct ip_discovery_top *ip_top = adev->discovery.ip_top;
uint8_t *discovery_bin = adev->discovery.bin;
- struct binary_header *bhdr;
+ struct table_info *info;
struct ip_discovery_header *ihdr;
struct die_header *dhdr;
struct kset *die_kset = &ip_top->die_kset;
@@ -1232,10 +1258,12 @@ static int amdgpu_discovery_sysfs_recurse(struct amdgpu_device *adev)
size_t ip_offset;
int ii, res;
- bhdr = (struct binary_header *)discovery_bin;
+ res = amdgpu_discovery_get_table_info(adev, &info, IP_DISCOVERY);
+ if (res)
+ return res;
ihdr = (struct ip_discovery_header
*)(discovery_bin +
- le16_to_cpu(bhdr->table_list[IP_DISCOVERY].offset));
+ le16_to_cpu(info->offset));
num_dies = le16_to_cpu(ihdr->num_dies);
DRM_DEBUG("number of dies: %d\n", num_dies);
@@ -1366,6 +1394,9 @@ static void amdgpu_discovery_sysfs_fini(struct amdgpu_device *adev)
struct list_head *el, *tmp;
struct kset *die_kset;
+ if (!ip_top)
+ return;
+
die_kset = &ip_top->die_kset;
spin_lock(&die_kset->list_lock);
list_for_each_prev_safe(el, tmp, &die_kset->list) {
@@ -1379,12 +1410,58 @@ static void amdgpu_discovery_sysfs_fini(struct amdgpu_device *adev)
kobject_put(&ip_top->kobj);
}
+/* devcoredump support */
+void amdgpu_discovery_dump(struct amdgpu_device *adev, struct drm_printer *p)
+{
+ struct ip_discovery_top *ip_top = adev->discovery.ip_top;
+ struct ip_die_entry *ip_die_entry;
+ struct list_head *el_die, *el_hw_id, *el_hw_inst;
+ struct ip_hw_id *hw_id;
+ struct kset *die_kset;
+ struct ip_hw_instance *ip_inst;
+ int i = 0, j;
+
+ if (!ip_top)
+ return;
+
+ die_kset = &ip_top->die_kset;
+
+ drm_printf(p, "\nHW IP Discovery\n");
+
+ spin_lock(&die_kset->list_lock);
+ list_for_each(el_die, &die_kset->list) {
+ drm_printf(p, "die %d\n", i++);
+ ip_die_entry = to_ip_die_entry(list_to_kobj(el_die));
+
+ list_for_each(el_hw_id, &ip_die_entry->ip_kset.list) {
+ hw_id = to_ip_hw_id(list_to_kobj(el_hw_id));
+ drm_printf(p, "hw_id %d %s\n", hw_id->hw_id, hw_id_names[hw_id->hw_id]);
+
+ list_for_each(el_hw_inst, &hw_id->hw_id_kset.list) {
+ ip_inst = to_ip_hw_instance(list_to_kobj(el_hw_inst));
+ drm_printf(p, "\tinstance %d\n", ip_inst->num_instance);
+ drm_printf(p, "\tmajor %d\n", ip_inst->major);
+ drm_printf(p, "\tminor %d\n", ip_inst->minor);
+ drm_printf(p, "\trevision %d\n", ip_inst->revision);
+ drm_printf(p, "\tharvest 0x%01X\n", ip_inst->harvest);
+ drm_printf(p, "\tnum_base_addresses %d\n",
+ ip_inst->num_base_addresses);
+ for (j = 0; j < ip_inst->num_base_addresses; j++)
+ drm_printf(p, "\tbase_addr[%d] 0x%08X\n",
+ j, ip_inst->base_addr[j]);
+ }
+ }
+ }
+ spin_unlock(&die_kset->list_lock);
+}
+
+
/* ================================================== */
static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev)
{
uint8_t num_base_address, subrev, variant;
- struct binary_header *bhdr;
+ struct table_info *info;
struct ip_discovery_header *ihdr;
struct die_header *dhdr;
uint8_t *discovery_bin;
@@ -1409,10 +1486,12 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev)
adev->sdma.sdma_mask = 0;
adev->vcn.inst_mask = 0;
adev->jpeg.inst_mask = 0;
- bhdr = (struct binary_header *)discovery_bin;
+ r = amdgpu_discovery_get_table_info(adev, &info, IP_DISCOVERY);
+ if (r)
+ return r;
ihdr = (struct ip_discovery_header
*)(discovery_bin +
- le16_to_cpu(bhdr->table_list[IP_DISCOVERY].offset));
+ le16_to_cpu(info->offset));
num_dies = le16_to_cpu(ihdr->num_dies);
DRM_DEBUG("number of dies: %d\n", num_dies);
@@ -1585,14 +1664,15 @@ static void amdgpu_discovery_harvest_ip(struct amdgpu_device *adev)
{
uint8_t *discovery_bin = adev->discovery.bin;
struct ip_discovery_header *ihdr;
- struct binary_header *bhdr;
+ struct table_info *info;
int vcn_harvest_count = 0;
int umc_harvest_count = 0;
- uint16_t offset, ihdr_ver;
+ uint16_t ihdr_ver;
- bhdr = (struct binary_header *)discovery_bin;
- offset = le16_to_cpu(bhdr->table_list[IP_DISCOVERY].offset);
- ihdr = (struct ip_discovery_header *)(discovery_bin + offset);
+ if (amdgpu_discovery_get_table_info(adev, &info, IP_DISCOVERY))
+ return;
+ ihdr = (struct ip_discovery_header *)(discovery_bin +
+ le16_to_cpu(info->offset));
ihdr_ver = le16_to_cpu(ihdr->version);
/*
* Harvest table does not fit Navi1x and legacy GPUs,
@@ -1640,7 +1720,7 @@ union gc_info {
static int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev)
{
uint8_t *discovery_bin = adev->discovery.bin;
- struct binary_header *bhdr;
+ struct table_info *info;
union gc_info *gc_info;
u16 offset;
@@ -1649,8 +1729,9 @@ static int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev)
return -EINVAL;
}
- bhdr = (struct binary_header *)discovery_bin;
- offset = le16_to_cpu(bhdr->table_list[GC].offset);
+ if (amdgpu_discovery_get_table_info(adev, &info, GC))
+ return -EINVAL;
+ offset = le16_to_cpu(info->offset);
if (!offset)
return 0;
@@ -1749,7 +1830,7 @@ union mall_info {
static int amdgpu_discovery_get_mall_info(struct amdgpu_device *adev)
{
uint8_t *discovery_bin = adev->discovery.bin;
- struct binary_header *bhdr;
+ struct table_info *info;
union mall_info *mall_info;
u32 u, mall_size_per_umc, m_s_present, half_use;
u64 mall_size;
@@ -1760,8 +1841,9 @@ static int amdgpu_discovery_get_mall_info(struct amdgpu_device *adev)
return -EINVAL;
}
- bhdr = (struct binary_header *)discovery_bin;
- offset = le16_to_cpu(bhdr->table_list[MALL_INFO].offset);
+ if (amdgpu_discovery_get_table_info(adev, &info, MALL_INFO))
+ return -EINVAL;
+ offset = le16_to_cpu(info->offset);
if (!offset)
return 0;
@@ -1806,7 +1888,7 @@ union vcn_info {
static int amdgpu_discovery_get_vcn_info(struct amdgpu_device *adev)
{
uint8_t *discovery_bin = adev->discovery.bin;
- struct binary_header *bhdr;
+ struct table_info *info;
union vcn_info *vcn_info;
u16 offset;
int v;
@@ -1826,8 +1908,9 @@ static int amdgpu_discovery_get_vcn_info(struct amdgpu_device *adev)
return -EINVAL;
}
- bhdr = (struct binary_header *)discovery_bin;
- offset = le16_to_cpu(bhdr->table_list[VCN_INFO].offset);
+ if (amdgpu_discovery_get_table_info(adev, &info, VCN_INFO))
+ return -EINVAL;
+ offset = le16_to_cpu(info->offset);
if (!offset)
return 0;
@@ -1864,14 +1947,26 @@ static int amdgpu_discovery_refresh_nps_info(struct amdgpu_device *adev,
uint64_t vram_size, pos, offset;
struct nps_info_header *nhdr;
struct binary_header bhdr;
+ struct binary_header_v2 bhdrv2;
uint16_t checksum;
vram_size = (uint64_t)RREG32(mmRCC_CONFIG_MEMSIZE) << 20;
pos = vram_size - DISCOVERY_TMR_OFFSET;
amdgpu_device_vram_access(adev, pos, &bhdr, sizeof(bhdr), false);
- offset = le16_to_cpu(bhdr.table_list[NPS_INFO].offset);
- checksum = le16_to_cpu(bhdr.table_list[NPS_INFO].checksum);
+ switch (bhdr.version_major) {
+ case 2:
+ amdgpu_device_vram_access(adev, pos, &bhdrv2, sizeof(bhdrv2), false);
+ offset = le16_to_cpu(bhdrv2.table_list[NPS_INFO].offset);
+ checksum = le16_to_cpu(bhdrv2.table_list[NPS_INFO].checksum);
+ break;
+ case 1:
+ offset = le16_to_cpu(bhdr.table_list[NPS_INFO].offset);
+ checksum = le16_to_cpu(bhdr.table_list[NPS_INFO].checksum);
+ break;
+ default:
+ return -EINVAL;
+ }
amdgpu_device_vram_access(adev, (pos + offset), nps_data,
sizeof(*nps_data), false);
@@ -1889,12 +1984,11 @@ static int amdgpu_discovery_refresh_nps_info(struct amdgpu_device *adev,
int amdgpu_discovery_get_nps_info(struct amdgpu_device *adev,
uint32_t *nps_type,
- struct amdgpu_gmc_memrange **ranges,
+ struct amdgpu_gmc_memrange *ranges,
int *range_cnt, bool refresh)
{
uint8_t *discovery_bin = adev->discovery.bin;
- struct amdgpu_gmc_memrange *mem_ranges;
- struct binary_header *bhdr;
+ struct table_info *info;
union nps_info *nps_info;
union nps_info nps_data;
u16 offset;
@@ -1915,14 +2009,15 @@ int amdgpu_discovery_get_nps_info(struct amdgpu_device *adev,
return -EINVAL;
}
- bhdr = (struct binary_header *)discovery_bin;
- offset = le16_to_cpu(bhdr->table_list[NPS_INFO].offset);
+ if (amdgpu_discovery_get_table_info(adev, &info, NPS_INFO))
+ return -EINVAL;
+ offset = le16_to_cpu(info->offset);
if (!offset)
return -ENOENT;
/* If verification fails, return as if NPS table doesn't exist */
- if (amdgpu_discovery_verify_npsinfo(adev, bhdr))
+ if (amdgpu_discovery_verify_npsinfo(adev, info))
return -ENOENT;
nps_info = (union nps_info *)(discovery_bin + offset);
@@ -1930,20 +2025,22 @@ int amdgpu_discovery_get_nps_info(struct amdgpu_device *adev,
switch (le16_to_cpu(nps_info->v1.header.version_major)) {
case 1:
- mem_ranges = kvzalloc_objs(*mem_ranges, nps_info->v1.count);
- if (!mem_ranges)
- return -ENOMEM;
*nps_type = nps_info->v1.nps_type;
+ if (*range_cnt < nps_info->v1.count) {
+ dev_dbg(adev->dev,
+ "not enough space for nps ranges: %d < %d\n",
+ *range_cnt, nps_info->v1.count);
+ return -ENOSPC;
+ }
*range_cnt = nps_info->v1.count;
for (i = 0; i < *range_cnt; i++) {
- mem_ranges[i].base_address =
+ ranges[i].base_address =
nps_info->v1.instance_info[i].base_address;
- mem_ranges[i].limit_address =
+ ranges[i].limit_address =
nps_info->v1.instance_info[i].limit_address;
- mem_ranges[i].nid_mask = -1;
- mem_ranges[i].flags = 0;
+ ranges[i].nid_mask = -1;
+ ranges[i].flags = 0;
}
- *ranges = mem_ranges;
break;
default:
dev_err(adev->dev, "Unhandled NPS info table %d.%d\n",
@@ -2245,6 +2342,7 @@ static int amdgpu_discovery_set_smu_ip_blocks(struct amdgpu_device *adev)
amdgpu_device_ip_block_add(adev, &smu_v14_0_ip_block);
break;
case IP_VERSION(15, 0, 0):
+ case IP_VERSION(15, 0, 8):
amdgpu_device_ip_block_add(adev, &smu_v15_0_ip_block);
break;
default:
@@ -2298,6 +2396,7 @@ static int amdgpu_discovery_set_display_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(3, 5, 1):
case IP_VERSION(3, 6, 0):
case IP_VERSION(4, 1, 0):
+ case IP_VERSION(4, 2, 0):
/* TODO: Fix IP version. DC code expects version 4.0.1 */
if (adev->ip_versions[DCE_HWIP][0] == IP_VERSION(4, 1, 0))
adev->ip_versions[DCE_HWIP][0] = IP_VERSION(4, 0, 1);
@@ -2561,6 +2660,10 @@ static int amdgpu_discovery_set_mm_ip_blocks(struct amdgpu_device *adev)
amdgpu_device_ip_block_add(adev, &vcn_v5_0_1_ip_block);
amdgpu_device_ip_block_add(adev, &jpeg_v5_0_1_ip_block);
break;
+ case IP_VERSION(5, 0, 2):
+ amdgpu_device_ip_block_add(adev, &vcn_v5_0_2_ip_block);
+ amdgpu_device_ip_block_add(adev, &jpeg_v5_0_2_ip_block);
+ break;
default:
dev_err(adev->dev,
"Failed to add vcn/jpeg ip block(UVD_HWIP:0x%x)\n",
@@ -3225,6 +3328,9 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(7, 0, 1):
adev->lsdma.funcs = &lsdma_v7_0_funcs;
break;
+ case IP_VERSION(7, 1, 0):
+ adev->lsdma.funcs = &lsdma_v7_1_funcs;
+ break;
default:
break;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h
index 4ce04486cc31..0ff1a7923eed 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h
@@ -30,10 +30,12 @@
#define DISCOVERY_TMR_OFFSET (64 << 10)
struct ip_discovery_top;
+struct drm_printer;
struct amdgpu_discovery_info {
struct debugfs_blob_wrapper debugfs_blob;
struct ip_discovery_top *ip_top;
+ uint64_t offset;
uint32_t size;
uint8_t *bin;
bool reserve_tmr;
@@ -44,7 +46,9 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev);
int amdgpu_discovery_get_nps_info(struct amdgpu_device *adev,
uint32_t *nps_type,
- struct amdgpu_gmc_memrange **ranges,
+ struct amdgpu_gmc_memrange *ranges,
int *range_cnt, bool refresh);
+void amdgpu_discovery_dump(struct amdgpu_device *adev, struct drm_printer *p);
+
#endif /* __AMDGPU_DISCOVERY__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index bef9dce2e7ea..f5cd68542442 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -1738,21 +1738,6 @@ bool amdgpu_crtc_get_scanout_position(struct drm_crtc *crtc,
stime, etime, mode);
}
-static bool
-amdgpu_display_robj_is_fb(struct amdgpu_device *adev, struct amdgpu_bo *robj)
-{
- struct drm_device *dev = adev_to_drm(adev);
- struct drm_fb_helper *fb_helper = dev->fb_helper;
-
- if (!fb_helper || !fb_helper->buffer)
- return false;
-
- if (gem_to_amdgpu_bo(fb_helper->buffer->gem) != robj)
- return false;
-
- return true;
-}
-
int amdgpu_display_suspend_helper(struct amdgpu_device *adev)
{
struct drm_device *dev = adev_to_drm(adev);
@@ -1775,7 +1760,6 @@ int amdgpu_display_suspend_helper(struct amdgpu_device *adev)
list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
struct drm_framebuffer *fb = crtc->primary->fb;
- struct amdgpu_bo *robj;
if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
@@ -1790,8 +1774,9 @@ int amdgpu_display_suspend_helper(struct amdgpu_device *adev)
if (!fb || !fb->obj[0])
continue;
- robj = gem_to_amdgpu_bo(fb->obj[0]);
- if (!amdgpu_display_robj_is_fb(adev, robj)) {
+ if (!drm_fb_helper_gem_is_fb(dev->fb_helper, fb->obj[0])) {
+ struct amdgpu_bo *robj = gem_to_amdgpu_bo(fb->obj[0]);
+
r = amdgpu_bo_reserve(robj, true);
if (r == 0) {
amdgpu_bo_unpin(robj);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
index 656c267dbe58..b33c300e26e2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
@@ -134,13 +134,9 @@ static int amdgpu_dma_buf_pin(struct dma_buf_attachment *attach)
* notifiers are disabled, only allow pinning in VRAM when move
* notiers are enabled.
*/
- if (!IS_ENABLED(CONFIG_DMABUF_MOVE_NOTIFY)) {
- domains &= ~AMDGPU_GEM_DOMAIN_VRAM;
- } else {
- list_for_each_entry(attach, &dmabuf->attachments, node)
- if (!attach->peer2peer)
- domains &= ~AMDGPU_GEM_DOMAIN_VRAM;
- }
+ list_for_each_entry(attach, &dmabuf->attachments, node)
+ if (!attach->peer2peer)
+ domains &= ~AMDGPU_GEM_DOMAIN_VRAM;
if (domains & AMDGPU_GEM_DOMAIN_VRAM)
bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
@@ -456,7 +452,7 @@ error:
}
/**
- * amdgpu_dma_buf_move_notify - &attach.move_notify implementation
+ * amdgpu_dma_buf_move_notify - &attach.invalidate_mappings implementation
*
* @attach: the DMA-buf attachment
*
@@ -534,7 +530,7 @@ amdgpu_dma_buf_move_notify(struct dma_buf_attachment *attach)
static const struct dma_buf_attach_ops amdgpu_dma_buf_attach_ops = {
.allow_peer2peer = true,
- .move_notify = amdgpu_dma_buf_move_notify
+ .invalidate_mappings = amdgpu_dma_buf_move_notify
};
/**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index c91638e65174..8ed637f92322 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -223,9 +223,7 @@ uint amdgpu_dc_visual_confirm;
int amdgpu_async_gfx_ring = 1;
int amdgpu_mcbp = -1;
int amdgpu_discovery = -1;
-int amdgpu_mes;
int amdgpu_mes_log_enable = 0;
-int amdgpu_mes_kiq;
int amdgpu_uni_mes = 1;
int amdgpu_noretry = -1;
int amdgpu_force_asic_type = -1;
@@ -691,15 +689,6 @@ MODULE_PARM_DESC(discovery,
module_param_named(discovery, amdgpu_discovery, int, 0444);
/**
- * DOC: mes (int)
- * Enable Micro Engine Scheduler. This is a new hw scheduling engine for gfx, sdma, and compute.
- * (0 = disabled (default), 1 = enabled)
- */
-MODULE_PARM_DESC(mes,
- "Enable Micro Engine Scheduler (0 = disabled (default), 1 = enabled)");
-module_param_named(mes, amdgpu_mes, int, 0444);
-
-/**
* DOC: mes_log_enable (int)
* Enable Micro Engine Scheduler log. This is used to enable/disable MES internal log.
* (0 = disabled (default), 1 = enabled)
@@ -709,15 +698,6 @@ MODULE_PARM_DESC(mes_log_enable,
module_param_named(mes_log_enable, amdgpu_mes_log_enable, int, 0444);
/**
- * DOC: mes_kiq (int)
- * Enable Micro Engine Scheduler KIQ. This is a new engine pipe for kiq.
- * (0 = disabled (default), 1 = enabled)
- */
-MODULE_PARM_DESC(mes_kiq,
- "Enable Micro Engine Scheduler KIQ (0 = disabled (default), 1 = enabled)");
-module_param_named(mes_kiq, amdgpu_mes_kiq, int, 0444);
-
-/**
* DOC: uni_mes (int)
* Enable Unified Micro Engine Scheduler. This is a new engine pipe for unified scheduler.
* (0 = disabled (default), 1 = enabled)
@@ -859,8 +839,8 @@ module_param_named_unsafe(no_queue_eviction_on_vm_fault, amdgpu_no_queue_evictio
/**
* DOC: mtype_local (int)
*/
-int amdgpu_mtype_local;
-MODULE_PARM_DESC(mtype_local, "MTYPE for local memory (0 = MTYPE_RW (default), 1 = MTYPE_NC, 2 = MTYPE_CC)");
+int amdgpu_mtype_local = -1;
+MODULE_PARM_DESC(mtype_local, "MTYPE for local memory (default: ASIC dependent, 0 = MTYPE_RW, 1 = MTYPE_NC, 2 = MTYPE_CC)");
module_param_named_unsafe(mtype_local, amdgpu_mtype_local, int, 0444);
/**
@@ -2976,9 +2956,11 @@ static int amdgpu_drm_release(struct inode *inode, struct file *filp)
int idx;
if (fpriv && drm_dev_enter(dev, &idx)) {
- fpriv->evf_mgr.fd_closing = true;
- amdgpu_eviction_fence_destroy(&fpriv->evf_mgr);
+ amdgpu_evf_mgr_shutdown(&fpriv->evf_mgr);
+ amdgpu_userq_mgr_cancel_resume(&fpriv->userq_mgr);
+ amdgpu_evf_mgr_flush_suspend(&fpriv->evf_mgr);
amdgpu_userq_mgr_fini(&fpriv->userq_mgr);
+ amdgpu_evf_mgr_fini(&fpriv->evf_mgr);
drm_dev_exit(idx);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c
index 3b588c7740ec..4c5e38dea4c2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c
@@ -25,9 +25,6 @@
#include <drm/drm_exec.h>
#include "amdgpu.h"
-#define work_to_evf_mgr(w, name) container_of(w, struct amdgpu_eviction_fence_mgr, name)
-#define evf_mgr_to_fpriv(e) container_of(e, struct amdgpu_fpriv, evf_mgr)
-
static const char *
amdgpu_eviction_fence_get_driver_name(struct dma_fence *fence)
{
@@ -43,127 +40,93 @@ amdgpu_eviction_fence_get_timeline_name(struct dma_fence *f)
return ef->timeline_name;
}
-int
-amdgpu_eviction_fence_replace_fence(struct amdgpu_eviction_fence_mgr *evf_mgr,
- struct drm_exec *exec)
+static bool amdgpu_eviction_fence_enable_signaling(struct dma_fence *f)
{
- struct amdgpu_eviction_fence *old_ef, *new_ef;
- struct drm_gem_object *obj;
- unsigned long index;
- int ret;
-
- if (evf_mgr->ev_fence &&
- !dma_fence_is_signaled(&evf_mgr->ev_fence->base))
- return 0;
- /*
- * Steps to replace eviction fence:
- * * lock all objects in exec (caller)
- * * create a new eviction fence
- * * update new eviction fence in evf_mgr
- * * attach the new eviction fence to BOs
- * * release the old fence
- * * unlock the objects (caller)
- */
- new_ef = amdgpu_eviction_fence_create(evf_mgr);
- if (!new_ef) {
- DRM_ERROR("Failed to create new eviction fence\n");
- return -ENOMEM;
- }
-
- /* Update the eviction fence now */
- spin_lock(&evf_mgr->ev_fence_lock);
- old_ef = evf_mgr->ev_fence;
- evf_mgr->ev_fence = new_ef;
- spin_unlock(&evf_mgr->ev_fence_lock);
+ struct amdgpu_eviction_fence *ev_fence = to_ev_fence(f);
- /* Attach the new fence */
- drm_exec_for_each_locked_object(exec, index, obj) {
- struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
-
- if (!bo)
- continue;
- ret = amdgpu_eviction_fence_attach(evf_mgr, bo);
- if (ret) {
- DRM_ERROR("Failed to attch new eviction fence\n");
- goto free_err;
- }
- }
-
- /* Free old fence */
- if (old_ef)
- dma_fence_put(&old_ef->base);
- return 0;
-
-free_err:
- kfree(new_ef);
- return ret;
+ schedule_work(&ev_fence->evf_mgr->suspend_work);
+ return true;
}
+static const struct dma_fence_ops amdgpu_eviction_fence_ops = {
+ .get_driver_name = amdgpu_eviction_fence_get_driver_name,
+ .get_timeline_name = amdgpu_eviction_fence_get_timeline_name,
+ .enable_signaling = amdgpu_eviction_fence_enable_signaling,
+};
+
static void
amdgpu_eviction_fence_suspend_worker(struct work_struct *work)
{
- struct amdgpu_eviction_fence_mgr *evf_mgr = work_to_evf_mgr(work, suspend_work.work);
- struct amdgpu_fpriv *fpriv = evf_mgr_to_fpriv(evf_mgr);
+ struct amdgpu_eviction_fence_mgr *evf_mgr =
+ container_of(work, struct amdgpu_eviction_fence_mgr,
+ suspend_work);
+ struct amdgpu_fpriv *fpriv =
+ container_of(evf_mgr, struct amdgpu_fpriv, evf_mgr);
struct amdgpu_userq_mgr *uq_mgr = &fpriv->userq_mgr;
- struct amdgpu_eviction_fence *ev_fence;
+ struct dma_fence *ev_fence;
+ bool cookie;
mutex_lock(&uq_mgr->userq_mutex);
- spin_lock(&evf_mgr->ev_fence_lock);
- ev_fence = evf_mgr->ev_fence;
- if (ev_fence)
- dma_fence_get(&ev_fence->base);
- else
- goto unlock;
- spin_unlock(&evf_mgr->ev_fence_lock);
- amdgpu_userq_evict(uq_mgr, ev_fence);
+ /*
+ * This is intentionally after taking the userq_mutex since we do
+ * allocate memory while holding this lock, but only after ensuring that
+ * the eviction fence is signaled.
+ */
+ cookie = dma_fence_begin_signalling();
- mutex_unlock(&uq_mgr->userq_mutex);
- dma_fence_put(&ev_fence->base);
- return;
+ ev_fence = amdgpu_evf_mgr_get_fence(evf_mgr);
+ amdgpu_userq_evict(uq_mgr);
+
+ /*
+ * Signaling the eviction fence must be done while holding the
+ * userq_mutex. Otherwise we won't resume the queues before issuing the
+ * next fence.
+ */
+ dma_fence_signal(ev_fence);
+ dma_fence_end_signalling(cookie);
+ dma_fence_put(ev_fence);
+
+ if (!evf_mgr->shutdown)
+ schedule_delayed_work(&uq_mgr->resume_work, 0);
-unlock:
- spin_unlock(&evf_mgr->ev_fence_lock);
mutex_unlock(&uq_mgr->userq_mutex);
}
-static bool amdgpu_eviction_fence_enable_signaling(struct dma_fence *f)
+int amdgpu_evf_mgr_attach_fence(struct amdgpu_eviction_fence_mgr *evf_mgr,
+ struct amdgpu_bo *bo)
{
- struct amdgpu_eviction_fence_mgr *evf_mgr;
- struct amdgpu_eviction_fence *ev_fence;
-
- if (!f)
- return true;
-
- ev_fence = to_ev_fence(f);
- evf_mgr = ev_fence->evf_mgr;
+ struct dma_fence *ev_fence = amdgpu_evf_mgr_get_fence(evf_mgr);
+ struct ttm_operation_ctx ctx = { false, false };
+ struct dma_resv *resv = bo->tbo.base.resv;
+ int ret;
- schedule_delayed_work(&evf_mgr->suspend_work, 0);
- return true;
-}
+ if (!dma_fence_is_signaled(ev_fence)) {
-static const struct dma_fence_ops amdgpu_eviction_fence_ops = {
- .get_driver_name = amdgpu_eviction_fence_get_driver_name,
- .get_timeline_name = amdgpu_eviction_fence_get_timeline_name,
- .enable_signaling = amdgpu_eviction_fence_enable_signaling,
-};
+ amdgpu_bo_placement_from_domain(bo, bo->allowed_domains);
+ ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+ if (!ret)
+ dma_resv_add_fence(resv, ev_fence,
+ DMA_RESV_USAGE_BOOKKEEP);
+ } else {
+ ret = 0;
+ }
-void amdgpu_eviction_fence_signal(struct amdgpu_eviction_fence_mgr *evf_mgr,
- struct amdgpu_eviction_fence *ev_fence)
-{
- spin_lock(&evf_mgr->ev_fence_lock);
- dma_fence_signal(&ev_fence->base);
- spin_unlock(&evf_mgr->ev_fence_lock);
+ dma_fence_put(ev_fence);
+ return ret;
}
-struct amdgpu_eviction_fence *
-amdgpu_eviction_fence_create(struct amdgpu_eviction_fence_mgr *evf_mgr)
+int amdgpu_evf_mgr_rearm(struct amdgpu_eviction_fence_mgr *evf_mgr,
+ struct drm_exec *exec)
{
struct amdgpu_eviction_fence *ev_fence;
+ struct drm_gem_object *obj;
+ unsigned long index;
+ /* Create and initialize a new eviction fence */
ev_fence = kzalloc_obj(*ev_fence);
if (!ev_fence)
- return NULL;
+ return -ENOMEM;
ev_fence->evf_mgr = evf_mgr;
get_task_comm(ev_fence->timeline_name, current);
@@ -171,56 +134,22 @@ amdgpu_eviction_fence_create(struct amdgpu_eviction_fence_mgr *evf_mgr)
dma_fence_init64(&ev_fence->base, &amdgpu_eviction_fence_ops,
&ev_fence->lock, evf_mgr->ev_fence_ctx,
atomic_inc_return(&evf_mgr->ev_fence_seq));
- return ev_fence;
-}
-
-void amdgpu_eviction_fence_destroy(struct amdgpu_eviction_fence_mgr *evf_mgr)
-{
- struct amdgpu_eviction_fence *ev_fence;
-
- /* Wait for any pending work to execute */
- flush_delayed_work(&evf_mgr->suspend_work);
-
- spin_lock(&evf_mgr->ev_fence_lock);
- ev_fence = evf_mgr->ev_fence;
- spin_unlock(&evf_mgr->ev_fence_lock);
-
- if (!ev_fence)
- return;
-
- dma_fence_wait(&ev_fence->base, false);
-
- /* Last unref of ev_fence */
- dma_fence_put(&ev_fence->base);
-}
-int amdgpu_eviction_fence_attach(struct amdgpu_eviction_fence_mgr *evf_mgr,
- struct amdgpu_bo *bo)
-{
- struct amdgpu_eviction_fence *ev_fence;
- struct dma_resv *resv = bo->tbo.base.resv;
- int ret;
+ /* Remember it for newly added BOs */
+ dma_fence_put(evf_mgr->ev_fence);
+ evf_mgr->ev_fence = &ev_fence->base;
- if (!resv)
- return 0;
+ /* And add it to all existing BOs */
+ drm_exec_for_each_locked_object(exec, index, obj) {
+ struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
- ret = dma_resv_reserve_fences(resv, 1);
- if (ret) {
- DRM_DEBUG_DRIVER("Failed to resv fence space\n");
- return ret;
+ amdgpu_evf_mgr_attach_fence(evf_mgr, bo);
}
-
- spin_lock(&evf_mgr->ev_fence_lock);
- ev_fence = evf_mgr->ev_fence;
- if (ev_fence)
- dma_resv_add_fence(resv, &ev_fence->base, DMA_RESV_USAGE_BOOKKEEP);
- spin_unlock(&evf_mgr->ev_fence_lock);
-
return 0;
}
-void amdgpu_eviction_fence_detach(struct amdgpu_eviction_fence_mgr *evf_mgr,
- struct amdgpu_bo *bo)
+void amdgpu_evf_mgr_detach_fence(struct amdgpu_eviction_fence_mgr *evf_mgr,
+ struct amdgpu_bo *bo)
{
struct dma_fence *stub = dma_fence_get_stub();
@@ -229,13 +158,31 @@ void amdgpu_eviction_fence_detach(struct amdgpu_eviction_fence_mgr *evf_mgr,
dma_fence_put(stub);
}
-int amdgpu_eviction_fence_init(struct amdgpu_eviction_fence_mgr *evf_mgr)
+void amdgpu_evf_mgr_init(struct amdgpu_eviction_fence_mgr *evf_mgr)
{
- /* This needs to be done one time per open */
atomic_set(&evf_mgr->ev_fence_seq, 0);
evf_mgr->ev_fence_ctx = dma_fence_context_alloc(1);
- spin_lock_init(&evf_mgr->ev_fence_lock);
+ evf_mgr->ev_fence = dma_fence_get_stub();
- INIT_DELAYED_WORK(&evf_mgr->suspend_work, amdgpu_eviction_fence_suspend_worker);
- return 0;
+ INIT_WORK(&evf_mgr->suspend_work, amdgpu_eviction_fence_suspend_worker);
+}
+
+void amdgpu_evf_mgr_shutdown(struct amdgpu_eviction_fence_mgr *evf_mgr)
+{
+ evf_mgr->shutdown = true;
+ /* Make sure that the shutdown is visible to the suspend work */
+ flush_work(&evf_mgr->suspend_work);
+}
+
+void amdgpu_evf_mgr_flush_suspend(struct amdgpu_eviction_fence_mgr *evf_mgr)
+{
+ dma_fence_wait(rcu_dereference_protected(evf_mgr->ev_fence, true),
+ false);
+ /* Make sure that we are done with the last suspend work */
+ flush_work(&evf_mgr->suspend_work);
+}
+
+void amdgpu_evf_mgr_fini(struct amdgpu_eviction_fence_mgr *evf_mgr)
+{
+ dma_fence_put(evf_mgr->ev_fence);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h
index fcd867b7147d..2a750add4e7b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h
@@ -25,6 +25,8 @@
#ifndef AMDGPU_EV_FENCE_H_
#define AMDGPU_EV_FENCE_H_
+#include <linux/dma-fence.h>
+
struct amdgpu_eviction_fence {
struct dma_fence base;
spinlock_t lock;
@@ -35,35 +37,36 @@ struct amdgpu_eviction_fence {
struct amdgpu_eviction_fence_mgr {
u64 ev_fence_ctx;
atomic_t ev_fence_seq;
- spinlock_t ev_fence_lock;
- struct amdgpu_eviction_fence *ev_fence;
- struct delayed_work suspend_work;
- uint8_t fd_closing;
-};
-
-/* Eviction fence helper functions */
-struct amdgpu_eviction_fence *
-amdgpu_eviction_fence_create(struct amdgpu_eviction_fence_mgr *evf_mgr);
-void
-amdgpu_eviction_fence_destroy(struct amdgpu_eviction_fence_mgr *evf_mgr);
-
-int
-amdgpu_eviction_fence_attach(struct amdgpu_eviction_fence_mgr *evf_mgr,
- struct amdgpu_bo *bo);
+ /*
+ * Only updated while holding the VM resv lock.
+ * Only signaled while holding the userq mutex.
+ */
+ struct dma_fence __rcu *ev_fence;
+ struct work_struct suspend_work;
+ bool shutdown;
+};
-void
-amdgpu_eviction_fence_detach(struct amdgpu_eviction_fence_mgr *evf_mgr,
- struct amdgpu_bo *bo);
+static inline struct dma_fence *
+amdgpu_evf_mgr_get_fence(struct amdgpu_eviction_fence_mgr *evf_mgr)
+{
+ struct dma_fence *ev_fence;
-int
-amdgpu_eviction_fence_init(struct amdgpu_eviction_fence_mgr *evf_mgr);
+ rcu_read_lock();
+ ev_fence = dma_fence_get_rcu_safe(&evf_mgr->ev_fence);
+ rcu_read_unlock();
+ return ev_fence;
+}
-void
-amdgpu_eviction_fence_signal(struct amdgpu_eviction_fence_mgr *evf_mgr,
- struct amdgpu_eviction_fence *ev_fence);
+int amdgpu_evf_mgr_attach_fence(struct amdgpu_eviction_fence_mgr *evf_mgr,
+ struct amdgpu_bo *bo);
+int amdgpu_evf_mgr_rearm(struct amdgpu_eviction_fence_mgr *evf_mgr,
+ struct drm_exec *exec);
+void amdgpu_evf_mgr_detach_fence(struct amdgpu_eviction_fence_mgr *evf_mgr,
+ struct amdgpu_bo *bo);
+void amdgpu_evf_mgr_init(struct amdgpu_eviction_fence_mgr *evf_mgr);
+void amdgpu_evf_mgr_shutdown(struct amdgpu_eviction_fence_mgr *evf_mgr);
+void amdgpu_evf_mgr_flush_suspend(struct amdgpu_eviction_fence_mgr *evf_mgr);
+void amdgpu_evf_mgr_fini(struct amdgpu_eviction_fence_mgr *evf_mgr);
-int
-amdgpu_eviction_fence_replace_fence(struct amdgpu_eviction_fence_mgr *evf_mgr,
- struct drm_exec *exec);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 1054d66c54fa..8048a4c04b47 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -89,16 +89,6 @@ static u32 amdgpu_fence_read(struct amdgpu_ring *ring)
return seq;
}
-static void amdgpu_fence_save_fence_wptr_start(struct amdgpu_fence *af)
-{
- af->fence_wptr_start = af->ring->wptr;
-}
-
-static void amdgpu_fence_save_fence_wptr_end(struct amdgpu_fence *af)
-{
- af->fence_wptr_end = af->ring->wptr;
-}
-
/**
* amdgpu_fence_emit - emit a fence on the requested ring
*
@@ -107,16 +97,14 @@ static void amdgpu_fence_save_fence_wptr_end(struct amdgpu_fence *af)
* @flags: flags to pass into the subordinate .emit_fence() call
*
* Emits a fence command on the requested ring (all asics).
- * Returns 0 on success, -ENOMEM on failure.
*/
-int amdgpu_fence_emit(struct amdgpu_ring *ring, struct amdgpu_fence *af,
- unsigned int flags)
+void amdgpu_fence_emit(struct amdgpu_ring *ring, struct amdgpu_fence *af,
+ unsigned int flags)
{
struct amdgpu_device *adev = ring->adev;
struct dma_fence *fence;
struct dma_fence __rcu **ptr;
uint32_t seq;
- int r;
fence = &af->base;
af->ring = ring;
@@ -126,11 +114,9 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct amdgpu_fence *af,
&ring->fence_drv.lock,
adev->fence_context + ring->idx, seq);
- amdgpu_fence_save_fence_wptr_start(af);
amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
seq, flags | AMDGPU_FENCE_FLAG_INT);
- amdgpu_fence_save_fence_wptr_end(af);
- amdgpu_fence_save_wptr(af);
+
pm_runtime_get_noresume(adev_to_drm(adev)->dev);
ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask];
if (unlikely(rcu_dereference_protected(*ptr, 1))) {
@@ -141,10 +127,13 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct amdgpu_fence *af,
rcu_read_unlock();
if (old) {
- r = dma_fence_wait(old, false);
+ /*
+ * dma_fence_wait(old, false) is not interruptible.
+ * It will not return an error in this case.
+ * So we can safely ignore the return value.
+ */
+ dma_fence_wait(old, false);
dma_fence_put(old);
- if (r)
- return r;
}
}
@@ -154,8 +143,6 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct amdgpu_fence *af,
* emitting the fence would mess up the hardware ring buffer.
*/
rcu_assign_pointer(*ptr, dma_fence_get(fence));
-
- return 0;
}
/**
@@ -241,7 +228,6 @@ bool amdgpu_fence_process(struct amdgpu_ring *ring)
do {
struct dma_fence *fence, **ptr;
- struct amdgpu_fence *am_fence;
++last_seq;
last_seq &= drv->num_fences_mask;
@@ -254,12 +240,6 @@ bool amdgpu_fence_process(struct amdgpu_ring *ring)
if (!fence)
continue;
- /* Save the wptr in the fence driver so we know what the last processed
- * wptr was. This is required for re-emitting the ring state for
- * queues that are reset but are not guilty and thus have no guilty fence.
- */
- am_fence = container_of(fence, struct amdgpu_fence, base);
- drv->signalled_wptr = am_fence->wptr;
dma_fence_signal(fence);
dma_fence_put(fence);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
@@ -458,7 +438,7 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
* @ring: ring to init the fence driver on
*
* Init the fence driver for the requested ring (all asics).
- * Helper function for amdgpu_fence_driver_init().
+ * Helper function for amdgpu_fence_driver_sw_init().
*/
int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring)
{
@@ -708,25 +688,29 @@ void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring)
*/
/**
- * amdgpu_fence_driver_update_timedout_fence_state - Update fence state and set errors
+ * amdgpu_ring_set_fence_errors_and_reemit - Set dma_fence errors and reemit
*
- * @af: fence of the ring to update
+ * @ring: the ring to operate on
+ * @guilty_fence: fence of the ring to update
*
*/
-void amdgpu_fence_driver_update_timedout_fence_state(struct amdgpu_fence *af)
+void amdgpu_ring_set_fence_errors_and_reemit(struct amdgpu_ring *ring,
+ struct amdgpu_fence *guilty_fence)
{
struct dma_fence *unprocessed;
struct dma_fence __rcu **ptr;
struct amdgpu_fence *fence;
- struct amdgpu_ring *ring = af->ring;
unsigned long flags;
u32 seq, last_seq;
- bool reemitted = false;
+ unsigned int i;
+ bool is_guilty_fence;
+ bool is_guilty_context;
last_seq = amdgpu_fence_read(ring) & ring->fence_drv.num_fences_mask;
seq = ring->fence_drv.sync_seq & ring->fence_drv.num_fences_mask;
- /* mark all fences from the guilty context with an error */
+ ring->reemit = true;
+ amdgpu_ring_alloc(ring, ring->ring_backup_entries_to_copy);
spin_lock_irqsave(&ring->fence_drv.lock, flags);
do {
last_seq++;
@@ -738,39 +722,45 @@ void amdgpu_fence_driver_update_timedout_fence_state(struct amdgpu_fence *af)
if (unprocessed && !dma_fence_is_signaled_locked(unprocessed)) {
fence = container_of(unprocessed, struct amdgpu_fence, base);
+ is_guilty_fence = fence == guilty_fence;
+ is_guilty_context = fence->context == guilty_fence->context;
- if (fence->reemitted > 1)
- reemitted = true;
- else if (fence == af)
+ /* mark all fences from the guilty context with an error */
+ if (is_guilty_fence)
dma_fence_set_error(&fence->base, -ETIME);
- else if (fence->context == af->context)
+ else if (is_guilty_context)
dma_fence_set_error(&fence->base, -ECANCELED);
+
+ /* reemit the packet stream and update wptrs */
+ fence->ib_wptr = ring->wptr;
+ for (i = 0; i < fence->ib_dw_size; i++) {
+ /* Skip the IB(s) for the guilty context. */
+ if (is_guilty_context &&
+ i >= fence->skip_ib_dw_start_offset &&
+ i < fence->skip_ib_dw_end_offset)
+ amdgpu_ring_write(ring, ring->funcs->nop);
+ else
+ amdgpu_ring_write(ring,
+ ring->ring_backup[fence->backup_idx + i]);
+ }
}
rcu_read_unlock();
} while (last_seq != seq);
spin_unlock_irqrestore(&ring->fence_drv.lock, flags);
-
- if (reemitted) {
- /* if we've already reemitted once then just cancel everything */
- amdgpu_fence_driver_force_completion(af->ring);
- af->ring->ring_backup_entries_to_copy = 0;
- }
-}
-
-void amdgpu_fence_save_wptr(struct amdgpu_fence *af)
-{
- af->wptr = af->ring->wptr;
+ amdgpu_ring_commit(ring);
+ ring->reemit = false;
}
static void amdgpu_ring_backup_unprocessed_command(struct amdgpu_ring *ring,
- u64 start_wptr, u64 end_wptr)
+ struct amdgpu_fence *af)
{
- unsigned int first_idx = start_wptr & ring->buf_mask;
- unsigned int last_idx = end_wptr & ring->buf_mask;
+ unsigned int first_idx = af->ib_wptr & ring->buf_mask;
+ unsigned int dw_size = af->ib_dw_size;
unsigned int i;
+ af->backup_idx = ring->ring_backup_entries_to_copy;
/* Backup the contents of the ring buffer. */
- for (i = first_idx; i != last_idx; ++i, i &= ring->buf_mask)
+ for (i = first_idx; dw_size > 0; ++i, i &= ring->buf_mask, --dw_size)
ring->ring_backup[ring->ring_backup_entries_to_copy++] = ring->ring[i];
}
@@ -780,12 +770,10 @@ void amdgpu_ring_backup_unprocessed_commands(struct amdgpu_ring *ring,
struct dma_fence *unprocessed;
struct dma_fence __rcu **ptr;
struct amdgpu_fence *fence;
- u64 wptr;
u32 seq, last_seq;
last_seq = amdgpu_fence_read(ring) & ring->fence_drv.num_fences_mask;
seq = ring->fence_drv.sync_seq & ring->fence_drv.num_fences_mask;
- wptr = ring->fence_drv.signalled_wptr;
ring->ring_backup_entries_to_copy = 0;
do {
@@ -799,21 +787,7 @@ void amdgpu_ring_backup_unprocessed_commands(struct amdgpu_ring *ring,
if (unprocessed && !dma_fence_is_signaled(unprocessed)) {
fence = container_of(unprocessed, struct amdgpu_fence, base);
- /* save everything if the ring is not guilty, otherwise
- * just save the content from other contexts.
- */
- if (!fence->reemitted &&
- (!guilty_fence || (fence->context != guilty_fence->context))) {
- amdgpu_ring_backup_unprocessed_command(ring, wptr,
- fence->wptr);
- } else if (!fence->reemitted) {
- /* always save the fence */
- amdgpu_ring_backup_unprocessed_command(ring,
- fence->fence_wptr_start,
- fence->fence_wptr_end);
- }
- wptr = fence->wptr;
- fence->reemitted++;
+ amdgpu_ring_backup_unprocessed_command(ring, fence);
}
rcu_read_unlock();
} while (last_seq != seq);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c
index 514bd302365f..841e1b3a017e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c
@@ -106,6 +106,7 @@ static bool is_fru_eeprom_supported(struct amdgpu_device *adev, u32 *fru_addr)
*fru_addr = FRU_EEPROM_MADDR_8;
return true;
case IP_VERSION(13, 0, 12):
+ case IP_VERSION(15, 0, 8):
if (fru_addr)
*fru_addr = FRU_EEPROM_MADDR_INV;
return true;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index a6107109a2b8..5376035d32fe 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -30,6 +30,7 @@
#include <linux/pagemap.h>
#include <linux/pci.h>
#include <linux/dma-buf.h>
+#include <linux/dma-fence-unwrap.h>
#include <drm/amdgpu_drm.h>
#include <drm/drm_drv.h>
@@ -106,6 +107,7 @@ amdgpu_gem_update_timeline_node(struct drm_file *filp,
*chain = dma_fence_chain_alloc();
if (!*chain) {
drm_syncobj_put(*syncobj);
+ *syncobj = NULL;
return -ENOMEM;
}
@@ -258,18 +260,15 @@ static int amdgpu_gem_object_open(struct drm_gem_object *obj,
amdgpu_vm_bo_update_shared(abo);
bo_va = amdgpu_vm_bo_find(vm, abo);
- if (!bo_va)
+ if (!bo_va) {
bo_va = amdgpu_vm_bo_add(adev, vm, abo);
- else
+ r = amdgpu_evf_mgr_attach_fence(&fpriv->evf_mgr, abo);
+ if (r)
+ goto out_unlock;
+ } else {
++bo_va->ref_count;
-
- /* attach gfx eviction fence */
- r = amdgpu_eviction_fence_attach(&fpriv->evf_mgr, abo);
- if (r) {
- DRM_DEBUG_DRIVER("Failed to attach eviction fence to BO\n");
- amdgpu_bo_unreserve(abo);
- return r;
}
+
drm_exec_fini(&exec);
/* Validate and add eviction fence to DMABuf imports with dynamic
@@ -337,7 +336,7 @@ static void amdgpu_gem_object_close(struct drm_gem_object *obj,
}
if (!amdgpu_vm_is_bo_always_valid(vm, bo))
- amdgpu_eviction_fence_detach(&fpriv->evf_mgr, bo);
+ amdgpu_evf_mgr_detach_fence(&fpriv->evf_mgr, bo);
bo_va = amdgpu_vm_bo_find(vm, bo);
if (!bo_va || --bo_va->ref_count)
@@ -744,11 +743,10 @@ amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
struct dma_fence *fence;
int r = 0;
- /* Always start from the VM's existing last update fence. */
- fence = dma_fence_get(vm->last_update);
-
+ /* If the VM is not ready return only a stub. */
if (!amdgpu_vm_ready(vm))
- return fence;
+ return dma_fence_get_stub();
+
/*
* First clean up any freed mappings in the VM.
@@ -757,7 +755,7 @@ amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
* schedules GPU work. If nothing needs clearing, @fence can remain as
* the original vm->last_update.
*/
- r = amdgpu_vm_clear_freed(adev, vm, &fence);
+ r = amdgpu_vm_clear_freed(adev, vm, &vm->last_update);
if (r)
goto error;
@@ -774,47 +772,34 @@ amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
if (r)
goto error;
- /*
- * Decide which fence best represents the last update:
- *
- * MAP/REPLACE:
- * - For always-valid mappings, use vm->last_update.
- * - Otherwise, export bo_va->last_pt_update.
- *
- * UNMAP/CLEAR:
- * Keep the fence returned by amdgpu_vm_clear_freed(). If no work was
- * needed, it can remain as vm->last_pt_update.
- *
- * The VM and BO update fences are always initialized to a valid value.
- * vm->last_update and bo_va->last_pt_update always start as valid fences.
- * and are never expected to be NULL.
- */
- switch (operation) {
- case AMDGPU_VA_OP_MAP:
- case AMDGPU_VA_OP_REPLACE:
+ if ((operation == AMDGPU_VA_OP_MAP ||
+ operation == AMDGPU_VA_OP_REPLACE) &&
+ !amdgpu_vm_is_bo_always_valid(vm, bo_va->base.bo)) {
+
/*
- * For MAP/REPLACE, return the page table update fence for the
- * mapping we just modified. bo_va is expected to be valid here.
+ * For MAP/REPLACE of non per-VM BOs we need to sync to both the
+ * bo_va->last_pt_update and vm->last_update or otherwise we
+ * potentially miss the PDE updates.
*/
- dma_fence_put(fence);
-
- if (amdgpu_vm_is_bo_always_valid(vm, bo_va->base.bo))
- fence = dma_fence_get(vm->last_update);
- else
- fence = dma_fence_get(bo_va->last_pt_update);
- break;
- case AMDGPU_VA_OP_UNMAP:
- case AMDGPU_VA_OP_CLEAR:
- default:
- /* keep @fence as returned by amdgpu_vm_clear_freed() */
- break;
+ fence = dma_fence_unwrap_merge(vm->last_update,
+ bo_va->last_pt_update);
+ if (!fence) {
+ /* As fallback in OOM situations */
+ dma_fence_wait(vm->last_update, false);
+ dma_fence_wait(bo_va->last_pt_update, false);
+ fence = dma_fence_get_stub();
+ }
+ } else {
+ fence = dma_fence_get(vm->last_update);
}
+ return fence;
+
error:
if (r && r != -ERESTARTSYS)
DRM_ERROR("Couldn't update BO_VA (%d)\n", r);
- return fence;
+ return dma_fence_get(vm->last_update);
}
int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
@@ -835,7 +820,6 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
struct amdgpu_bo_va *bo_va;
struct drm_syncobj *timeline_syncobj = NULL;
struct dma_fence_chain *timeline_chain = NULL;
- struct dma_fence *fence;
struct drm_exec exec;
uint64_t vm_size;
int r = 0;
@@ -887,6 +871,10 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
return -EINVAL;
}
+ if (args->flags & AMDGPU_VM_DELAY_UPDATE &&
+ args->vm_timeline_syncobj_out)
+ return -EINVAL;
+
if ((args->operation != AMDGPU_VA_OP_CLEAR) &&
!(args->flags & AMDGPU_VM_PAGE_PRT)) {
gobj = drm_gem_object_lookup(filp, args->handle);
@@ -976,11 +964,13 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
* that represents the last relevant update for this mapping. This
* fence can then be exported to the user-visible VM timeline.
*/
- if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) && !adev->debug_vm) {
+ if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) &&
+ (!adev->debug_vm || timeline_syncobj)) {
+ struct dma_fence *fence;
+
fence = amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va,
args->operation);
-
- if (timeline_syncobj && fence) {
+ if (timeline_syncobj) {
if (!args->vm_timeline_point) {
/* Replace the existing fence when no point is given. */
drm_syncobj_replace_fence(timeline_syncobj,
@@ -991,6 +981,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
timeline_chain,
fence,
args->vm_timeline_point);
+ timeline_chain = NULL;
}
}
dma_fence_put(fence);
@@ -998,6 +989,9 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
}
error:
+ dma_fence_chain_free(timeline_chain);
+ if (timeline_syncobj)
+ drm_syncobj_put(timeline_syncobj);
drm_exec_fini(&exec);
error_put_gobj:
drm_gem_object_put(gobj);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index 77578ecc6782..cab3196a87fb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -503,6 +503,55 @@ void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev, int xcc_id)
&ring->mqd_ptr);
}
+void amdgpu_gfx_mqd_symmetrically_map_cu_mask(struct amdgpu_device *adev, const uint32_t *cu_mask,
+ uint32_t cu_mask_count, uint32_t *se_mask)
+{
+ struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
+ struct amdgpu_gfx_config *gfx_info = &adev->gfx.config;
+ uint32_t cu_per_sh[8][4] = {0};
+ int i, se, sh, cu, cu_bitmap_sh_mul;
+ int xcc_inst = ffs(adev->gfx.xcc_mask) - 1;
+ bool wgp_mode_req = amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 0, 0);
+ int cu_inc = wgp_mode_req ? 2 : 1;
+ uint32_t en_mask = wgp_mode_req ? 0x3 : 0x1;
+ int num_xcc, inc, inst = 0;
+
+ if (xcc_inst < 0)
+ xcc_inst = 0;
+
+ num_xcc = hweight16(adev->gfx.xcc_mask);
+ if (!num_xcc)
+ num_xcc = 1;
+
+ inc = cu_inc * num_xcc;
+
+ cu_bitmap_sh_mul = 2;
+
+ for (se = 0; se < gfx_info->max_shader_engines; se++)
+ for (sh = 0; sh < gfx_info->max_sh_per_se; sh++)
+ cu_per_sh[se][sh] = hweight32(
+ cu_info->bitmap[xcc_inst][se % 4][sh + (se / 4) *
+ cu_bitmap_sh_mul]);
+
+ for (i = 0; i < gfx_info->max_shader_engines; i++)
+ se_mask[i] = 0;
+
+ i = inst;
+ for (cu = 0; cu < 16; cu += cu_inc) {
+ for (sh = 0; sh < gfx_info->max_sh_per_se; sh++) {
+ for (se = 0; se < gfx_info->max_shader_engines; se++) {
+ if (cu_per_sh[se][sh] > cu) {
+ if ((i / 32) < cu_mask_count && (cu_mask[i / 32] & (1 << (i % 32))))
+ se_mask[se] |= en_mask << (cu + sh * 16);
+ i += inc;
+ if (i >= cu_mask_count * 32)
+ return;
+ }
+ }
+ }
+ }
+}
+
int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev, int xcc_id)
{
struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 585cc8e81bb2..2785eda6fea5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -463,6 +463,7 @@ struct amdgpu_gfx {
struct amdgpu_irq_src cp_ecc_error_irq;
struct amdgpu_irq_src sq_irq;
struct amdgpu_irq_src rlc_gc_fed_irq;
+ struct amdgpu_irq_src rlc_poison_irq;
struct sq_work sq_work;
/* gfx status */
@@ -583,6 +584,8 @@ int amdgpu_gfx_kiq_init(struct amdgpu_device *adev,
int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
unsigned mqd_size, int xcc_id);
void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev, int xcc_id);
+void amdgpu_gfx_mqd_symmetrically_map_cu_mask(struct amdgpu_device *adev, const uint32_t *cu_mask,
+ uint32_t cu_mask_count, uint32_t *se_mask);
int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev, int xcc_id);
int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev, int xcc_id);
int amdgpu_gfx_disable_kgq(struct amdgpu_device *adev, int xcc_id);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index 5179fa008626..ec74f3971732 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -34,6 +34,7 @@
#include "amdgpu_ras.h"
#include "amdgpu_reset.h"
#include "amdgpu_xgmi.h"
+#include "amdgpu_atomfirmware.h"
#include <drm/drm_drv.h>
#include <drm/ttm/ttm_tt.h>
@@ -742,7 +743,7 @@ void amdgpu_gmc_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
* translation. Avoid this by doing the invalidation from the SDMA
* itself at least for GART.
*/
- mutex_lock(&adev->mman.gtt_window_lock);
+ mutex_lock(&adev->mman.default_entity.lock);
r = amdgpu_job_alloc_with_ib(ring->adev, &adev->mman.default_entity.base,
AMDGPU_FENCE_OWNER_UNDEFINED,
16 * 4, AMDGPU_IB_POOL_IMMEDIATE,
@@ -755,7 +756,7 @@ void amdgpu_gmc_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
job->ibs->ptr[job->ibs->length_dw++] = ring->funcs->nop;
amdgpu_ring_pad_ib(ring, &job->ibs[0]);
fence = amdgpu_job_submit(job);
- mutex_unlock(&adev->mman.gtt_window_lock);
+ mutex_unlock(&adev->mman.default_entity.lock);
dma_fence_wait(fence, false);
dma_fence_put(fence);
@@ -763,7 +764,7 @@ void amdgpu_gmc_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
return;
error_alloc:
- mutex_unlock(&adev->mman.gtt_window_lock);
+ mutex_unlock(&adev->mman.default_entity.lock);
dev_err(adev->dev, "Error flushing GPU TLB using the SDMA (%d)!\n", r);
}
@@ -1374,18 +1375,18 @@ int amdgpu_gmc_get_nps_memranges(struct amdgpu_device *adev,
struct amdgpu_mem_partition_info *mem_ranges,
uint8_t *exp_ranges)
{
- struct amdgpu_gmc_memrange *ranges;
+ struct amdgpu_gmc_memrange ranges[AMDGPU_MAX_MEM_RANGES];
int range_cnt, ret, i, j;
uint32_t nps_type;
bool refresh;
if (!mem_ranges || !exp_ranges)
return -EINVAL;
-
+ range_cnt = AMDGPU_MAX_MEM_RANGES;
refresh = (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) &&
(adev->gmc.reset_flags & AMDGPU_GMC_INIT_RESET_NPS);
- ret = amdgpu_discovery_get_nps_info(adev, &nps_type, &ranges,
- &range_cnt, refresh);
+ ret = amdgpu_discovery_get_nps_info(adev, &nps_type, ranges, &range_cnt,
+ refresh);
if (ret)
return ret;
@@ -1446,8 +1447,6 @@ int amdgpu_gmc_get_nps_memranges(struct amdgpu_device *adev,
if (!*exp_ranges)
*exp_ranges = range_cnt;
err:
- kvfree(ranges);
-
return ret;
}
@@ -1749,3 +1748,31 @@ int amdgpu_gmc_init_mem_ranges(struct amdgpu_device *adev)
return 0;
}
+
+int amdgpu_gmc_get_vram_info(struct amdgpu_device *adev,
+ int *vram_width, int *vram_type, int *vram_vendor)
+{
+ int ret = 0;
+
+ if (adev->flags & AMD_IS_APU)
+ return amdgpu_atomfirmware_get_integrated_system_info(adev,
+ vram_width, vram_type, vram_vendor);
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(12, 0, 0):
+ case IP_VERSION(12, 0, 1):
+ return amdgpu_atomfirmware_get_umc_info(adev,
+ vram_width, vram_type, vram_vendor);
+ case IP_VERSION(9, 5, 0):
+ case IP_VERSION(9, 4, 4):
+ case IP_VERSION(9, 4, 3):
+ ret = amdgpu_atomfirmware_get_umc_info(adev,
+ vram_width, vram_type, vram_vendor);
+ if (vram_width && !ret)
+ *vram_width *= hweight32(adev->aid_mask);
+ return ret;
+ default:
+ return amdgpu_atomfirmware_get_vram_info(adev,
+ vram_width, vram_type, vram_vendor);
+ }
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
index 0e8a52d96573..32e73e8ba778 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
@@ -94,6 +94,10 @@ enum amdgpu_memory_partition {
#define AMDGPU_GMC9_FAULT_SOURCE_DATA_WRITE 0x20
#define AMDGPU_GMC9_FAULT_SOURCE_DATA_EXE 0x10
+#define AMDGPU_GMC121_FAULT_SOURCE_DATA_READ 0x400000
+#define AMDGPU_GMC121_FAULT_SOURCE_DATA_WRITE 0x200000
+#define AMDGPU_GMC121_FAULT_SOURCE_DATA_EXE 0x100000
+
/*
* GMC page fault information
*/
@@ -478,4 +482,6 @@ amdgpu_gmc_query_memory_partition(struct amdgpu_device *adev);
int amdgpu_gmc_init_mem_ranges(struct amdgpu_device *adev);
void amdgpu_gmc_init_sw_mem_ranges(struct amdgpu_device *adev,
struct amdgpu_mem_partition_info *mem_ranges);
+int amdgpu_gmc_get_vram_info(struct amdgpu_device *adev,
+ int *vram_width, int *vram_type, int *vram_vendor);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
index ac276bb53c7c..620fddde4c4d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
@@ -324,17 +324,13 @@ int amdgpu_gtt_mgr_init(struct amdgpu_device *adev, uint64_t gtt_size)
{
struct amdgpu_gtt_mgr *mgr = &adev->mman.gtt_mgr;
struct ttm_resource_manager *man = &mgr->manager;
- uint64_t start, size;
man->use_tt = true;
man->func = &amdgpu_gtt_mgr_func;
ttm_resource_manager_init(man, &adev->mman.bdev, gtt_size);
- start = AMDGPU_GTT_MAX_TRANSFER_SIZE * AMDGPU_GTT_NUM_TRANSFER_WINDOWS;
- start += amdgpu_vce_required_gart_pages(adev);
- size = (adev->gmc.gart_size >> PAGE_SHIFT) - start;
- drm_mm_init(&mgr->mm, start, size);
+ drm_mm_init(&mgr->mm, 0, adev->gmc.gart_size >> PAGE_SHIFT);
spin_lock_init(&mgr->lock);
ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_TT, &mgr->manager);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 3a7bab87b5d8..63f62c670df5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -129,6 +129,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
struct amdgpu_ib *ib = &ibs[0];
struct dma_fence *tmp = NULL;
struct amdgpu_fence *af;
+ struct amdgpu_fence *vm_af;
bool need_ctx_switch;
struct amdgpu_vm *vm;
uint64_t fence_ctx;
@@ -215,25 +216,28 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
dma_fence_put(tmp);
}
- if ((ib->flags & AMDGPU_IB_FLAG_EMIT_MEM_SYNC) && ring->funcs->emit_mem_sync)
- ring->funcs->emit_mem_sync(ring);
+ if (job) {
+ vm_af = job->hw_vm_fence;
+ /* VM sequence */
+ vm_af->ib_wptr = ring->wptr;
+ amdgpu_vm_flush(ring, job, need_pipe_sync);
+ vm_af->ib_dw_size =
+ amdgpu_ring_get_dw_distance(ring, vm_af->ib_wptr, ring->wptr);
+ }
- if (ring->funcs->emit_wave_limit &&
- ring->hw_prio == AMDGPU_GFX_PIPE_PRIO_HIGH)
- ring->funcs->emit_wave_limit(ring, true);
+ /* IB sequence */
+ af->ib_wptr = ring->wptr;
+ amdgpu_ring_ib_begin(ring);
if (ring->funcs->insert_start)
ring->funcs->insert_start(ring);
- if (job) {
- r = amdgpu_vm_flush(ring, job, need_pipe_sync);
- if (r) {
- amdgpu_ring_undo(ring);
- goto free_fence;
- }
- }
+ if ((ib->flags & AMDGPU_IB_FLAG_EMIT_MEM_SYNC) && ring->funcs->emit_mem_sync)
+ ring->funcs->emit_mem_sync(ring);
- amdgpu_ring_ib_begin(ring);
+ if (ring->funcs->emit_wave_limit &&
+ ring->hw_prio == AMDGPU_GFX_PIPE_PRIO_HIGH)
+ ring->funcs->emit_wave_limit(ring, true);
if (ring->funcs->emit_gfx_shadow && adev->gfx.cp_gfx_shadow)
amdgpu_ring_emit_gfx_shadow(ring, shadow_va, csa_va, gds_va,
@@ -243,6 +247,9 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
cond_exec = amdgpu_ring_init_cond_exec(ring,
ring->cond_exe_gpu_addr);
+ /* Skip the IB for guilty contexts */
+ af->skip_ib_dw_start_offset =
+ amdgpu_ring_get_dw_distance(ring, af->ib_wptr, ring->wptr);
amdgpu_device_flush_hdp(adev, ring);
if (need_ctx_switch)
@@ -281,6 +288,9 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
amdgpu_ring_emit_frame_cntl(ring, false, secure);
amdgpu_device_invalidate_hdp(adev, ring);
+ /* Skip the IB for guilty contexts */
+ af->skip_ib_dw_end_offset =
+ amdgpu_ring_get_dw_distance(ring, af->ib_wptr, ring->wptr);
if (ib->flags & AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE)
fence_flags |= AMDGPU_FENCE_FLAG_TC_WB_ONLY;
@@ -297,14 +307,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
amdgpu_ring_init_cond_exec(ring, ring->cond_exe_gpu_addr);
}
- r = amdgpu_fence_emit(ring, af, fence_flags);
- if (r) {
- dev_err(adev->dev, "failed to emit fence (%d)\n", r);
- if (job && job->vmid)
- amdgpu_vmid_reset(adev, ring->vm_hub, job->vmid);
- amdgpu_ring_undo(ring);
- goto free_fence;
- }
+ amdgpu_fence_emit(ring, af, fence_flags);
*f = &af->base;
/* get a ref for the job */
if (job)
@@ -323,15 +326,10 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
ring->hw_prio == AMDGPU_GFX_PIPE_PRIO_HIGH)
ring->funcs->emit_wave_limit(ring, false);
- /* Save the wptr associated with this fence.
- * This must be last for resets to work properly
- * as we need to save the wptr associated with this
- * fence so we know what rings contents to backup
- * after we reset the queue.
- */
- amdgpu_fence_save_wptr(af);
-
amdgpu_ring_ib_end(ring);
+
+ af->ib_dw_size = amdgpu_ring_get_dw_distance(ring, af->ib_wptr, ring->wptr);
+
amdgpu_ring_commit(ring);
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index d7c1ffbf7626..07771721af9d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -92,7 +92,6 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)
struct drm_wedge_task_info *info = NULL;
struct amdgpu_task_info *ti = NULL;
struct amdgpu_device *adev = ring->adev;
- enum drm_gpu_sched_stat status = DRM_GPU_SCHED_STAT_RESET;
int idx, r;
if (!drm_dev_enter(adev_to_drm(adev), &idx)) {
@@ -147,8 +146,6 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)
ring->sched.name);
drm_dev_wedged_event(adev_to_drm(adev),
DRM_WEDGE_RECOVERY_NONE, info);
- /* This is needed to add the job back to the pending list */
- status = DRM_GPU_SCHED_STAT_NO_HANG;
goto exit;
}
dev_err(adev->dev, "Ring %s reset failed\n", ring->sched.name);
@@ -184,7 +181,8 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)
exit:
amdgpu_vm_put_task_info(ti);
drm_dev_exit(idx);
- return status;
+ /* This is needed to add the job back to the pending list */
+ return DRM_GPU_SCHED_STAT_NO_HANG;
}
int amdgpu_job_alloc(struct amdgpu_device *adev, struct amdgpu_vm *vm,
@@ -289,9 +287,10 @@ void amdgpu_job_free_resources(struct amdgpu_job *job)
unsigned i;
/* Check if any fences were initialized */
- if (job->base.s_fence && job->base.s_fence->finished.ops)
+ if (job->base.s_fence &&
+ dma_fence_was_initialized(&job->base.s_fence->finished))
f = &job->base.s_fence->finished;
- else if (job->hw_fence && job->hw_fence->base.ops)
+ else if (dma_fence_was_initialized(&job->hw_fence->base))
f = &job->hw_fence->base;
else
f = NULL;
@@ -308,11 +307,11 @@ static void amdgpu_job_free_cb(struct drm_sched_job *s_job)
amdgpu_sync_free(&job->explicit_sync);
- if (job->hw_fence->base.ops)
+ if (dma_fence_was_initialized(&job->hw_fence->base))
dma_fence_put(&job->hw_fence->base);
else
kfree(job->hw_fence);
- if (job->hw_vm_fence->base.ops)
+ if (dma_fence_was_initialized(&job->hw_vm_fence->base))
dma_fence_put(&job->hw_vm_fence->base);
else
kfree(job->hw_vm_fence);
@@ -346,11 +345,11 @@ void amdgpu_job_free(struct amdgpu_job *job)
if (job->gang_submit != &job->base.s_fence->scheduled)
dma_fence_put(job->gang_submit);
- if (job->hw_fence->base.ops)
+ if (dma_fence_was_initialized(&job->hw_fence->base))
dma_fence_put(&job->hw_fence->base);
else
kfree(job->hw_fence);
- if (job->hw_vm_fence->base.ops)
+ if (dma_fence_was_initialized(&job->hw_vm_fence->base))
dma_fence_put(&job->hw_vm_fence->base);
else
kfree(job->hw_vm_fence);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 7f19554b9ad1..06efce38f323 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -1522,10 +1522,7 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
"Failed to init usermode queue manager (%d), use legacy workload submission only\n",
r);
- r = amdgpu_eviction_fence_init(&fpriv->evf_mgr);
- if (r)
- goto error_vm;
-
+ amdgpu_evf_mgr_init(&fpriv->evf_mgr);
amdgpu_ctx_mgr_init(&fpriv->ctx_mgr, adev);
file_priv->driver_priv = fpriv;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
index bcf2a067dc41..f80e3aca9c78 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
@@ -159,9 +159,9 @@ struct amdgpu_mes {
int hung_queue_db_array_size;
int hung_queue_hqd_info_offset;
- struct amdgpu_bo *hung_queue_db_array_gpu_obj[AMDGPU_MAX_MES_PIPES];
- uint64_t hung_queue_db_array_gpu_addr[AMDGPU_MAX_MES_PIPES];
- void *hung_queue_db_array_cpu_addr[AMDGPU_MAX_MES_PIPES];
+ struct amdgpu_bo *hung_queue_db_array_gpu_obj[AMDGPU_MAX_MES_INST_PIPES];
+ uint64_t hung_queue_db_array_gpu_addr[AMDGPU_MAX_MES_INST_PIPES];
+ void *hung_queue_db_array_cpu_addr[AMDGPU_MAX_MES_INST_PIPES];
/* cooperative dispatch */
bool enable_coop_mode;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h
index 1ca9d4ed8063..6b8214650e5d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h
@@ -63,14 +63,40 @@ struct amdgpu_mmhub_funcs {
uint64_t page_table_base);
void (*update_power_gating)(struct amdgpu_device *adev,
bool enable);
+ int (*get_xgmi_info)(struct amdgpu_device *adev);
+};
+
+struct amdgpu_mmhub_client_ids {
+ const char * const (*names)[2];
+ unsigned int size;
};
struct amdgpu_mmhub {
struct ras_common_if *ras_if;
const struct amdgpu_mmhub_funcs *funcs;
struct amdgpu_mmhub_ras *ras;
+ struct amdgpu_mmhub_client_ids client_ids;
};
+static inline void
+amdgpu_mmhub_init_client_info(struct amdgpu_mmhub *mmhub,
+ const char * const (*names)[2],
+ unsigned int size)
+{
+ mmhub->client_ids.names = names;
+ mmhub->client_ids.size = size;
+}
+
+static inline const char *
+amdgpu_mmhub_client_name(struct amdgpu_mmhub *mmhub,
+ u32 cid, bool is_write)
+{
+ if (cid < mmhub->client_ids.size)
+ return mmhub->client_ids.names[cid][is_write];
+
+ return NULL;
+}
+
int amdgpu_mmhub_ras_sw_init(struct amdgpu_device *adev);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
index e244c12ceb23..51ab1a332615 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
@@ -44,6 +44,7 @@
#include <drm/display/drm_dp_mst_helper.h>
#include "modules/inc/mod_freesync.h"
#include "amdgpu_dm_irq_params.h"
+#include "amdgpu_dm_ism.h"
struct amdgpu_bo;
struct amdgpu_device;
@@ -486,6 +487,10 @@ struct amdgpu_crtc {
int deferred_flip_completion;
/* parameters access from DM IRQ handler */
struct dm_irq_params dm_irq_params;
+
+ /* DM idle state manager */
+ struct amdgpu_dm_ism ism;
+
/* pll sharing */
struct amdgpu_atom_ss ss;
bool ss_enabled;
@@ -624,7 +629,7 @@ struct amdgpu_connector {
bool use_digital;
/* we need to mind the EDID between detect
and get modes due to analog/digital/tvencoder */
- struct edid *edid;
+ const struct drm_edid *edid;
void *con_priv;
bool dac_load_detect;
bool detected_by_load; /* if the connection status was determined by load */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index aaa8cdc122c4..b6aabac39b46 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -1274,7 +1274,7 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
if (abo->tbo.base.dma_buf && !drm_gem_is_imported(&abo->tbo.base) &&
old_mem && old_mem->mem_type != TTM_PL_SYSTEM)
- dma_buf_move_notify(abo->tbo.base.dma_buf);
+ dma_buf_invalidate_mappings(abo->tbo.base.dma_buf);
/* move_notify is called before move happens */
trace_amdgpu_bo_move(abo, new_mem ? new_mem->mem_type : -1,
@@ -1325,7 +1325,8 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo)
if (r)
goto out;
- r = amdgpu_fill_buffer(&adev->mman.clear_entity, abo, 0, &bo->base._resv,
+ r = amdgpu_fill_buffer(amdgpu_ttm_next_clear_entity(adev),
+ abo, 0, &bo->base._resv,
&fence, AMDGPU_KERNEL_JOB_ID_CLEAR_ON_RELEASE);
if (WARN_ON(r))
goto out;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 27b67da9fdac..d39b695cd925 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -3096,6 +3096,13 @@ static int psp_load_non_psp_fw(struct psp_context *psp)
*/
continue;
+ /* IMU ucode is part of IFWI and MP0 15.0.8 would load it */
+ if (amdgpu_ip_version(adev, MP0_HWIP, 0) ==
+ IP_VERSION(15, 0, 8) &&
+ (ucode->ucode_id == AMDGPU_UCODE_ID_IMU_I ||
+ ucode->ucode_id == AMDGPU_UCODE_ID_IMU_D))
+ continue;
+
psp_print_fw_hdr(psp, ucode);
ret = psp_execute_ip_fw_load(psp, ucode);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index c0336ca9bf6a..6edcb7713299 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -21,6 +21,7 @@
*
*
*/
+#include "amdgpu_reg_access.h"
#include <linux/debugfs.h>
#include <linux/list.h>
#include <linux/module.h>
@@ -42,6 +43,7 @@
#include "amdgpu_reset.h"
#include "amdgpu_psp.h"
#include "amdgpu_ras_mgr.h"
+#include "amdgpu_virt_ras_cmd.h"
#ifdef CONFIG_X86_MCE_AMD
#include <asm/mce.h>
@@ -228,19 +230,30 @@ static int amdgpu_check_address_validity(struct amdgpu_device *adev,
return 0;
if (amdgpu_sriov_vf(adev)) {
- if (amdgpu_virt_check_vf_critical_region(adev, address, &hit))
- return -EPERM;
- return hit ? -EACCES : 0;
+ if (amdgpu_uniras_enabled(adev)) {
+ if (amdgpu_virt_ras_check_address_validity(adev, address, &hit))
+ return -EPERM;
+ if (hit)
+ return -EACCES;
+ } else {
+ if (amdgpu_virt_check_vf_critical_region(adev, address, &hit))
+ return -EPERM;
+ return hit ? -EACCES : 0;
+ }
}
if ((address >= adev->gmc.mc_vram_size) ||
(address >= RAS_UMC_INJECT_ADDR_LIMIT))
return -EFAULT;
- if (amdgpu_uniras_enabled(adev))
- count = amdgpu_ras_mgr_lookup_bad_pages_in_a_row(adev, address,
- page_pfns, ARRAY_SIZE(page_pfns));
- else
+ if (amdgpu_uniras_enabled(adev)) {
+ if (amdgpu_sriov_vf(adev))
+ count = amdgpu_virt_ras_convert_retired_address(adev, address,
+ page_pfns, ARRAY_SIZE(page_pfns));
+ else
+ count = amdgpu_ras_mgr_lookup_bad_pages_in_a_row(adev, address,
+ page_pfns, ARRAY_SIZE(page_pfns));
+ } else
count = amdgpu_umc_lookup_bad_pages_in_a_row(adev,
address, page_pfns, ARRAY_SIZE(page_pfns));
@@ -2237,6 +2250,7 @@ static bool amdgpu_ras_aca_is_supported(struct amdgpu_device *adev)
case IP_VERSION(13, 0, 6):
case IP_VERSION(13, 0, 12):
case IP_VERSION(13, 0, 14):
+ case IP_VERSION(13, 0, 15):
ret = true;
break;
default:
@@ -3118,9 +3132,11 @@ static int __amdgpu_ras_convert_rec_array_from_rom(struct amdgpu_device *adev,
enum amdgpu_memory_partition nps)
{
int i = 0;
+ uint64_t chan_idx_v2;
enum amdgpu_memory_partition save_nps;
save_nps = (bps[0].retired_page >> UMC_NPS_SHIFT) & UMC_NPS_MASK;
+ chan_idx_v2 = bps[0].retired_page & UMC_CHANNEL_IDX_V2;
/*old asics just have pa in eeprom*/
if (IP_VERSION_MAJ(amdgpu_ip_version(adev, UMC_HWIP, 0)) < 12) {
@@ -3132,7 +3148,7 @@ static int __amdgpu_ras_convert_rec_array_from_rom(struct amdgpu_device *adev,
for (i = 0; i < adev->umc.retire_unit; i++)
bps[i].retired_page &= ~(UMC_NPS_MASK << UMC_NPS_SHIFT);
- if (save_nps) {
+ if (save_nps || chan_idx_v2) {
if (save_nps == nps) {
if (amdgpu_umc_pages_in_a_row(adev, err_data,
bps[0].retired_page << AMDGPU_GPU_PAGE_SHIFT))
@@ -3176,10 +3192,12 @@ static int __amdgpu_ras_convert_rec_from_rom(struct amdgpu_device *adev,
enum amdgpu_memory_partition nps)
{
int i = 0;
+ uint64_t chan_idx_v2;
enum amdgpu_memory_partition save_nps;
if (!amdgpu_ras_smu_eeprom_supported(adev)) {
save_nps = (bps->retired_page >> UMC_NPS_SHIFT) & UMC_NPS_MASK;
+ chan_idx_v2 = bps->retired_page & UMC_CHANNEL_IDX_V2;
bps->retired_page &= ~(UMC_NPS_MASK << UMC_NPS_SHIFT);
} else {
/* if pmfw manages eeprom, save_nps is not stored on eeprom,
@@ -3201,16 +3219,19 @@ static int __amdgpu_ras_convert_rec_from_rom(struct amdgpu_device *adev,
err_data->err_addr[i].mcumc_id = bps->mcumc_id;
}
} else {
- if (bps->address) {
+ if (save_nps || chan_idx_v2) {
if (amdgpu_ras_mca2pa_by_idx(adev, bps, err_data))
return -EINVAL;
} else {
/* for specific old eeprom data, mca address is not stored,
* calc it from pa
*/
- if (amdgpu_umc_pa2mca(adev, bps->retired_page << AMDGPU_GPU_PAGE_SHIFT,
- &(bps->address), AMDGPU_NPS1_PARTITION_MODE))
- return -EINVAL;
+ if (bps->address == 0)
+ if (amdgpu_umc_pa2mca(adev,
+ bps->retired_page << AMDGPU_GPU_PAGE_SHIFT,
+ &(bps->address),
+ AMDGPU_NPS1_PARTITION_MODE))
+ return -EINVAL;
if (amdgpu_ras_mca2pa(adev, bps, err_data))
return -EOPNOTSUPP;
@@ -3980,6 +4001,7 @@ static bool amdgpu_ras_asic_supported(struct amdgpu_device *adev)
case IP_VERSION(13, 0, 6):
case IP_VERSION(13, 0, 12):
case IP_VERSION(13, 0, 14):
+ case IP_VERSION(13, 0, 15):
return true;
default:
return false;
@@ -3993,6 +4015,7 @@ static bool amdgpu_ras_asic_supported(struct amdgpu_device *adev)
case IP_VERSION(13, 0, 10):
case IP_VERSION(13, 0, 12):
case IP_VERSION(13, 0, 14):
+ case IP_VERSION(13, 0, 15):
case IP_VERSION(14, 0, 3):
return true;
default:
@@ -4162,7 +4185,8 @@ init_ras_enabled_flag:
adev->aca.is_enabled =
(amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6) ||
amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 12) ||
- amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 14));
+ amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 14) ||
+ amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 15));
}
/* bad page feature is not applicable to specific app platform */
@@ -4250,6 +4274,7 @@ static void amdgpu_ras_init_reserved_vram_size(struct amdgpu_device *adev)
case IP_VERSION(13, 0, 2):
case IP_VERSION(13, 0, 6):
case IP_VERSION(13, 0, 12):
+ case IP_VERSION(13, 0, 15):
con->reserved_pages_in_bytes = AMDGPU_RAS_RESERVED_VRAM_SIZE_DEFAULT;
break;
case IP_VERSION(13, 0, 14):
@@ -5516,11 +5541,11 @@ static void amdgpu_ras_boot_time_error_reporting(struct amdgpu_device *adev,
* is changed. In such case, replace the aqua_vanjaram implementation
* with more common helper */
reg_addr = (mmMP0_SMN_C2PMSG_92 << 2) +
- aqua_vanjaram_encode_ext_smn_addressing(instance);
+ amdgpu_reg_get_smn_base64(adev, MP0_HWIP, instance);
fw_status = amdgpu_device_indirect_rreg_ext(adev, reg_addr);
reg_addr = (mmMP0_SMN_C2PMSG_126 << 2) +
- aqua_vanjaram_encode_ext_smn_addressing(instance);
+ amdgpu_reg_get_smn_base64(adev, MP0_HWIP, instance);
boot_error = amdgpu_device_indirect_rreg_ext(adev, reg_addr);
socket_id = AMDGPU_RAS_GPU_ERR_SOCKET_ID(boot_error);
@@ -5586,7 +5611,7 @@ static bool amdgpu_ras_boot_error_detected(struct amdgpu_device *adev,
int retry_loop;
reg_addr = (mmMP0_SMN_C2PMSG_92 << 2) +
- aqua_vanjaram_encode_ext_smn_addressing(instance);
+ amdgpu_reg_get_smn_base64(adev, MP0_HWIP, instance);
for (retry_loop = 0; retry_loop < AMDGPU_RAS_BOOT_STATUS_POLLING_LIMIT; retry_loop++) {
reg_data = amdgpu_device_indirect_rreg_ext(adev, reg_addr);
@@ -5665,7 +5690,7 @@ int amdgpu_ras_add_critical_region(struct amdgpu_device *adev,
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
struct amdgpu_vram_mgr_resource *vres;
struct ras_critical_region *region;
- struct drm_buddy_block *block;
+ struct gpu_buddy_block *block;
int ret = 0;
if (!bo || !bo->tbo.resource)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
index 6fba9d5b29ea..cdf4909592d2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
@@ -508,6 +508,9 @@ int amdgpu_ras_eeprom_reset_table(struct amdgpu_ras_eeprom_control *control)
control->bad_channel_bitmap = 0;
amdgpu_dpm_send_hbm_bad_channel_flag(adev, control->bad_channel_bitmap);
con->update_channel_flag = false;
+ /* there is no record on eeprom now, clear the counter */
+ if (con->eh_data)
+ con->eh_data->count_saved = 0;
amdgpu_ras_debugfs_set_ret_size(control);
@@ -1555,6 +1558,8 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control)
unsigned char buf[RAS_TABLE_HEADER_SIZE] = { 0 };
struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr;
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+ int dev_var = adev->pdev->device & 0xF;
+ uint32_t vram_type = adev->gmc.vram_type;
int res;
if (amdgpu_ras_smu_eeprom_supported(adev))
@@ -1594,6 +1599,12 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control)
return amdgpu_ras_eeprom_reset_table(control);
}
+ if (!(adev->flags & AMD_IS_APU) && (dev_var == 0x5) &&
+ (vram_type == AMDGPU_VRAM_TYPE_HBM3E) &&
+ (hdr->version < RAS_TABLE_VER_V3)) {
+ return amdgpu_ras_eeprom_reset_table(control);
+ }
+
switch (hdr->version) {
case RAS_TABLE_VER_V2_1:
case RAS_TABLE_VER_V3:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reg_access.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_reg_access.c
new file mode 100644
index 000000000000..540040c76058
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reg_access.c
@@ -0,0 +1,958 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <linux/delay.h>
+
+#include "amdgpu.h"
+#include "amdgpu_reset.h"
+#include "amdgpu_trace.h"
+#include "amdgpu_virt.h"
+#include "amdgpu_reg_access.h"
+
+#define AMDGPU_PCIE_INDEX_FALLBACK (0x38 >> 2)
+#define AMDGPU_PCIE_INDEX_HI_FALLBACK (0x44 >> 2)
+#define AMDGPU_PCIE_DATA_FALLBACK (0x3C >> 2)
+
+void amdgpu_reg_access_init(struct amdgpu_device *adev)
+{
+ spin_lock_init(&adev->reg.smc.lock);
+ adev->reg.smc.rreg = NULL;
+ adev->reg.smc.wreg = NULL;
+
+ spin_lock_init(&adev->reg.uvd_ctx.lock);
+ adev->reg.uvd_ctx.rreg = NULL;
+ adev->reg.uvd_ctx.wreg = NULL;
+
+ spin_lock_init(&adev->reg.didt.lock);
+ adev->reg.didt.rreg = NULL;
+ adev->reg.didt.wreg = NULL;
+
+ spin_lock_init(&adev->reg.gc_cac.lock);
+ adev->reg.gc_cac.rreg = NULL;
+ adev->reg.gc_cac.wreg = NULL;
+
+ spin_lock_init(&adev->reg.se_cac.lock);
+ adev->reg.se_cac.rreg = NULL;
+ adev->reg.se_cac.wreg = NULL;
+
+ spin_lock_init(&adev->reg.audio_endpt.lock);
+ adev->reg.audio_endpt.rreg = NULL;
+ adev->reg.audio_endpt.wreg = NULL;
+
+ spin_lock_init(&adev->reg.pcie.lock);
+ adev->reg.pcie.rreg = NULL;
+ adev->reg.pcie.wreg = NULL;
+ adev->reg.pcie.rreg_ext = NULL;
+ adev->reg.pcie.wreg_ext = NULL;
+ adev->reg.pcie.rreg64 = NULL;
+ adev->reg.pcie.wreg64 = NULL;
+ adev->reg.pcie.rreg64_ext = NULL;
+ adev->reg.pcie.wreg64_ext = NULL;
+ adev->reg.pcie.port_rreg = NULL;
+ adev->reg.pcie.port_wreg = NULL;
+}
+
+uint32_t amdgpu_reg_smc_rd32(struct amdgpu_device *adev, uint32_t reg)
+{
+ if (!adev->reg.smc.rreg) {
+ dev_err_once(adev->dev, "SMC register read not supported\n");
+ return 0;
+ }
+ return adev->reg.smc.rreg(adev, reg);
+}
+
+void amdgpu_reg_smc_wr32(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
+{
+ if (!adev->reg.smc.wreg) {
+ dev_err_once(adev->dev, "SMC register write not supported\n");
+ return;
+ }
+ adev->reg.smc.wreg(adev, reg, v);
+}
+
+uint32_t amdgpu_reg_uvd_ctx_rd32(struct amdgpu_device *adev, uint32_t reg)
+{
+ if (!adev->reg.uvd_ctx.rreg) {
+ dev_err_once(adev->dev,
+ "UVD_CTX register read not supported\n");
+ return 0;
+ }
+ return adev->reg.uvd_ctx.rreg(adev, reg);
+}
+
+void amdgpu_reg_uvd_ctx_wr32(struct amdgpu_device *adev, uint32_t reg,
+ uint32_t v)
+{
+ if (!adev->reg.uvd_ctx.wreg) {
+ dev_err_once(adev->dev,
+ "UVD_CTX register write not supported\n");
+ return;
+ }
+ adev->reg.uvd_ctx.wreg(adev, reg, v);
+}
+
+uint32_t amdgpu_reg_didt_rd32(struct amdgpu_device *adev, uint32_t reg)
+{
+ if (!adev->reg.didt.rreg) {
+ dev_err_once(adev->dev, "DIDT register read not supported\n");
+ return 0;
+ }
+ return adev->reg.didt.rreg(adev, reg);
+}
+
+void amdgpu_reg_didt_wr32(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
+{
+ if (!adev->reg.didt.wreg) {
+ dev_err_once(adev->dev, "DIDT register write not supported\n");
+ return;
+ }
+ adev->reg.didt.wreg(adev, reg, v);
+}
+
+uint32_t amdgpu_reg_gc_cac_rd32(struct amdgpu_device *adev, uint32_t reg)
+{
+ if (!adev->reg.gc_cac.rreg) {
+ dev_err_once(adev->dev, "GC_CAC register read not supported\n");
+ return 0;
+ }
+ return adev->reg.gc_cac.rreg(adev, reg);
+}
+
+void amdgpu_reg_gc_cac_wr32(struct amdgpu_device *adev, uint32_t reg,
+ uint32_t v)
+{
+ if (!adev->reg.gc_cac.wreg) {
+ dev_err_once(adev->dev,
+ "GC_CAC register write not supported\n");
+ return;
+ }
+ adev->reg.gc_cac.wreg(adev, reg, v);
+}
+
+uint32_t amdgpu_reg_se_cac_rd32(struct amdgpu_device *adev, uint32_t reg)
+{
+ if (!adev->reg.se_cac.rreg) {
+ dev_err_once(adev->dev, "SE_CAC register read not supported\n");
+ return 0;
+ }
+ return adev->reg.se_cac.rreg(adev, reg);
+}
+
+void amdgpu_reg_se_cac_wr32(struct amdgpu_device *adev, uint32_t reg,
+ uint32_t v)
+{
+ if (!adev->reg.se_cac.wreg) {
+ dev_err_once(adev->dev,
+ "SE_CAC register write not supported\n");
+ return;
+ }
+ adev->reg.se_cac.wreg(adev, reg, v);
+}
+
+uint32_t amdgpu_reg_audio_endpt_rd32(struct amdgpu_device *adev, uint32_t block,
+ uint32_t reg)
+{
+ if (!adev->reg.audio_endpt.rreg) {
+ dev_err_once(adev->dev,
+ "AUDIO_ENDPT register read not supported\n");
+ return 0;
+ }
+ return adev->reg.audio_endpt.rreg(adev, block, reg);
+}
+
+void amdgpu_reg_audio_endpt_wr32(struct amdgpu_device *adev, uint32_t block,
+ uint32_t reg, uint32_t v)
+{
+ if (!adev->reg.audio_endpt.wreg) {
+ dev_err_once(adev->dev,
+ "AUDIO_ENDPT register write not supported\n");
+ return;
+ }
+ adev->reg.audio_endpt.wreg(adev, block, reg, v);
+}
+
+uint32_t amdgpu_reg_pcie_rd32(struct amdgpu_device *adev, uint32_t reg)
+{
+ if (!adev->reg.pcie.rreg) {
+ dev_err_once(adev->dev, "PCIE register read not supported\n");
+ return 0;
+ }
+ return adev->reg.pcie.rreg(adev, reg);
+}
+
+void amdgpu_reg_pcie_wr32(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
+{
+ if (!adev->reg.pcie.wreg) {
+ dev_err_once(adev->dev, "PCIE register write not supported\n");
+ return;
+ }
+ adev->reg.pcie.wreg(adev, reg, v);
+}
+
+uint32_t amdgpu_reg_pcie_ext_rd32(struct amdgpu_device *adev, uint64_t reg)
+{
+ if (!adev->reg.pcie.rreg_ext) {
+ dev_err_once(adev->dev, "PCIE EXT register read not supported\n");
+ return 0;
+ }
+ return adev->reg.pcie.rreg_ext(adev, reg);
+}
+
+void amdgpu_reg_pcie_ext_wr32(struct amdgpu_device *adev, uint64_t reg,
+ uint32_t v)
+{
+ if (!adev->reg.pcie.wreg_ext) {
+ dev_err_once(adev->dev, "PCIE EXT register write not supported\n");
+ return;
+ }
+ adev->reg.pcie.wreg_ext(adev, reg, v);
+}
+
+uint64_t amdgpu_reg_pcie_rd64(struct amdgpu_device *adev, uint32_t reg)
+{
+ if (!adev->reg.pcie.rreg64) {
+ dev_err_once(adev->dev, "PCIE 64-bit register read not supported\n");
+ return 0;
+ }
+ return adev->reg.pcie.rreg64(adev, reg);
+}
+
+void amdgpu_reg_pcie_wr64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
+{
+ if (!adev->reg.pcie.wreg64) {
+ dev_err_once(adev->dev, "PCIE 64-bit register write not supported\n");
+ return;
+ }
+ adev->reg.pcie.wreg64(adev, reg, v);
+}
+
+uint64_t amdgpu_reg_pcie_ext_rd64(struct amdgpu_device *adev, uint64_t reg)
+{
+ if (!adev->reg.pcie.rreg64_ext) {
+ dev_err_once(adev->dev, "PCIE EXT 64-bit register read not supported\n");
+ return 0;
+ }
+ return adev->reg.pcie.rreg64_ext(adev, reg);
+}
+
+void amdgpu_reg_pcie_ext_wr64(struct amdgpu_device *adev, uint64_t reg,
+ uint64_t v)
+{
+ if (!adev->reg.pcie.wreg64_ext) {
+ dev_err_once(adev->dev, "PCIE EXT 64-bit register write not supported\n");
+ return;
+ }
+ adev->reg.pcie.wreg64_ext(adev, reg, v);
+}
+
+uint32_t amdgpu_reg_pciep_rd32(struct amdgpu_device *adev, uint32_t reg)
+{
+ if (!adev->reg.pcie.port_rreg) {
+ dev_err_once(adev->dev, "PCIEP register read not supported\n");
+ return 0;
+ }
+ return adev->reg.pcie.port_rreg(adev, reg);
+}
+
+void amdgpu_reg_pciep_wr32(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
+{
+ if (!adev->reg.pcie.port_wreg) {
+ dev_err_once(adev->dev, "PCIEP register write not supported\n");
+ return;
+ }
+ adev->reg.pcie.port_wreg(adev, reg, v);
+}
+
+static int amdgpu_reg_get_smn_base_version(struct amdgpu_device *adev)
+{
+ struct pci_dev *pdev = adev->pdev;
+ int id;
+
+ if (amdgpu_sriov_vf(adev))
+ return -EOPNOTSUPP;
+
+ id = (pdev->device >> 4) & 0xFFFF;
+ if (id == 0x74A || id == 0x74B || id == 0x75A || id == 0x75B)
+ return 1;
+
+ return -EOPNOTSUPP;
+}
+
+uint64_t amdgpu_reg_get_smn_base64(struct amdgpu_device *adev,
+ enum amd_hw_ip_block_type block,
+ int die_inst)
+{
+ if (!adev->reg.smn.get_smn_base) {
+ int version = amdgpu_reg_get_smn_base_version(adev);
+ switch (version) {
+ case 1:
+ return amdgpu_reg_smn_v1_0_get_base(adev, block,
+ die_inst);
+ default:
+ dev_err_once(
+ adev->dev,
+ "SMN base address query not supported for this device\n");
+ return 0;
+ }
+ }
+ return adev->reg.smn.get_smn_base(adev, block, die_inst);
+}
+
+uint64_t amdgpu_reg_smn_v1_0_get_base(struct amdgpu_device *adev,
+ enum amd_hw_ip_block_type block,
+ int die_inst)
+{
+ uint64_t smn_base;
+
+ if (die_inst == 0)
+ return 0;
+
+ switch (block) {
+ case XGMI_HWIP:
+ case NBIO_HWIP:
+ case MP0_HWIP:
+ case UMC_HWIP:
+ case DF_HWIP:
+ smn_base = ((uint64_t)(die_inst & 0x3) << 32) | (1ULL << 34);
+ break;
+ default:
+ dev_warn_once(
+ adev->dev,
+ "SMN base address query not supported for this block %d\n",
+ block);
+ smn_base = 0;
+ break;
+ }
+
+ return smn_base;
+}
+
+/*
+ * register access helper functions.
+ */
+
+/**
+ * amdgpu_device_rreg - read a memory mapped IO or indirect register
+ *
+ * @adev: amdgpu_device pointer
+ * @reg: dword aligned register offset
+ * @acc_flags: access flags which require special behavior
+ *
+ * Returns the 32 bit value from the offset specified.
+ */
+uint32_t amdgpu_device_rreg(struct amdgpu_device *adev, uint32_t reg,
+ uint32_t acc_flags)
+{
+ uint32_t ret;
+
+ if (amdgpu_device_skip_hw_access(adev))
+ return 0;
+
+ if ((reg * 4) < adev->rmmio_size) {
+ if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
+ amdgpu_sriov_runtime(adev) &&
+ down_read_trylock(&adev->reset_domain->sem)) {
+ ret = amdgpu_kiq_rreg(adev, reg, 0);
+ up_read(&adev->reset_domain->sem);
+ } else {
+ ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
+ }
+ } else {
+ ret = amdgpu_reg_pcie_rd32(adev, reg * 4);
+ }
+
+ trace_amdgpu_device_rreg(adev->pdev->device, reg, ret);
+
+ return ret;
+}
+
+/*
+ * MMIO register read with bytes helper functions
+ * @offset:bytes offset from MMIO start
+ */
+
+/**
+ * amdgpu_mm_rreg8 - read a memory mapped IO register
+ *
+ * @adev: amdgpu_device pointer
+ * @offset: byte aligned register offset
+ *
+ * Returns the 8 bit value from the offset specified.
+ */
+uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
+{
+ if (amdgpu_device_skip_hw_access(adev))
+ return 0;
+
+ if (offset < adev->rmmio_size)
+ return (readb(adev->rmmio + offset));
+ BUG();
+}
+
+/**
+ * amdgpu_device_xcc_rreg - read a memory mapped IO or indirect register with specific XCC
+ *
+ * @adev: amdgpu_device pointer
+ * @reg: dword aligned register offset
+ * @acc_flags: access flags which require special behavior
+ * @xcc_id: xcc accelerated compute core id
+ *
+ * Returns the 32 bit value from the offset specified.
+ */
+uint32_t amdgpu_device_xcc_rreg(struct amdgpu_device *adev, uint32_t reg,
+ uint32_t acc_flags, uint32_t xcc_id)
+{
+ uint32_t ret, rlcg_flag;
+
+ if (amdgpu_device_skip_hw_access(adev))
+ return 0;
+
+ if ((reg * 4) < adev->rmmio_size) {
+ if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_runtime(adev) &&
+ adev->gfx.rlc.rlcg_reg_access_supported &&
+ amdgpu_virt_get_rlcg_reg_access_flag(
+ adev, acc_flags, GC_HWIP, false, &rlcg_flag)) {
+ ret = amdgpu_virt_rlcg_reg_rw(adev, reg, 0, rlcg_flag,
+ GET_INST(GC, xcc_id));
+ } else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
+ amdgpu_sriov_runtime(adev) &&
+ down_read_trylock(&adev->reset_domain->sem)) {
+ ret = amdgpu_kiq_rreg(adev, reg, xcc_id);
+ up_read(&adev->reset_domain->sem);
+ } else {
+ ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
+ }
+ } else {
+ ret = amdgpu_reg_pcie_rd32(adev, reg * 4);
+ }
+
+ return ret;
+}
+
+/*
+ * MMIO register write with bytes helper functions
+ * @offset:bytes offset from MMIO start
+ * @value: the value want to be written to the register
+ */
+
+/**
+ * amdgpu_mm_wreg8 - read a memory mapped IO register
+ *
+ * @adev: amdgpu_device pointer
+ * @offset: byte aligned register offset
+ * @value: 8 bit value to write
+ *
+ * Writes the value specified to the offset specified.
+ */
+void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value)
+{
+ if (amdgpu_device_skip_hw_access(adev))
+ return;
+
+ if (offset < adev->rmmio_size)
+ writeb(value, adev->rmmio + offset);
+ else
+ BUG();
+}
+
+/**
+ * amdgpu_device_wreg - write to a memory mapped IO or indirect register
+ *
+ * @adev: amdgpu_device pointer
+ * @reg: dword aligned register offset
+ * @v: 32 bit value to write to the register
+ * @acc_flags: access flags which require special behavior
+ *
+ * Writes the value specified to the offset specified.
+ */
+void amdgpu_device_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
+ uint32_t acc_flags)
+{
+ if (amdgpu_device_skip_hw_access(adev))
+ return;
+
+ if ((reg * 4) < adev->rmmio_size) {
+ if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
+ amdgpu_sriov_runtime(adev) &&
+ down_read_trylock(&adev->reset_domain->sem)) {
+ amdgpu_kiq_wreg(adev, reg, v, 0);
+ up_read(&adev->reset_domain->sem);
+ } else {
+ writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
+ }
+ } else {
+ amdgpu_reg_pcie_wr32(adev, reg * 4, v);
+ }
+
+ trace_amdgpu_device_wreg(adev->pdev->device, reg, v);
+}
+
+/**
+ * amdgpu_mm_wreg_mmio_rlc - write register either with direct/indirect mmio or with RLC path if in range
+ *
+ * @adev: amdgpu_device pointer
+ * @reg: mmio/rlc register
+ * @v: value to write
+ * @xcc_id: xcc accelerated compute core id
+ *
+ * this function is invoked only for the debugfs register access
+ */
+void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev, uint32_t reg,
+ uint32_t v, uint32_t xcc_id)
+{
+ if (amdgpu_device_skip_hw_access(adev))
+ return;
+
+ if (amdgpu_sriov_fullaccess(adev) && adev->gfx.rlc.funcs &&
+ adev->gfx.rlc.funcs->is_rlcg_access_range) {
+ if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
+ return amdgpu_sriov_wreg(adev, reg, v, 0, 0, xcc_id);
+ } else if ((reg * 4) >= adev->rmmio_size) {
+ amdgpu_reg_pcie_wr32(adev, reg * 4, v);
+ } else {
+ writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
+ }
+}
+
+/**
+ * amdgpu_device_xcc_wreg - write to a memory mapped IO or indirect register with specific XCC
+ *
+ * @adev: amdgpu_device pointer
+ * @reg: dword aligned register offset
+ * @v: 32 bit value to write to the register
+ * @acc_flags: access flags which require special behavior
+ * @xcc_id: xcc accelerated compute core id
+ *
+ * Writes the value specified to the offset specified.
+ */
+void amdgpu_device_xcc_wreg(struct amdgpu_device *adev, uint32_t reg,
+ uint32_t v, uint32_t acc_flags, uint32_t xcc_id)
+{
+ uint32_t rlcg_flag;
+
+ if (amdgpu_device_skip_hw_access(adev))
+ return;
+
+ if ((reg * 4) < adev->rmmio_size) {
+ if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_runtime(adev) &&
+ adev->gfx.rlc.rlcg_reg_access_supported &&
+ amdgpu_virt_get_rlcg_reg_access_flag(
+ adev, acc_flags, GC_HWIP, true, &rlcg_flag)) {
+ amdgpu_virt_rlcg_reg_rw(adev, reg, v, rlcg_flag,
+ GET_INST(GC, xcc_id));
+ } else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
+ amdgpu_sriov_runtime(adev) &&
+ down_read_trylock(&adev->reset_domain->sem)) {
+ amdgpu_kiq_wreg(adev, reg, v, xcc_id);
+ up_read(&adev->reset_domain->sem);
+ } else {
+ writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
+ }
+ } else {
+ amdgpu_reg_pcie_wr32(adev, reg * 4, v);
+ }
+}
+
+/**
+ * amdgpu_device_indirect_rreg - read an indirect register
+ *
+ * @adev: amdgpu_device pointer
+ * @reg_addr: indirect register address to read from
+ *
+ * Returns the value of indirect register @reg_addr
+ */
+u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev, u32 reg_addr)
+{
+ unsigned long flags, pcie_index, pcie_data;
+ void __iomem *pcie_index_offset;
+ void __iomem *pcie_data_offset;
+ u32 r;
+
+ pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
+ pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
+
+ spin_lock_irqsave(&adev->reg.pcie.lock, flags);
+ pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
+ pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
+
+ writel(reg_addr, pcie_index_offset);
+ readl(pcie_index_offset);
+ r = readl(pcie_data_offset);
+ spin_unlock_irqrestore(&adev->reg.pcie.lock, flags);
+
+ return r;
+}
+
+u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev, u64 reg_addr)
+{
+ unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
+ u32 r;
+ void __iomem *pcie_index_offset;
+ void __iomem *pcie_index_hi_offset;
+ void __iomem *pcie_data_offset;
+
+ if (unlikely(!adev->nbio.funcs)) {
+ pcie_index = AMDGPU_PCIE_INDEX_FALLBACK;
+ pcie_data = AMDGPU_PCIE_DATA_FALLBACK;
+ } else {
+ pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
+ pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
+ }
+
+ if (reg_addr >> 32) {
+ if (unlikely(!adev->nbio.funcs))
+ pcie_index_hi = AMDGPU_PCIE_INDEX_HI_FALLBACK;
+ else
+ pcie_index_hi =
+ adev->nbio.funcs->get_pcie_index_hi_offset(
+ adev);
+ } else {
+ pcie_index_hi = 0;
+ }
+
+ spin_lock_irqsave(&adev->reg.pcie.lock, flags);
+ pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
+ pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
+ if (pcie_index_hi != 0)
+ pcie_index_hi_offset =
+ (void __iomem *)adev->rmmio + pcie_index_hi * 4;
+
+ writel(reg_addr, pcie_index_offset);
+ readl(pcie_index_offset);
+ if (pcie_index_hi != 0) {
+ writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
+ readl(pcie_index_hi_offset);
+ }
+ r = readl(pcie_data_offset);
+
+ /* clear the high bits */
+ if (pcie_index_hi != 0) {
+ writel(0, pcie_index_hi_offset);
+ readl(pcie_index_hi_offset);
+ }
+
+ spin_unlock_irqrestore(&adev->reg.pcie.lock, flags);
+
+ return r;
+}
+
+/**
+ * amdgpu_device_indirect_rreg64 - read a 64bits indirect register
+ *
+ * @adev: amdgpu_device pointer
+ * @reg_addr: indirect register address to read from
+ *
+ * Returns the value of indirect register @reg_addr
+ */
+u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev, u32 reg_addr)
+{
+ unsigned long flags, pcie_index, pcie_data;
+ void __iomem *pcie_index_offset;
+ void __iomem *pcie_data_offset;
+ u64 r;
+
+ pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
+ pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
+
+ spin_lock_irqsave(&adev->reg.pcie.lock, flags);
+ pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
+ pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
+
+ /* read low 32 bits */
+ writel(reg_addr, pcie_index_offset);
+ readl(pcie_index_offset);
+ r = readl(pcie_data_offset);
+ /* read high 32 bits */
+ writel(reg_addr + 4, pcie_index_offset);
+ readl(pcie_index_offset);
+ r |= ((u64)readl(pcie_data_offset) << 32);
+ spin_unlock_irqrestore(&adev->reg.pcie.lock, flags);
+
+ return r;
+}
+
+u64 amdgpu_device_indirect_rreg64_ext(struct amdgpu_device *adev, u64 reg_addr)
+{
+ unsigned long flags, pcie_index, pcie_data;
+ unsigned long pcie_index_hi = 0;
+ void __iomem *pcie_index_offset;
+ void __iomem *pcie_index_hi_offset;
+ void __iomem *pcie_data_offset;
+ u64 r;
+
+ pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
+ pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
+ if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
+ pcie_index_hi =
+ adev->nbio.funcs->get_pcie_index_hi_offset(adev);
+
+ spin_lock_irqsave(&adev->reg.pcie.lock, flags);
+ pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
+ pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
+ if (pcie_index_hi != 0)
+ pcie_index_hi_offset =
+ (void __iomem *)adev->rmmio + pcie_index_hi * 4;
+
+ /* read low 32 bits */
+ writel(reg_addr, pcie_index_offset);
+ readl(pcie_index_offset);
+ if (pcie_index_hi != 0) {
+ writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
+ readl(pcie_index_hi_offset);
+ }
+ r = readl(pcie_data_offset);
+ /* read high 32 bits */
+ writel(reg_addr + 4, pcie_index_offset);
+ readl(pcie_index_offset);
+ if (pcie_index_hi != 0) {
+ writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
+ readl(pcie_index_hi_offset);
+ }
+ r |= ((u64)readl(pcie_data_offset) << 32);
+
+ /* clear the high bits */
+ if (pcie_index_hi != 0) {
+ writel(0, pcie_index_hi_offset);
+ readl(pcie_index_hi_offset);
+ }
+
+ spin_unlock_irqrestore(&adev->reg.pcie.lock, flags);
+
+ return r;
+}
+
+/**
+ * amdgpu_device_indirect_wreg - write an indirect register address
+ *
+ * @adev: amdgpu_device pointer
+ * @reg_addr: indirect register offset
+ * @reg_data: indirect register data
+ *
+ */
+void amdgpu_device_indirect_wreg(struct amdgpu_device *adev, u32 reg_addr,
+ u32 reg_data)
+{
+ unsigned long flags, pcie_index, pcie_data;
+ void __iomem *pcie_index_offset;
+ void __iomem *pcie_data_offset;
+
+ pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
+ pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
+
+ spin_lock_irqsave(&adev->reg.pcie.lock, flags);
+ pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
+ pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
+
+ writel(reg_addr, pcie_index_offset);
+ readl(pcie_index_offset);
+ writel(reg_data, pcie_data_offset);
+ readl(pcie_data_offset);
+ spin_unlock_irqrestore(&adev->reg.pcie.lock, flags);
+}
+
+void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev, u64 reg_addr,
+ u32 reg_data)
+{
+ unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
+ void __iomem *pcie_index_offset;
+ void __iomem *pcie_index_hi_offset;
+ void __iomem *pcie_data_offset;
+
+ pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
+ pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
+ if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
+ pcie_index_hi =
+ adev->nbio.funcs->get_pcie_index_hi_offset(adev);
+ else
+ pcie_index_hi = 0;
+
+ spin_lock_irqsave(&adev->reg.pcie.lock, flags);
+ pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
+ pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
+ if (pcie_index_hi != 0)
+ pcie_index_hi_offset =
+ (void __iomem *)adev->rmmio + pcie_index_hi * 4;
+
+ writel(reg_addr, pcie_index_offset);
+ readl(pcie_index_offset);
+ if (pcie_index_hi != 0) {
+ writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
+ readl(pcie_index_hi_offset);
+ }
+ writel(reg_data, pcie_data_offset);
+ readl(pcie_data_offset);
+
+ /* clear the high bits */
+ if (pcie_index_hi != 0) {
+ writel(0, pcie_index_hi_offset);
+ readl(pcie_index_hi_offset);
+ }
+
+ spin_unlock_irqrestore(&adev->reg.pcie.lock, flags);
+}
+
+/**
+ * amdgpu_device_indirect_wreg64 - write a 64bits indirect register address
+ *
+ * @adev: amdgpu_device pointer
+ * @reg_addr: indirect register offset
+ * @reg_data: indirect register data
+ *
+ */
+void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev, u32 reg_addr,
+ u64 reg_data)
+{
+ unsigned long flags, pcie_index, pcie_data;
+ void __iomem *pcie_index_offset;
+ void __iomem *pcie_data_offset;
+
+ pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
+ pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
+
+ spin_lock_irqsave(&adev->reg.pcie.lock, flags);
+ pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
+ pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
+
+ /* write low 32 bits */
+ writel(reg_addr, pcie_index_offset);
+ readl(pcie_index_offset);
+ writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
+ readl(pcie_data_offset);
+ /* write high 32 bits */
+ writel(reg_addr + 4, pcie_index_offset);
+ readl(pcie_index_offset);
+ writel((u32)(reg_data >> 32), pcie_data_offset);
+ readl(pcie_data_offset);
+ spin_unlock_irqrestore(&adev->reg.pcie.lock, flags);
+}
+
+void amdgpu_device_indirect_wreg64_ext(struct amdgpu_device *adev, u64 reg_addr,
+ u64 reg_data)
+{
+ unsigned long flags, pcie_index, pcie_data;
+ unsigned long pcie_index_hi = 0;
+ void __iomem *pcie_index_offset;
+ void __iomem *pcie_index_hi_offset;
+ void __iomem *pcie_data_offset;
+
+ pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
+ pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
+ if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
+ pcie_index_hi =
+ adev->nbio.funcs->get_pcie_index_hi_offset(adev);
+
+ spin_lock_irqsave(&adev->reg.pcie.lock, flags);
+ pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
+ pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
+ if (pcie_index_hi != 0)
+ pcie_index_hi_offset =
+ (void __iomem *)adev->rmmio + pcie_index_hi * 4;
+
+ /* write low 32 bits */
+ writel(reg_addr, pcie_index_offset);
+ readl(pcie_index_offset);
+ if (pcie_index_hi != 0) {
+ writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
+ readl(pcie_index_hi_offset);
+ }
+ writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
+ readl(pcie_data_offset);
+ /* write high 32 bits */
+ writel(reg_addr + 4, pcie_index_offset);
+ readl(pcie_index_offset);
+ if (pcie_index_hi != 0) {
+ writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
+ readl(pcie_index_hi_offset);
+ }
+ writel((u32)(reg_data >> 32), pcie_data_offset);
+ readl(pcie_data_offset);
+
+ /* clear the high bits */
+ if (pcie_index_hi != 0) {
+ writel(0, pcie_index_hi_offset);
+ readl(pcie_index_hi_offset);
+ }
+
+ spin_unlock_irqrestore(&adev->reg.pcie.lock, flags);
+}
+
+u32 amdgpu_device_pcie_port_rreg(struct amdgpu_device *adev, u32 reg)
+{
+ unsigned long flags, address, data;
+ u32 r;
+
+ address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
+ data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
+
+ spin_lock_irqsave(&adev->reg.pcie.lock, flags);
+ WREG32(address, reg * 4);
+ (void)RREG32(address);
+ r = RREG32(data);
+ spin_unlock_irqrestore(&adev->reg.pcie.lock, flags);
+ return r;
+}
+
+void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
+{
+ unsigned long flags, address, data;
+
+ address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
+ data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
+
+ spin_lock_irqsave(&adev->reg.pcie.lock, flags);
+ WREG32(address, reg * 4);
+ (void)RREG32(address);
+ WREG32(data, v);
+ (void)RREG32(data);
+ spin_unlock_irqrestore(&adev->reg.pcie.lock, flags);
+}
+
+uint32_t amdgpu_device_wait_on_rreg(struct amdgpu_device *adev, uint32_t inst,
+ uint32_t reg_addr, char reg_name[],
+ uint32_t expected_value, uint32_t mask)
+{
+ uint32_t ret = 0;
+ uint32_t old_ = 0;
+ uint32_t tmp_ = RREG32(reg_addr);
+ uint32_t loop = adev->usec_timeout;
+
+ while ((tmp_ & (mask)) != (expected_value)) {
+ if (old_ != tmp_) {
+ loop = adev->usec_timeout;
+ old_ = tmp_;
+ } else
+ udelay(1);
+ tmp_ = RREG32(reg_addr);
+ loop--;
+ if (!loop) {
+ dev_warn(
+ adev->dev,
+ "Register(%d) [%s] failed to reach value 0x%08x != 0x%08xn",
+ inst, reg_name, (uint32_t)expected_value,
+ (uint32_t)(tmp_ & (mask)));
+ ret = -ETIMEDOUT;
+ break;
+ }
+ }
+ return ret;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reg_access.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_reg_access.h
new file mode 100644
index 000000000000..4d88e5cd19fc
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reg_access.h
@@ -0,0 +1,163 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __AMDGPU_REG_ACCESS_H__
+#define __AMDGPU_REG_ACCESS_H__
+
+#include <linux/types.h>
+#include <linux/spinlock.h>
+
+#include "amdgpu_ip.h"
+
+struct amdgpu_device;
+
+typedef uint32_t (*amdgpu_rreg_t)(struct amdgpu_device *, uint32_t);
+typedef void (*amdgpu_wreg_t)(struct amdgpu_device *, uint32_t, uint32_t);
+typedef uint32_t (*amdgpu_rreg_ext_t)(struct amdgpu_device *, uint64_t);
+typedef void (*amdgpu_wreg_ext_t)(struct amdgpu_device *, uint64_t, uint32_t);
+typedef uint64_t (*amdgpu_rreg64_t)(struct amdgpu_device *, uint32_t);
+typedef void (*amdgpu_wreg64_t)(struct amdgpu_device *, uint32_t, uint64_t);
+typedef uint64_t (*amdgpu_rreg64_ext_t)(struct amdgpu_device *, uint64_t);
+typedef void (*amdgpu_wreg64_ext_t)(struct amdgpu_device *, uint64_t, uint64_t);
+
+typedef uint32_t (*amdgpu_block_rreg_t)(struct amdgpu_device *, uint32_t,
+ uint32_t);
+typedef void (*amdgpu_block_wreg_t)(struct amdgpu_device *, uint32_t, uint32_t,
+ uint32_t);
+typedef uint64_t (*amdgpu_reg_get_smn_base64_t)(struct amdgpu_device *adev,
+ enum amd_hw_ip_block_type block,
+ int die_inst);
+
+struct amdgpu_reg_ind {
+ spinlock_t lock;
+ amdgpu_rreg_t rreg;
+ amdgpu_wreg_t wreg;
+};
+
+struct amdgpu_reg_ind_blk {
+ spinlock_t lock;
+ amdgpu_block_rreg_t rreg;
+ amdgpu_block_wreg_t wreg;
+};
+
+struct amdgpu_reg_pcie_ind {
+ spinlock_t lock;
+ amdgpu_rreg_t rreg;
+ amdgpu_wreg_t wreg;
+ amdgpu_rreg_ext_t rreg_ext;
+ amdgpu_wreg_ext_t wreg_ext;
+ amdgpu_rreg64_t rreg64;
+ amdgpu_wreg64_t wreg64;
+ amdgpu_rreg64_ext_t rreg64_ext;
+ amdgpu_wreg64_ext_t wreg64_ext;
+ amdgpu_rreg_t port_rreg;
+ amdgpu_wreg_t port_wreg;
+};
+
+struct amdgpu_reg_smn_ext {
+ amdgpu_reg_get_smn_base64_t get_smn_base;
+};
+
+struct amdgpu_reg_access {
+ struct amdgpu_reg_ind smc;
+ struct amdgpu_reg_ind uvd_ctx;
+ struct amdgpu_reg_ind didt;
+ struct amdgpu_reg_ind gc_cac;
+ struct amdgpu_reg_ind se_cac;
+ struct amdgpu_reg_ind_blk audio_endpt;
+ struct amdgpu_reg_pcie_ind pcie;
+ struct amdgpu_reg_smn_ext smn;
+};
+
+void amdgpu_reg_access_init(struct amdgpu_device *adev);
+uint32_t amdgpu_reg_smc_rd32(struct amdgpu_device *adev, uint32_t reg);
+void amdgpu_reg_smc_wr32(struct amdgpu_device *adev, uint32_t reg, uint32_t v);
+uint32_t amdgpu_reg_uvd_ctx_rd32(struct amdgpu_device *adev, uint32_t reg);
+void amdgpu_reg_uvd_ctx_wr32(struct amdgpu_device *adev, uint32_t reg, uint32_t v);
+uint32_t amdgpu_reg_didt_rd32(struct amdgpu_device *adev, uint32_t reg);
+void amdgpu_reg_didt_wr32(struct amdgpu_device *adev, uint32_t reg, uint32_t v);
+uint32_t amdgpu_reg_gc_cac_rd32(struct amdgpu_device *adev, uint32_t reg);
+void amdgpu_reg_gc_cac_wr32(struct amdgpu_device *adev, uint32_t reg,
+ uint32_t v);
+uint32_t amdgpu_reg_se_cac_rd32(struct amdgpu_device *adev, uint32_t reg);
+void amdgpu_reg_se_cac_wr32(struct amdgpu_device *adev, uint32_t reg,
+ uint32_t v);
+uint32_t amdgpu_reg_audio_endpt_rd32(struct amdgpu_device *adev, uint32_t block,
+ uint32_t reg);
+void amdgpu_reg_audio_endpt_wr32(struct amdgpu_device *adev, uint32_t block,
+ uint32_t reg, uint32_t v);
+uint32_t amdgpu_reg_pcie_rd32(struct amdgpu_device *adev, uint32_t reg);
+void amdgpu_reg_pcie_wr32(struct amdgpu_device *adev, uint32_t reg, uint32_t v);
+uint32_t amdgpu_reg_pcie_ext_rd32(struct amdgpu_device *adev, uint64_t reg);
+void amdgpu_reg_pcie_ext_wr32(struct amdgpu_device *adev, uint64_t reg,
+ uint32_t v);
+uint64_t amdgpu_reg_pcie_rd64(struct amdgpu_device *adev, uint32_t reg);
+void amdgpu_reg_pcie_wr64(struct amdgpu_device *adev, uint32_t reg, uint64_t v);
+uint64_t amdgpu_reg_pcie_ext_rd64(struct amdgpu_device *adev, uint64_t reg);
+void amdgpu_reg_pcie_ext_wr64(struct amdgpu_device *adev, uint64_t reg,
+ uint64_t v);
+uint32_t amdgpu_reg_pciep_rd32(struct amdgpu_device *adev, uint32_t reg);
+void amdgpu_reg_pciep_wr32(struct amdgpu_device *adev, uint32_t reg,
+ uint32_t v);
+uint64_t amdgpu_reg_get_smn_base64(struct amdgpu_device *adev,
+ enum amd_hw_ip_block_type block,
+ int die_inst);
+uint64_t amdgpu_reg_smn_v1_0_get_base(struct amdgpu_device *adev,
+ enum amd_hw_ip_block_type block,
+ int die_inst);
+
+uint32_t amdgpu_device_rreg(struct amdgpu_device *adev, uint32_t reg,
+ uint32_t acc_flags);
+uint32_t amdgpu_device_xcc_rreg(struct amdgpu_device *adev, uint32_t reg,
+ uint32_t acc_flags, uint32_t xcc_id);
+void amdgpu_device_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
+ uint32_t acc_flags);
+void amdgpu_device_xcc_wreg(struct amdgpu_device *adev, uint32_t reg,
+ uint32_t v, uint32_t acc_flags, uint32_t xcc_id);
+void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev, uint32_t reg,
+ uint32_t v, uint32_t xcc_id);
+void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset,
+ uint8_t value);
+uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset);
+
+u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev, u32 reg_addr);
+u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev, u64 reg_addr);
+u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev, u32 reg_addr);
+u64 amdgpu_device_indirect_rreg64_ext(struct amdgpu_device *adev, u64 reg_addr);
+void amdgpu_device_indirect_wreg(struct amdgpu_device *adev, u32 reg_addr,
+ u32 reg_data);
+void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev, u64 reg_addr,
+ u32 reg_data);
+void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev, u32 reg_addr,
+ u64 reg_data);
+void amdgpu_device_indirect_wreg64_ext(struct amdgpu_device *adev, u64 reg_addr,
+ u64 reg_data);
+
+u32 amdgpu_device_pcie_port_rreg(struct amdgpu_device *adev, u32 reg);
+void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev, u32 reg, u32 v);
+
+uint32_t amdgpu_device_wait_on_rreg(struct amdgpu_device *adev, uint32_t inst,
+ uint32_t reg_addr, char reg_name[],
+ uint32_t expected_value, uint32_t mask);
+
+#endif /* __AMDGPU_REG_ACCESS_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
index be2e56ce1355..8908d9e08a30 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
@@ -55,7 +55,7 @@ static inline void amdgpu_res_first(struct ttm_resource *res,
uint64_t start, uint64_t size,
struct amdgpu_res_cursor *cur)
{
- struct drm_buddy_block *block;
+ struct gpu_buddy_block *block;
struct list_head *head, *next;
struct drm_mm_node *node;
@@ -71,7 +71,7 @@ static inline void amdgpu_res_first(struct ttm_resource *res,
head = &to_amdgpu_vram_mgr_resource(res)->blocks;
block = list_first_entry_or_null(head,
- struct drm_buddy_block,
+ struct gpu_buddy_block,
link);
if (!block)
goto fallback;
@@ -81,7 +81,7 @@ static inline void amdgpu_res_first(struct ttm_resource *res,
next = block->link.next;
if (next != head)
- block = list_entry(next, struct drm_buddy_block, link);
+ block = list_entry(next, struct gpu_buddy_block, link);
}
cur->start = amdgpu_vram_mgr_block_start(block) + start;
@@ -125,7 +125,7 @@ fallback:
*/
static inline void amdgpu_res_next(struct amdgpu_res_cursor *cur, uint64_t size)
{
- struct drm_buddy_block *block;
+ struct gpu_buddy_block *block;
struct drm_mm_node *node;
struct list_head *next;
@@ -146,7 +146,7 @@ static inline void amdgpu_res_next(struct amdgpu_res_cursor *cur, uint64_t size)
block = cur->node;
next = block->link.next;
- block = list_entry(next, struct drm_buddy_block, link);
+ block = list_entry(next, struct gpu_buddy_block, link);
cur->node = block;
cur->start = amdgpu_vram_mgr_block_start(block);
@@ -175,7 +175,7 @@ static inline void amdgpu_res_next(struct amdgpu_res_cursor *cur, uint64_t size)
*/
static inline bool amdgpu_res_cleared(struct amdgpu_res_cursor *cur)
{
- struct drm_buddy_block *block;
+ struct gpu_buddy_block *block;
switch (cur->mem_type) {
case TTM_PL_VRAM:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 4638a686a84e..66e8a2f7afcf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -90,10 +90,13 @@ int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned int ndw)
ndw = (ndw + ring->funcs->align_mask) & ~ring->funcs->align_mask;
/* Make sure we aren't trying to allocate more space
- * than the maximum for one submission
+ * than the maximum for one submission. Skip for reemit
+ * since we may be reemitting several submissions.
*/
- if (WARN_ON_ONCE(ndw > ring->max_dw))
- return -ENOMEM;
+ if (!ring->reemit) {
+ if (WARN_ON_ONCE(ndw > ring->max_dw))
+ return -ENOMEM;
+ }
ring->count_dw = ndw;
ring->wptr_old = ring->wptr;
@@ -105,29 +108,6 @@ int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned int ndw)
}
/**
- * amdgpu_ring_alloc_reemit - allocate space on the ring buffer for reemit
- *
- * @ring: amdgpu_ring structure holding ring information
- * @ndw: number of dwords to allocate in the ring buffer
- *
- * Allocate @ndw dwords in the ring buffer (all asics).
- * doesn't check the max_dw limit as we may be reemitting
- * several submissions.
- */
-static void amdgpu_ring_alloc_reemit(struct amdgpu_ring *ring, unsigned int ndw)
-{
- /* Align requested size with padding so unlock_commit can
- * pad safely */
- ndw = (ndw + ring->funcs->align_mask) & ~ring->funcs->align_mask;
-
- ring->count_dw = ndw;
- ring->wptr_old = ring->wptr;
-
- if (ring->funcs->begin_use)
- ring->funcs->begin_use(ring);
-}
-
-/**
* amdgpu_ring_insert_nop - insert NOP packets
*
* @ring: amdgpu_ring structure holding ring information
@@ -479,10 +459,10 @@ bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, unsigned int vmid,
if (amdgpu_sriov_vf(ring->adev) || !ring->funcs->soft_recovery || !fence)
return false;
- spin_lock_irqsave(fence->lock, flags);
+ dma_fence_lock_irqsave(fence, flags);
if (!dma_fence_is_signaled_locked(fence))
dma_fence_set_error(fence, -ENODATA);
- spin_unlock_irqrestore(fence->lock, flags);
+ dma_fence_unlock_irqrestore(fence, flags);
while (!dma_fence_is_signaled(fence) &&
ktime_to_ns(ktime_sub(deadline, ktime_get())) > 0)
@@ -875,7 +855,6 @@ void amdgpu_ring_reset_helper_begin(struct amdgpu_ring *ring,
int amdgpu_ring_reset_helper_end(struct amdgpu_ring *ring,
struct amdgpu_fence *guilty_fence)
{
- unsigned int i;
int r;
/* verify that the ring is functional */
@@ -883,16 +862,9 @@ int amdgpu_ring_reset_helper_end(struct amdgpu_ring *ring,
if (r)
return r;
- /* set an error on all fences from the context */
- if (guilty_fence)
- amdgpu_fence_driver_update_timedout_fence_state(guilty_fence);
- /* Re-emit the non-guilty commands */
- if (ring->ring_backup_entries_to_copy) {
- amdgpu_ring_alloc_reemit(ring, ring->ring_backup_entries_to_copy);
- for (i = 0; i < ring->ring_backup_entries_to_copy; i++)
- amdgpu_ring_write(ring, ring->ring_backup[i]);
- amdgpu_ring_commit(ring);
- }
+ /* set an error on all fences from the context and reemit */
+ amdgpu_ring_set_fence_errors_and_reemit(ring, guilty_fence);
+
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index cb0fb1a989d2..ce5af137ee40 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -121,7 +121,6 @@ struct amdgpu_fence_driver {
/* sync_seq is protected by ring emission lock */
uint32_t sync_seq;
atomic_t last_seq;
- u64 signalled_wptr;
bool initialized;
struct amdgpu_irq_src *irq_src;
unsigned irq_type;
@@ -146,23 +145,23 @@ struct amdgpu_fence {
struct amdgpu_ring *ring;
ktime_t start_timestamp;
- /* wptr for the total submission for resets */
- u64 wptr;
+ /* location and size of the IB */
+ u64 ib_wptr;
+ unsigned int ib_dw_size;
+ unsigned int skip_ib_dw_start_offset;
+ unsigned int skip_ib_dw_end_offset;
/* fence context for resets */
u64 context;
- /* has this fence been reemitted */
- unsigned int reemitted;
- /* wptr for the fence for the submission */
- u64 fence_wptr_start;
- u64 fence_wptr_end;
+ /* idx for ring backups */
+ unsigned int backup_idx;
};
extern const struct drm_sched_backend_ops amdgpu_sched_ops;
void amdgpu_fence_driver_set_error(struct amdgpu_ring *ring, int error);
void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring);
-void amdgpu_fence_driver_update_timedout_fence_state(struct amdgpu_fence *af);
-void amdgpu_fence_save_wptr(struct amdgpu_fence *af);
+void amdgpu_ring_set_fence_errors_and_reemit(struct amdgpu_ring *ring,
+ struct amdgpu_fence *guilty_fence);
int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring);
int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
@@ -172,8 +171,8 @@ void amdgpu_fence_driver_hw_init(struct amdgpu_device *adev);
void amdgpu_fence_driver_hw_fini(struct amdgpu_device *adev);
int amdgpu_fence_driver_sw_init(struct amdgpu_device *adev);
void amdgpu_fence_driver_sw_fini(struct amdgpu_device *adev);
-int amdgpu_fence_emit(struct amdgpu_ring *ring, struct amdgpu_fence *af,
- unsigned int flags);
+void amdgpu_fence_emit(struct amdgpu_ring *ring, struct amdgpu_fence *af,
+ unsigned int flags);
int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s,
uint32_t timeout);
bool amdgpu_fence_process(struct amdgpu_ring *ring);
@@ -313,6 +312,7 @@ struct amdgpu_ring {
/* backups for resets */
uint32_t *ring_backup;
unsigned int ring_backup_entries_to_copy;
+ bool reemit;
unsigned rptr_offs;
u64 rptr_gpu_addr;
u32 *rptr_cpu_addr;
@@ -522,6 +522,17 @@ static inline void amdgpu_ring_write_multiple(struct amdgpu_ring *ring,
ring->count_dw -= count_dw;
}
+static inline unsigned int amdgpu_ring_get_dw_distance(struct amdgpu_ring *ring,
+ u64 start_wptr, u64 end_wptr)
+{
+ unsigned int start = start_wptr & ring->buf_mask;
+ unsigned int end = end_wptr & ring->buf_mask;
+
+ if (end < start)
+ end += ring->ring_size >> 2;
+ return end - start;
+}
+
/**
* amdgpu_ring_patch_cond_exec - patch dw count of conditional execute
* @ring: amdgpu_ring structure
@@ -532,18 +543,13 @@ static inline void amdgpu_ring_write_multiple(struct amdgpu_ring *ring,
static inline void amdgpu_ring_patch_cond_exec(struct amdgpu_ring *ring,
unsigned int offset)
{
- unsigned cur;
-
if (!ring->funcs->init_cond_exec)
return;
WARN_ON(offset > ring->buf_mask);
WARN_ON(ring->ring[offset] != 0);
- cur = (ring->wptr - 1) & ring->buf_mask;
- if (cur < offset)
- cur += ring->ring_size >> 2;
- ring->ring[offset] = cur - offset;
+ ring->ring[offset] = amdgpu_ring_get_dw_distance(ring, offset, ring->wptr - 1);
}
int amdgpu_ring_test_helper(struct amdgpu_ring *ring);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
index 341beec59537..0eecfaa3a94c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
@@ -103,10 +103,8 @@ int amdgpu_sched_ioctl(struct drm_device *dev, void *data,
return -EINVAL;
}
- if (!amdgpu_ctx_priority_is_valid(args->in.priority)) {
- WARN(1, "Invalid context priority %d\n", args->in.priority);
+ if (!amdgpu_ctx_priority_is_valid(args->in.priority))
return -EINVAL;
- }
switch (args->in.op) {
case AMDGPU_SCHED_OP_PROCESS_PRIORITY_OVERRIDE:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 0ccb31788b20..afaaab6496de 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -163,7 +163,8 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
}
static struct dma_fence *
-amdgpu_ttm_job_submit(struct amdgpu_device *adev, struct amdgpu_job *job, u32 num_dw)
+amdgpu_ttm_job_submit(struct amdgpu_device *adev, struct amdgpu_ttm_buffer_entity *entity,
+ struct amdgpu_job *job, u32 num_dw)
{
struct amdgpu_ring *ring;
@@ -171,6 +172,8 @@ amdgpu_ttm_job_submit(struct amdgpu_device *adev, struct amdgpu_job *job, u32 nu
amdgpu_ring_pad_ib(ring, &job->ibs[0]);
WARN_ON(job->ibs[0].length_dw > num_dw);
+ lockdep_assert_held(&entity->lock);
+
return amdgpu_job_submit(job);
}
@@ -228,9 +231,7 @@ static int amdgpu_ttm_map_buffer(struct amdgpu_ttm_buffer_entity *entity,
*size = min(*size, (uint64_t)num_pages * PAGE_SIZE - offset);
- *addr = adev->gmc.gart_start;
- *addr += (u64)window * AMDGPU_GTT_MAX_TRANSFER_SIZE *
- AMDGPU_GPU_PAGE_SIZE;
+ *addr = amdgpu_compute_gart_address(&adev->gmc, entity, window);
*addr += offset;
num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);
@@ -248,7 +249,7 @@ static int amdgpu_ttm_map_buffer(struct amdgpu_ttm_buffer_entity *entity,
src_addr += job->ibs[0].gpu_addr;
dst_addr = amdgpu_bo_gpu_offset(adev->gart.bo);
- dst_addr += window * AMDGPU_GTT_MAX_TRANSFER_SIZE * 8;
+ dst_addr += (entity->gart_window_offs[window] >> AMDGPU_GPU_PAGE_SHIFT) * 8;
amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr,
dst_addr, num_bytes, 0);
@@ -269,7 +270,7 @@ static int amdgpu_ttm_map_buffer(struct amdgpu_ttm_buffer_entity *entity,
amdgpu_gart_map_vram_range(adev, pa, 0, num_pages, flags, cpu_addr);
}
- dma_fence_put(amdgpu_ttm_job_submit(adev, job, num_dw));
+ dma_fence_put(amdgpu_ttm_job_submit(adev, entity, job, num_dw));
return 0;
}
@@ -313,7 +314,7 @@ static int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
amdgpu_res_first(src->mem, src->offset, size, &src_mm);
amdgpu_res_first(dst->mem, dst->offset, size, &dst_mm);
- mutex_lock(&adev->mman.gtt_window_lock);
+ mutex_lock(&entity->lock);
while (src_mm.remaining) {
uint64_t from, to, cur_size, tiling_flags;
uint32_t num_type, data_format, max_com, write_compress_disable;
@@ -368,7 +369,7 @@ static int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
amdgpu_res_next(&dst_mm, cur_size);
}
error:
- mutex_unlock(&adev->mman.gtt_window_lock);
+ mutex_unlock(&entity->lock);
*f = fence;
return r;
}
@@ -386,9 +387,11 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
+ struct amdgpu_ttm_buffer_entity *entity;
struct amdgpu_copy_mem src, dst;
struct dma_fence *fence = NULL;
int r;
+ u32 e;
src.bo = bo;
dst.bo = bo;
@@ -397,8 +400,12 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
src.offset = 0;
dst.offset = 0;
+ e = atomic_inc_return(&adev->mman.next_move_entity) %
+ adev->mman.num_move_entities;
+ entity = &adev->mman.move_entities[e];
+
r = amdgpu_ttm_copy_mem_to_mem(adev,
- &adev->mman.move_entity,
+ entity,
&src, &dst,
new_mem->size,
amdgpu_bo_encrypted(abo),
@@ -410,9 +417,7 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
if (old_mem->mem_type == TTM_PL_VRAM &&
(abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) {
struct dma_fence *wipe_fence = NULL;
-
- r = amdgpu_fill_buffer(&adev->mman.move_entity,
- abo, 0, NULL, &wipe_fence,
+ r = amdgpu_fill_buffer(entity, abo, 0, NULL, &wipe_fence,
AMDGPU_KERNEL_JOB_ID_MOVE_BLIT);
if (r) {
goto error;
@@ -1570,7 +1575,7 @@ static int amdgpu_ttm_access_memory_sdma(struct ttm_buffer_object *bo,
if (r)
goto out;
- mutex_lock(&adev->mman.gtt_window_lock);
+ mutex_lock(&adev->mman.default_entity.lock);
amdgpu_res_first(abo->tbo.resource, offset, len, &src_mm);
src_addr = amdgpu_ttm_domain_start(adev, bo->resource->mem_type) +
src_mm.start;
@@ -1581,8 +1586,8 @@ static int amdgpu_ttm_access_memory_sdma(struct ttm_buffer_object *bo,
amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, dst_addr,
PAGE_SIZE, 0);
- fence = amdgpu_ttm_job_submit(adev, job, num_dw);
- mutex_unlock(&adev->mman.gtt_window_lock);
+ fence = amdgpu_ttm_job_submit(adev, &adev->mman.default_entity, job, num_dw);
+ mutex_unlock(&adev->mman.default_entity.lock);
if (!dma_fence_wait_timeout(fence, false, adev->sdma_timeout))
r = -ETIMEDOUT;
@@ -1898,7 +1903,7 @@ static void amdgpu_ttm_pools_fini(struct amdgpu_device *adev)
}
/**
- * amdgpu_ttm_mmio_remap_bo_init - Allocate the singleton MMIO_REMAP BO
+ * amdgpu_ttm_alloc_mmio_remap_bo - Allocate the singleton MMIO_REMAP BO
* @adev: amdgpu device
*
* Allocates a global BO with backing AMDGPU_PL_MMIO_REMAP when the
@@ -2003,6 +2008,50 @@ static void amdgpu_ttm_free_mmio_remap_bo(struct amdgpu_device *adev)
adev->rmmio_remap.bo = NULL;
}
+static int amdgpu_ttm_buffer_entity_init(struct amdgpu_gtt_mgr *mgr,
+ struct amdgpu_ttm_buffer_entity *entity,
+ enum drm_sched_priority prio,
+ struct drm_gpu_scheduler **scheds,
+ int num_schedulers,
+ u32 num_gart_windows)
+{
+ int i, r, num_pages;
+
+ r = drm_sched_entity_init(&entity->base, prio, scheds, num_schedulers, NULL);
+ if (r)
+ return r;
+
+ mutex_init(&entity->lock);
+
+ if (ARRAY_SIZE(entity->gart_window_offs) < num_gart_windows)
+ return -EINVAL;
+ if (num_gart_windows == 0)
+ return 0;
+
+ num_pages = num_gart_windows * AMDGPU_GTT_MAX_TRANSFER_SIZE;
+ r = amdgpu_gtt_mgr_alloc_entries(mgr, &entity->gart_node, num_pages,
+ DRM_MM_INSERT_BEST);
+ if (r) {
+ drm_sched_entity_destroy(&entity->base);
+ return r;
+ }
+
+ for (i = 0; i < num_gart_windows; i++) {
+ entity->gart_window_offs[i] =
+ amdgpu_gtt_node_to_byte_offset(&entity->gart_node) +
+ i * AMDGPU_GTT_MAX_TRANSFER_SIZE * PAGE_SIZE;
+ }
+
+ return 0;
+}
+
+static void amdgpu_ttm_buffer_entity_fini(struct amdgpu_gtt_mgr *mgr,
+ struct amdgpu_ttm_buffer_entity *entity)
+{
+ amdgpu_gtt_mgr_free_entries(mgr, &entity->gart_node);
+ drm_sched_entity_destroy(&entity->base);
+}
+
/*
* amdgpu_ttm_init - Init the memory management (ttm) as well as various
* gtt/vram related fields.
@@ -2017,8 +2066,6 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
uint64_t gtt_size;
int r;
- mutex_init(&adev->mman.gtt_window_lock);
-
dma_set_max_seg_size(adev->dev, UINT_MAX);
/* No others user of address space so set it to 0 */
r = ttm_device_init(&adev->mman.bdev, &amdgpu_bo_driver, adev->dev,
@@ -2292,8 +2339,9 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev)
void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
{
struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM);
+ u32 num_clear_entities, num_move_entities;
uint64_t size;
- int r;
+ int r, i, j;
if (!adev->mman.initialized || amdgpu_in_reset(adev) ||
adev->mman.buffer_funcs_enabled == enable || adev->gmc.is_app_apu)
@@ -2303,46 +2351,83 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
struct amdgpu_ring *ring;
struct drm_gpu_scheduler *sched;
+ if (!adev->mman.buffer_funcs_ring || !adev->mman.buffer_funcs_ring->sched.ready) {
+ dev_warn(adev->dev, "Not enabling DMA transfers for in kernel use");
+ return;
+ }
+
+ num_clear_entities = 1;
+ num_move_entities = 1;
ring = adev->mman.buffer_funcs_ring;
sched = &ring->sched;
- r = drm_sched_entity_init(&adev->mman.default_entity.base,
- DRM_SCHED_PRIORITY_KERNEL, &sched,
- 1, NULL);
- if (r) {
+ r = amdgpu_ttm_buffer_entity_init(&adev->mman.gtt_mgr,
+ &adev->mman.default_entity,
+ DRM_SCHED_PRIORITY_KERNEL,
+ &sched, 1, 0);
+ if (r < 0) {
dev_err(adev->dev,
- "Failed setting up TTM BO move entity (%d)\n",
- r);
+ "Failed setting up TTM entity (%d)\n", r);
return;
}
- r = drm_sched_entity_init(&adev->mman.clear_entity.base,
- DRM_SCHED_PRIORITY_NORMAL, &sched,
- 1, NULL);
- if (r) {
- dev_err(adev->dev,
- "Failed setting up TTM BO clear entity (%d)\n",
- r);
- goto error_free_entity;
+ adev->mman.clear_entities = kcalloc(num_clear_entities,
+ sizeof(struct amdgpu_ttm_buffer_entity),
+ GFP_KERNEL);
+ atomic_set(&adev->mman.next_clear_entity, 0);
+ if (!adev->mman.clear_entities)
+ goto error_free_default_entity;
+
+ adev->mman.num_clear_entities = num_clear_entities;
+
+ for (i = 0; i < num_clear_entities; i++) {
+ r = amdgpu_ttm_buffer_entity_init(
+ &adev->mman.gtt_mgr, &adev->mman.clear_entities[i],
+ DRM_SCHED_PRIORITY_NORMAL, &sched, 1, 1);
+
+ if (r < 0) {
+ for (j = 0; j < i; j++)
+ amdgpu_ttm_buffer_entity_fini(
+ &adev->mman.gtt_mgr, &adev->mman.clear_entities[j]);
+ kfree(adev->mman.clear_entities);
+ adev->mman.num_clear_entities = 0;
+ adev->mman.clear_entities = NULL;
+ goto error_free_default_entity;
+ }
}
- r = drm_sched_entity_init(&adev->mman.move_entity.base,
- DRM_SCHED_PRIORITY_NORMAL, &sched,
- 1, NULL);
- if (r) {
- dev_err(adev->dev,
- "Failed setting up TTM BO move entity (%d)\n",
- r);
- drm_sched_entity_destroy(&adev->mman.clear_entity.base);
- goto error_free_entity;
+ adev->mman.num_move_entities = num_move_entities;
+ atomic_set(&adev->mman.next_move_entity, 0);
+ for (i = 0; i < num_move_entities; i++) {
+ r = amdgpu_ttm_buffer_entity_init(
+ &adev->mman.gtt_mgr,
+ &adev->mman.move_entities[i],
+ DRM_SCHED_PRIORITY_NORMAL, &sched, 1, 2);
+
+ if (r < 0) {
+ for (j = 0; j < i; j++)
+ amdgpu_ttm_buffer_entity_fini(
+ &adev->mman.gtt_mgr, &adev->mman.move_entities[j]);
+ adev->mman.num_move_entities = 0;
+ goto error_free_clear_entities;
+ }
}
} else {
- drm_sched_entity_destroy(&adev->mman.default_entity.base);
- drm_sched_entity_destroy(&adev->mman.clear_entity.base);
- drm_sched_entity_destroy(&adev->mman.move_entity.base);
+ amdgpu_ttm_buffer_entity_fini(&adev->mman.gtt_mgr,
+ &adev->mman.default_entity);
+ for (i = 0; i < adev->mman.num_clear_entities; i++)
+ amdgpu_ttm_buffer_entity_fini(&adev->mman.gtt_mgr,
+ &adev->mman.clear_entities[i]);
+ for (i = 0; i < adev->mman.num_move_entities; i++)
+ amdgpu_ttm_buffer_entity_fini(&adev->mman.gtt_mgr,
+ &adev->mman.move_entities[i]);
/* Drop all the old fences since re-creating the scheduler entities
* will allocate new contexts.
*/
ttm_resource_manager_cleanup(man);
+ kfree(adev->mman.clear_entities);
+ adev->mman.clear_entities = NULL;
+ adev->mman.num_clear_entities = 0;
+ adev->mman.num_move_entities = 0;
}
/* this just adjusts TTM size idea, which sets lpfn to the correct value */
@@ -2355,8 +2440,16 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
return;
-error_free_entity:
- drm_sched_entity_destroy(&adev->mman.default_entity.base);
+error_free_clear_entities:
+ for (i = 0; i < adev->mman.num_clear_entities; i++)
+ amdgpu_ttm_buffer_entity_fini(&adev->mman.gtt_mgr,
+ &adev->mman.clear_entities[i]);
+ kfree(adev->mman.clear_entities);
+ adev->mman.clear_entities = NULL;
+ adev->mman.num_clear_entities = 0;
+error_free_default_entity:
+ amdgpu_ttm_buffer_entity_fini(&adev->mman.gtt_mgr,
+ &adev->mman.default_entity);
}
static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev,
@@ -2430,7 +2523,7 @@ int amdgpu_copy_buffer(struct amdgpu_device *adev,
byte_count -= cur_size_in_bytes;
}
- *fence = amdgpu_ttm_job_submit(adev, job, num_dw);
+ *fence = amdgpu_ttm_job_submit(adev, entity, job, num_dw);
return 0;
@@ -2473,7 +2566,7 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_device *adev,
byte_count -= cur_size;
}
- *fence = amdgpu_ttm_job_submit(adev, job, num_dw);
+ *fence = amdgpu_ttm_job_submit(adev, entity, job, num_dw);
return 0;
}
@@ -2493,6 +2586,7 @@ int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
struct dma_fence **fence)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+ struct amdgpu_ttm_buffer_entity *entity;
struct amdgpu_res_cursor cursor;
u64 addr;
int r = 0;
@@ -2502,12 +2596,12 @@ int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
if (!fence)
return -EINVAL;
-
+ entity = &adev->mman.clear_entities[0];
*fence = dma_fence_get_stub();
amdgpu_res_first(bo->tbo.resource, 0, amdgpu_bo_size(bo), &cursor);
- mutex_lock(&adev->mman.gtt_window_lock);
+ mutex_lock(&entity->lock);
while (cursor.remaining) {
struct dma_fence *next = NULL;
u64 size;
@@ -2520,13 +2614,12 @@ int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
/* Never clear more than 256MiB at once to avoid timeouts */
size = min(cursor.size, 256ULL << 20);
- r = amdgpu_ttm_map_buffer(&adev->mman.clear_entity,
- &bo->tbo, bo->tbo.resource, &cursor,
- 1, false, &size, &addr);
+ r = amdgpu_ttm_map_buffer(entity, &bo->tbo, bo->tbo.resource, &cursor,
+ 0, false, &size, &addr);
if (r)
goto err;
- r = amdgpu_ttm_fill_mem(adev, &adev->mman.clear_entity, 0, addr, size, resv,
+ r = amdgpu_ttm_fill_mem(adev, entity, 0, addr, size, resv,
&next, true,
AMDGPU_KERNEL_JOB_ID_TTM_CLEAR_BUFFER);
if (r)
@@ -2538,7 +2631,7 @@ int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
amdgpu_res_next(&cursor, size);
}
err:
- mutex_unlock(&adev->mman.gtt_window_lock);
+ mutex_unlock(&entity->lock);
return r;
}
@@ -2555,15 +2648,12 @@ int amdgpu_fill_buffer(struct amdgpu_ttm_buffer_entity *entity,
struct amdgpu_res_cursor dst;
int r;
- if (!adev->mman.buffer_funcs_enabled) {
- dev_err(adev->dev,
- "Trying to clear memory with ring turned off.\n");
+ if (!entity)
return -EINVAL;
- }
amdgpu_res_first(bo->tbo.resource, 0, amdgpu_bo_size(bo), &dst);
- mutex_lock(&adev->mman.gtt_window_lock);
+ mutex_lock(&entity->lock);
while (dst.remaining) {
struct dma_fence *next;
uint64_t cur_size, to;
@@ -2572,7 +2662,7 @@ int amdgpu_fill_buffer(struct amdgpu_ttm_buffer_entity *entity,
cur_size = min(dst.size, 256ULL << 20);
r = amdgpu_ttm_map_buffer(entity, &bo->tbo, bo->tbo.resource, &dst,
- 1, false, &cur_size, &to);
+ 0, false, &cur_size, &to);
if (r)
goto error;
@@ -2588,13 +2678,27 @@ int amdgpu_fill_buffer(struct amdgpu_ttm_buffer_entity *entity,
amdgpu_res_next(&dst, cur_size);
}
error:
- mutex_unlock(&adev->mman.gtt_window_lock);
+ mutex_unlock(&entity->lock);
if (f)
*f = dma_fence_get(fence);
dma_fence_put(fence);
return r;
}
+struct amdgpu_ttm_buffer_entity *
+amdgpu_ttm_next_clear_entity(struct amdgpu_device *adev)
+{
+ struct amdgpu_mman *mman = &adev->mman;
+ u32 i;
+
+ if (mman->num_clear_entities == 0)
+ return NULL;
+
+ i = atomic_inc_return(&mman->next_clear_entity) %
+ mman->num_clear_entities;
+ return &mman->clear_entities[i];
+}
+
/**
* amdgpu_ttm_evict_resources - evict memory buffers
* @adev: amdgpu device object
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index 143201ecea3f..3b1973611446 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -29,6 +29,7 @@
#include <drm/ttm/ttm_placement.h>
#include "amdgpu_vram_mgr.h"
#include "amdgpu_hmm.h"
+#include "amdgpu_gmc.h"
#define AMDGPU_PL_GDS (TTM_PL_PRIV + 0)
#define AMDGPU_PL_GWS (TTM_PL_PRIV + 1)
@@ -38,8 +39,7 @@
#define AMDGPU_PL_MMIO_REMAP (TTM_PL_PRIV + 5)
#define __AMDGPU_PL_NUM (TTM_PL_PRIV + 6)
-#define AMDGPU_GTT_MAX_TRANSFER_SIZE 512
-#define AMDGPU_GTT_NUM_TRANSFER_WINDOWS 2
+#define AMDGPU_GTT_MAX_TRANSFER_SIZE 1024
extern const struct attribute_group amdgpu_vram_mgr_attr_group;
extern const struct attribute_group amdgpu_gtt_mgr_attr_group;
@@ -54,6 +54,9 @@ struct amdgpu_gtt_mgr {
struct amdgpu_ttm_buffer_entity {
struct drm_sched_entity base;
+ struct mutex lock;
+ struct drm_mm_node gart_node;
+ u64 gart_window_offs[2];
};
struct amdgpu_mman {
@@ -67,11 +70,14 @@ struct amdgpu_mman {
struct amdgpu_ring *buffer_funcs_ring;
bool buffer_funcs_enabled;
- struct mutex gtt_window_lock;
-
+ /* @default_entity: for workarounds, has no gart windows */
struct amdgpu_ttm_buffer_entity default_entity;
- struct amdgpu_ttm_buffer_entity clear_entity;
- struct amdgpu_ttm_buffer_entity move_entity;
+ struct amdgpu_ttm_buffer_entity *clear_entities;
+ atomic_t next_clear_entity;
+ u32 num_clear_entities;
+ struct amdgpu_ttm_buffer_entity move_entities[TTM_NUM_MOVE_FENCES];
+ atomic_t next_move_entity;
+ u32 num_move_entities;
struct amdgpu_vram_mgr vram_mgr;
struct amdgpu_gtt_mgr gtt_mgr;
@@ -189,6 +195,7 @@ int amdgpu_fill_buffer(struct amdgpu_ttm_buffer_entity *entity,
struct dma_resv *resv,
struct dma_fence **f,
u64 k_job_id);
+struct amdgpu_ttm_buffer_entity *amdgpu_ttm_next_clear_entity(struct amdgpu_device *adev);
int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo);
void amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo);
@@ -205,6 +212,27 @@ static inline int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo,
}
#endif
+/**
+ * amdgpu_compute_gart_address() - Returns GART address of an entity's window
+ * @gmc: The &struct amdgpu_gmc instance to use
+ * @entity: The &struct amdgpu_ttm_buffer_entity owning the GART window
+ * @index: The window to use (must be 0 or 1)
+ */
+static inline u64 amdgpu_compute_gart_address(struct amdgpu_gmc *gmc,
+ struct amdgpu_ttm_buffer_entity *entity,
+ int index)
+{
+ return gmc->gart_start + entity->gart_window_offs[index];
+}
+
+/**
+ * amdgpu_gtt_node_to_byte_offset() - Returns a byte offset of a gtt node
+ */
+static inline u64 amdgpu_gtt_node_to_byte_offset(const struct drm_mm_node *gtt_node)
+{
+ return gtt_node->start * (u64)PAGE_SIZE;
+}
+
void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct amdgpu_hmm_range *range);
int amdgpu_ttm_tt_get_userptr(const struct ttm_buffer_object *tbo,
uint64_t *user_addr);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
index 1ab61e7b35db..6d9e96fabd58 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
@@ -291,22 +291,22 @@ void amdgpu_ucode_print_rlc_hdr(const struct common_firmware_header *hdr)
break;
case 5:
/* rlc_hdr v2_5 */
- DRM_INFO("rlc_iram_ucode_size_bytes: %u\n",
+ DRM_DEBUG("rlc_iram_ucode_size_bytes: %u\n",
le32_to_cpu(rlc_hdr_v2_5->v2_2.rlc_iram_ucode_size_bytes));
- DRM_INFO("rlc_iram_ucode_offset_bytes: %u\n",
+ DRM_DEBUG("rlc_iram_ucode_offset_bytes: %u\n",
le32_to_cpu(rlc_hdr_v2_5->v2_2.rlc_iram_ucode_offset_bytes));
- DRM_INFO("rlc_dram_ucode_size_bytes: %u\n",
+ DRM_DEBUG("rlc_dram_ucode_size_bytes: %u\n",
le32_to_cpu(rlc_hdr_v2_5->v2_2.rlc_dram_ucode_size_bytes));
- DRM_INFO("rlc_dram_ucode_offset_bytes: %u\n",
+ DRM_DEBUG("rlc_dram_ucode_offset_bytes: %u\n",
le32_to_cpu(rlc_hdr_v2_5->v2_2.rlc_dram_ucode_offset_bytes));
/* rlc_hdr v2_5 */
- DRM_INFO("rlc_1_iram_ucode_size_bytes: %u\n",
+ DRM_DEBUG("rlc_1_iram_ucode_size_bytes: %u\n",
le32_to_cpu(rlc_hdr_v2_5->rlc_1_iram_ucode_size_bytes));
- DRM_INFO("rlc_1_iram_ucode_offset_bytes: %u\n",
+ DRM_DEBUG("rlc_1_iram_ucode_offset_bytes: %u\n",
le32_to_cpu(rlc_hdr_v2_5->rlc_1_iram_ucode_offset_bytes));
- DRM_INFO("rlc_1_dram_ucode_size_bytes: %u\n",
+ DRM_DEBUG("rlc_1_dram_ucode_size_bytes: %u\n",
le32_to_cpu(rlc_hdr_v2_5->rlc_1_dram_ucode_size_bytes));
- DRM_INFO("rlc_1_dram_ucode_offset_bytes: %u\n",
+ DRM_DEBUG("rlc_1_dram_ucode_offset_bytes: %u\n",
le32_to_cpu(rlc_hdr_v2_5->rlc_1_dram_ucode_offset_bytes));
break;
default:
@@ -1150,7 +1150,7 @@ int amdgpu_ucode_create_bo(struct amdgpu_device *adev)
if ((adev->firmware.load_type != AMDGPU_FW_LOAD_DIRECT) &&
(adev->firmware.load_type != AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO)) {
amdgpu_bo_create_kernel(adev, adev->firmware.fw_size, PAGE_SIZE,
- (amdgpu_sriov_vf(adev) || adev->debug_use_vram_fw_buf) ?
+ (amdgpu_sriov_vf(adev) || adev->debug_use_vram_fw_buf || adev->gmc.xgmi.connected_to_cpu) ?
AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT,
&adev->firmware.fw_buf,
&adev->firmware.fw_buf_mc,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
index 4df0f9d5ad11..0238c2798de4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
@@ -398,6 +398,17 @@ int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev,
return 0;
}
+int amdgpu_umc_uniras_process_ecc_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ struct ras_ih_info ih_info = {0};
+
+ ih_info.block = RAS_BLOCK_ID__UMC;
+ amdgpu_ras_mgr_dispatch_interrupt(adev, &ih_info);
+ return 0;
+}
+
int amdgpu_umc_fill_error_record(struct ras_err_data *err_data,
uint64_t err_addr,
uint64_t retired_page,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
index 28dff750c47e..8494a55ebf76 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
@@ -161,6 +161,9 @@ int amdgpu_umc_pasid_poison_handler(struct amdgpu_device *adev,
int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry);
+int amdgpu_umc_uniras_process_ecc_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry);
int amdgpu_umc_fill_error_record(struct ras_err_data *err_data,
uint64_t err_addr,
uint64_t retired_page,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
index 0a1b93259887..366728ed03e3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
@@ -156,7 +156,7 @@ static void amdgpu_userq_hang_detect_work(struct work_struct *work)
struct dma_fence *fence;
struct amdgpu_userq_mgr *uq_mgr;
- if (!queue || !queue->userq_mgr)
+ if (!queue->userq_mgr)
return;
uq_mgr = queue->userq_mgr;
@@ -472,17 +472,16 @@ void
amdgpu_userq_ensure_ev_fence(struct amdgpu_userq_mgr *uq_mgr,
struct amdgpu_eviction_fence_mgr *evf_mgr)
{
- struct amdgpu_eviction_fence *ev_fence;
+ struct dma_fence *ev_fence;
retry:
/* Flush any pending resume work to create ev_fence */
flush_delayed_work(&uq_mgr->resume_work);
mutex_lock(&uq_mgr->userq_mutex);
- spin_lock(&evf_mgr->ev_fence_lock);
- ev_fence = evf_mgr->ev_fence;
- spin_unlock(&evf_mgr->ev_fence_lock);
- if (!ev_fence || dma_fence_is_signaled(&ev_fence->base)) {
+ ev_fence = amdgpu_evf_mgr_get_fence(evf_mgr);
+ if (dma_fence_is_signaled(ev_fence)) {
+ dma_fence_put(ev_fence);
mutex_unlock(&uq_mgr->userq_mutex);
/*
* Looks like there was no pending resume work,
@@ -491,6 +490,7 @@ retry:
schedule_delayed_work(&uq_mgr->resume_work, 0);
goto retry;
}
+ dma_fence_put(ev_fence);
}
int amdgpu_userq_create_object(struct amdgpu_userq_mgr *uq_mgr,
@@ -630,13 +630,14 @@ amdgpu_userq_destroy(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_usermode_que
int r = 0;
cancel_delayed_work_sync(&uq_mgr->resume_work);
+
+ /* Cancel any pending hang detection work and cleanup */
+ cancel_delayed_work_sync(&queue->hang_detect_work);
+
mutex_lock(&uq_mgr->userq_mutex);
+ queue->hang_detect_fence = NULL;
amdgpu_userq_wait_for_last_fence(queue);
- /* Cancel any pending hang detection work and cleanup */
- if (queue->hang_detect_fence) {
- cancel_delayed_work_sync(&queue->hang_detect_work);
- queue->hang_detect_fence = NULL;
- }
+
r = amdgpu_bo_reserve(queue->db_obj.obj, true);
if (!r) {
amdgpu_bo_unpin(queue->db_obj.obj);
@@ -716,46 +717,6 @@ static int amdgpu_userq_priority_permit(struct drm_file *filp,
return -EACCES;
}
-#if defined(CONFIG_DEBUG_FS)
-static int amdgpu_mqd_info_read(struct seq_file *m, void *unused)
-{
- struct amdgpu_usermode_queue *queue = m->private;
- struct amdgpu_bo *bo;
- int r;
-
- if (!queue || !queue->mqd.obj)
- return -EINVAL;
-
- bo = amdgpu_bo_ref(queue->mqd.obj);
- r = amdgpu_bo_reserve(bo, true);
- if (r) {
- amdgpu_bo_unref(&bo);
- return -EINVAL;
- }
-
- seq_printf(m, "queue_type: %d\n", queue->queue_type);
- seq_printf(m, "mqd_gpu_address: 0x%llx\n", amdgpu_bo_gpu_offset(queue->mqd.obj));
-
- amdgpu_bo_unreserve(bo);
- amdgpu_bo_unref(&bo);
-
- return 0;
-}
-
-static int amdgpu_mqd_info_open(struct inode *inode, struct file *file)
-{
- return single_open(file, amdgpu_mqd_info_read, inode->i_private);
-}
-
-static const struct file_operations amdgpu_mqd_info_fops = {
- .owner = THIS_MODULE,
- .open = amdgpu_mqd_info_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-#endif
-
static int
amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args)
{
@@ -765,7 +726,6 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args)
const struct amdgpu_userq_funcs *uq_funcs;
struct amdgpu_usermode_queue *queue;
struct amdgpu_db_info db_info;
- char *queue_name;
bool skip_map_queue;
u32 qid;
uint64_t index;
@@ -826,17 +786,15 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args)
amdgpu_userq_input_va_validate(adev, queue, args->in.rptr_va, AMDGPU_GPU_PAGE_SIZE) ||
amdgpu_userq_input_va_validate(adev, queue, args->in.wptr_va, AMDGPU_GPU_PAGE_SIZE)) {
r = -EINVAL;
- kfree(queue);
- goto unlock;
+ goto free_queue;
}
/* Convert relative doorbell offset into absolute doorbell index */
index = amdgpu_userq_get_doorbell_index(uq_mgr, &db_info, filp);
if (index == (uint64_t)-EINVAL) {
drm_file_err(uq_mgr->file, "Failed to get doorbell for queue\n");
- kfree(queue);
r = -EINVAL;
- goto unlock;
+ goto free_queue;
}
queue->doorbell_index = index;
@@ -844,41 +802,14 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args)
r = amdgpu_userq_fence_driver_alloc(adev, queue);
if (r) {
drm_file_err(uq_mgr->file, "Failed to alloc fence driver\n");
- goto unlock;
+ goto free_queue;
}
r = uq_funcs->mqd_create(queue, &args->in);
if (r) {
drm_file_err(uq_mgr->file, "Failed to create Queue\n");
- amdgpu_userq_fence_driver_free(queue);
- kfree(queue);
- goto unlock;
- }
-
- /* drop this refcount during queue destroy */
- kref_init(&queue->refcount);
-
- /* Wait for mode-1 reset to complete */
- down_read(&adev->reset_domain->sem);
- r = xa_err(xa_store_irq(&adev->userq_doorbell_xa, index, queue, GFP_KERNEL));
- if (r) {
- kfree(queue);
- up_read(&adev->reset_domain->sem);
- goto unlock;
- }
-
- r = xa_alloc(&uq_mgr->userq_xa, &qid, queue,
- XA_LIMIT(1, AMDGPU_MAX_USERQ_COUNT), GFP_KERNEL);
- if (r) {
- drm_file_err(uq_mgr->file, "Failed to allocate a queue id\n");
- amdgpu_userq_fence_driver_free(queue);
- uq_funcs->mqd_destroy(queue);
- kfree(queue);
- r = -ENOMEM;
- up_read(&adev->reset_domain->sem);
- goto unlock;
+ goto clean_fence_driver;
}
- up_read(&adev->reset_domain->sem);
/* don't map the queue if scheduling is halted */
if (adev->userq_halt_for_enforce_isolation &&
@@ -891,31 +822,52 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args)
r = amdgpu_userq_map_helper(queue);
if (r) {
drm_file_err(uq_mgr->file, "Failed to map Queue\n");
- xa_erase(&uq_mgr->userq_xa, qid);
- amdgpu_userq_fence_driver_free(queue);
- uq_funcs->mqd_destroy(queue);
- kfree(queue);
- goto unlock;
+ down_read(&adev->reset_domain->sem);
+ goto clean_mqd;
}
}
- queue_name = kasprintf(GFP_KERNEL, "queue-%d", qid);
- if (!queue_name) {
+ /* drop this refcount during queue destroy */
+ kref_init(&queue->refcount);
+
+ /* Wait for mode-1 reset to complete */
+ down_read(&adev->reset_domain->sem);
+
+ r = xa_alloc(&uq_mgr->userq_xa, &qid, queue,
+ XA_LIMIT(1, AMDGPU_MAX_USERQ_COUNT), GFP_KERNEL);
+ if (r) {
+ if (!skip_map_queue)
+ amdgpu_userq_unmap_helper(queue);
+
r = -ENOMEM;
- goto unlock;
+ goto clean_mqd;
}
-#if defined(CONFIG_DEBUG_FS)
- /* Queue dentry per client to hold MQD information */
- queue->debugfs_queue = debugfs_create_dir(queue_name, filp->debugfs_client);
- debugfs_create_file("mqd_info", 0444, queue->debugfs_queue, queue, &amdgpu_mqd_info_fops);
-#endif
+ r = xa_err(xa_store_irq(&adev->userq_doorbell_xa, index, queue, GFP_KERNEL));
+ if (r) {
+ xa_erase(&uq_mgr->userq_xa, qid);
+ if (!skip_map_queue)
+ amdgpu_userq_unmap_helper(queue);
+
+ goto clean_mqd;
+ }
+ up_read(&adev->reset_domain->sem);
+
+ amdgpu_debugfs_userq_init(filp, queue, qid);
amdgpu_userq_init_hang_detect_work(queue);
- kfree(queue_name);
args->out.queue_id = qid;
atomic_inc(&uq_mgr->userq_count[queue->queue_type]);
+ mutex_unlock(&uq_mgr->userq_mutex);
+ return 0;
+clean_mqd:
+ uq_funcs->mqd_destroy(queue);
+ up_read(&adev->reset_domain->sem);
+clean_fence_driver:
+ amdgpu_userq_fence_driver_free(queue);
+free_queue:
+ kfree(queue);
unlock:
mutex_unlock(&uq_mgr->userq_mutex);
@@ -1052,17 +1004,14 @@ amdgpu_userq_restore_all(struct amdgpu_userq_mgr *uq_mgr)
unsigned long queue_id;
int ret = 0, r;
+ mutex_lock(&uq_mgr->userq_mutex);
/* Resume all the queues for this process */
xa_for_each(&uq_mgr->userq_xa, queue_id, queue) {
- queue = amdgpu_userq_get(uq_mgr, queue_id);
- if (!queue)
- continue;
if (!amdgpu_userq_buffer_vas_mapped(queue)) {
drm_file_err(uq_mgr->file,
"trying restore queue without va mapping\n");
queue->state = AMDGPU_USERQ_STATE_INVALID_VA;
- amdgpu_userq_put(queue);
continue;
}
@@ -1070,8 +1019,8 @@ amdgpu_userq_restore_all(struct amdgpu_userq_mgr *uq_mgr)
if (r)
ret = r;
- amdgpu_userq_put(queue);
}
+ mutex_unlock(&uq_mgr->userq_mutex);
if (ret)
drm_file_err(uq_mgr->file, "Failed to map all the queues\n");
@@ -1252,7 +1201,7 @@ retry_lock:
dma_fence_wait(bo_va->last_pt_update, false);
dma_fence_wait(vm->last_update, false);
- ret = amdgpu_eviction_fence_replace_fence(&fpriv->evf_mgr, &exec);
+ ret = amdgpu_evf_mgr_rearm(&fpriv->evf_mgr, &exec);
if (ret)
drm_file_err(uq_mgr->file, "Failed to replace eviction fence\n");
@@ -1272,26 +1221,25 @@ static void amdgpu_userq_restore_worker(struct work_struct *work)
{
struct amdgpu_userq_mgr *uq_mgr = work_to_uq_mgr(work, resume_work.work);
struct amdgpu_fpriv *fpriv = uq_mgr_to_fpriv(uq_mgr);
+ struct dma_fence *ev_fence;
int ret;
- flush_delayed_work(&fpriv->evf_mgr.suspend_work);
-
- mutex_lock(&uq_mgr->userq_mutex);
+ ev_fence = amdgpu_evf_mgr_get_fence(&fpriv->evf_mgr);
+ if (!dma_fence_is_signaled(ev_fence))
+ goto put_fence;
ret = amdgpu_userq_vm_validate(uq_mgr);
if (ret) {
drm_file_err(uq_mgr->file, "Failed to validate BOs to restore\n");
- goto unlock;
+ goto put_fence;
}
ret = amdgpu_userq_restore_all(uq_mgr);
- if (ret) {
+ if (ret)
drm_file_err(uq_mgr->file, "Failed to restore all queues\n");
- goto unlock;
- }
-unlock:
- mutex_unlock(&uq_mgr->userq_mutex);
+put_fence:
+ dma_fence_put(ev_fence);
}
static int
@@ -1304,13 +1252,9 @@ amdgpu_userq_evict_all(struct amdgpu_userq_mgr *uq_mgr)
amdgpu_userq_detect_and_reset_queues(uq_mgr);
/* Try to unmap all the queues in this process ctx */
xa_for_each(&uq_mgr->userq_xa, queue_id, queue) {
- queue = amdgpu_userq_get(uq_mgr, queue_id);
- if (!queue)
- continue;
r = amdgpu_userq_preempt_helper(queue);
if (r)
ret = r;
- amdgpu_userq_put(queue);
}
if (ret)
@@ -1343,35 +1287,26 @@ amdgpu_userq_wait_for_signal(struct amdgpu_userq_mgr *uq_mgr)
int ret;
xa_for_each(&uq_mgr->userq_xa, queue_id, queue) {
- queue = amdgpu_userq_get(uq_mgr, queue_id);
- if (!queue)
- continue;
-
struct dma_fence *f = queue->last_fence;
- if (!f || dma_fence_is_signaled(f)) {
- amdgpu_userq_put(queue);
+ if (!f || dma_fence_is_signaled(f))
continue;
- }
+
ret = dma_fence_wait_timeout(f, true, msecs_to_jiffies(100));
if (ret <= 0) {
drm_file_err(uq_mgr->file, "Timed out waiting for fence=%llu:%llu\n",
f->context, f->seqno);
- amdgpu_userq_put(queue);
+
return -ETIMEDOUT;
}
- amdgpu_userq_put(queue);
}
return 0;
}
void
-amdgpu_userq_evict(struct amdgpu_userq_mgr *uq_mgr,
- struct amdgpu_eviction_fence *ev_fence)
+amdgpu_userq_evict(struct amdgpu_userq_mgr *uq_mgr)
{
- struct amdgpu_fpriv *fpriv = uq_mgr_to_fpriv(uq_mgr);
- struct amdgpu_eviction_fence_mgr *evf_mgr = &fpriv->evf_mgr;
struct amdgpu_device *adev = uq_mgr->adev;
int ret;
@@ -1384,16 +1319,6 @@ amdgpu_userq_evict(struct amdgpu_userq_mgr *uq_mgr,
if (ret)
dev_err(adev->dev, "Failed to evict userqueue\n");
- /* Signal current eviction fence */
- amdgpu_eviction_fence_signal(evf_mgr, ev_fence);
-
- if (evf_mgr->fd_closing) {
- cancel_delayed_work_sync(&uq_mgr->resume_work);
- return;
- }
-
- /* Schedule a resume work */
- schedule_delayed_work(&uq_mgr->resume_work, 0);
}
int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct drm_file *file_priv,
@@ -1408,6 +1333,11 @@ int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct drm_file *f
return 0;
}
+void amdgpu_userq_mgr_cancel_resume(struct amdgpu_userq_mgr *userq_mgr)
+{
+ cancel_delayed_work_sync(&userq_mgr->resume_work);
+}
+
void amdgpu_userq_mgr_fini(struct amdgpu_userq_mgr *userq_mgr)
{
struct amdgpu_usermode_queue *queue;
@@ -1531,17 +1461,19 @@ int amdgpu_userq_start_sched_for_enforce_isolation(struct amdgpu_device *adev,
if (!adev->userq_halt_for_enforce_isolation)
dev_warn(adev->dev, "userq scheduling already started!\n");
+
adev->userq_halt_for_enforce_isolation = false;
+
xa_for_each(&adev->userq_doorbell_xa, queue_id, queue) {
uqm = queue->userq_mgr;
mutex_lock(&uqm->userq_mutex);
- if (((queue->queue_type == AMDGPU_HW_IP_GFX) ||
- (queue->queue_type == AMDGPU_HW_IP_COMPUTE)) &&
- (queue->xcp_id == idx)) {
+ if (((queue->queue_type == AMDGPU_HW_IP_GFX) ||
+ (queue->queue_type == AMDGPU_HW_IP_COMPUTE)) &&
+ (queue->xcp_id == idx)) {
r = amdgpu_userq_restore_helper(queue);
if (r)
ret = r;
- }
+ }
mutex_unlock(&uqm->userq_mutex);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
index 736c1d38297c..a4d44abf24fa 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
@@ -82,6 +82,8 @@ struct amdgpu_usermode_queue {
struct amdgpu_userq_funcs {
int (*mqd_create)(struct amdgpu_usermode_queue *queue,
struct drm_amdgpu_userq_in *args);
+ int (*mqd_update)(struct amdgpu_usermode_queue *queue,
+ struct drm_amdgpu_userq_in *args);
void (*mqd_destroy)(struct amdgpu_usermode_queue *uq);
int (*unmap)(struct amdgpu_usermode_queue *queue);
int (*map)(struct amdgpu_usermode_queue *queue);
@@ -121,6 +123,7 @@ int amdgpu_userq_ioctl(struct drm_device *dev, void *data, struct drm_file *filp
int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct drm_file *file_priv,
struct amdgpu_device *adev);
+void amdgpu_userq_mgr_cancel_resume(struct amdgpu_userq_mgr *userq_mgr);
void amdgpu_userq_mgr_fini(struct amdgpu_userq_mgr *userq_mgr);
int amdgpu_userq_create_object(struct amdgpu_userq_mgr *uq_mgr,
@@ -130,8 +133,7 @@ int amdgpu_userq_create_object(struct amdgpu_userq_mgr *uq_mgr,
void amdgpu_userq_destroy_object(struct amdgpu_userq_mgr *uq_mgr,
struct amdgpu_userq_obj *userq_obj);
-void amdgpu_userq_evict(struct amdgpu_userq_mgr *uq_mgr,
- struct amdgpu_eviction_fence *ev_fence);
+void amdgpu_userq_evict(struct amdgpu_userq_mgr *uq_mgr);
void amdgpu_userq_ensure_ev_fence(struct amdgpu_userq_mgr *userq_mgr,
struct amdgpu_eviction_fence_mgr *evf_mgr);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
index 5239b06b9ab0..fe6d83e859a0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
@@ -461,33 +461,31 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp)
{
struct amdgpu_device *adev = drm_to_adev(dev);
+ struct drm_amdgpu_userq_signal *args = data;
+ const unsigned int num_write_bo_handles = args->num_bo_write_handles;
+ const unsigned int num_read_bo_handles = args->num_bo_read_handles;
struct amdgpu_fpriv *fpriv = filp->driver_priv;
struct amdgpu_userq_mgr *userq_mgr = &fpriv->userq_mgr;
- struct drm_amdgpu_userq_signal *args = data;
- struct drm_gem_object **gobj_write = NULL;
- struct drm_gem_object **gobj_read = NULL;
- struct amdgpu_usermode_queue *queue = NULL;
+ struct drm_gem_object **gobj_write, **gobj_read;
+ u32 *syncobj_handles, num_syncobj_handles;
struct amdgpu_userq_fence *userq_fence;
+ struct amdgpu_usermode_queue *queue = NULL;
struct drm_syncobj **syncobj = NULL;
- u32 *bo_handles_write, num_write_bo_handles;
- u32 *syncobj_handles, num_syncobj_handles;
- u32 *bo_handles_read, num_read_bo_handles;
- int r, i, entry, rentry, wentry;
struct dma_fence *fence;
struct drm_exec exec;
+ int r, i, entry;
u64 wptr;
if (!amdgpu_userq_enabled(dev))
return -ENOTSUPP;
- if (args->num_syncobj_handles > AMDGPU_USERQ_MAX_HANDLES ||
- args->num_bo_write_handles > AMDGPU_USERQ_MAX_HANDLES ||
+ if (args->num_bo_write_handles > AMDGPU_USERQ_MAX_HANDLES ||
args->num_bo_read_handles > AMDGPU_USERQ_MAX_HANDLES)
return -EINVAL;
num_syncobj_handles = args->num_syncobj_handles;
- syncobj_handles = memdup_user(u64_to_user_ptr(args->syncobj_handles),
- size_mul(sizeof(u32), num_syncobj_handles));
+ syncobj_handles = memdup_array_user(u64_to_user_ptr(args->syncobj_handles),
+ num_syncobj_handles, sizeof(u32));
if (IS_ERR(syncobj_handles))
return PTR_ERR(syncobj_handles);
@@ -506,51 +504,19 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data,
}
}
- num_read_bo_handles = args->num_bo_read_handles;
- bo_handles_read = memdup_user(u64_to_user_ptr(args->bo_read_handles),
- sizeof(u32) * num_read_bo_handles);
- if (IS_ERR(bo_handles_read)) {
- r = PTR_ERR(bo_handles_read);
+ r = drm_gem_objects_lookup(filp,
+ u64_to_user_ptr(args->bo_read_handles),
+ num_read_bo_handles,
+ &gobj_read);
+ if (r)
goto free_syncobj;
- }
-
- /* Array of pointers to the GEM read objects */
- gobj_read = kmalloc_array(num_read_bo_handles, sizeof(*gobj_read), GFP_KERNEL);
- if (!gobj_read) {
- r = -ENOMEM;
- goto free_bo_handles_read;
- }
- for (rentry = 0; rentry < num_read_bo_handles; rentry++) {
- gobj_read[rentry] = drm_gem_object_lookup(filp, bo_handles_read[rentry]);
- if (!gobj_read[rentry]) {
- r = -ENOENT;
- goto put_gobj_read;
- }
- }
-
- num_write_bo_handles = args->num_bo_write_handles;
- bo_handles_write = memdup_user(u64_to_user_ptr(args->bo_write_handles),
- sizeof(u32) * num_write_bo_handles);
- if (IS_ERR(bo_handles_write)) {
- r = PTR_ERR(bo_handles_write);
+ r = drm_gem_objects_lookup(filp,
+ u64_to_user_ptr(args->bo_write_handles),
+ num_write_bo_handles,
+ &gobj_write);
+ if (r)
goto put_gobj_read;
- }
-
- /* Array of pointers to the GEM write objects */
- gobj_write = kmalloc_array(num_write_bo_handles, sizeof(*gobj_write), GFP_KERNEL);
- if (!gobj_write) {
- r = -ENOMEM;
- goto free_bo_handles_write;
- }
-
- for (wentry = 0; wentry < num_write_bo_handles; wentry++) {
- gobj_write[wentry] = drm_gem_object_lookup(filp, bo_handles_write[wentry]);
- if (!gobj_write[wentry]) {
- r = -ENOENT;
- goto put_gobj_write;
- }
- }
/* Retrieve the user queue */
queue = amdgpu_userq_get(userq_mgr, args->queue_id);
@@ -629,17 +595,13 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data,
exec_fini:
drm_exec_fini(&exec);
put_gobj_write:
- while (wentry-- > 0)
- drm_gem_object_put(gobj_write[wentry]);
- kfree(gobj_write);
-free_bo_handles_write:
- kfree(bo_handles_write);
+ for (i = 0; i < num_write_bo_handles; i++)
+ drm_gem_object_put(gobj_write[i]);
+ kvfree(gobj_write);
put_gobj_read:
- while (rentry-- > 0)
- drm_gem_object_put(gobj_read[rentry]);
- kfree(gobj_read);
-free_bo_handles_read:
- kfree(bo_handles_read);
+ for (i = 0; i < num_read_bo_handles; i++)
+ drm_gem_object_put(gobj_read[i]);
+ kvfree(gobj_read);
free_syncobj:
while (entry-- > 0)
if (syncobj[entry])
@@ -654,371 +616,405 @@ free_syncobj_handles:
return r;
}
-int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,
- struct drm_file *filp)
+/* Count the number of expected fences so userspace can alloc a buffer */
+static int
+amdgpu_userq_wait_count_fences(struct drm_file *filp,
+ struct drm_amdgpu_userq_wait *wait_info,
+ u32 *syncobj_handles, u32 *timeline_points,
+ u32 *timeline_handles,
+ struct drm_gem_object **gobj_write,
+ struct drm_gem_object **gobj_read)
{
- u32 *syncobj_handles, *timeline_points, *timeline_handles, *bo_handles_read, *bo_handles_write;
- u32 num_syncobj, num_read_bo_handles, num_write_bo_handles;
- struct drm_amdgpu_userq_fence_info *fence_info = NULL;
- struct drm_amdgpu_userq_wait *wait_info = data;
- struct amdgpu_fpriv *fpriv = filp->driver_priv;
- struct amdgpu_userq_mgr *userq_mgr = &fpriv->userq_mgr;
- struct amdgpu_usermode_queue *waitq = NULL;
- struct drm_gem_object **gobj_write;
- struct drm_gem_object **gobj_read;
- struct dma_fence **fences = NULL;
- u16 num_points, num_fences = 0;
- int r, i, rentry, wentry, cnt;
+ int num_read_bo_handles, num_write_bo_handles;
+ struct dma_fence_unwrap iter;
+ struct dma_fence *fence, *f;
+ unsigned int num_fences = 0;
struct drm_exec exec;
+ int i, r;
+
+ /*
+ * This needs to be outside of the lock provided by drm_exec for
+ * DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT to work correctly.
+ */
+
+ /* Count timeline fences */
+ for (i = 0; i < wait_info->num_syncobj_timeline_handles; i++) {
+ r = drm_syncobj_find_fence(filp, timeline_handles[i],
+ timeline_points[i],
+ DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
+ &fence);
+ if (r)
+ return r;
+
+ dma_fence_unwrap_for_each(f, &iter, fence)
+ num_fences++;
- if (!amdgpu_userq_enabled(dev))
- return -ENOTSUPP;
+ dma_fence_put(fence);
+ }
- if (wait_info->num_syncobj_handles > AMDGPU_USERQ_MAX_HANDLES ||
- wait_info->num_bo_write_handles > AMDGPU_USERQ_MAX_HANDLES ||
- wait_info->num_bo_read_handles > AMDGPU_USERQ_MAX_HANDLES)
- return -EINVAL;
+ /* Count boolean fences */
+ for (i = 0; i < wait_info->num_syncobj_handles; i++) {
+ r = drm_syncobj_find_fence(filp, syncobj_handles[i], 0,
+ DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
+ &fence);
+ if (r)
+ return r;
- num_read_bo_handles = wait_info->num_bo_read_handles;
- bo_handles_read = memdup_user(u64_to_user_ptr(wait_info->bo_read_handles),
- size_mul(sizeof(u32), num_read_bo_handles));
- if (IS_ERR(bo_handles_read))
- return PTR_ERR(bo_handles_read);
+ num_fences++;
+ dma_fence_put(fence);
+ }
+ /* Lock all the GEM objects */
+ /* TODO: It is actually not necessary to lock them */
+ num_read_bo_handles = wait_info->num_bo_read_handles;
num_write_bo_handles = wait_info->num_bo_write_handles;
- bo_handles_write = memdup_user(u64_to_user_ptr(wait_info->bo_write_handles),
- size_mul(sizeof(u32), num_write_bo_handles));
- if (IS_ERR(bo_handles_write)) {
- r = PTR_ERR(bo_handles_write);
- goto free_bo_handles_read;
- }
+ drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT,
+ num_read_bo_handles + num_write_bo_handles);
- num_syncobj = wait_info->num_syncobj_handles;
- syncobj_handles = memdup_user(u64_to_user_ptr(wait_info->syncobj_handles),
- size_mul(sizeof(u32), num_syncobj));
- if (IS_ERR(syncobj_handles)) {
- r = PTR_ERR(syncobj_handles);
- goto free_bo_handles_write;
+ drm_exec_until_all_locked(&exec) {
+ r = drm_exec_prepare_array(&exec, gobj_read,
+ num_read_bo_handles, 1);
+ drm_exec_retry_on_contention(&exec);
+ if (r)
+ goto error_unlock;
+
+ r = drm_exec_prepare_array(&exec, gobj_write,
+ num_write_bo_handles, 1);
+ drm_exec_retry_on_contention(&exec);
+ if (r)
+ goto error_unlock;
}
- num_points = wait_info->num_syncobj_timeline_handles;
- timeline_handles = memdup_user(u64_to_user_ptr(wait_info->syncobj_timeline_handles),
- sizeof(u32) * num_points);
- if (IS_ERR(timeline_handles)) {
- r = PTR_ERR(timeline_handles);
- goto free_syncobj_handles;
+ /* Count read fences */
+ for (i = 0; i < num_read_bo_handles; i++) {
+ struct dma_resv_iter resv_cursor;
+ struct dma_fence *fence;
+
+ dma_resv_for_each_fence(&resv_cursor, gobj_read[i]->resv,
+ DMA_RESV_USAGE_READ, fence)
+ num_fences++;
}
- timeline_points = memdup_user(u64_to_user_ptr(wait_info->syncobj_timeline_points),
- sizeof(u32) * num_points);
- if (IS_ERR(timeline_points)) {
- r = PTR_ERR(timeline_points);
- goto free_timeline_handles;
+ /* Count write fences */
+ for (i = 0; i < num_write_bo_handles; i++) {
+ struct dma_resv_iter resv_cursor;
+ struct dma_fence *fence;
+
+ dma_resv_for_each_fence(&resv_cursor, gobj_write[i]->resv,
+ DMA_RESV_USAGE_WRITE, fence)
+ num_fences++;
}
- gobj_read = kmalloc_array(num_read_bo_handles, sizeof(*gobj_read), GFP_KERNEL);
- if (!gobj_read) {
+ wait_info->num_fences = min(num_fences, USHRT_MAX);
+ r = 0;
+
+error_unlock:
+ /* Unlock all the GEM objects */
+ drm_exec_fini(&exec);
+ return r;
+}
+
+static int
+amdgpu_userq_wait_add_fence(struct drm_amdgpu_userq_wait *wait_info,
+ struct dma_fence **fences, unsigned int *num_fences,
+ struct dma_fence *fence)
+{
+ /* As fallback shouldn't userspace allocate enough space */
+ if (*num_fences >= wait_info->num_fences)
+ return dma_fence_wait(fence, true);
+
+ fences[(*num_fences)++] = dma_fence_get(fence);
+ return 0;
+}
+
+static int
+amdgpu_userq_wait_return_fence_info(struct drm_file *filp,
+ struct drm_amdgpu_userq_wait *wait_info,
+ u32 *syncobj_handles, u32 *timeline_points,
+ u32 *timeline_handles,
+ struct drm_gem_object **gobj_write,
+ struct drm_gem_object **gobj_read)
+{
+ struct amdgpu_fpriv *fpriv = filp->driver_priv;
+ struct amdgpu_userq_mgr *userq_mgr = &fpriv->userq_mgr;
+ struct drm_amdgpu_userq_fence_info *fence_info;
+ int num_read_bo_handles, num_write_bo_handles;
+ struct amdgpu_usermode_queue *waitq;
+ struct dma_fence **fences, *fence, *f;
+ struct dma_fence_unwrap iter;
+ int num_points, num_syncobj;
+ unsigned int num_fences = 0;
+ struct drm_exec exec;
+ int i, cnt, r;
+
+ fence_info = kmalloc_array(wait_info->num_fences, sizeof(*fence_info),
+ GFP_KERNEL);
+ if (!fence_info)
+ return -ENOMEM;
+
+ fences = kmalloc_array(wait_info->num_fences, sizeof(*fences),
+ GFP_KERNEL);
+ if (!fences) {
r = -ENOMEM;
- goto free_timeline_points;
+ goto free_fence_info;
}
- for (rentry = 0; rentry < num_read_bo_handles; rentry++) {
- gobj_read[rentry] = drm_gem_object_lookup(filp, bo_handles_read[rentry]);
- if (!gobj_read[rentry]) {
- r = -ENOENT;
- goto put_gobj_read;
+ /* Retrieve timeline fences */
+ num_points = wait_info->num_syncobj_timeline_handles;
+ for (i = 0; i < num_points; i++) {
+ r = drm_syncobj_find_fence(filp, timeline_handles[i],
+ timeline_points[i],
+ DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
+ &fence);
+ if (r)
+ goto free_fences;
+
+ dma_fence_unwrap_for_each(f, &iter, fence) {
+ r = amdgpu_userq_wait_add_fence(wait_info, fences,
+ &num_fences, f);
+ if (r) {
+ dma_fence_put(fence);
+ goto free_fences;
+ }
}
- }
- gobj_write = kmalloc_array(num_write_bo_handles, sizeof(*gobj_write), GFP_KERNEL);
- if (!gobj_write) {
- r = -ENOMEM;
- goto put_gobj_read;
+ dma_fence_put(fence);
}
- for (wentry = 0; wentry < num_write_bo_handles; wentry++) {
- gobj_write[wentry] = drm_gem_object_lookup(filp, bo_handles_write[wentry]);
- if (!gobj_write[wentry]) {
- r = -ENOENT;
- goto put_gobj_write;
- }
+ /* Retrieve boolean fences */
+ num_syncobj = wait_info->num_syncobj_handles;
+ for (i = 0; i < num_syncobj; i++) {
+ struct dma_fence *fence;
+
+ r = drm_syncobj_find_fence(filp, syncobj_handles[i], 0,
+ DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
+ &fence);
+ if (r)
+ goto free_fences;
+
+ r = amdgpu_userq_wait_add_fence(wait_info, fences,
+ &num_fences, fence);
+ dma_fence_put(fence);
+ if (r)
+ goto free_fences;
+
}
+ /* Lock all the GEM objects */
+ num_read_bo_handles = wait_info->num_bo_read_handles;
+ num_write_bo_handles = wait_info->num_bo_write_handles;
drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT,
- (num_read_bo_handles + num_write_bo_handles));
+ num_read_bo_handles + num_write_bo_handles);
- /* Lock all BOs with retry handling */
drm_exec_until_all_locked(&exec) {
- r = drm_exec_prepare_array(&exec, gobj_read, num_read_bo_handles, 1);
+ r = drm_exec_prepare_array(&exec, gobj_read,
+ num_read_bo_handles, 1);
drm_exec_retry_on_contention(&exec);
- if (r) {
- drm_exec_fini(&exec);
- goto put_gobj_write;
- }
+ if (r)
+ goto error_unlock;
- r = drm_exec_prepare_array(&exec, gobj_write, num_write_bo_handles, 1);
+ r = drm_exec_prepare_array(&exec, gobj_write,
+ num_write_bo_handles, 1);
drm_exec_retry_on_contention(&exec);
- if (r) {
- drm_exec_fini(&exec);
- goto put_gobj_write;
- }
+ if (r)
+ goto error_unlock;
}
- if (!wait_info->num_fences) {
- if (num_points) {
- struct dma_fence_unwrap iter;
- struct dma_fence *fence;
- struct dma_fence *f;
-
- for (i = 0; i < num_points; i++) {
- r = drm_syncobj_find_fence(filp, timeline_handles[i],
- timeline_points[i],
- DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
- &fence);
- if (r)
- goto exec_fini;
-
- dma_fence_unwrap_for_each(f, &iter, fence)
- num_fences++;
+ /* Retrieve GEM read objects fence */
+ for (i = 0; i < num_read_bo_handles; i++) {
+ struct dma_resv_iter resv_cursor;
+ struct dma_fence *fence;
- dma_fence_put(fence);
- }
+ dma_resv_for_each_fence(&resv_cursor, gobj_read[i]->resv,
+ DMA_RESV_USAGE_READ, fence) {
+ r = amdgpu_userq_wait_add_fence(wait_info, fences,
+ &num_fences, fence);
+ if (r)
+ goto error_unlock;
}
+ }
- /* Count syncobj's fence */
- for (i = 0; i < num_syncobj; i++) {
- struct dma_fence *fence;
+ /* Retrieve GEM write objects fence */
+ for (i = 0; i < num_write_bo_handles; i++) {
+ struct dma_resv_iter resv_cursor;
+ struct dma_fence *fence;
- r = drm_syncobj_find_fence(filp, syncobj_handles[i],
- 0,
- DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
- &fence);
+ dma_resv_for_each_fence(&resv_cursor, gobj_write[i]->resv,
+ DMA_RESV_USAGE_WRITE, fence) {
+ r = amdgpu_userq_wait_add_fence(wait_info, fences,
+ &num_fences, fence);
if (r)
- goto exec_fini;
-
- num_fences++;
- dma_fence_put(fence);
+ goto error_unlock;
}
+ }
- /* Count GEM objects fence */
- for (i = 0; i < num_read_bo_handles; i++) {
- struct dma_resv_iter resv_cursor;
- struct dma_fence *fence;
+ drm_exec_fini(&exec);
- dma_resv_for_each_fence(&resv_cursor, gobj_read[i]->resv,
- DMA_RESV_USAGE_READ, fence)
- num_fences++;
- }
+ /*
+ * Keep only the latest fences to reduce the number of values
+ * given back to userspace.
+ */
+ num_fences = dma_fence_dedup_array(fences, num_fences);
- for (i = 0; i < num_write_bo_handles; i++) {
- struct dma_resv_iter resv_cursor;
- struct dma_fence *fence;
+ waitq = amdgpu_userq_get(userq_mgr, wait_info->waitq_id);
+ if (!waitq) {
+ r = -EINVAL;
+ goto free_fences;
+ }
- dma_resv_for_each_fence(&resv_cursor, gobj_write[i]->resv,
- DMA_RESV_USAGE_WRITE, fence)
- num_fences++;
- }
+ for (i = 0, cnt = 0; i < num_fences; i++) {
+ struct amdgpu_userq_fence_driver *fence_drv;
+ struct amdgpu_userq_fence *userq_fence;
+ u32 index;
- /*
- * Passing num_fences = 0 means that userspace doesn't want to
- * retrieve userq_fence_info. If num_fences = 0 we skip filling
- * userq_fence_info and return the actual number of fences on
- * args->num_fences.
- */
- wait_info->num_fences = num_fences;
- } else {
- /* Array of fence info */
- fence_info = kmalloc_array(wait_info->num_fences, sizeof(*fence_info), GFP_KERNEL);
- if (!fence_info) {
- r = -ENOMEM;
- goto exec_fini;
- }
+ userq_fence = to_amdgpu_userq_fence(fences[i]);
+ if (!userq_fence) {
+ /*
+ * Just waiting on other driver fences should
+ * be good for now
+ */
+ r = dma_fence_wait(fences[i], true);
+ if (r)
+ goto put_waitq;
- /* Array of fences */
- fences = kmalloc_array(wait_info->num_fences, sizeof(*fences), GFP_KERNEL);
- if (!fences) {
- r = -ENOMEM;
- goto free_fence_info;
+ continue;
}
- /* Retrieve GEM read objects fence */
- for (i = 0; i < num_read_bo_handles; i++) {
- struct dma_resv_iter resv_cursor;
- struct dma_fence *fence;
-
- dma_resv_for_each_fence(&resv_cursor, gobj_read[i]->resv,
- DMA_RESV_USAGE_READ, fence) {
- if (num_fences >= wait_info->num_fences) {
- r = -EINVAL;
- goto free_fences;
- }
-
- fences[num_fences++] = fence;
- dma_fence_get(fence);
- }
- }
+ fence_drv = userq_fence->fence_drv;
+ /*
+ * We need to make sure the user queue release their reference
+ * to the fence drivers at some point before queue destruction.
+ * Otherwise, we would gather those references until we don't
+ * have any more space left and crash.
+ */
+ r = xa_alloc(&waitq->fence_drv_xa, &index, fence_drv,
+ xa_limit_32b, GFP_KERNEL);
+ if (r)
+ goto put_waitq;
- /* Retrieve GEM write objects fence */
- for (i = 0; i < num_write_bo_handles; i++) {
- struct dma_resv_iter resv_cursor;
- struct dma_fence *fence;
+ amdgpu_userq_fence_driver_get(fence_drv);
- dma_resv_for_each_fence(&resv_cursor, gobj_write[i]->resv,
- DMA_RESV_USAGE_WRITE, fence) {
- if (num_fences >= wait_info->num_fences) {
- r = -EINVAL;
- goto free_fences;
- }
+ /* Store drm syncobj's gpu va address and value */
+ fence_info[cnt].va = fence_drv->va;
+ fence_info[cnt].value = fences[i]->seqno;
- fences[num_fences++] = fence;
- dma_fence_get(fence);
- }
- }
+ /* Increment the actual userq fence count */
+ cnt++;
+ }
+ wait_info->num_fences = cnt;
- if (num_points) {
- struct dma_fence_unwrap iter;
- struct dma_fence *fence;
- struct dma_fence *f;
-
- for (i = 0; i < num_points; i++) {
- r = drm_syncobj_find_fence(filp, timeline_handles[i],
- timeline_points[i],
- DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
- &fence);
- if (r)
- goto free_fences;
-
- dma_fence_unwrap_for_each(f, &iter, fence) {
- if (num_fences >= wait_info->num_fences) {
- r = -EINVAL;
- dma_fence_put(fence);
- goto free_fences;
- }
-
- dma_fence_get(f);
- fences[num_fences++] = f;
- }
+ /* Copy userq fence info to user space */
+ if (copy_to_user(u64_to_user_ptr(wait_info->out_fences),
+ fence_info, cnt * sizeof(*fence_info)))
+ r = -EFAULT;
+ else
+ r = 0;
- dma_fence_put(fence);
- }
- }
+put_waitq:
+ amdgpu_userq_put(waitq);
- /* Retrieve syncobj's fence */
- for (i = 0; i < num_syncobj; i++) {
- struct dma_fence *fence;
+free_fences:
+ while (num_fences--)
+ dma_fence_put(fences[num_fences]);
+ kfree(fences);
- r = drm_syncobj_find_fence(filp, syncobj_handles[i],
- 0,
- DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
- &fence);
- if (r)
- goto free_fences;
+free_fence_info:
+ kfree(fence_info);
+ return r;
- if (num_fences >= wait_info->num_fences) {
- r = -EINVAL;
- dma_fence_put(fence);
- goto free_fences;
- }
+error_unlock:
+ drm_exec_fini(&exec);
+ goto free_fences;
+}
- fences[num_fences++] = fence;
- }
+int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *filp)
+{
+ int num_points, num_syncobj, num_read_bo_handles, num_write_bo_handles;
+ u32 *syncobj_handles, *timeline_points, *timeline_handles;
+ struct drm_amdgpu_userq_wait *wait_info = data;
+ struct drm_gem_object **gobj_write;
+ struct drm_gem_object **gobj_read;
+ void __user *ptr;
+ int r;
- /*
- * Keep only the latest fences to reduce the number of values
- * given back to userspace.
- */
- num_fences = dma_fence_dedup_array(fences, num_fences);
+ if (!amdgpu_userq_enabled(dev))
+ return -ENOTSUPP;
- waitq = amdgpu_userq_get(userq_mgr, wait_info->waitq_id);
- if (!waitq) {
- r = -EINVAL;
- goto free_fences;
- }
+ if (wait_info->num_bo_write_handles > AMDGPU_USERQ_MAX_HANDLES ||
+ wait_info->num_bo_read_handles > AMDGPU_USERQ_MAX_HANDLES)
+ return -EINVAL;
- for (i = 0, cnt = 0; i < num_fences; i++) {
- struct amdgpu_userq_fence_driver *fence_drv;
- struct amdgpu_userq_fence *userq_fence;
- u32 index;
-
- userq_fence = to_amdgpu_userq_fence(fences[i]);
- if (!userq_fence) {
- /*
- * Just waiting on other driver fences should
- * be good for now
- */
- r = dma_fence_wait(fences[i], true);
- if (r) {
- dma_fence_put(fences[i]);
- goto free_fences;
- }
-
- dma_fence_put(fences[i]);
- continue;
- }
+ num_syncobj = wait_info->num_syncobj_handles;
+ ptr = u64_to_user_ptr(wait_info->syncobj_handles);
+ syncobj_handles = memdup_array_user(ptr, num_syncobj, sizeof(u32));
+ if (IS_ERR(syncobj_handles))
+ return PTR_ERR(syncobj_handles);
- fence_drv = userq_fence->fence_drv;
- /*
- * We need to make sure the user queue release their reference
- * to the fence drivers at some point before queue destruction.
- * Otherwise, we would gather those references until we don't
- * have any more space left and crash.
- */
- r = xa_alloc(&waitq->fence_drv_xa, &index, fence_drv,
- xa_limit_32b, GFP_KERNEL);
- if (r)
- goto free_fences;
+ num_points = wait_info->num_syncobj_timeline_handles;
+ ptr = u64_to_user_ptr(wait_info->syncobj_timeline_handles);
+ timeline_handles = memdup_array_user(ptr, num_points, sizeof(u32));
+ if (IS_ERR(timeline_handles)) {
+ r = PTR_ERR(timeline_handles);
+ goto free_syncobj_handles;
+ }
- amdgpu_userq_fence_driver_get(fence_drv);
+ ptr = u64_to_user_ptr(wait_info->syncobj_timeline_points);
+ timeline_points = memdup_array_user(ptr, num_points, sizeof(u32));
+ if (IS_ERR(timeline_points)) {
+ r = PTR_ERR(timeline_points);
+ goto free_timeline_handles;
+ }
- /* Store drm syncobj's gpu va address and value */
- fence_info[cnt].va = fence_drv->va;
- fence_info[cnt].value = fences[i]->seqno;
+ num_read_bo_handles = wait_info->num_bo_read_handles;
+ ptr = u64_to_user_ptr(wait_info->bo_read_handles);
+ r = drm_gem_objects_lookup(filp, ptr, num_read_bo_handles, &gobj_read);
+ if (r)
+ goto free_timeline_points;
- dma_fence_put(fences[i]);
- /* Increment the actual userq fence count */
- cnt++;
- }
+ num_write_bo_handles = wait_info->num_bo_write_handles;
+ ptr = u64_to_user_ptr(wait_info->bo_write_handles);
+ r = drm_gem_objects_lookup(filp, ptr, num_write_bo_handles,
+ &gobj_write);
+ if (r)
+ goto put_gobj_read;
- wait_info->num_fences = cnt;
- /* Copy userq fence info to user space */
- if (copy_to_user(u64_to_user_ptr(wait_info->out_fences),
- fence_info, wait_info->num_fences * sizeof(*fence_info))) {
- r = -EFAULT;
- goto free_fences;
- }
+ /*
+ * Passing num_fences = 0 means that userspace doesn't want to
+ * retrieve userq_fence_info. If num_fences = 0 we skip filling
+ * userq_fence_info and return the actual number of fences on
+ * args->num_fences.
+ */
+ if (!wait_info->num_fences) {
+ r = amdgpu_userq_wait_count_fences(filp, wait_info,
+ syncobj_handles,
+ timeline_points,
+ timeline_handles,
+ gobj_write,
+ gobj_read);
+ } else {
+ r = amdgpu_userq_wait_return_fence_info(filp, wait_info,
+ syncobj_handles,
+ timeline_points,
+ timeline_handles,
+ gobj_write,
+ gobj_read);
}
-free_fences:
- if (fences) {
- while (num_fences-- > 0)
- dma_fence_put(fences[num_fences]);
- kfree(fences);
- }
-free_fence_info:
- kfree(fence_info);
-exec_fini:
- drm_exec_fini(&exec);
-put_gobj_write:
- while (wentry-- > 0)
- drm_gem_object_put(gobj_write[wentry]);
- kfree(gobj_write);
+ while (num_write_bo_handles--)
+ drm_gem_object_put(gobj_write[num_write_bo_handles]);
+ kvfree(gobj_write);
+
put_gobj_read:
- while (rentry-- > 0)
- drm_gem_object_put(gobj_read[rentry]);
- kfree(gobj_read);
+ while (num_read_bo_handles--)
+ drm_gem_object_put(gobj_read[num_read_bo_handles]);
+ kvfree(gobj_read);
+
free_timeline_points:
kfree(timeline_points);
free_timeline_handles:
kfree(timeline_handles);
free_syncobj_handles:
kfree(syncobj_handles);
-free_bo_handles_write:
- kfree(bo_handles_write);
-free_bo_handles_read:
- kfree(bo_handles_read);
-
- if (waitq)
- amdgpu_userq_put(waitq);
-
return r;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
index a7d8f1ce6ac2..eb4a15db2ef2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@@ -451,24 +451,6 @@ void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp)
}
/**
- * amdgpu_vce_required_gart_pages() - gets number of GART pages required by VCE
- *
- * @adev: amdgpu_device pointer
- *
- * Returns how many GART pages we need before GTT for the VCE IP block.
- * For VCE1, see vce_v1_0_ensure_vcpu_bo_32bit_addr for details.
- * For VCE2+, this is not needed so return zero.
- */
-u32 amdgpu_vce_required_gart_pages(struct amdgpu_device *adev)
-{
- /* VCE IP block not added yet, so can't use amdgpu_ip_version */
- if (adev->family == AMDGPU_FAMILY_SI)
- return 512;
-
- return 0;
-}
-
-/**
* amdgpu_vce_get_create_msg - generate a VCE create msg
*
* @ring: ring we should submit the msg to
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
index 1c3464ce5037..778c714c8385 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
@@ -52,6 +52,7 @@ struct amdgpu_vce {
uint32_t srbm_soft_reset;
unsigned num_rings;
uint32_t keyselect;
+ struct drm_mm_node gart_node;
};
int amdgpu_vce_early_init(struct amdgpu_device *adev);
@@ -61,7 +62,6 @@ int amdgpu_vce_entity_init(struct amdgpu_device *adev, struct amdgpu_ring *ring)
int amdgpu_vce_suspend(struct amdgpu_device *adev);
int amdgpu_vce_resume(struct amdgpu_device *adev);
void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp);
-u32 amdgpu_vce_required_gart_pages(struct amdgpu_device *adev);
int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, struct amdgpu_job *job,
struct amdgpu_ib *ib);
int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
index 75ae9b429420..03d95dca93d7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -63,6 +63,7 @@
#define FIRMWARE_VCN4_0_6_1 "amdgpu/vcn_4_0_6_1.bin"
#define FIRMWARE_VCN5_0_0 "amdgpu/vcn_5_0_0.bin"
#define FIRMWARE_VCN5_0_1 "amdgpu/vcn_5_0_1.bin"
+#define FIRMWARE_VCN5_0_2 "amdgpu/vcn_5_0_2.bin"
#define FIRMWARE_VCN5_3_0 "amdgpu/vcn_5_3_0.bin"
MODULE_FIRMWARE(FIRMWARE_RAVEN);
@@ -91,6 +92,7 @@ MODULE_FIRMWARE(FIRMWARE_VCN4_0_6);
MODULE_FIRMWARE(FIRMWARE_VCN4_0_6_1);
MODULE_FIRMWARE(FIRMWARE_VCN5_0_0);
MODULE_FIRMWARE(FIRMWARE_VCN5_0_1);
+MODULE_FIRMWARE(FIRMWARE_VCN5_0_2);
MODULE_FIRMWARE(FIRMWARE_VCN5_3_0);
static void amdgpu_vcn_idle_work_handler(struct work_struct *work);
@@ -1095,7 +1097,8 @@ int amdgpu_vcn_unified_ring_test_ib(struct amdgpu_ring *ring, long timeout)
long r;
if ((amdgpu_ip_version(adev, UVD_HWIP, 0) != IP_VERSION(4, 0, 3)) &&
- (amdgpu_ip_version(adev, UVD_HWIP, 0) != IP_VERSION(5, 0, 1))) {
+ (amdgpu_ip_version(adev, UVD_HWIP, 0) != IP_VERSION(5, 0, 1)) &&
+ (amdgpu_ip_version(adev, UVD_HWIP, 0) != IP_VERSION(5, 0, 2))) {
r = amdgpu_vcn_enc_ring_test_ib(ring, timeout);
if (r)
goto error;
@@ -1132,7 +1135,8 @@ void amdgpu_vcn_setup_ucode(struct amdgpu_device *adev, int i)
return;
if ((amdgpu_ip_version(adev, UVD_HWIP, 0) == IP_VERSION(4, 0, 3) ||
- amdgpu_ip_version(adev, UVD_HWIP, 0) == IP_VERSION(5, 0, 1))
+ amdgpu_ip_version(adev, UVD_HWIP, 0) == IP_VERSION(5, 0, 1) ||
+ amdgpu_ip_version(adev, UVD_HWIP, 0) == IP_VERSION(5, 0, 2))
&& (i > 0))
return;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 275745aa5829..dba7ea16a10d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -605,10 +605,10 @@ static int amdgpu_virt_write_vf2pf_data(struct amdgpu_device *adev)
#ifdef MODULE
if (THIS_MODULE->version != NULL)
- strcpy(vf2pf_info->driver_version, THIS_MODULE->version);
+ strscpy(vf2pf_info->driver_version, THIS_MODULE->version);
else
#endif
- strcpy(vf2pf_info->driver_version, "N/A");
+ strscpy(vf2pf_info->driver_version, "N/A");
vf2pf_info->pf2vf_version_required = 0; // no requirement, guest understands all
vf2pf_info->driver_cert = 0;
@@ -950,11 +950,6 @@ int amdgpu_virt_init_critical_region(struct amdgpu_device *adev)
if (adev->virt.req_init_data_ver != GPU_CRIT_REGION_V2)
return 0;
- if (init_hdr_offset < 0) {
- dev_err(adev->dev, "Invalid init header offset\n");
- return -EINVAL;
- }
-
vram_size = RREG32(mmRCC_CONFIG_MEMSIZE);
if (!vram_size || vram_size == U32_MAX)
return -EINVAL;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index 886fbce0bfd1..9da0c6e9b869 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -162,6 +162,7 @@ enum AMDGIM_FEATURE_FLAG {
AMDGIM_FEATURE_RAS_TELEMETRY = (1 << 10),
AMDGIM_FEATURE_RAS_CPER = (1 << 11),
AMDGIM_FEATURE_XGMI_TA_EXT_PEER_LINK = (1 << 12),
+ AMDGIM_FEATURE_XGMI_CONNECTED_TO_CPU = (1 << 13),
};
enum AMDGIM_REG_ACCESS_FLAG {
@@ -412,6 +413,9 @@ struct amdgpu_video_codec_info;
#define amdgpu_sriov_xgmi_ta_ext_peer_link_en(adev) \
((adev)->virt.gim_feature & AMDGIM_FEATURE_XGMI_TA_EXT_PEER_LINK)
+#define amdgpu_sriov_xgmi_connected_to_cpu(adev) \
+((adev)->virt.gim_feature & AMDGIM_FEATURE_XGMI_CONNECTED_TO_CPU)
+
static inline bool is_virtual_machine(void)
{
#if defined(CONFIG_X86)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c
index 5cef8cd14148..e54295b56282 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c
@@ -328,11 +328,9 @@ static int amdgpu_vkms_prepare_fb(struct drm_plane *plane,
return r;
}
- r = dma_resv_reserve_fences(rbo->tbo.base.resv, 1);
- if (r) {
- dev_err(adev->dev, "allocating fence slot failed (%d)\n", r);
+ r = dma_resv_reserve_fences(rbo->tbo.base.resv, TTM_NUM_MOVE_FENCES);
+ if (r)
goto error_unlock;
- }
if (plane->type != DRM_PLANE_TYPE_CURSOR)
domain = amdgpu_display_supported_domains(adev, rbo->flags);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index a677e38a493b..73abac6be5b3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -139,6 +139,20 @@ static void amdgpu_vm_assert_locked(struct amdgpu_vm *vm)
}
/**
+ * amdgpu_vm_is_bo_always_valid - check if the BO is VM always valid
+ *
+ * @vm: VM to test against.
+ * @bo: BO to be tested.
+ *
+ * Returns true if the BO shares the dma_resv object with the root PD and is
+ * always guaranteed to be valid inside the VM.
+ */
+bool amdgpu_vm_is_bo_always_valid(struct amdgpu_vm *vm, struct amdgpu_bo *bo)
+{
+ return bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv;
+}
+
+/**
* amdgpu_vm_bo_evicted - vm_bo is evicted
*
* @vm_bo: vm_bo which is evicted
@@ -764,12 +778,9 @@ bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
* @need_pipe_sync: is pipe sync needed
*
* Emit a VM flush when it is necessary.
- *
- * Returns:
- * 0 on success, errno otherwise.
*/
-int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
- bool need_pipe_sync)
+void amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
+ bool need_pipe_sync)
{
struct amdgpu_device *adev = ring->adev;
struct amdgpu_isolation *isolation = &adev->isolation[ring->xcp_id];
@@ -783,8 +794,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
bool cleaner_shader_needed = false;
bool pasid_mapping_needed = false;
struct dma_fence *fence = NULL;
- unsigned int patch;
- int r;
+ unsigned int patch = 0;
if (amdgpu_vmid_had_gpu_reset(adev, id)) {
gds_switch_needed = true;
@@ -812,9 +822,20 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync &&
!cleaner_shader_needed)
- return 0;
+ return;
amdgpu_ring_ib_begin(ring);
+
+ /* There is no matching insert_end for this on purpose for the vm flush.
+ * The IB portion of the submission has both. Having multiple
+ * insert_start sequences is ok, but you can only have one insert_end
+ * per submission based on the way VCN FW works. For JPEG
+ * you can as many insert_start and insert_end sequences as you like as
+ * long as the rest of the packets come between start and end sequences.
+ */
+ if (ring->funcs->insert_start)
+ ring->funcs->insert_start(ring);
+
if (ring->funcs->init_cond_exec)
patch = amdgpu_ring_init_cond_exec(ring,
ring->cond_exe_gpu_addr);
@@ -845,9 +866,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
}
if (vm_flush_needed || pasid_mapping_needed || cleaner_shader_needed) {
- r = amdgpu_fence_emit(ring, job->hw_vm_fence, 0);
- if (r)
- return r;
+ amdgpu_fence_emit(ring, job->hw_vm_fence, 0);
fence = &job->hw_vm_fence->base;
/* get a ref for the job */
dma_fence_get(fence);
@@ -892,7 +911,6 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
}
amdgpu_ring_ib_end(ring);
- return 0;
}
/**
@@ -2790,8 +2808,8 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
dma_fence_put(vm->last_unlocked);
dma_fence_wait(vm->last_tlb_flush, false);
/* Make sure that all fence callbacks have completed */
- spin_lock_irqsave(vm->last_tlb_flush->lock, flags);
- spin_unlock_irqrestore(vm->last_tlb_flush->lock, flags);
+ dma_fence_lock_irqsave(vm->last_tlb_flush, flags);
+ dma_fence_unlock_irqrestore(vm->last_tlb_flush, flags);
dma_fence_put(vm->last_tlb_flush);
list_for_each_entry_safe(mapping, tmp, &vm->freed, list) {
@@ -3194,20 +3212,6 @@ void amdgpu_vm_update_fault_cache(struct amdgpu_device *adev,
xa_unlock_irqrestore(&adev->vm_manager.pasids, flags);
}
-/**
- * amdgpu_vm_is_bo_always_valid - check if the BO is VM always valid
- *
- * @vm: VM to test against.
- * @bo: BO to be tested.
- *
- * Returns true if the BO shares the dma_resv object with the root PD and is
- * always guaranteed to be valid inside the VM.
- */
-bool amdgpu_vm_is_bo_always_valid(struct amdgpu_vm *vm, struct amdgpu_bo *bo)
-{
- return bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv;
-}
-
void amdgpu_vm_print_task_info(struct amdgpu_device *adev,
struct amdgpu_task_info *task_info)
{
@@ -3216,3 +3220,20 @@ void amdgpu_vm_print_task_info(struct amdgpu_device *adev,
task_info->process_name, task_info->tgid,
task_info->task.comm, task_info->task.pid);
}
+
+void amdgpu_sdma_set_vm_pte_scheds(struct amdgpu_device *adev,
+ const struct amdgpu_vm_pte_funcs *vm_pte_funcs)
+{
+ struct drm_gpu_scheduler *sched;
+ int i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ if (adev->sdma.has_page_queue)
+ sched = &adev->sdma.instance[i].page.sched;
+ else
+ sched = &adev->sdma.instance[i].ring.sched;
+ adev->vm_manager.vm_pte_scheds[i] = sched;
+ }
+ adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances;
+ adev->vm_manager.vm_pte_funcs = vm_pte_funcs;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index d5b7061556ba..3b32f41c3655 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -515,7 +515,7 @@ int amdgpu_vm_validate(struct amdgpu_device *adev, struct amdgpu_vm *vm,
struct ww_acquire_ctx *ticket,
int (*callback)(void *p, struct amdgpu_bo *bo),
void *param);
-int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync);
+void amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync);
int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
struct amdgpu_vm *vm, bool immediate);
int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
@@ -641,7 +641,7 @@ static inline uint64_t amdgpu_vm_tlb_seq(struct amdgpu_vm *vm)
* sure that the dma_fence structure isn't freed up.
*/
rcu_read_lock();
- lock = vm->last_tlb_flush->lock;
+ lock = dma_fence_spinlock(vm->last_tlb_flush);
rcu_read_unlock();
spin_lock_irqsave(lock, flags);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index 6c9b3e21e15c..2a241a5b12c4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -25,6 +25,7 @@
#include <linux/dma-mapping.h>
#include <drm/ttm/ttm_range_manager.h>
#include <drm/drm_drv.h>
+#include <drm/drm_buddy.h>
#include "amdgpu.h"
#include "amdgpu_vm.h"
@@ -52,15 +53,15 @@ to_amdgpu_device(struct amdgpu_vram_mgr *mgr)
return container_of(mgr, struct amdgpu_device, mman.vram_mgr);
}
-static inline struct drm_buddy_block *
+static inline struct gpu_buddy_block *
amdgpu_vram_mgr_first_block(struct list_head *list)
{
- return list_first_entry_or_null(list, struct drm_buddy_block, link);
+ return list_first_entry_or_null(list, struct gpu_buddy_block, link);
}
static inline bool amdgpu_is_vram_mgr_blocks_contiguous(struct list_head *head)
{
- struct drm_buddy_block *block;
+ struct gpu_buddy_block *block;
u64 start, size;
block = amdgpu_vram_mgr_first_block(head);
@@ -71,7 +72,7 @@ static inline bool amdgpu_is_vram_mgr_blocks_contiguous(struct list_head *head)
start = amdgpu_vram_mgr_block_start(block);
size = amdgpu_vram_mgr_block_size(block);
- block = list_entry(block->link.next, struct drm_buddy_block, link);
+ block = list_entry(block->link.next, struct gpu_buddy_block, link);
if (start + size != amdgpu_vram_mgr_block_start(block))
return false;
}
@@ -81,7 +82,7 @@ static inline bool amdgpu_is_vram_mgr_blocks_contiguous(struct list_head *head)
static inline u64 amdgpu_vram_mgr_blocks_size(struct list_head *head)
{
- struct drm_buddy_block *block;
+ struct gpu_buddy_block *block;
u64 size = 0;
list_for_each_entry(block, head, link)
@@ -254,7 +255,7 @@ const struct attribute_group amdgpu_vram_mgr_attr_group = {
* Calculate how many bytes of the DRM BUDDY block are inside visible VRAM
*/
static u64 amdgpu_vram_mgr_vis_size(struct amdgpu_device *adev,
- struct drm_buddy_block *block)
+ struct gpu_buddy_block *block)
{
u64 start = amdgpu_vram_mgr_block_start(block);
u64 end = start + amdgpu_vram_mgr_block_size(block);
@@ -279,7 +280,7 @@ u64 amdgpu_vram_mgr_bo_visible_size(struct amdgpu_bo *bo)
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
struct ttm_resource *res = bo->tbo.resource;
struct amdgpu_vram_mgr_resource *vres = to_amdgpu_vram_mgr_resource(res);
- struct drm_buddy_block *block;
+ struct gpu_buddy_block *block;
u64 usage = 0;
if (amdgpu_gmc_vram_full_visible(&adev->gmc))
@@ -299,15 +300,15 @@ static void amdgpu_vram_mgr_do_reserve(struct ttm_resource_manager *man)
{
struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
struct amdgpu_device *adev = to_amdgpu_device(mgr);
- struct drm_buddy *mm = &mgr->mm;
+ struct gpu_buddy *mm = &mgr->mm;
struct amdgpu_vram_reservation *rsv, *temp;
- struct drm_buddy_block *block;
+ struct gpu_buddy_block *block;
uint64_t vis_usage;
list_for_each_entry_safe(rsv, temp, &mgr->reservations_pending, blocks) {
- if (drm_buddy_alloc_blocks(mm, rsv->start, rsv->start + rsv->size,
+ if (gpu_buddy_alloc_blocks(mm, rsv->start, rsv->start + rsv->size,
rsv->size, mm->chunk_size, &rsv->allocated,
- DRM_BUDDY_RANGE_ALLOCATION))
+ GPU_BUDDY_RANGE_ALLOCATION))
continue;
block = amdgpu_vram_mgr_first_block(&rsv->allocated);
@@ -403,7 +404,7 @@ int amdgpu_vram_mgr_query_address_block_info(struct amdgpu_vram_mgr *mgr,
uint64_t address, struct amdgpu_vram_block_info *info)
{
struct amdgpu_vram_mgr_resource *vres;
- struct drm_buddy_block *block;
+ struct gpu_buddy_block *block;
u64 start, size;
int ret = -ENOENT;
@@ -450,8 +451,8 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
struct amdgpu_vram_mgr_resource *vres;
u64 size, remaining_size, lpfn, fpfn;
unsigned int adjust_dcc_size = 0;
- struct drm_buddy *mm = &mgr->mm;
- struct drm_buddy_block *block;
+ struct gpu_buddy *mm = &mgr->mm;
+ struct gpu_buddy_block *block;
unsigned long pages_per_block;
int r;
@@ -493,17 +494,17 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
INIT_LIST_HEAD(&vres->blocks);
if (place->flags & TTM_PL_FLAG_TOPDOWN)
- vres->flags |= DRM_BUDDY_TOPDOWN_ALLOCATION;
+ vres->flags |= GPU_BUDDY_TOPDOWN_ALLOCATION;
if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)
- vres->flags |= DRM_BUDDY_CONTIGUOUS_ALLOCATION;
+ vres->flags |= GPU_BUDDY_CONTIGUOUS_ALLOCATION;
if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED)
- vres->flags |= DRM_BUDDY_CLEAR_ALLOCATION;
+ vres->flags |= GPU_BUDDY_CLEAR_ALLOCATION;
if (fpfn || lpfn != mgr->mm.size)
/* Allocate blocks in desired range */
- vres->flags |= DRM_BUDDY_RANGE_ALLOCATION;
+ vres->flags |= GPU_BUDDY_RANGE_ALLOCATION;
if (bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC &&
adev->gmc.gmc_funcs->get_dcc_alignment)
@@ -516,7 +517,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
dcc_size = roundup_pow_of_two(vres->base.size + adjust_dcc_size);
remaining_size = (u64)dcc_size;
- vres->flags |= DRM_BUDDY_TRIM_DISABLE;
+ vres->flags |= GPU_BUDDY_TRIM_DISABLE;
}
mutex_lock(&mgr->lock);
@@ -536,7 +537,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
BUG_ON(min_block_size < mm->chunk_size);
- r = drm_buddy_alloc_blocks(mm, fpfn,
+ r = gpu_buddy_alloc_blocks(mm, fpfn,
lpfn,
size,
min_block_size,
@@ -545,7 +546,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
if (unlikely(r == -ENOSPC) && pages_per_block == ~0ul &&
!(place->flags & TTM_PL_FLAG_CONTIGUOUS)) {
- vres->flags &= ~DRM_BUDDY_CONTIGUOUS_ALLOCATION;
+ vres->flags &= ~GPU_BUDDY_CONTIGUOUS_ALLOCATION;
pages_per_block = max_t(u32, 2UL << (20UL - PAGE_SHIFT),
tbo->page_alignment);
@@ -566,7 +567,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
list_add_tail(&vres->vres_node, &mgr->allocated_vres_list);
if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS && adjust_dcc_size) {
- struct drm_buddy_block *dcc_block;
+ struct gpu_buddy_block *dcc_block;
unsigned long dcc_start;
u64 trim_start;
@@ -576,7 +577,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
roundup((unsigned long)amdgpu_vram_mgr_block_start(dcc_block),
adjust_dcc_size);
trim_start = (u64)dcc_start;
- drm_buddy_block_trim(mm, &trim_start,
+ gpu_buddy_block_trim(mm, &trim_start,
(u64)vres->base.size,
&vres->blocks);
}
@@ -614,7 +615,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
return 0;
error_free_blocks:
- drm_buddy_free_list(mm, &vres->blocks, 0);
+ gpu_buddy_free_list(mm, &vres->blocks, 0);
mutex_unlock(&mgr->lock);
error_fini:
ttm_resource_fini(man, &vres->base);
@@ -637,8 +638,8 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man,
struct amdgpu_vram_mgr_resource *vres = to_amdgpu_vram_mgr_resource(res);
struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
struct amdgpu_device *adev = to_amdgpu_device(mgr);
- struct drm_buddy *mm = &mgr->mm;
- struct drm_buddy_block *block;
+ struct gpu_buddy *mm = &mgr->mm;
+ struct gpu_buddy_block *block;
uint64_t vis_usage = 0;
mutex_lock(&mgr->lock);
@@ -649,7 +650,7 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man,
list_for_each_entry(block, &vres->blocks, link)
vis_usage += amdgpu_vram_mgr_vis_size(adev, block);
- drm_buddy_free_list(mm, &vres->blocks, vres->flags);
+ gpu_buddy_free_list(mm, &vres->blocks, vres->flags);
amdgpu_vram_mgr_do_reserve(man);
mutex_unlock(&mgr->lock);
@@ -688,7 +689,7 @@ int amdgpu_vram_mgr_alloc_sgt(struct amdgpu_device *adev,
if (!*sgt)
return -ENOMEM;
- /* Determine the number of DRM_BUDDY blocks to export */
+ /* Determine the number of GPU_BUDDY blocks to export */
amdgpu_res_first(res, offset, length, &cursor);
while (cursor.remaining) {
num_entries++;
@@ -704,10 +705,10 @@ int amdgpu_vram_mgr_alloc_sgt(struct amdgpu_device *adev,
sg->length = 0;
/*
- * Walk down DRM_BUDDY blocks to populate scatterlist nodes
- * @note: Use iterator api to get first the DRM_BUDDY block
+ * Walk down GPU_BUDDY blocks to populate scatterlist nodes
+ * @note: Use iterator api to get first the GPU_BUDDY block
* and the number of bytes from it. Access the following
- * DRM_BUDDY block(s) if more buffer needs to exported
+ * GPU_BUDDY block(s) if more buffer needs to exported
*/
amdgpu_res_first(res, offset, length, &cursor);
for_each_sgtable_sg((*sgt), sg, i) {
@@ -792,10 +793,10 @@ uint64_t amdgpu_vram_mgr_vis_usage(struct amdgpu_vram_mgr *mgr)
void amdgpu_vram_mgr_clear_reset_blocks(struct amdgpu_device *adev)
{
struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr;
- struct drm_buddy *mm = &mgr->mm;
+ struct gpu_buddy *mm = &mgr->mm;
mutex_lock(&mgr->lock);
- drm_buddy_reset_clear(mm, false);
+ gpu_buddy_reset_clear(mm, false);
mutex_unlock(&mgr->lock);
}
@@ -815,7 +816,7 @@ static bool amdgpu_vram_mgr_intersects(struct ttm_resource_manager *man,
size_t size)
{
struct amdgpu_vram_mgr_resource *mgr = to_amdgpu_vram_mgr_resource(res);
- struct drm_buddy_block *block;
+ struct gpu_buddy_block *block;
/* Check each drm buddy block individually */
list_for_each_entry(block, &mgr->blocks, link) {
@@ -848,7 +849,7 @@ static bool amdgpu_vram_mgr_compatible(struct ttm_resource_manager *man,
size_t size)
{
struct amdgpu_vram_mgr_resource *mgr = to_amdgpu_vram_mgr_resource(res);
- struct drm_buddy_block *block;
+ struct gpu_buddy_block *block;
/* Check each drm buddy block individually */
list_for_each_entry(block, &mgr->blocks, link) {
@@ -877,7 +878,7 @@ static void amdgpu_vram_mgr_debug(struct ttm_resource_manager *man,
struct drm_printer *printer)
{
struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
- struct drm_buddy *mm = &mgr->mm;
+ struct gpu_buddy *mm = &mgr->mm;
struct amdgpu_vram_reservation *rsv;
drm_printf(printer, " vis usage:%llu\n",
@@ -930,7 +931,7 @@ int amdgpu_vram_mgr_init(struct amdgpu_device *adev)
mgr->default_page_size = PAGE_SIZE;
man->func = &amdgpu_vram_mgr_func;
- err = drm_buddy_init(&mgr->mm, man->size, PAGE_SIZE);
+ err = gpu_buddy_init(&mgr->mm, man->size, PAGE_SIZE);
if (err)
return err;
@@ -965,11 +966,11 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev)
kfree(rsv);
list_for_each_entry_safe(rsv, temp, &mgr->reserved_pages, blocks) {
- drm_buddy_free_list(&mgr->mm, &rsv->allocated, 0);
+ gpu_buddy_free_list(&mgr->mm, &rsv->allocated, 0);
kfree(rsv);
}
if (!adev->gmc.is_app_apu)
- drm_buddy_fini(&mgr->mm);
+ gpu_buddy_fini(&mgr->mm);
mutex_unlock(&mgr->lock);
ttm_resource_manager_cleanup(man);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
index 5f5fd9a911c2..429a21a2e9b2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
@@ -24,11 +24,11 @@
#ifndef __AMDGPU_VRAM_MGR_H__
#define __AMDGPU_VRAM_MGR_H__
-#include <drm/drm_buddy.h>
+#include <linux/gpu_buddy.h>
struct amdgpu_vram_mgr {
struct ttm_resource_manager manager;
- struct drm_buddy mm;
+ struct gpu_buddy mm;
/* protects access to buffer objects */
struct mutex lock;
struct list_head reservations_pending;
@@ -57,19 +57,19 @@ struct amdgpu_vram_mgr_resource {
struct amdgpu_vres_task task;
};
-static inline u64 amdgpu_vram_mgr_block_start(struct drm_buddy_block *block)
+static inline u64 amdgpu_vram_mgr_block_start(struct gpu_buddy_block *block)
{
- return drm_buddy_block_offset(block);
+ return gpu_buddy_block_offset(block);
}
-static inline u64 amdgpu_vram_mgr_block_size(struct drm_buddy_block *block)
+static inline u64 amdgpu_vram_mgr_block_size(struct gpu_buddy_block *block)
{
- return (u64)PAGE_SIZE << drm_buddy_block_order(block);
+ return (u64)PAGE_SIZE << gpu_buddy_block_order(block);
}
-static inline bool amdgpu_vram_mgr_is_cleared(struct drm_buddy_block *block)
+static inline bool amdgpu_vram_mgr_is_cleared(struct gpu_buddy_block *block)
{
- return drm_buddy_block_is_clear(block);
+ return gpu_buddy_block_is_clear(block);
}
static inline struct amdgpu_vram_mgr_resource *
@@ -82,8 +82,8 @@ static inline void amdgpu_vram_mgr_set_cleared(struct ttm_resource *res)
{
struct amdgpu_vram_mgr_resource *ares = to_amdgpu_vram_mgr_resource(res);
- WARN_ON(ares->flags & DRM_BUDDY_CLEARED);
- ares->flags |= DRM_BUDDY_CLEARED;
+ WARN_ON(ares->flags & GPU_BUDDY_CLEARED);
+ ares->flags |= GPU_BUDDY_CLEARED;
}
int amdgpu_vram_mgr_query_address_block_info(struct amdgpu_vram_mgr *mgr,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
index 11e56df1d91b..e63d05c477a0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
@@ -338,7 +338,7 @@ static u32 xgmi_v6_4_get_link_status(struct amdgpu_device *adev, int global_link
if (!(adev->aid_mask & BIT(i)))
return U32_MAX;
- addr += adev->asic_funcs->encode_ext_smn_addressing(i);
+ addr += amdgpu_reg_get_smn_base64(adev, XGMI_HWIP, i);
return RREG32_PCIE_EXT(addr);
}
@@ -347,6 +347,9 @@ int amdgpu_get_xgmi_link_status(struct amdgpu_device *adev, int global_link_num)
{
u32 xgmi_state_reg_val;
+ if (amdgpu_sriov_vf(adev))
+ return AMDGPU_XGMI_LINK_NA;
+
if (adev->gmc.xgmi.num_physical_nodes <= 1)
return -EINVAL;
@@ -469,12 +472,12 @@ static ssize_t amdgpu_xgmi_show_num_hops(struct device *dev,
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = drm_to_adev(ddev);
struct psp_xgmi_topology_info *top = &adev->psp.xgmi_context.top_info;
- int i;
+ int i, offset = 0;
for (i = 0; i < top->num_nodes; i++)
- sprintf(buf + 3 * i, "%02x ", top->nodes[i].num_hops);
+ offset += sysfs_emit_at(buf, offset, "%02x ", top->nodes[i].num_hops);
- return sysfs_emit(buf, "%s\n", buf);
+ return offset + sysfs_emit_at(buf, offset, "\n");
}
static ssize_t amdgpu_xgmi_show_num_links(struct device *dev,
@@ -484,12 +487,12 @@ static ssize_t amdgpu_xgmi_show_num_links(struct device *dev,
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = drm_to_adev(ddev);
struct psp_xgmi_topology_info *top = &adev->psp.xgmi_context.top_info;
- int i;
+ int i, offset = 0;
for (i = 0; i < top->num_nodes; i++)
- sprintf(buf + 3 * i, "%02x ", top->nodes[i].num_links);
+ offset += sysfs_emit_at(buf, offset, "%02x ", top->nodes[i].num_links);
- return sysfs_emit(buf, "%s\n", buf);
+ return offset + sysfs_emit_at(buf, offset, "\n");
}
static ssize_t amdgpu_xgmi_show_connected_port_num(struct device *dev,
@@ -1290,7 +1293,10 @@ static void amdgpu_xgmi_legacy_reset_ras_error_count(struct amdgpu_device *adev)
static void __xgmi_v6_4_0_reset_error_count(struct amdgpu_device *adev, int xgmi_inst, u64 mca_base)
{
- WREG64_MCA(xgmi_inst, mca_base, ACA_REG_IDX_STATUS, 0ULL);
+ uint64_t smn_base =
+ amdgpu_reg_get_smn_base64(adev, XGMI_HWIP, xgmi_inst);
+
+ WREG64_MCA(smn_base, mca_base, ACA_REG_IDX_STATUS, 0ULL);
}
static void xgmi_v6_4_0_reset_error_count(struct amdgpu_device *adev, int xgmi_inst)
@@ -1500,6 +1506,7 @@ static void __xgmi_v6_4_0_query_error_count(struct amdgpu_device *adev, struct a
u64 mca_base, struct ras_err_data *err_data)
{
int xgmi_inst = mcm_info->die_id;
+ uint64_t smn_base;
u64 status = 0;
status = RREG64_MCA(xgmi_inst, mca_base, ACA_REG_IDX_STATUS);
@@ -1516,8 +1523,8 @@ static void __xgmi_v6_4_0_query_error_count(struct amdgpu_device *adev, struct a
default:
break;
}
-
- WREG64_MCA(xgmi_inst, mca_base, ACA_REG_IDX_STATUS, 0ULL);
+ smn_base = amdgpu_reg_get_smn_base64(adev, XGMI_HWIP, xgmi_inst);
+ WREG64_MCA(smn_base, mca_base, ACA_REG_IDX_STATUS, 0ULL);
}
static void xgmi_v6_4_0_query_error_count(struct amdgpu_device *adev, int xgmi_inst, struct ras_err_data *err_data)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
index cffb2f805de2..847cfd1fd004 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
@@ -161,7 +161,8 @@ union amd_sriov_msg_feature_flags {
uint32_t ras_telemetry : 1;
uint32_t ras_cper : 1;
uint32_t xgmi_ta_ext_peer_link : 1;
- uint32_t reserved : 19;
+ uint32_t xgmi_connected_to_cpu : 1;
+ uint32_t reserved : 18;
} flags;
uint32_t all;
};
@@ -470,14 +471,23 @@ struct amd_sriov_ras_chk_criti {
uint32_t hit;
};
+union amd_sriov_ras_host_push {
+ struct amd_sriov_ras_telemetry_error_count error_count;
+ struct amd_sriov_ras_cper_dump cper_dump;
+ struct amd_sriov_ras_chk_criti chk_criti;
+};
+
+#define AMD_SRIOV_UNIRAS_BLOCKS_BUF_SIZE 4096
+#define AMD_SRIOV_UNIRAS_CMD_MAX_SIZE (4096 * 13)
+struct amd_sriov_uniras_shared_mem {
+ uint8_t blocks_ecc_buf[AMD_SRIOV_UNIRAS_BLOCKS_BUF_SIZE];
+ uint8_t cmd_buf[AMD_SRIOV_UNIRAS_CMD_MAX_SIZE];
+};
+
struct amdsriov_ras_telemetry {
struct amd_sriov_ras_telemetry_header header;
-
- union {
- struct amd_sriov_ras_telemetry_error_count error_count;
- struct amd_sriov_ras_cper_dump cper_dump;
- struct amd_sriov_ras_chk_criti chk_criti;
- } body;
+ union amd_sriov_ras_host_push body;
+ struct amd_sriov_uniras_shared_mem uniras_shared_mem;
};
/* version data stored in MAILBOX_MSGBUF_RCV_DW1 for future expansion */
@@ -510,6 +520,10 @@ _Static_assert(AMD_SRIOV_MSG_RESERVE_UCODE % 4 == 0,
_Static_assert(AMD_SRIOV_MSG_RESERVE_UCODE > AMD_SRIOV_UCODE_ID__MAX,
"AMD_SRIOV_MSG_RESERVE_UCODE must be bigger than AMD_SRIOV_UCODE_ID__MAX");
+_Static_assert(
+ sizeof(struct amdsriov_ras_telemetry) <= AMD_SRIOV_MSG_RAS_TELEMETRY_SIZE_KB_V1 << 10,
+"amdsriov_ras_telemetry must be " stringification(AMD_SRIOV_MSG_RAS_TELEMETRY_SIZE_KB_V1) " KB");
+
#undef _stringification
#undef stringification
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c
index d9842aa25283..72ea37dbfea8 100644
--- a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c
+++ b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c
@@ -58,25 +58,6 @@ void aqua_vanjaram_doorbell_index_init(struct amdgpu_device *adev)
adev->doorbell_index.max_assignment = AMDGPU_DOORBELL_LAYOUT1_MAX_ASSIGNMENT << 1;
}
-/* Fixed pattern for smn addressing on different AIDs:
- * bit[34]: indicate cross AID access
- * bit[33:32]: indicate target AID id
- * AID id range is 0 ~ 3 as maximum AID number is 4.
- */
-u64 aqua_vanjaram_encode_ext_smn_addressing(int ext_id)
-{
- u64 ext_offset;
-
- /* local routing and bit[34:32] will be zeros */
- if (ext_id == 0)
- return 0;
-
- /* Initiated from host, accessing to all non-zero aids are cross traffic */
- ext_offset = ((u64)(ext_id & 0x3) << 32) | (1ULL << 34);
-
- return ext_offset;
-}
-
static enum amdgpu_gfx_partition
__aqua_vanjaram_calc_xcp_mode(struct amdgpu_xcp_mgr *xcp_mgr)
{
@@ -590,7 +571,7 @@ static void aqua_read_smn_ext(struct amdgpu_device *adev,
uint64_t smn_addr, int i)
{
regdata->addr =
- smn_addr + adev->asic_funcs->encode_ext_smn_addressing(i);
+ smn_addr + amdgpu_reg_get_smn_base64(adev, XGMI_HWIP, i);
regdata->value = RREG32_PCIE_EXT(regdata->addr);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/atom.c b/drivers/gpu/drm/amd/amdgpu/atom.c
index e4ce3029d3fb..6e37961f6be5 100644
--- a/drivers/gpu/drm/amd/amdgpu/atom.c
+++ b/drivers/gpu/drm/amd/amdgpu/atom.c
@@ -1462,8 +1462,6 @@ static void atom_get_vbios_pn(struct atom_context *ctx)
ctx->vbios_pn[count] = 0;
}
-
- drm_info(ctx->card->dev, "ATOM BIOS: %s\n", ctx->vbios_pn);
}
static void atom_get_vbios_version(struct atom_context *ctx)
@@ -1520,6 +1518,30 @@ static void atom_get_vbios_build(struct atom_context *ctx)
strscpy(ctx->build_num, str, len);
}
+static inline void atom_print_vbios_info(struct atom_context *ctx)
+{
+ char vbios_info[256];
+ int off = 0;
+
+ if (ctx->vbios_pn[0])
+ off += scnprintf(vbios_info + off, sizeof(vbios_info) - off,
+ "%s", ctx->vbios_pn);
+ if (ctx->build_num[0])
+ off += scnprintf(vbios_info + off, sizeof(vbios_info) - off,
+ "%sbuild: %s", off ? ", " : "",
+ ctx->build_num);
+ if (ctx->vbios_ver_str[0])
+ off += scnprintf(vbios_info + off, sizeof(vbios_info) - off,
+ "%sver: %s", off ? ", " : "",
+ ctx->vbios_ver_str);
+ if (ctx->date[0])
+ off += scnprintf(vbios_info + off, sizeof(vbios_info) - off,
+ "%s%.10s", off ? ", " : "",
+ ctx->date);
+ if (off)
+ drm_info(ctx->card->dev, "ATOM BIOS: %s\n", vbios_info);
+}
+
struct atom_context *amdgpu_atom_parse(struct card_info *card, void *bios)
{
int base;
@@ -1582,6 +1604,8 @@ struct atom_context *amdgpu_atom_parse(struct card_info *card, void *bios)
atom_get_vbios_version(ctx);
atom_get_vbios_build(ctx);
+ atom_print_vbios_info(ctx);
+
return ctx;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c
index c081784a19c4..29954c7d61b0 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik.c
@@ -154,11 +154,11 @@ static u32 cik_pcie_rreg(struct amdgpu_device *adev, u32 reg)
unsigned long flags;
u32 r;
- spin_lock_irqsave(&adev->pcie_idx_lock, flags);
+ spin_lock_irqsave(&adev->reg.pcie.lock, flags);
WREG32(mmPCIE_INDEX, reg);
(void)RREG32(mmPCIE_INDEX);
r = RREG32(mmPCIE_DATA);
- spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+ spin_unlock_irqrestore(&adev->reg.pcie.lock, flags);
return r;
}
@@ -166,12 +166,12 @@ static void cik_pcie_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
{
unsigned long flags;
- spin_lock_irqsave(&adev->pcie_idx_lock, flags);
+ spin_lock_irqsave(&adev->reg.pcie.lock, flags);
WREG32(mmPCIE_INDEX, reg);
(void)RREG32(mmPCIE_INDEX);
WREG32(mmPCIE_DATA, v);
(void)RREG32(mmPCIE_DATA);
- spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+ spin_unlock_irqrestore(&adev->reg.pcie.lock, flags);
}
static u32 cik_smc_rreg(struct amdgpu_device *adev, u32 reg)
@@ -179,10 +179,10 @@ static u32 cik_smc_rreg(struct amdgpu_device *adev, u32 reg)
unsigned long flags;
u32 r;
- spin_lock_irqsave(&adev->smc_idx_lock, flags);
+ spin_lock_irqsave(&adev->reg.smc.lock, flags);
WREG32(mmSMC_IND_INDEX_0, (reg));
r = RREG32(mmSMC_IND_DATA_0);
- spin_unlock_irqrestore(&adev->smc_idx_lock, flags);
+ spin_unlock_irqrestore(&adev->reg.smc.lock, flags);
return r;
}
@@ -190,10 +190,10 @@ static void cik_smc_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
{
unsigned long flags;
- spin_lock_irqsave(&adev->smc_idx_lock, flags);
+ spin_lock_irqsave(&adev->reg.smc.lock, flags);
WREG32(mmSMC_IND_INDEX_0, (reg));
WREG32(mmSMC_IND_DATA_0, (v));
- spin_unlock_irqrestore(&adev->smc_idx_lock, flags);
+ spin_unlock_irqrestore(&adev->reg.smc.lock, flags);
}
static u32 cik_uvd_ctx_rreg(struct amdgpu_device *adev, u32 reg)
@@ -201,10 +201,10 @@ static u32 cik_uvd_ctx_rreg(struct amdgpu_device *adev, u32 reg)
unsigned long flags;
u32 r;
- spin_lock_irqsave(&adev->uvd_ctx_idx_lock, flags);
+ spin_lock_irqsave(&adev->reg.uvd_ctx.lock, flags);
WREG32(mmUVD_CTX_INDEX, ((reg) & 0x1ff));
r = RREG32(mmUVD_CTX_DATA);
- spin_unlock_irqrestore(&adev->uvd_ctx_idx_lock, flags);
+ spin_unlock_irqrestore(&adev->reg.uvd_ctx.lock, flags);
return r;
}
@@ -212,10 +212,10 @@ static void cik_uvd_ctx_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
{
unsigned long flags;
- spin_lock_irqsave(&adev->uvd_ctx_idx_lock, flags);
+ spin_lock_irqsave(&adev->reg.uvd_ctx.lock, flags);
WREG32(mmUVD_CTX_INDEX, ((reg) & 0x1ff));
WREG32(mmUVD_CTX_DATA, (v));
- spin_unlock_irqrestore(&adev->uvd_ctx_idx_lock, flags);
+ spin_unlock_irqrestore(&adev->reg.uvd_ctx.lock, flags);
}
static u32 cik_didt_rreg(struct amdgpu_device *adev, u32 reg)
@@ -223,10 +223,10 @@ static u32 cik_didt_rreg(struct amdgpu_device *adev, u32 reg)
unsigned long flags;
u32 r;
- spin_lock_irqsave(&adev->didt_idx_lock, flags);
+ spin_lock_irqsave(&adev->reg.didt.lock, flags);
WREG32(mmDIDT_IND_INDEX, (reg));
r = RREG32(mmDIDT_IND_DATA);
- spin_unlock_irqrestore(&adev->didt_idx_lock, flags);
+ spin_unlock_irqrestore(&adev->reg.didt.lock, flags);
return r;
}
@@ -234,10 +234,10 @@ static void cik_didt_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
{
unsigned long flags;
- spin_lock_irqsave(&adev->didt_idx_lock, flags);
+ spin_lock_irqsave(&adev->reg.didt.lock, flags);
WREG32(mmDIDT_IND_INDEX, (reg));
WREG32(mmDIDT_IND_DATA, (v));
- spin_unlock_irqrestore(&adev->didt_idx_lock, flags);
+ spin_unlock_irqrestore(&adev->reg.didt.lock, flags);
}
static const u32 bonaire_golden_spm_registers[] =
@@ -1027,7 +1027,7 @@ static bool cik_read_bios_from_rom(struct amdgpu_device *adev,
dw_ptr = (u32 *)bios;
length_dw = ALIGN(length_bytes, 4) / 4;
/* take the smc lock since we are using the smc index */
- spin_lock_irqsave(&adev->smc_idx_lock, flags);
+ spin_lock_irqsave(&adev->reg.smc.lock, flags);
/* set rom index to 0 */
WREG32(mmSMC_IND_INDEX_0, ixROM_INDEX);
WREG32(mmSMC_IND_DATA_0, 0);
@@ -1035,7 +1035,7 @@ static bool cik_read_bios_from_rom(struct amdgpu_device *adev,
WREG32(mmSMC_IND_INDEX_0, ixROM_DATA);
for (i = 0; i < length_dw; i++)
dw_ptr[i] = RREG32(mmSMC_IND_DATA_0);
- spin_unlock_irqrestore(&adev->smc_idx_lock, flags);
+ spin_unlock_irqrestore(&adev->reg.smc.lock, flags);
return true;
}
@@ -1984,14 +1984,14 @@ static int cik_common_early_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_device *adev = ip_block->adev;
- adev->smc_rreg = &cik_smc_rreg;
- adev->smc_wreg = &cik_smc_wreg;
- adev->pcie_rreg = &cik_pcie_rreg;
- adev->pcie_wreg = &cik_pcie_wreg;
- adev->uvd_ctx_rreg = &cik_uvd_ctx_rreg;
- adev->uvd_ctx_wreg = &cik_uvd_ctx_wreg;
- adev->didt_rreg = &cik_didt_rreg;
- adev->didt_wreg = &cik_didt_wreg;
+ adev->reg.smc.rreg = cik_smc_rreg;
+ adev->reg.smc.wreg = cik_smc_wreg;
+ adev->reg.pcie.rreg = &cik_pcie_rreg;
+ adev->reg.pcie.wreg = &cik_pcie_wreg;
+ adev->reg.uvd_ctx.rreg = &cik_uvd_ctx_rreg;
+ adev->reg.uvd_ctx.wreg = &cik_uvd_ctx_wreg;
+ adev->reg.didt.rreg = &cik_didt_rreg;
+ adev->reg.didt.wreg = &cik_didt_wreg;
adev->asic_funcs = &cik_asic_funcs;
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
index 9e8715b4739d..22780c09177d 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
@@ -53,7 +53,6 @@ static const u32 sdma_offsets[SDMA_MAX_INSTANCE] =
static void cik_sdma_set_ring_funcs(struct amdgpu_device *adev);
static void cik_sdma_set_irq_funcs(struct amdgpu_device *adev);
static void cik_sdma_set_buffer_funcs(struct amdgpu_device *adev);
-static void cik_sdma_set_vm_pte_funcs(struct amdgpu_device *adev);
static int cik_sdma_soft_reset(struct amdgpu_ip_block *ip_block);
u32 amdgpu_cik_gpu_check_soft_reset(struct amdgpu_device *adev);
@@ -919,6 +918,14 @@ static void cik_enable_sdma_mgls(struct amdgpu_device *adev,
}
}
+static const struct amdgpu_vm_pte_funcs cik_sdma_vm_pte_funcs = {
+ .copy_pte_num_dw = 7,
+ .copy_pte = cik_sdma_vm_copy_pte,
+
+ .write_pte = cik_sdma_vm_write_pte,
+ .set_pte_pde = cik_sdma_vm_set_pte_pde,
+};
+
static int cik_sdma_early_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_device *adev = ip_block->adev;
@@ -933,7 +940,7 @@ static int cik_sdma_early_init(struct amdgpu_ip_block *ip_block)
cik_sdma_set_ring_funcs(adev);
cik_sdma_set_irq_funcs(adev);
cik_sdma_set_buffer_funcs(adev);
- cik_sdma_set_vm_pte_funcs(adev);
+ amdgpu_sdma_set_vm_pte_scheds(adev, &cik_sdma_vm_pte_funcs);
return 0;
}
@@ -1337,26 +1344,6 @@ static void cik_sdma_set_buffer_funcs(struct amdgpu_device *adev)
adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring;
}
-static const struct amdgpu_vm_pte_funcs cik_sdma_vm_pte_funcs = {
- .copy_pte_num_dw = 7,
- .copy_pte = cik_sdma_vm_copy_pte,
-
- .write_pte = cik_sdma_vm_write_pte,
- .set_pte_pde = cik_sdma_vm_set_pte_pde,
-};
-
-static void cik_sdma_set_vm_pte_funcs(struct amdgpu_device *adev)
-{
- unsigned i;
-
- adev->vm_manager.vm_pte_funcs = &cik_sdma_vm_pte_funcs;
- for (i = 0; i < adev->sdma.num_instances; i++) {
- adev->vm_manager.vm_pte_scheds[i] =
- &adev->sdma.instance[i].ring.sched;
- }
- adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances;
-}
-
const struct amdgpu_ip_block_version cik_sdma_ip_block =
{
.type = AMD_IP_BLOCK_TYPE_SDMA,
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
index a7ffe10eea1b..c8f465158e71 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
@@ -175,10 +175,10 @@ static u32 dce_v10_0_audio_endpt_rreg(struct amdgpu_device *adev,
unsigned long flags;
u32 r;
- spin_lock_irqsave(&adev->audio_endpt_idx_lock, flags);
+ spin_lock_irqsave(&adev->reg.audio_endpt.lock, flags);
WREG32(mmAZALIA_F0_CODEC_ENDPOINT_INDEX + block_offset, reg);
r = RREG32(mmAZALIA_F0_CODEC_ENDPOINT_DATA + block_offset);
- spin_unlock_irqrestore(&adev->audio_endpt_idx_lock, flags);
+ spin_unlock_irqrestore(&adev->reg.audio_endpt.lock, flags);
return r;
}
@@ -188,10 +188,10 @@ static void dce_v10_0_audio_endpt_wreg(struct amdgpu_device *adev,
{
unsigned long flags;
- spin_lock_irqsave(&adev->audio_endpt_idx_lock, flags);
+ spin_lock_irqsave(&adev->reg.audio_endpt.lock, flags);
WREG32(mmAZALIA_F0_CODEC_ENDPOINT_INDEX + block_offset, reg);
WREG32(mmAZALIA_F0_CODEC_ENDPOINT_DATA + block_offset, v);
- spin_unlock_irqrestore(&adev->audio_endpt_idx_lock, flags);
+ spin_unlock_irqrestore(&adev->reg.audio_endpt.lock, flags);
}
static u32 dce_v10_0_vblank_get_counter(struct amdgpu_device *adev, int crtc)
@@ -1298,7 +1298,7 @@ static void dce_v10_0_audio_write_speaker_allocation(struct drm_encoder *encoder
return;
}
- sad_count = drm_edid_to_speaker_allocation(amdgpu_connector->edid, &sadb);
+ sad_count = drm_edid_to_speaker_allocation(drm_edid_raw(amdgpu_connector->edid), &sadb);
if (sad_count < 0) {
DRM_ERROR("Couldn't read Speaker Allocation Data Block: %d\n", sad_count);
sad_count = 0;
@@ -1368,7 +1368,7 @@ static void dce_v10_0_audio_write_sad_regs(struct drm_encoder *encoder)
return;
}
- sad_count = drm_edid_to_sad(amdgpu_connector->edid, &sads);
+ sad_count = drm_edid_to_sad(drm_edid_raw(amdgpu_connector->edid), &sads);
if (sad_count < 0)
DRM_ERROR("Couldn't read SADs: %d\n", sad_count);
if (sad_count <= 0)
@@ -2750,8 +2750,8 @@ static int dce_v10_0_early_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_device *adev = ip_block->adev;
- adev->audio_endpt_rreg = &dce_v10_0_audio_endpt_rreg;
- adev->audio_endpt_wreg = &dce_v10_0_audio_endpt_wreg;
+ adev->reg.audio_endpt.rreg = &dce_v10_0_audio_endpt_rreg;
+ adev->reg.audio_endpt.wreg = &dce_v10_0_audio_endpt_wreg;
dce_v10_0_set_display_funcs(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
index a72e20db5363..58d0da5c2a74 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
@@ -138,10 +138,10 @@ static u32 dce_v6_0_audio_endpt_rreg(struct amdgpu_device *adev,
unsigned long flags;
u32 r;
- spin_lock_irqsave(&adev->audio_endpt_idx_lock, flags);
+ spin_lock_irqsave(&adev->reg.audio_endpt.lock, flags);
WREG32(mmAZALIA_F0_CODEC_ENDPOINT_INDEX + block_offset, reg);
r = RREG32(mmAZALIA_F0_CODEC_ENDPOINT_DATA + block_offset);
- spin_unlock_irqrestore(&adev->audio_endpt_idx_lock, flags);
+ spin_unlock_irqrestore(&adev->reg.audio_endpt.lock, flags);
return r;
}
@@ -151,11 +151,11 @@ static void dce_v6_0_audio_endpt_wreg(struct amdgpu_device *adev,
{
unsigned long flags;
- spin_lock_irqsave(&adev->audio_endpt_idx_lock, flags);
+ spin_lock_irqsave(&adev->reg.audio_endpt.lock, flags);
WREG32(mmAZALIA_F0_CODEC_ENDPOINT_INDEX + block_offset,
reg | AZALIA_F0_CODEC_ENDPOINT_INDEX__AZALIA_ENDPOINT_REG_WRITE_EN_MASK);
WREG32(mmAZALIA_F0_CODEC_ENDPOINT_DATA + block_offset, v);
- spin_unlock_irqrestore(&adev->audio_endpt_idx_lock, flags);
+ spin_unlock_irqrestore(&adev->reg.audio_endpt.lock, flags);
}
static u32 dce_v6_0_vblank_get_counter(struct amdgpu_device *adev, int crtc)
@@ -1265,7 +1265,7 @@ static void dce_v6_0_audio_write_speaker_allocation(struct drm_encoder *encoder)
return;
}
- sad_count = drm_edid_to_speaker_allocation(amdgpu_connector->edid, &sadb);
+ sad_count = drm_edid_to_speaker_allocation(drm_edid_raw(amdgpu_connector->edid), &sadb);
if (sad_count < 0) {
DRM_ERROR("Couldn't read Speaker Allocation Data Block: %d\n", sad_count);
sad_count = 0;
@@ -1346,7 +1346,7 @@ static void dce_v6_0_audio_write_sad_regs(struct drm_encoder *encoder)
return;
}
- sad_count = drm_edid_to_sad(amdgpu_connector->edid, &sads);
+ sad_count = drm_edid_to_sad(drm_edid_raw(amdgpu_connector->edid), &sads);
if (sad_count < 0)
DRM_ERROR("Couldn't read SADs: %d\n", sad_count);
if (sad_count <= 0)
@@ -2697,8 +2697,8 @@ static int dce_v6_0_early_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_device *adev = ip_block->adev;
- adev->audio_endpt_rreg = &dce_v6_0_audio_endpt_rreg;
- adev->audio_endpt_wreg = &dce_v6_0_audio_endpt_wreg;
+ adev->reg.audio_endpt.rreg = &dce_v6_0_audio_endpt_rreg;
+ adev->reg.audio_endpt.wreg = &dce_v6_0_audio_endpt_wreg;
dce_v6_0_set_display_funcs(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
index 4221c7b7c506..6d19f6d94d25 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
@@ -126,10 +126,10 @@ static u32 dce_v8_0_audio_endpt_rreg(struct amdgpu_device *adev,
unsigned long flags;
u32 r;
- spin_lock_irqsave(&adev->audio_endpt_idx_lock, flags);
+ spin_lock_irqsave(&adev->reg.audio_endpt.lock, flags);
WREG32(mmAZALIA_F0_CODEC_ENDPOINT_INDEX + block_offset, reg);
r = RREG32(mmAZALIA_F0_CODEC_ENDPOINT_DATA + block_offset);
- spin_unlock_irqrestore(&adev->audio_endpt_idx_lock, flags);
+ spin_unlock_irqrestore(&adev->reg.audio_endpt.lock, flags);
return r;
}
@@ -139,10 +139,10 @@ static void dce_v8_0_audio_endpt_wreg(struct amdgpu_device *adev,
{
unsigned long flags;
- spin_lock_irqsave(&adev->audio_endpt_idx_lock, flags);
+ spin_lock_irqsave(&adev->reg.audio_endpt.lock, flags);
WREG32(mmAZALIA_F0_CODEC_ENDPOINT_INDEX + block_offset, reg);
WREG32(mmAZALIA_F0_CODEC_ENDPOINT_DATA + block_offset, v);
- spin_unlock_irqrestore(&adev->audio_endpt_idx_lock, flags);
+ spin_unlock_irqrestore(&adev->reg.audio_endpt.lock, flags);
}
static u32 dce_v8_0_vblank_get_counter(struct amdgpu_device *adev, int crtc)
@@ -1271,7 +1271,7 @@ static void dce_v8_0_audio_write_speaker_allocation(struct drm_encoder *encoder)
return;
}
- sad_count = drm_edid_to_speaker_allocation(amdgpu_connector->edid, &sadb);
+ sad_count = drm_edid_to_speaker_allocation(drm_edid_raw(amdgpu_connector->edid), &sadb);
if (sad_count < 0) {
DRM_ERROR("Couldn't read Speaker Allocation Data Block: %d\n", sad_count);
sad_count = 0;
@@ -1339,7 +1339,7 @@ static void dce_v8_0_audio_write_sad_regs(struct drm_encoder *encoder)
return;
}
- sad_count = drm_edid_to_sad(amdgpu_connector->edid, &sads);
+ sad_count = drm_edid_to_sad(drm_edid_raw(amdgpu_connector->edid), &sads);
if (sad_count < 0)
DRM_ERROR("Couldn't read SADs: %d\n", sad_count);
if (sad_count <= 0)
@@ -2655,8 +2655,8 @@ static int dce_v8_0_early_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_device *adev = ip_block->adev;
- adev->audio_endpt_rreg = &dce_v8_0_audio_endpt_rreg;
- adev->audio_endpt_wreg = &dce_v8_0_audio_endpt_wreg;
+ adev->reg.audio_endpt.rreg = &dce_v8_0_audio_endpt_rreg;
+ adev->reg.audio_endpt.wreg = &dce_v8_0_audio_endpt_wreg;
dce_v8_0_set_display_funcs(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
index 621aeca53880..7e7e6c389895 100644
--- a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
+++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
@@ -51,7 +51,7 @@ static uint64_t df_v3_6_get_fica(struct amdgpu_device *adev,
address = adev->nbio.funcs->get_pcie_index_offset(adev);
data = adev->nbio.funcs->get_pcie_data_offset(adev);
- spin_lock_irqsave(&adev->pcie_idx_lock, flags);
+ spin_lock_irqsave(&adev->reg.pcie.lock, flags);
WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessAddress3);
WREG32(data, ficaa_val);
@@ -61,7 +61,7 @@ static uint64_t df_v3_6_get_fica(struct amdgpu_device *adev,
WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessDataHi3);
ficadh_val = RREG32(data);
- spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+ spin_unlock_irqrestore(&adev->reg.pcie.lock, flags);
return (((ficadh_val & 0xFFFFFFFFFFFFFFFF) << 32) | ficadl_val);
}
@@ -74,7 +74,7 @@ static void df_v3_6_set_fica(struct amdgpu_device *adev, uint32_t ficaa_val,
address = adev->nbio.funcs->get_pcie_index_offset(adev);
data = adev->nbio.funcs->get_pcie_data_offset(adev);
- spin_lock_irqsave(&adev->pcie_idx_lock, flags);
+ spin_lock_irqsave(&adev->reg.pcie.lock, flags);
WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessAddress3);
WREG32(data, ficaa_val);
@@ -84,7 +84,7 @@ static void df_v3_6_set_fica(struct amdgpu_device *adev, uint32_t ficaa_val,
WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessDataHi3);
WREG32(data, ficadh_val);
- spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+ spin_unlock_irqrestore(&adev->reg.pcie.lock, flags);
}
/*
@@ -102,12 +102,12 @@ static void df_v3_6_perfmon_rreg(struct amdgpu_device *adev,
address = adev->nbio.funcs->get_pcie_index_offset(adev);
data = adev->nbio.funcs->get_pcie_data_offset(adev);
- spin_lock_irqsave(&adev->pcie_idx_lock, flags);
+ spin_lock_irqsave(&adev->reg.pcie.lock, flags);
WREG32(address, lo_addr);
*lo_val = RREG32(data);
WREG32(address, hi_addr);
*hi_val = RREG32(data);
- spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+ spin_unlock_irqrestore(&adev->reg.pcie.lock, flags);
}
/*
@@ -124,12 +124,12 @@ static void df_v3_6_perfmon_wreg(struct amdgpu_device *adev, uint32_t lo_addr,
address = adev->nbio.funcs->get_pcie_index_offset(adev);
data = adev->nbio.funcs->get_pcie_data_offset(adev);
- spin_lock_irqsave(&adev->pcie_idx_lock, flags);
+ spin_lock_irqsave(&adev->reg.pcie.lock, flags);
WREG32(address, lo_addr);
WREG32(data, lo_val);
WREG32(address, hi_addr);
WREG32(data, hi_val);
- spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+ spin_unlock_irqrestore(&adev->reg.pcie.lock, flags);
}
/* same as perfmon_wreg but return status on write value check */
@@ -143,7 +143,7 @@ static int df_v3_6_perfmon_arm_with_status(struct amdgpu_device *adev,
address = adev->nbio.funcs->get_pcie_index_offset(adev);
data = adev->nbio.funcs->get_pcie_data_offset(adev);
- spin_lock_irqsave(&adev->pcie_idx_lock, flags);
+ spin_lock_irqsave(&adev->reg.pcie.lock, flags);
WREG32(address, lo_addr);
WREG32(data, lo_val);
WREG32(address, hi_addr);
@@ -153,7 +153,7 @@ static int df_v3_6_perfmon_arm_with_status(struct amdgpu_device *adev,
lo_val_rb = RREG32(data);
WREG32(address, hi_addr);
hi_val_rb = RREG32(data);
- spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+ spin_unlock_irqrestore(&adev->reg.pcie.lock, flags);
if (!(lo_val == lo_val_rb && hi_val == hi_val_rb))
return -EBUSY;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 1893ceeeb26c..8b60299b73ef 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -6752,7 +6752,7 @@ static void gfx_v10_0_gfx_mqd_set_priority(struct amdgpu_device *adev,
/* set up default queue priority level
* 0x0 = low priority, 0x1 = high priority
*/
- if (prop->hqd_pipe_priority == AMDGPU_GFX_PIPE_PRIO_HIGH)
+ if (prop->hqd_queue_priority == AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM)
priority = 1;
tmp = RREG32_SOC15(GC, 0, mmCP_GFX_HQD_QUEUE_PRIORITY);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index 427975b5a1d9..ae39b9e1f7d6 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -1722,6 +1722,20 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block)
}
}
break;
+ case IP_VERSION(11, 5, 4):
+ adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex;
+ adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex);
+ if (adev->gfx.me_fw_version >= 4 &&
+ adev->gfx.pfp_fw_version >= 7 &&
+ adev->gfx.mec_fw_version >= 5) {
+ adev->gfx.enable_cleaner_shader = true;
+ r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
+ if (r) {
+ adev->gfx.enable_cleaner_shader = false;
+ dev_err(adev->dev, "Failed to initialize cleaner shader\n");
+ }
+ }
+ break;
default:
adev->gfx.enable_cleaner_shader = false;
break;
@@ -4088,7 +4102,7 @@ static void gfx_v11_0_gfx_mqd_set_priority(struct amdgpu_device *adev,
/* set up default queue priority level
* 0x0 = low priority, 0x1 = high priority
*/
- if (prop->hqd_pipe_priority == AMDGPU_GFX_PIPE_PRIO_HIGH)
+ if (prop->hqd_queue_priority == AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM)
priority = 1;
tmp = regCP_GFX_HQD_QUEUE_PRIORITY_DEFAULT;
@@ -4238,6 +4252,37 @@ static int gfx_v11_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev)
return gfx_v11_0_cp_gfx_start(adev);
}
+static void gfx_v11_0_compute_mqd_set_cu_mask(struct amdgpu_device *adev,
+ struct v11_compute_mqd *mqd,
+ struct amdgpu_mqd_prop *prop)
+{
+ uint32_t se_mask[8] = {0};
+ uint32_t wa_mask;
+ bool has_wa_flag = prop->cu_flags & (AMDGPU_UPDATE_FLAG_DBG_WA_ENABLE |
+ AMDGPU_UPDATE_FLAG_DBG_WA_DISABLE);
+
+ if (!has_wa_flag && (!prop->cu_mask || !prop->cu_mask_count))
+ return;
+
+ if (has_wa_flag) {
+ wa_mask = (prop->cu_flags & AMDGPU_UPDATE_FLAG_DBG_WA_ENABLE) ?
+ 0xffff : 0xffffffff;
+ mqd->compute_static_thread_mgmt_se0 = wa_mask;
+ mqd->compute_static_thread_mgmt_se1 = wa_mask;
+ mqd->compute_static_thread_mgmt_se2 = wa_mask;
+ mqd->compute_static_thread_mgmt_se3 = wa_mask;
+ return;
+ }
+
+ amdgpu_gfx_mqd_symmetrically_map_cu_mask(adev, prop->cu_mask,
+ prop->cu_mask_count, se_mask);
+
+ mqd->compute_static_thread_mgmt_se0 = se_mask[0];
+ mqd->compute_static_thread_mgmt_se1 = se_mask[1];
+ mqd->compute_static_thread_mgmt_se2 = se_mask[2];
+ mqd->compute_static_thread_mgmt_se3 = se_mask[3];
+}
+
static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
struct amdgpu_mqd_prop *prop)
{
@@ -4372,6 +4417,8 @@ static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
/* set UQ fenceaddress */
mqd->fence_address_lo = lower_32_bits(prop->fence_address);
mqd->fence_address_hi = upper_32_bits(prop->fence_address);
+ /* set CU mask */
+ gfx_v11_0_compute_mqd_set_cu_mask(adev, mqd, prop);
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
index 79ea1af363a5..a418ae609c36 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
@@ -3109,6 +3109,37 @@ static int gfx_v12_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev)
return gfx_v12_0_cp_gfx_start(adev);
}
+static void gfx_v12_0_compute_mqd_set_cu_mask(struct amdgpu_device *adev,
+ struct v12_compute_mqd *mqd,
+ struct amdgpu_mqd_prop *prop)
+{
+ uint32_t se_mask[8] = {0};
+ uint32_t wa_mask;
+ bool has_wa_flag = prop->cu_flags & (AMDGPU_UPDATE_FLAG_DBG_WA_ENABLE |
+ AMDGPU_UPDATE_FLAG_DBG_WA_DISABLE);
+
+ if (!has_wa_flag && (!prop->cu_mask || !prop->cu_mask_count))
+ return;
+
+ if (has_wa_flag) {
+ wa_mask = (prop->cu_flags & AMDGPU_UPDATE_FLAG_DBG_WA_ENABLE) ?
+ 0xffff : 0xffffffff;
+ mqd->compute_static_thread_mgmt_se0 = wa_mask;
+ mqd->compute_static_thread_mgmt_se1 = wa_mask;
+ mqd->compute_static_thread_mgmt_se2 = wa_mask;
+ mqd->compute_static_thread_mgmt_se3 = wa_mask;
+ return;
+ }
+
+ amdgpu_gfx_mqd_symmetrically_map_cu_mask(adev, prop->cu_mask,
+ prop->cu_mask_count, se_mask);
+
+ mqd->compute_static_thread_mgmt_se0 = se_mask[0];
+ mqd->compute_static_thread_mgmt_se1 = se_mask[1];
+ mqd->compute_static_thread_mgmt_se2 = se_mask[2];
+ mqd->compute_static_thread_mgmt_se3 = se_mask[3];
+}
+
static int gfx_v12_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
struct amdgpu_mqd_prop *prop)
{
@@ -3242,6 +3273,8 @@ static int gfx_v12_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
/* set UQ fenceaddress */
mqd->fence_address_lo = lower_32_bits(prop->fence_address);
mqd->fence_address_hi = upper_32_bits(prop->fence_address);
+ /* set CU mask */
+ gfx_v12_0_compute_mqd_set_cu_mask(adev, mqd, prop);
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c
index eb9725ae1607..db49582a211f 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c
@@ -45,6 +45,7 @@
#include "v12_structs.h"
#include "gfx_v12_1.h"
#include "mes_v12_1.h"
+#include "amdgpu_ras_mgr.h"
#define GFX12_MEC_HPD_SIZE 2048
#define NUM_SIMD_PER_CU_GFX12_1 4
@@ -136,7 +137,6 @@ static void gfx_v12_1_kiq_map_queues(struct amdgpu_ring *kiq_ring,
PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
PACKET3_MAP_QUEUES_ME((me)) |
PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
- PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
@@ -245,8 +245,7 @@ static void gfx_v12_1_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
/* memory (1) or register (0) */
(WAIT_REG_MEM_MEM_SPACE(mem_space) |
WAIT_REG_MEM_OPERATION(opt) | /* wait */
- WAIT_REG_MEM_FUNCTION(3) | /* equal */
- WAIT_REG_MEM_ENGINE(eng_sel)));
+ WAIT_REG_MEM_FUNCTION(3))); /* equal */
if (mem_space)
BUG_ON(addr0 & 0x3); /* Dword align */
@@ -1155,11 +1154,13 @@ static int gfx_v12_1_sw_init(struct amdgpu_ip_block *ip_block)
break;
}
- /* recalculate compute rings to use based on hardware configuration */
- num_compute_rings = (adev->gfx.mec.num_pipe_per_mec *
- adev->gfx.mec.num_queue_per_pipe) / 2;
- adev->gfx.num_compute_rings = min(adev->gfx.num_compute_rings,
- num_compute_rings);
+ if (adev->gfx.num_compute_rings) {
+ /* recalculate compute rings to use based on hardware configuration */
+ num_compute_rings = (adev->gfx.mec.num_pipe_per_mec *
+ adev->gfx.mec.num_queue_per_pipe) / 2;
+ adev->gfx.num_compute_rings = min(adev->gfx.num_compute_rings,
+ num_compute_rings);
+ }
num_xcc = NUM_XCC(adev->gfx.xcc_mask);
@@ -1184,6 +1185,13 @@ static int gfx_v12_1_sw_init(struct amdgpu_ip_block *ip_block)
if (r)
return r;
+ /* RLC POISON Error */
+ r = amdgpu_irq_add_id(adev, SOC_V1_0_IH_CLIENTID_RLC,
+ GFX_12_1_0__SRCID__RLC_POISON_INTERRUPT,
+ &adev->gfx.rlc_poison_irq);
+ if (r)
+ return r;
+
adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
r = gfx_v12_1_rlc_init(adev);
@@ -1405,7 +1413,7 @@ static void gfx_v12_1_xcc_init_compute_vmid(struct amdgpu_device *adev,
/*
* Configure apertures:
* LDS: 0x20000000'00000000 - 0x20000001'00000000 (4GB)
- * Scratch: 0x10000000'00000000 - 0x10000001'00000000 (4GB)
+ * Scratch: 0x10000000'00000000 - 0x11ffffff'ffffffff (128PB 57-bit)
*/
sh_mem_bases = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
(adev->gmc.private_aperture_start >> 58));
@@ -2629,24 +2637,6 @@ static void gfx_v12_1_xcc_disable_gpa_mode(struct amdgpu_device *adev,
WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCPG_PSP_DEBUG, data);
}
-static void gfx_v12_1_xcc_setup_tcp_thrashing_ctrl(struct amdgpu_device *adev,
- int xcc_id)
-{
- uint32_t val;
-
- /* Set the TCP UTCL0 register to enable atomics */
- val = RREG32_SOC15(GC, GET_INST(GC, xcc_id),
- regTCP_UTCL0_THRASHING_CTRL);
- val = REG_SET_FIELD(val, TCP_UTCL0_THRASHING_CTRL, THRASHING_EN, 0x2);
- val = REG_SET_FIELD(val, TCP_UTCL0_THRASHING_CTRL,
- RETRY_FRAGMENT_THRESHOLD_UP_EN, 0x1);
- val = REG_SET_FIELD(val, TCP_UTCL0_THRASHING_CTRL,
- RETRY_FRAGMENT_THRESHOLD_DOWN_EN, 0x1);
-
- WREG32_SOC15(GC, GET_INST(GC, xcc_id),
- regTCP_UTCL0_THRASHING_CTRL, val);
-}
-
static void gfx_v12_1_xcc_enable_atomics(struct amdgpu_device *adev,
int xcc_id)
{
@@ -2695,7 +2685,6 @@ static void gfx_v12_1_init_golden_registers(struct amdgpu_device *adev)
for (i = 0; i < NUM_XCC(adev->gfx.xcc_mask); i++) {
gfx_v12_1_xcc_disable_burst(adev, i);
gfx_v12_1_xcc_enable_atomics(adev, i);
- gfx_v12_1_xcc_setup_tcp_thrashing_ctrl(adev, i);
gfx_v12_1_xcc_disable_early_write_ack(adev, i);
gfx_v12_1_xcc_disable_tcp_spill_cache(adev, i);
}
@@ -2794,6 +2783,33 @@ static void gfx_v12_1_xcc_fini(struct amdgpu_device *adev,
gfx_v12_1_xcc_enable_gui_idle_interrupt(adev, false, xcc_id);
}
+static int gfx_v12_1_set_userq_eop_interrupts(struct amdgpu_device *adev,
+ bool enable)
+{
+ unsigned int irq_type;
+ int m, p, r;
+
+ if (adev->gfx.disable_kq) {
+ for (m = 0; m < adev->gfx.mec.num_mec; ++m) {
+ for (p = 0; p < adev->gfx.mec.num_pipe_per_mec; p++) {
+ irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
+ + (m * adev->gfx.mec.num_pipe_per_mec)
+ + p;
+ if (enable)
+ r = amdgpu_irq_get(adev, &adev->gfx.eop_irq,
+ irq_type);
+ else
+ r = amdgpu_irq_put(adev, &adev->gfx.eop_irq,
+ irq_type);
+ if (r)
+ return r;
+ }
+ }
+ }
+
+ return 0;
+}
+
static int gfx_v12_1_hw_fini(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_device *adev = ip_block->adev;
@@ -2801,6 +2817,7 @@ static int gfx_v12_1_hw_fini(struct amdgpu_ip_block *ip_block)
amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
+ gfx_v12_1_set_userq_eop_interrupts(adev, false);
num_xcc = NUM_XCC(adev->gfx.xcc_mask);
for (i = 0; i < num_xcc; i++) {
@@ -2868,10 +2885,26 @@ static int gfx_v12_1_early_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_device *adev = ip_block->adev;
+
+ switch (amdgpu_user_queue) {
+ case -1:
+ default:
+ adev->gfx.disable_kq = true;
+ adev->gfx.disable_uq = true;
+ break;
+ case 0:
+ adev->gfx.disable_kq = false;
+ adev->gfx.disable_uq = true;
+ break;
+ }
+
adev->gfx.funcs = &gfx_v12_1_gfx_funcs;
- adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
- AMDGPU_MAX_COMPUTE_RINGS);
+ if (adev->gfx.disable_kq)
+ adev->gfx.num_compute_rings = 0;
+ else
+ adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
+ AMDGPU_MAX_COMPUTE_RINGS);
gfx_v12_1_set_kiq_pm4_funcs(adev);
gfx_v12_1_set_ring_funcs(adev);
@@ -2898,6 +2931,10 @@ static int gfx_v12_1_late_init(struct amdgpu_ip_block *ip_block)
if (r)
return r;
+ r = gfx_v12_1_set_userq_eop_interrupts(adev, true);
+ if (r)
+ return r;
+
return 0;
}
@@ -3381,11 +3418,10 @@ static void gfx_v12_1_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
static void gfx_v12_1_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
{
- int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
uint32_t seq = ring->fence_drv.sync_seq;
uint64_t addr = ring->fence_drv.gpu_addr;
- gfx_v12_1_wait_reg_mem(ring, usepfp, 1, 0, lower_32_bits(addr),
+ gfx_v12_1_wait_reg_mem(ring, 0, 1, 0, lower_32_bits(addr),
upper_32_bits(addr), seq, 0xffffffff, 4);
}
@@ -3424,8 +3460,7 @@ static void gfx_v12_1_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
/* write fence seq to the "addr" */
amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
- amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
- WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
+ amdgpu_ring_write(ring, (WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
amdgpu_ring_write(ring, lower_32_bits(addr));
amdgpu_ring_write(ring, upper_32_bits(addr));
amdgpu_ring_write(ring, lower_32_bits(seq));
@@ -3433,8 +3468,7 @@ static void gfx_v12_1_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
if (flags & AMDGPU_FENCE_FLAG_INT) {
/* set register to trigger INT */
amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
- amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
- WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
+ amdgpu_ring_write(ring, (WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regCPC_INT_STATUS));
amdgpu_ring_write(ring, 0);
amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
@@ -3493,9 +3527,7 @@ static void gfx_v12_1_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
uint32_t reg0, uint32_t reg1,
uint32_t ref, uint32_t mask)
{
- int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
-
- gfx_v12_1_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
+ gfx_v12_1_wait_reg_mem(ring, 0, 0, 1, reg0, reg1,
ref, mask, 0x20);
}
@@ -3630,12 +3662,6 @@ static int gfx_v12_1_eop_irq(struct amdgpu_device *adev,
return -EINVAL;
switch (me_id) {
- case 0:
- if (pipe_id == 0)
- amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
- else
- amdgpu_fence_process(&adev->gfx.gfx_ring[1]);
- break;
case 1:
case 2:
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
@@ -3652,6 +3678,9 @@ static int gfx_v12_1_eop_irq(struct amdgpu_device *adev,
amdgpu_fence_process(ring);
}
break;
+ default:
+ dev_dbg(adev->dev, "Unexpected me %d in eop_irq\n", me_id);
+ break;
}
}
@@ -3719,29 +3748,23 @@ static void gfx_v12_1_handle_priv_fault(struct amdgpu_device *adev,
if (xcc_id == -EINVAL)
return;
- switch (me_id) {
- case 0:
- for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
- ring = &adev->gfx.gfx_ring[i];
- /* we only enabled 1 gfx queue per pipe for now */
- if (ring->me == me_id && ring->pipe == pipe_id)
- drm_sched_fault(&ring->sched);
- }
- break;
- case 1:
- case 2:
- for (i = 0; i < adev->gfx.num_compute_rings; i++) {
- ring = &adev->gfx.compute_ring
+ if (!adev->gfx.disable_kq) {
+ switch (me_id) {
+ case 1:
+ case 2:
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ ring = &adev->gfx.compute_ring
[i +
xcc_id * adev->gfx.num_compute_rings];
- if (ring->me == me_id && ring->pipe == pipe_id &&
- ring->queue == queue_id)
- drm_sched_fault(&ring->sched);
+ if (ring->me == me_id && ring->pipe == pipe_id &&
+ ring->queue == queue_id)
+ drm_sched_fault(&ring->sched);
+ }
+ break;
+ default:
+ dev_dbg(adev->dev, "Unexpected me %d in priv_fault\n", me_id);
+ break;
}
- break;
- default:
- BUG();
- break;
}
}
@@ -3763,6 +3786,35 @@ static int gfx_v12_1_priv_inst_irq(struct amdgpu_device *adev,
return 0;
}
+static int gfx_v12_1_rlc_poison_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ uint32_t rlc_fed_status = 0;
+ uint32_t ras_blk = RAS_BLOCK_ID__GFX;
+ struct ras_ih_info ih_info = {0};
+ int i, num_xcc;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ for (i = 0; i < num_xcc; i++)
+ rlc_fed_status |= RREG32(SOC15_REG_OFFSET(GC,
+ GET_INST(GC, i), regRLC_RLCS_FED_STATUS));
+
+ if (!rlc_fed_status)
+ return 0;
+
+ if (REG_GET_FIELD(rlc_fed_status, RLC_RLCS_FED_STATUS, SDMA0_FED_ERR) ||
+ REG_GET_FIELD(rlc_fed_status, RLC_RLCS_FED_STATUS, SDMA1_FED_ERR))
+ ras_blk = RAS_BLOCK_ID__SDMA;
+
+ dev_warn(adev->dev, "RLC %d FED IRQ\n", ras_blk);
+
+ ih_info.block = ras_blk;
+ ih_info.reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET;
+ amdgpu_ras_mgr_dispatch_interrupt(adev, &ih_info);
+ return 0;
+}
+
static void gfx_v12_1_emit_mem_sync(struct amdgpu_ring *ring)
{
const unsigned int gcr_cntl =
@@ -3887,6 +3939,10 @@ static const struct amdgpu_irq_src_funcs gfx_v12_1_priv_inst_irq_funcs = {
.process = gfx_v12_1_priv_inst_irq,
};
+static const struct amdgpu_irq_src_funcs gfx_v12_1_rlc_poison_irq_funcs = {
+ .process = gfx_v12_1_rlc_poison_irq,
+};
+
static void gfx_v12_1_set_irq_funcs(struct amdgpu_device *adev)
{
adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
@@ -3897,6 +3953,9 @@ static void gfx_v12_1_set_irq_funcs(struct amdgpu_device *adev)
adev->gfx.priv_inst_irq.num_types = 1;
adev->gfx.priv_inst_irq.funcs = &gfx_v12_1_priv_inst_irq_funcs;
+
+ adev->gfx.rlc_poison_irq.num_types = 1;
+ adev->gfx.rlc_poison_irq.funcs = &gfx_v12_1_rlc_poison_irq_funcs;
}
static void gfx_v12_1_set_imu_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_1_pkt.h b/drivers/gpu/drm/amd/amdgpu/gfx_v12_1_pkt.h
index 9a5c82c8db53..21a07530c64d 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_1_pkt.h
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_1_pkt.h
@@ -53,16 +53,10 @@
/* Packet 3 types */
#define PACKET3_NOP 0x10
-#define PACKET3_SET_BASE 0x11
-#define PACKET3_BASE_INDEX(x) ((x) << 0)
-#define CE_PARTITION_BASE 3
#define PACKET3_CLEAR_STATE 0x12
#define PACKET3_INDEX_BUFFER_SIZE 0x13
#define PACKET3_DISPATCH_DIRECT 0x15
#define PACKET3_DISPATCH_INDIRECT 0x16
-#define PACKET3_INDIRECT_BUFFER_END 0x17
-#define PACKET3_INDIRECT_BUFFER_CNST_END 0x19
-#define PACKET3_ATOMIC_GDS 0x1D
#define PACKET3_ATOMIC_MEM 0x1E
#define PACKET3_OCCLUSION_QUERY 0x1F
#define PACKET3_SET_PREDICATION 0x20
@@ -74,47 +68,42 @@
#define PACKET3_INDEX_BASE 0x26
#define PACKET3_DRAW_INDEX_2 0x27
#define PACKET3_CONTEXT_CONTROL 0x28
-#define PACKET3_INDEX_TYPE 0x2A
#define PACKET3_DRAW_INDIRECT_MULTI 0x2C
#define PACKET3_DRAW_INDEX_AUTO 0x2D
#define PACKET3_NUM_INSTANCES 0x2F
#define PACKET3_DRAW_INDEX_MULTI_AUTO 0x30
-#define PACKET3_INDIRECT_BUFFER_PRIV 0x32
-#define PACKET3_INDIRECT_BUFFER_CNST 0x33
-#define PACKET3_COND_INDIRECT_BUFFER_CNST 0x33
-#define PACKET3_STRMOUT_BUFFER_UPDATE 0x34
#define PACKET3_DRAW_INDEX_OFFSET_2 0x35
-#define PACKET3_DRAW_PREAMBLE 0x36
#define PACKET3_WRITE_DATA 0x37
-#define WRITE_DATA_DST_SEL(x) ((x) << 8)
+#define WRITE_DATA_DST_SEL(x) (((x) & 0xf) << 8)
/* 0 - register
- * 1 - memory (sync - via GRBM)
- * 2 - gl2
- * 3 - gds
+ * 1 - reserved
+ * 2 - tc_l2
+ * 3 - reserved
* 4 - reserved
- * 5 - memory (async - direct)
+ * 5 - memory (same as tc_l2)
+ * 6 - memory_mapped_adc_persistent_state
*/
-#define WR_ONE_ADDR (1 << 16)
+#define WRITE_DATA_SCOPE(x) (((x) & 0x3) << 12)
+#define WRITE_DATA_MODE(x) (((x) & 0x3) << 14)
+ /* 0 - local xcd
+ * 1 - remote/local aid
+ * 2 - remote xcd
+ * 3 - remote mid
+ */
+#define WRITE_DATA_ADDR_INCR (1 << 16)
+#define WRITE_DATA_MID_DIE_ID(x) (((x) & 0x3) << 18)
#define WR_CONFIRM (1 << 20)
-#define WRITE_DATA_CACHE_POLICY(x) ((x) << 25)
- /* 0 - LRU
- * 1 - Stream
- */
-#define WRITE_DATA_ENGINE_SEL(x) ((x) << 30)
- /* 0 - me
- * 1 - pfp
- * 2 - ce
+#define WRITE_DATA_XCD_DIE_ID(x) (((x) & 0xf) << 21)
+#define WRITE_DATA_TEMPORAL(x) (((x) & 0x3) << 25)
+ /* 0 - rt
+ * 1 - nt
+ * 2 - ht
+ * 3 - lu
*/
+#define WRITE_DATA_COOP_DISABLE (1 << 27)
#define PACKET3_DRAW_INDEX_INDIRECT_MULTI 0x38
-#define PACKET3_MEM_SEMAPHORE 0x39
-# define PACKET3_SEM_USE_MAILBOX (0x1 << 16)
-# define PACKET3_SEM_SEL_SIGNAL_TYPE (0x1 << 20) /* 0 = increment, 1 = write 1 */
-# define PACKET3_SEM_SEL_SIGNAL (0x6 << 29)
-# define PACKET3_SEM_SEL_WAIT (0x7 << 29)
-#define PACKET3_DRAW_INDEX_MULTI_INST 0x3A
-#define PACKET3_COPY_DW 0x3B
#define PACKET3_WAIT_REG_MEM 0x3C
-#define WAIT_REG_MEM_FUNCTION(x) ((x) << 0)
+#define WAIT_REG_MEM_FUNCTION(x) (((x) & 0x7) << 0)
/* 0 - always
* 1 - <
* 2 - <=
@@ -123,33 +112,66 @@
* 5 - >=
* 6 - >
*/
-#define WAIT_REG_MEM_MEM_SPACE(x) ((x) << 4)
+#define WAIT_REG_MEM_MEM_SPACE(x) (((x) & 0x3) << 4)
/* 0 - reg
* 1 - mem
*/
-#define WAIT_REG_MEM_OPERATION(x) ((x) << 6)
+#define WAIT_REG_MEM_OPERATION(x) (((x) & 0x3) << 6)
/* 0 - wait_reg_mem
* 1 - wr_wait_wr_reg
*/
-#define WAIT_REG_MEM_ENGINE(x) ((x) << 8)
- /* 0 - me
- * 1 - pfp
+#define WAIT_REG_MEM_MODE(x) (((x) & 0x3) << 10)
+ /* 0 - local xcd
+ * 1 - remote/local aid
+ * 2 - remote xcd
+ * 3 - remote mid
+ */
+#define WAIT_REG_MEM_MID_DIE_ID(x) (((x) & 0x3) << 12)
+#define WAIT_REG_MEM_XCD_DIE_ID(x) (((x) & 0xf) << 14)
+#define WAIT_REG_MEM_MES_INTR_PIPE(x) (((x) & 0x3) << 22)
+#define WAIT_REG_MEM_MES_ACTION(x) (((x) & 0x1) << 24)
+#define WAIT_REG_MEM_TEMPORAL(x) (((x) & 0x3) << 25)
+ /* 0 - rt
+ * 1 - nt
+ * 2 - ht
+ * 3 - lu
*/
#define PACKET3_INDIRECT_BUFFER 0x3F
#define INDIRECT_BUFFER_VALID (1 << 23)
-#define INDIRECT_BUFFER_CACHE_POLICY(x) ((x) << 28)
- /* 0 - LRU
- * 1 - Stream
- * 2 - Bypass
+#define INDIRECT_BUFFER_TEMPORAL(x) (x) << 28)
+ /* 0 - rt
+ * 1 - nt
+ * 2 - ht
+ * 3 - lu
*/
-#define INDIRECT_BUFFER_PRE_ENB(x) ((x) << 21)
-#define INDIRECT_BUFFER_PRE_RESUME(x) ((x) << 30)
#define PACKET3_COND_INDIRECT_BUFFER 0x3F
#define PACKET3_COPY_DATA 0x40
-#define PACKET3_CP_DMA 0x41
+#define COPY_DATA_SRC_SEL(x) (((x) & 0xf) << 0)
+#define COPY_DATA_DST_SEL(x) (((x) & 0xf) << 8)
+#define COPY_DATA_SRC_SCOPE(x) (((x) & 0x3) << 4)
+#define COPY_DATA_DST_SCOPE(x) (((x) & 0x3) << 27)
+#define COPY_DATA_MODE(x) (((x) & 0x3) << 6)
+ /* 0 - local xcd
+ * 1 - remote/local aid
+ * 2 - remote xcd
+ * 3 - remote mid
+ */
+#define COPY_DATA_SRC_TEMPORAL(x) (((x) & 0x3) << 13)
+#define COPY_DATA_DST_TEMPORAL(x) (((x) & 0x3) << 25)
+ /* 0 - rt
+ * 1 - nt
+ * 2 - ht
+ * 3 - lu
+ */
+#define COPY_DATA_COUNT_SEL (1 << 16)
+#define COPY_DATA_SRC_DST_REMOTE_MODE(x) (((x)) & 0x1 << 16)
+ /* 0 - src remote
+ * 1 - dst remote
+ */
+#define COPY_DATA_MID_DIE_ID(x) (((x) & 0x3) << 18)
+#define COPY_DATA_XCD_DIE_ID(x) (((x) & 0xf) << 21)
+#define COPY_DATA_PQ_EXE_STATUS (1 << 27)
#define PACKET3_PFP_SYNC_ME 0x42
-#define PACKET3_SURFACE_SYNC 0x43
-#define PACKET3_ME_INITIALIZE 0x44
#define PACKET3_COND_WRITE 0x45
#define PACKET3_EVENT_WRITE 0x46
#define EVENT_TYPE(x) ((x) << 0)
@@ -160,8 +182,6 @@
* 3 - SAMPLE_STREAMOUTSTAT*
* 4 - *S_PARTIAL_FLUSH
*/
-#define PACKET3_EVENT_WRITE_EOP 0x47
-#define PACKET3_EVENT_WRITE_EOS 0x48
#define PACKET3_RELEASE_MEM 0x49
#define PACKET3_RELEASE_MEM_EVENT_TYPE(x) ((x) << 0)
#define PACKET3_RELEASE_MEM_EVENT_INDEX(x) ((x) << 8)
@@ -180,27 +200,30 @@
* 2 - temporal__release_mem__ht
* 3 - temporal__release_mem__lu
*/
-#define PACKET3_RELEASE_MEM_EXECUTE (1 << 28)
+#define PACKET3_RELEASE_MEM_PQ_EXE_STATUS (1 << 28)
+#define PACKET3_RELEASE_MEM_GCR_GLK_INV (1 << 30)
-#define PACKET3_RELEASE_MEM_DATA_SEL(x) ((x) << 29)
- /* 0 - discard
- * 1 - send low 32bit data
- * 2 - send 64bit data
- * 3 - send 64bit GPU counter value
- * 4 - send 64bit sys counter value
+#define PACKET3_RELEASE_MEM_DST_SEL(x) ((x) << 16)
+ /* 0 - memory controller
+ * 1 - TC/L2
+ * 2 - register
*/
+#define PACKET3_RELEASE_MEM_MES_INTR_PIPE(x) ((x) << 20)
+#define PACKET3_RELEASE_MEM_MES_ACTION_ID(x) ((x) << 22)
#define PACKET3_RELEASE_MEM_INT_SEL(x) ((x) << 24)
/* 0 - none
* 1 - interrupt only (DATA_SEL = 0)
* 2 - interrupt when data write is confirmed
*/
-#define PACKET3_RELEASE_MEM_DST_SEL(x) ((x) << 16)
- /* 0 - MC
- * 1 - TC/L2
+#define PACKET3_RELEASE_MEM_ADD_DOOREBLL_OFFSET(x) (1 << 28)
+#define PACKET3_RELEASE_MEM_DATA_SEL(x) ((x) << 29)
+ /* 0 - discard
+ * 1 - send low 32bit data
+ * 2 - send 64bit data
+ * 3 - send 64bit GPU counter value
+ * 4 - send 64bit sys counter value
*/
-
-
#define PACKET3_PREAMBLE_CNTL 0x4A
# define PACKET3_PREAMBLE_BEGIN_CLEAR_STATE (2 << 28)
# define PACKET3_PREAMBLE_END_CLEAR_STATE (3 << 28)
@@ -218,26 +241,29 @@
/* 0 - ME
* 1 - PFP
*/
-# define PACKET3_DMA_DATA_SRC_CACHE_POLICY(x) ((x) << 13)
- /* 0 - LRU
- * 1 - Stream
+# define PACKET3_DMA_DATA_SRC_TEMPORAL(x) ((x) << 13)
+ /* 0 - rt
+ * 1 - nt
+ * 2 - ht
+ * 3 - lu
*/
-# define PACKET3_DMA_DATA_DST_SEL(x) ((x) << 20)
+# define PACKET3_DMA_DATA_SRC_SCOPE(x) ((x) << 15)
+# define PACKET3_DMA_DATA_DST_SEL(x) ((x) << 20)
/* 0 - DST_ADDR using DAS
* 1 - GDS
* 3 - DST_ADDR using L2
*/
-# define PACKET3_DMA_DATA_DST_CACHE_POLICY(x) ((x) << 25)
+# define PACKET3_DMA_DATA_DST_TEMPORAL(x) ((x) << 25)
/* 0 - LRU
* 1 - Stream
*/
-# define PACKET3_DMA_DATA_SRC_SEL(x) ((x) << 29)
+# define PACKET3_DMA_DATA_DST_SCOPE(x) ((x) << 27)
+# define PACKET3_DMA_DATA_SRC_SEL(x) ((x) << 29)
/* 0 - SRC_ADDR using SAS
* 1 - GDS
* 2 - DATA
* 3 - SRC_ADDR using L2
*/
-# define PACKET3_DMA_DATA_CP_SYNC (1 << 31)
/* COMMAND */
# define PACKET3_DMA_DATA_CMD_SAS (1 << 26)
/* 0 - memory
@@ -247,13 +273,11 @@
/* 0 - memory
* 1 - register
*/
-# define PACKET3_DMA_DATA_CMD_SAIC (1 << 28)
-# define PACKET3_DMA_DATA_CMD_DAIC (1 << 29)
-# define PACKET3_DMA_DATA_CMD_RAW_WAIT (1 << 30)
+# define PACKET3_DMA_DATA_CMD_SAIC (1 << 28)
+# define PACKET3_DMA_DATA_CMD_DAIC (1 << 29)
+# define PACKET3_DMA_DATA_CMD_RAW_WAIT (1 << 30)
+# define PACKET3_DMA_DATA_CMD_DIS_WC (1 << 30)
#define PACKET3_CONTEXT_REG_RMW 0x51
-#define PACKET3_GFX_CNTX_UPDATE 0x52
-#define PACKET3_BLK_CNTX_UPDATE 0x53
-#define PACKET3_INCR_UPDT_STATE 0x55
#define PACKET3_ACQUIRE_MEM 0x58
/* 1. HEADER
* 2. COHER_CNTL [30:0]
@@ -307,10 +331,7 @@
* 2: REVERSE
*/
#define PACKET3_ACQUIRE_MEM_GCR_RANGE_IS_PA (1 << 18)
-#define PACKET3_REWIND 0x59
-#define PACKET3_INTERRUPT 0x5A
#define PACKET3_GEN_PDEPTE 0x5B
-#define PACKET3_INDIRECT_BUFFER_PASID 0x5C
#define PACKET3_PRIME_UTCL2 0x5D
#define PACKET3_LOAD_UCONFIG_REG 0x5E
#define PACKET3_LOAD_SH_REG 0x5F
@@ -324,12 +345,6 @@
#define PACKET3_SET_CONTEXT_REG 0x69
#define PACKET3_SET_CONTEXT_REG_START 0x0000a000
#define PACKET3_SET_CONTEXT_REG_END 0x0000a400
-#define PACKET3_SET_CONTEXT_REG_INDEX 0x6A
-#define PACKET3_SET_VGPR_REG_DI_MULTI 0x71
-#define PACKET3_SET_SH_REG_DI 0x72
-#define PACKET3_SET_CONTEXT_REG_INDIRECT 0x73
-#define PACKET3_SET_SH_REG_DI_MULTI 0x74
-#define PACKET3_GFX_PIPE_LOCK 0x75
#define PACKET3_SET_SH_REG 0x76
#define PACKET3_SET_SH_REG_START 0x00002c00
#define PACKET3_SET_SH_REG_END 0x00003000
@@ -339,47 +354,19 @@
#define PACKET3_SET_UCONFIG_REG_START 0x0000c000
#define PACKET3_SET_UCONFIG_REG_END 0x0000c400
#define PACKET3_SET_UCONFIG_REG_INDEX 0x7A
-#define PACKET3_FORWARD_HEADER 0x7C
-#define PACKET3_SCRATCH_RAM_WRITE 0x7D
-#define PACKET3_SCRATCH_RAM_READ 0x7E
-#define PACKET3_LOAD_CONST_RAM 0x80
-#define PACKET3_WRITE_CONST_RAM 0x81
-#define PACKET3_DUMP_CONST_RAM 0x83
-#define PACKET3_INCREMENT_CE_COUNTER 0x84
-#define PACKET3_INCREMENT_DE_COUNTER 0x85
-#define PACKET3_WAIT_ON_CE_COUNTER 0x86
-#define PACKET3_WAIT_ON_DE_COUNTER_DIFF 0x88
-#define PACKET3_SWITCH_BUFFER 0x8B
#define PACKET3_DISPATCH_DRAW_PREAMBLE 0x8C
-#define PACKET3_DISPATCH_DRAW_PREAMBLE_ACE 0x8C
#define PACKET3_DISPATCH_DRAW 0x8D
-#define PACKET3_DISPATCH_DRAW_ACE 0x8D
-#define PACKET3_GET_LOD_STATS 0x8E
-#define PACKET3_DRAW_MULTI_PREAMBLE 0x8F
-#define PACKET3_FRAME_CONTROL 0x90
-# define FRAME_TMZ (1 << 0)
-# define FRAME_CMD(x) ((x) << 28)
- /*
- * x=0: tmz_begin
- * x=1: tmz_end
- */
#define PACKET3_INDEX_ATTRIBUTES_INDIRECT 0x91
#define PACKET3_WAIT_REG_MEM64 0x93
-#define PACKET3_COND_PREEMPT 0x94
#define PACKET3_HDP_FLUSH 0x95
-#define PACKET3_COPY_DATA_RB 0x96
#define PACKET3_INVALIDATE_TLBS 0x98
#define PACKET3_INVALIDATE_TLBS_DST_SEL(x) ((x) << 0)
#define PACKET3_INVALIDATE_TLBS_ALL_HUB(x) ((x) << 4)
#define PACKET3_INVALIDATE_TLBS_PASID(x) ((x) << 5)
#define PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(x) ((x) << 29)
-#define PACKET3_AQL_PACKET 0x99
#define PACKET3_DMA_DATA_FILL_MULTI 0x9A
#define PACKET3_SET_SH_REG_INDEX 0x9B
-#define PACKET3_DRAW_INDIRECT_COUNT_MULTI 0x9C
-#define PACKET3_DRAW_INDEX_INDIRECT_COUNT_MULTI 0x9D
-#define PACKET3_DUMP_CONST_RAM_OFFSET 0x9E
#define PACKET3_LOAD_CONTEXT_REG_INDEX 0x9F
#define PACKET3_SET_RESOURCES 0xA0
/* 1. header
@@ -394,7 +381,6 @@
# define PACKET3_SET_RESOURCES_VMID_MASK(x) ((x) << 0)
# define PACKET3_SET_RESOURCES_UNMAP_LATENTY(x) ((x) << 16)
# define PACKET3_SET_RESOURCES_QUEUE_TYPE(x) ((x) << 29)
-#define PACKET3_MAP_PROCESS 0xA1
#define PACKET3_MAP_QUEUES 0xA2
/* 1. header
* 2. CONTROL
@@ -411,11 +397,10 @@
# define PACKET3_MAP_QUEUES_PIPE(x) ((x) << 16)
# define PACKET3_MAP_QUEUES_ME(x) ((x) << 18)
# define PACKET3_MAP_QUEUES_QUEUE_TYPE(x) ((x) << 21)
-# define PACKET3_MAP_QUEUES_ALLOC_FORMAT(x) ((x) << 24)
+# define PACKET3_MAP_QUEUES_QUEUE_GROUP(x) ((x) << 24)
# define PACKET3_MAP_QUEUES_ENGINE_SEL(x) ((x) << 26)
# define PACKET3_MAP_QUEUES_NUM_QUEUES(x) ((x) << 29)
/* CONTROL2 */
-# define PACKET3_MAP_QUEUES_CHECK_DISABLE(x) ((x) << 1)
# define PACKET3_MAP_QUEUES_DOORBELL_OFFSET(x) ((x) << 2)
#define PACKET3_UNMAP_QUEUES 0xA3
/* 1. header
@@ -464,12 +449,6 @@
# define PACKET3_QUERY_STATUS_PASID(x) ((x) << 0)
/* CONTROL2b */
# define PACKET3_QUERY_STATUS_DOORBELL_OFFSET(x) ((x) << 2)
-# define PACKET3_QUERY_STATUS_ENG_SEL(x) ((x) << 25)
-#define PACKET3_RUN_LIST 0xA5
-#define PACKET3_MAP_PROCESS_VM 0xA6
-/* GFX11 */
-#define PACKET3_SET_Q_PREEMPTION_MODE 0xF0
-# define PACKET3_SET_Q_PREEMPTION_MODE_IB_VMID(x) ((x) << 0)
-# define PACKET3_SET_Q_PREEMPTION_MODE_INIT_SHADOW_MEM (1 << 0)
+# define PACKET3_QUERY_STATUS_ENG_SEL(x) ((x) << 28)
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 7e9d753f4a80..95be105671ec 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -2355,7 +2355,7 @@ static int gfx_v9_0_sw_init(struct amdgpu_ip_block *ip_block)
for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) {
ring = &adev->gfx.sw_gfx_ring[i];
ring->ring_obj = NULL;
- sprintf(ring->name, amdgpu_sw_ring_name(i));
+ strscpy(ring->name, amdgpu_sw_ring_name(i), sizeof(ring->name));
ring->use_doorbell = true;
ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
ring->is_sw_ring = true;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v12_1.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v12_1.c
index 4aa004ee2c4d..3544eb42dca6 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v12_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v12_1.c
@@ -146,71 +146,15 @@ static void gfxhub_v12_1_xcc_init_system_aperture_regs(struct amdgpu_device *ade
uint32_t tmp;
int i;
- for_each_inst(i, xcc_mask) {
- /* Program the AGP BAR */
- WREG32_SOC15_RLC(GC, GET_INST(GC, i),
- regGCMC_VM_AGP_BASE_LO32, 0);
- WREG32_SOC15_RLC(GC, GET_INST(GC, i),
- regGCMC_VM_AGP_BASE_HI32, 0);
- WREG32_SOC15_RLC(GC, GET_INST(GC, i),
- regGCMC_VM_AGP_BOT_LO32,
- lower_32_bits(adev->gmc.agp_start >> 24));
- WREG32_SOC15_RLC(GC, GET_INST(GC, i),
- regGCMC_VM_AGP_BOT_HI32,
- upper_32_bits(adev->gmc.agp_start >> 24));
- WREG32_SOC15_RLC(GC, GET_INST(GC, i),
- regGCMC_VM_AGP_TOP_LO32,
- lower_32_bits(adev->gmc.agp_end >> 24));
- WREG32_SOC15_RLC(GC, GET_INST(GC, i),
- regGCMC_VM_AGP_TOP_HI32,
- upper_32_bits(adev->gmc.agp_end >> 24));
+ /*TODO: revisit whether the SRIOV guest access to theseregisters
+ * is blocked by security policy or not */
+ if (amdgpu_sriov_vf(adev))
+ return;
- if (!amdgpu_sriov_vf(adev)) {
- /* Program the system aperture low logical page number. */
- WREG32_SOC15(GC, GET_INST(GC, i),
- regGCMC_VM_SYSTEM_APERTURE_LOW_ADDR_LO32,
- lower_32_bits(min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18));
- WREG32_SOC15(GC, GET_INST(GC, i),
- regGCMC_VM_SYSTEM_APERTURE_LOW_ADDR_HI32,
- upper_32_bits(min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18));
- WREG32_SOC15(GC, GET_INST(GC, i),
- regGCMC_VM_SYSTEM_APERTURE_HIGH_ADDR_LO32,
- lower_32_bits(max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18));
- WREG32_SOC15(GC, GET_INST(GC, i),
- regGCMC_VM_SYSTEM_APERTURE_HIGH_ADDR_HI32,
- upper_32_bits(max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18));
-
- /* Set default page address. */
- value = amdgpu_gmc_vram_mc2pa(adev, adev->mem_scratch.gpu_addr);
- WREG32_SOC15(GC, GET_INST(GC, i),
- regGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
- (u32)(value >> 12));
- WREG32_SOC15(GC, GET_INST(GC, i),
- regGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
- (u32)(value >> 44));
-
- /* Program "protection fault". */
- WREG32_SOC15(GC, GET_INST(GC, i),
- regGCVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32,
- (u32)(adev->dummy_page_addr >> 12));
- WREG32_SOC15(GC, GET_INST(GC, i),
- regGCVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32,
- (u32)((u64)adev->dummy_page_addr >> 44));
-
- tmp = RREG32_SOC15(GC, GET_INST(GC, i),
- regGCVM_L2_PROTECTION_FAULT_CNTL2);
- tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL2,
- ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1);
- tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL2,
- ENABLE_RETRY_FAULT_INTERRUPT, 0x1);
- WREG32_SOC15(GC, GET_INST(GC, i),
- regGCVM_L2_PROTECTION_FAULT_CNTL2, tmp);
- }
-
- /* In the case squeezing vram into GART aperture, we don't use
- * FB aperture and AGP aperture. Disable them.
- */
+ for_each_inst(i, xcc_mask) {
if (adev->gmc.pdb0_bo) {
+ /* Disable agp and system aperture
+ * when vmid0 page table is enabled */
WREG32_SOC15(GC, GET_INST(GC, i),
regGCMC_VM_FB_LOCATION_TOP_LO32, 0);
WREG32_SOC15(GC, GET_INST(GC, i),
@@ -225,7 +169,8 @@ static void gfxhub_v12_1_xcc_init_system_aperture_regs(struct amdgpu_device *ade
WREG32_SOC15(GC, GET_INST(GC, i),
regGCMC_VM_AGP_TOP_HI32, 0);
WREG32_SOC15(GC, GET_INST(GC, i),
- regGCMC_VM_AGP_BOT_LO32, 0xFFFFFFFF);
+ regGCMC_VM_AGP_BOT_LO32,
+ 0xFFFFFFFF);
WREG32_SOC15(GC, GET_INST(GC, i),
regGCMC_VM_AGP_BOT_HI32, 1);
WREG32_SOC15(GC, GET_INST(GC, i),
@@ -238,7 +183,69 @@ static void gfxhub_v12_1_xcc_init_system_aperture_regs(struct amdgpu_device *ade
regGCMC_VM_SYSTEM_APERTURE_HIGH_ADDR_LO32, 0);
WREG32_SOC15(GC, GET_INST(GC, i),
regGCMC_VM_SYSTEM_APERTURE_HIGH_ADDR_HI32, 0);
+ } else {
+ /* Program the AGP BAR */
+ WREG32_SOC15_RLC(GC, GET_INST(GC, i),
+ regGCMC_VM_AGP_BASE_LO32, 0);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, i),
+ regGCMC_VM_AGP_BASE_HI32, 0);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, i),
+ regGCMC_VM_AGP_BOT_LO32,
+ lower_32_bits(adev->gmc.agp_start >> 24));
+ WREG32_SOC15_RLC(GC, GET_INST(GC, i),
+ regGCMC_VM_AGP_BOT_HI32,
+ upper_32_bits(adev->gmc.agp_start >> 24));
+ WREG32_SOC15_RLC(GC, GET_INST(GC, i),
+ regGCMC_VM_AGP_TOP_LO32,
+ lower_32_bits(adev->gmc.agp_end >> 24));
+ WREG32_SOC15_RLC(GC, GET_INST(GC, i),
+ regGCMC_VM_AGP_TOP_HI32,
+ upper_32_bits(adev->gmc.agp_end >> 24));
+
+ /* Program the system aperture low logical page number. */
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regGCMC_VM_SYSTEM_APERTURE_LOW_ADDR_LO32,
+ lower_32_bits(min(adev->gmc.fb_start,
+ adev->gmc.agp_start) >> 18));
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regGCMC_VM_SYSTEM_APERTURE_LOW_ADDR_HI32,
+ upper_32_bits(min(adev->gmc.fb_start,
+ adev->gmc.agp_start) >> 18));
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regGCMC_VM_SYSTEM_APERTURE_HIGH_ADDR_LO32,
+ lower_32_bits(max(adev->gmc.fb_end,
+ adev->gmc.agp_end) >> 18));
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regGCMC_VM_SYSTEM_APERTURE_HIGH_ADDR_HI32,
+ upper_32_bits(max(adev->gmc.fb_end,
+ adev->gmc.agp_end) >> 18));
}
+
+ /* Set default page address. */
+ value = amdgpu_gmc_vram_mc2pa(adev, adev->mem_scratch.gpu_addr);
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
+ (u32)(value >> 12));
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
+ (u32)(value >> 44));
+
+ /* Program "protection fault". */
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regGCVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32,
+ (u32)(adev->dummy_page_addr >> 12));
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regGCVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32,
+ (u32)((u64)adev->dummy_page_addr >> 44));
+
+ tmp = RREG32_SOC15(GC, GET_INST(GC, i),
+ regGCVM_L2_PROTECTION_FAULT_CNTL2);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL2,
+ ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL2,
+ ENABLE_RETRY_FAULT_INTERRUPT, 0x1);
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regGCVM_L2_PROTECTION_FAULT_CNTL2, tmp);
}
}
@@ -325,10 +332,18 @@ static void gfxhub_v12_1_xcc_init_cache_regs(struct amdgpu_device *adev,
WREG32_SOC15_RLC(GC, GET_INST(GC, i), regGCVM_L2_CNTL3, tmp);
tmp = regGCVM_L2_CNTL4_DEFAULT;
- tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL4,
- VMC_TAP_PDE_REQUEST_PHYSICAL, 1);
- tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL4,
- VMC_TAP_PTE_REQUEST_PHYSICAL, 1);
+ if (adev->gmc.xgmi.connected_to_cpu) {
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL4,
+ VMC_TAP_PDE_REQUEST_PHYSICAL, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL4,
+ VMC_TAP_PTE_REQUEST_PHYSICAL, 1);
+ } else {
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL4,
+ VMC_TAP_PDE_REQUEST_PHYSICAL, 0);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL4,
+ VMC_TAP_PTE_REQUEST_PHYSICAL, 0);
+ }
+
WREG32_SOC15_RLC(GC, GET_INST(GC, i), regGCVM_L2_CNTL4, tmp);
tmp = regGCVM_L2_CNTL5_DEFAULT;
@@ -807,47 +822,6 @@ static void gfxhub_v12_1_init(struct amdgpu_device *adev)
gfxhub_v12_1_xcc_init(adev, xcc_mask);
}
-static int gfxhub_v12_1_get_xgmi_info(struct amdgpu_device *adev)
-{
- u32 max_num_physical_nodes;
- u32 max_physical_node_id;
- u32 xgmi_lfb_cntl;
- u32 max_region;
- u64 seg_size;
-
- xgmi_lfb_cntl = RREG32_SOC15(GC, GET_INST(GC, 0),
- regGCMC_VM_XGMI_LFB_CNTL);
- seg_size = REG_GET_FIELD(RREG32_SOC15(GC, GET_INST(GC, 0),
- regGCMC_VM_XGMI_LFB_SIZE),
- GCMC_VM_XGMI_LFB_SIZE, PF_LFB_SIZE) << 24;
- max_region = REG_GET_FIELD(xgmi_lfb_cntl,
- GCMC_VM_XGMI_LFB_CNTL,
- PF_MAX_REGION);
-
- max_num_physical_nodes = 8;
- max_physical_node_id = 7;
-
- /* PF_MAX_REGION=0 means xgmi is disabled */
- if (max_region || adev->gmc.xgmi.connected_to_cpu) {
- adev->gmc.xgmi.num_physical_nodes = max_region + 1;
-
- if (adev->gmc.xgmi.num_physical_nodes > max_num_physical_nodes)
- return -EINVAL;
-
- adev->gmc.xgmi.physical_node_id =
- REG_GET_FIELD(xgmi_lfb_cntl,
- GCMC_VM_XGMI_LFB_CNTL,
- PF_LFB_REGION);
-
- if (adev->gmc.xgmi.physical_node_id > max_physical_node_id)
- return -EINVAL;
-
- adev->gmc.xgmi.node_segment_size = seg_size;
- }
-
- return 0;
-}
-
const struct amdgpu_gfxhub_funcs gfxhub_v12_1_funcs = {
.get_fb_location = gfxhub_v12_1_get_fb_location,
.get_mc_fb_offset = gfxhub_v12_1_get_mc_fb_offset,
@@ -856,7 +830,6 @@ const struct amdgpu_gfxhub_funcs gfxhub_v12_1_funcs = {
.gart_disable = gfxhub_v12_1_gart_disable,
.set_fault_enable_default = gfxhub_v12_1_set_fault_enable_default,
.init = gfxhub_v12_1_init,
- .get_xgmi_info = gfxhub_v12_1_get_xgmi_info,
};
static int gfxhub_v12_1_xcp_resume(void *handle, uint32_t inst_mask)
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index 2568eeaae945..fd691b2a6e21 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -767,7 +767,7 @@ static int gmc_v10_0_sw_init(struct amdgpu_ip_block *ip_block)
adev->gmc.vram_type = AMDGPU_VRAM_TYPE_GDDR6;
adev->gmc.vram_width = 1 * 128; /* numchan * chansize */
} else {
- r = amdgpu_atomfirmware_get_vram_info(adev,
+ r = amdgpu_gmc_get_vram_info(adev,
&vram_width, &vram_type, &vram_vendor);
adev->gmc.vram_width = vram_width;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
index 6349e239a367..e6db87b94eb1 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
@@ -751,7 +751,7 @@ static int gmc_v11_0_sw_init(struct amdgpu_ip_block *ip_block)
spin_lock_init(&adev->gmc.invalidate_lock);
- r = amdgpu_atomfirmware_get_vram_info(adev,
+ r = amdgpu_gmc_get_vram_info(adev,
&vram_width, &vram_type, &vram_vendor);
adev->gmc.vram_width = vram_width;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
index b9671fc39e2a..6e184ea069ef 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
@@ -636,6 +636,11 @@ static int gmc_v12_0_early_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_device *adev = ip_block->adev;
+ if (adev->smuio.funcs &&
+ adev->smuio.funcs->is_host_gpu_xgmi_supported)
+ adev->gmc.xgmi.connected_to_cpu =
+ adev->smuio.funcs->is_host_gpu_xgmi_supported(adev);
+
switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(12, 1, 0):
gmc_v12_1_set_gmc_funcs(adev);
@@ -654,9 +659,15 @@ static int gmc_v12_0_early_init(struct amdgpu_ip_block *ip_block)
adev->gmc.shared_aperture_start = 0x2000000000000000ULL;
adev->gmc.shared_aperture_end =
adev->gmc.shared_aperture_start + (4ULL << 30) - 1;
+
adev->gmc.private_aperture_start = 0x1000000000000000ULL;
- adev->gmc.private_aperture_end =
- adev->gmc.private_aperture_start + (4ULL << 30) - 1;
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(12, 1, 0))
+ adev->gmc.private_aperture_end =
+ adev->gmc.private_aperture_start + (1ULL << 57) - 1;
+ else
+ adev->gmc.private_aperture_end =
+ adev->gmc.private_aperture_start + (4ULL << 30) - 1;
+
adev->gmc.noretry_flags = AMDGPU_VM_NORETRY_FLAGS_TF;
return 0;
@@ -685,17 +696,23 @@ static void gmc_v12_0_vram_gtt_location(struct amdgpu_device *adev,
base = adev->mmhub.funcs->get_fb_location(adev);
- amdgpu_gmc_set_agp_default(adev, mc);
- amdgpu_gmc_vram_location(adev, &adev->gmc, base);
- amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_LOW);
- if (!amdgpu_sriov_vf(adev) && (amdgpu_agp == 1))
- amdgpu_gmc_agp_location(adev, mc);
-
+ if (amdgpu_gmc_is_pdb0_enabled(adev)) {
+ amdgpu_gmc_sysvm_location(adev, mc);
+ } else {
+ amdgpu_gmc_set_agp_default(adev, mc);
+ amdgpu_gmc_vram_location(adev, &adev->gmc, base);
+ amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_LOW);
+ if (!amdgpu_sriov_vf(adev) && (amdgpu_agp == 1))
+ amdgpu_gmc_agp_location(adev, mc);
+ }
/* base offset of vram pages */
if (amdgpu_sriov_vf(adev))
adev->vm_manager.vram_base_offset = 0;
else
adev->vm_manager.vram_base_offset = adev->mmhub.funcs->get_mc_fb_offset(adev);
+
+ adev->vm_manager.vram_base_offset +=
+ adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
}
/**
@@ -711,12 +728,17 @@ static int gmc_v12_0_mc_init(struct amdgpu_device *adev)
{
int r;
- /* size in MB on si */
- adev->gmc.mc_vram_size =
- adev->nbio.funcs->get_memsize(adev) * 1024ULL * 1024ULL;
+ if (adev->gmc.xgmi.connected_to_cpu)
+ adev->gmc.mc_vram_size =
+ adev->gmc.xgmi.node_segment_size * adev->gmc.xgmi.num_physical_nodes;
+ else
+ adev->gmc.mc_vram_size =
+ adev->nbio.funcs->get_memsize(adev) * 1024ULL * 1024ULL;
+
adev->gmc.real_vram_size = adev->gmc.mc_vram_size;
- if (!(adev->flags & AMD_IS_APU)) {
+ if (!(adev->flags & AMD_IS_APU) &&
+ !adev->gmc.xgmi.connected_to_cpu) {
r = amdgpu_device_resize_fb_bar(adev);
if (r)
return r;
@@ -726,8 +748,12 @@ static int gmc_v12_0_mc_init(struct amdgpu_device *adev)
adev->gmc.aper_size = pci_resource_len(adev->pdev, 0);
#ifdef CONFIG_X86_64
- if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev)) {
- adev->gmc.aper_base = adev->mmhub.funcs->get_mc_fb_offset(adev);
+ if (((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev)) ||
+ (adev->gmc.xgmi.connected_to_cpu)) {
+ adev->gmc.aper_base =
+ adev->mmhub.funcs->get_mc_fb_offset(adev) +
+ adev->gmc.xgmi.physical_node_id *
+ adev->gmc.xgmi.node_segment_size;
adev->gmc.aper_size = adev->gmc.real_vram_size;
}
#endif
@@ -756,6 +782,14 @@ static int gmc_v12_0_gart_init(struct amdgpu_device *adev)
return 0;
}
+ if (amdgpu_gmc_is_pdb0_enabled(adev)) {
+ adev->gmc.vmid0_page_table_depth = 1;
+ adev->gmc.vmid0_page_table_block_size = 12;
+ } else {
+ adev->gmc.vmid0_page_table_depth = 0;
+ adev->gmc.vmid0_page_table_block_size = 0;
+ }
+
/* Initialize common gart structure */
r = amdgpu_gart_init(adev);
if (r)
@@ -766,7 +800,14 @@ static int gmc_v12_0_gart_init(struct amdgpu_device *adev)
AMDGPU_PTE_EXECUTABLE |
AMDGPU_PTE_IS_PTE;
- return amdgpu_gart_table_vram_alloc(adev);
+ r = amdgpu_gart_table_vram_alloc(adev);
+ if (r)
+ return r;
+
+ if (amdgpu_gmc_is_pdb0_enabled(adev))
+ r = amdgpu_gmc_pdb0_alloc(adev);
+
+ return r;
}
static int gmc_v12_0_sw_init(struct amdgpu_ip_block *ip_block)
@@ -784,7 +825,7 @@ static int gmc_v12_0_sw_init(struct amdgpu_ip_block *ip_block)
if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(12, 1, 0)) {
gmc_v12_1_init_vram_info(adev);
} else {
- r = amdgpu_atomfirmware_get_vram_info(adev,
+ r = amdgpu_gmc_get_vram_info(adev,
&vram_width, &vram_type, &vram_vendor);
adev->gmc.vram_width = vram_width;
adev->gmc.vram_type = vram_type;
@@ -852,11 +893,15 @@ static int gmc_v12_0_sw_init(struct amdgpu_ip_block *ip_block)
if (r)
return r;
- if ((amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(12, 1, 0)) &&
- !amdgpu_sriov_vf(adev)) {
+ if (!amdgpu_sriov_vf(adev)) {
/* interrupt sent to DF. */
- r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_DF, 0,
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(12, 0, 0))
+ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_DF, 0,
+ &adev->gmc.ecc_irq);
+ else
+ r = amdgpu_irq_add_id(adev, SOC_V1_0_IH_CLIENTID_DF, 0,
&adev->gmc.ecc_irq);
+
if (r)
return r;
}
@@ -938,6 +983,7 @@ static int gmc_v12_0_sw_fini(struct amdgpu_ip_block *ip_block)
amdgpu_vm_manager_fini(adev);
gmc_v12_0_gart_fini(adev);
amdgpu_gem_force_release(adev);
+ amdgpu_bo_free_kernel(&adev->gmc.pdb0_bo, NULL, &adev->gmc.ptr_pdb0);
amdgpu_bo_fini(adev);
return 0;
@@ -957,6 +1003,9 @@ static int gmc_v12_0_gart_enable(struct amdgpu_device *adev)
int r;
bool value;
+ if (adev->gmc.xgmi.connected_to_cpu)
+ amdgpu_gmc_init_pdb0(adev);
+
if (adev->gart.bo == NULL) {
dev_err(adev->dev, "No VRAM object for PCIE GART.\n");
return -EINVAL;
@@ -978,6 +1027,7 @@ static int gmc_v12_0_gart_enable(struct amdgpu_device *adev)
drm_info(adev_to_drm(adev), "PCIE GART of %uM enabled (table at 0x%016llX).\n",
(unsigned)(adev->gmc.gart_size >> 20),
+ (adev->gmc.pdb0_bo) ? (unsigned long long)amdgpu_bo_gpu_offset(adev->gmc.pdb0_bo) :
(unsigned long long)amdgpu_bo_gpu_offset(adev->gart.bo));
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_1.c b/drivers/gpu/drm/amd/amdgpu/gmc_v12_1.c
index ef6e550ce7c3..7ea7b9c30bca 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_1.c
@@ -121,7 +121,7 @@ static int gmc_v12_1_process_interrupt(struct amdgpu_device *adev,
if (entry->src_id == UTCL2_1_0__SRCID__RETRY) {
retry_fault = true;
- write_fault = !!(entry->src_data[1] & 0x200000);
+ write_fault = !!(entry->src_data[1] & AMDGPU_GMC121_FAULT_SOURCE_DATA_WRITE);
}
if (entry->client_id == SOC_V1_0_IH_CLIENTID_VMC) {
@@ -345,9 +345,7 @@ static void gmc_v12_1_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
return;
}
- mutex_lock(&adev->mman.gtt_window_lock);
gmc_v12_1_flush_vm_hub(adev, vmid, vmhub, 0);
- mutex_unlock(&adev->mman.gtt_window_lock);
return;
}
@@ -526,20 +524,27 @@ static void gmc_v12_1_get_coherence_flags(struct amdgpu_device *adev,
bool ext_coherent = bo->flags & AMDGPU_GEM_CREATE_EXT_COHERENT;
uint32_t gc_ip_version = amdgpu_ip_version(adev, GC_HWIP, 0);
bool uncached = bo->flags & AMDGPU_GEM_CREATE_UNCACHED;
- unsigned int mtype, mtype_local;
+ unsigned int mtype, mtype_local, mtype_remote;
bool snoop = false;
bool is_local = false;
+ bool is_aid_a1;
switch (gc_ip_version) {
case IP_VERSION(12, 1, 0):
- mtype_local = MTYPE_RW;
- if (amdgpu_mtype_local == 1) {
+ is_aid_a1 = (adev->rev_id & 0x10);
+
+ mtype_local = is_aid_a1 ? MTYPE_RW : MTYPE_NC;
+ mtype_remote = is_aid_a1 ? MTYPE_NC : MTYPE_UC;
+ if (amdgpu_mtype_local == 0) {
+ DRM_INFO_ONCE("Using MTYPE_RW for local memory\n");
+ mtype_local = MTYPE_RW;
+ } else if (amdgpu_mtype_local == 1) {
DRM_INFO_ONCE("Using MTYPE_NC for local memory\n");
mtype_local = MTYPE_NC;
} else if (amdgpu_mtype_local == 2) {
- DRM_INFO_ONCE("MTYPE_CC not supported, using MTYPE_RW instead for local memory\n");
+ DRM_INFO_ONCE("MTYPE_CC not supported, using %s for local memory\n", is_aid_a1 ? "MTYPE_RW" : "MTYPE_NC");
} else {
- DRM_INFO_ONCE("Using MTYPE_RW for local memory\n");
+ DRM_INFO_ONCE("Using %s for local memory\n", is_aid_a1 ? "MTYPE_RW" : "MTYPE_NC");
}
is_local = (is_vram && adev == bo_adev);
@@ -549,10 +554,7 @@ static void gmc_v12_1_get_coherence_flags(struct amdgpu_device *adev,
} else if (ext_coherent) {
mtype = is_local ? mtype_local : MTYPE_UC;
} else {
- if (is_local)
- mtype = mtype_local;
- else
- mtype = MTYPE_NC;
+ mtype = is_local ? mtype_local : mtype_remote;
}
break;
default:
@@ -623,10 +625,17 @@ static const struct amdgpu_irq_src_funcs gmc_v12_1_irq_funcs = {
.process = gmc_v12_1_process_interrupt,
};
+static const struct amdgpu_irq_src_funcs gmc_v12_1_ecc_funcs = {
+ .process = amdgpu_umc_uniras_process_ecc_irq,
+};
+
void gmc_v12_1_set_irq_funcs(struct amdgpu_device *adev)
{
adev->gmc.vm_fault.num_types = 1;
adev->gmc.vm_fault.funcs = &gmc_v12_1_irq_funcs;
+
+ adev->gmc.ecc_irq.num_types = 1;
+ adev->gmc.ecc_irq.funcs = &gmc_v12_1_ecc_funcs;
}
void gmc_v12_1_init_vram_info(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 8eba99aa0f8f..d865059e884a 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -660,42 +660,7 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
gfxhub_client_ids[cid],
cid);
} else {
- switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) {
- case IP_VERSION(9, 0, 0):
- mmhub_cid = cid < ARRAY_SIZE(mmhub_client_ids_vega10) ?
- mmhub_client_ids_vega10[cid][rw] : NULL;
- break;
- case IP_VERSION(9, 3, 0):
- mmhub_cid = cid < ARRAY_SIZE(mmhub_client_ids_vega12) ?
- mmhub_client_ids_vega12[cid][rw] : NULL;
- break;
- case IP_VERSION(9, 4, 0):
- mmhub_cid = cid < ARRAY_SIZE(mmhub_client_ids_vega20) ?
- mmhub_client_ids_vega20[cid][rw] : NULL;
- break;
- case IP_VERSION(9, 4, 1):
- mmhub_cid = cid < ARRAY_SIZE(mmhub_client_ids_arcturus) ?
- mmhub_client_ids_arcturus[cid][rw] : NULL;
- break;
- case IP_VERSION(9, 1, 0):
- case IP_VERSION(9, 2, 0):
- mmhub_cid = cid < ARRAY_SIZE(mmhub_client_ids_raven) ?
- mmhub_client_ids_raven[cid][rw] : NULL;
- break;
- case IP_VERSION(1, 5, 0):
- case IP_VERSION(2, 4, 0):
- mmhub_cid = cid < ARRAY_SIZE(mmhub_client_ids_renoir) ?
- mmhub_client_ids_renoir[cid][rw] : NULL;
- break;
- case IP_VERSION(1, 8, 0):
- case IP_VERSION(9, 4, 2):
- mmhub_cid = cid < ARRAY_SIZE(mmhub_client_ids_aldebaran) ?
- mmhub_client_ids_aldebaran[cid][rw] : NULL;
- break;
- default:
- mmhub_cid = NULL;
- break;
- }
+ mmhub_cid = amdgpu_mmhub_client_name(&adev->mmhub, cid, rw);
dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n",
mmhub_cid ? mmhub_cid : "unknown", cid);
}
@@ -1435,6 +1400,52 @@ static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev)
}
}
+static void gmc_v9_0_init_mmhub_client_info(struct amdgpu_device *adev)
+{
+ switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) {
+ case IP_VERSION(9, 0, 0):
+ amdgpu_mmhub_init_client_info(&adev->mmhub,
+ mmhub_client_ids_vega10,
+ ARRAY_SIZE(mmhub_client_ids_vega10));
+ break;
+ case IP_VERSION(9, 3, 0):
+ amdgpu_mmhub_init_client_info(&adev->mmhub,
+ mmhub_client_ids_vega12,
+ ARRAY_SIZE(mmhub_client_ids_vega12));
+ break;
+ case IP_VERSION(9, 4, 0):
+ amdgpu_mmhub_init_client_info(&adev->mmhub,
+ mmhub_client_ids_vega20,
+ ARRAY_SIZE(mmhub_client_ids_vega20));
+ break;
+ case IP_VERSION(9, 4, 1):
+ amdgpu_mmhub_init_client_info(&adev->mmhub,
+ mmhub_client_ids_arcturus,
+ ARRAY_SIZE(mmhub_client_ids_arcturus));
+ break;
+ case IP_VERSION(9, 1, 0):
+ case IP_VERSION(9, 2, 0):
+ amdgpu_mmhub_init_client_info(&adev->mmhub,
+ mmhub_client_ids_raven,
+ ARRAY_SIZE(mmhub_client_ids_raven));
+ break;
+ case IP_VERSION(1, 5, 0):
+ case IP_VERSION(2, 4, 0):
+ amdgpu_mmhub_init_client_info(&adev->mmhub,
+ mmhub_client_ids_renoir,
+ ARRAY_SIZE(mmhub_client_ids_renoir));
+ break;
+ case IP_VERSION(1, 8, 0):
+ case IP_VERSION(9, 4, 2):
+ amdgpu_mmhub_init_client_info(&adev->mmhub,
+ mmhub_client_ids_aldebaran,
+ ARRAY_SIZE(mmhub_client_ids_aldebaran));
+ break;
+ default:
+ break;
+ }
+}
+
static void gmc_v9_0_set_mmhub_funcs(struct amdgpu_device *adev)
{
switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) {
@@ -1452,6 +1463,8 @@ static void gmc_v9_0_set_mmhub_funcs(struct amdgpu_device *adev)
adev->mmhub.funcs = &mmhub_v1_0_funcs;
break;
}
+
+ gmc_v9_0_init_mmhub_client_info(adev);
}
static void gmc_v9_0_set_mmhub_ras_funcs(struct amdgpu_device *adev)
@@ -1810,24 +1823,37 @@ static void gmc_v9_0_save_registers(struct amdgpu_device *adev)
adev->gmc.sdpif_register = RREG32_SOC15(DCE, 0, mmDCHUBBUB_SDPIF_MMIO_CNTRL_0);
}
-static void gmc_v9_4_3_init_vram_info(struct amdgpu_device *adev)
+static void gmc_v9_0_init_vram_info(struct amdgpu_device *adev)
{
static const u32 regBIF_BIOS_SCRATCH_4 = 0x50;
+ int dev_var = adev->pdev->device & 0xF;
u32 vram_info;
- adev->gmc.vram_type = AMDGPU_VRAM_TYPE_HBM;
- adev->gmc.vram_width = 128 * 64;
-
- if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0))
- adev->gmc.vram_type = AMDGPU_VRAM_TYPE_HBM3E;
-
- if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) &&
- adev->rev_id == 0x3)
- adev->gmc.vram_type = AMDGPU_VRAM_TYPE_HBM3E;
-
- if (!(adev->flags & AMD_IS_APU) && !amdgpu_sriov_vf(adev)) {
- vram_info = RREG32(regBIF_BIOS_SCRATCH_4);
- adev->gmc.vram_vendor = vram_info & 0xF;
+ if (adev->gmc.is_app_apu) {
+ adev->gmc.vram_type = AMDGPU_VRAM_TYPE_HBM;
+ adev->gmc.vram_width = 128 * 64;
+ } else if (adev->flags & AMD_IS_APU) {
+ adev->gmc.vram_type = AMDGPU_VRAM_TYPE_DDR4;
+ adev->gmc.vram_width = 64 * 64;
+ } else if (amdgpu_is_multi_aid(adev)) {
+ adev->gmc.vram_type = AMDGPU_VRAM_TYPE_HBM;
+ adev->gmc.vram_width = 128 * 64;
+
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0))
+ adev->gmc.vram_type = AMDGPU_VRAM_TYPE_HBM3E;
+
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) &&
+ adev->rev_id == 0x3)
+ adev->gmc.vram_type = AMDGPU_VRAM_TYPE_HBM3E;
+
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) &&
+ (dev_var == 0x5))
+ adev->gmc.vram_type = AMDGPU_VRAM_TYPE_HBM3E;
+
+ if (!(adev->flags & AMD_IS_APU) && !amdgpu_sriov_vf(adev)) {
+ vram_info = RREG32(regBIF_BIOS_SCRATCH_4);
+ adev->gmc.vram_vendor = vram_info & 0xF;
+ }
}
}
@@ -1843,19 +1869,11 @@ static int gmc_v9_0_sw_init(struct amdgpu_ip_block *ip_block)
spin_lock_init(&adev->gmc.invalidate_lock);
- if (amdgpu_is_multi_aid(adev)) {
- gmc_v9_4_3_init_vram_info(adev);
- } else if (!adev->bios) {
- if (adev->flags & AMD_IS_APU) {
- adev->gmc.vram_type = AMDGPU_VRAM_TYPE_DDR4;
- adev->gmc.vram_width = 64 * 64;
- } else {
- adev->gmc.vram_type = AMDGPU_VRAM_TYPE_HBM;
- adev->gmc.vram_width = 128 * 64;
- }
+ if (!adev->bios) {
+ gmc_v9_0_init_vram_info(adev);
} else {
- r = amdgpu_atomfirmware_get_vram_info(adev,
- &vram_width, &vram_type, &vram_vendor);
+ r = amdgpu_gmc_get_vram_info(adev,
+ &vram_width, &vram_type, &vram_vendor);
if (amdgpu_sriov_vf(adev))
/* For Vega10 SR-IOV, vram_width can't be read from ATOM as RAVEN,
* and DF related registers is not readable, seems hardcord is the
@@ -1883,6 +1901,7 @@ static int gmc_v9_0_sw_init(struct amdgpu_ip_block *ip_block)
adev->gmc.vram_type = vram_type;
adev->gmc.vram_vendor = vram_vendor;
}
+
switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(9, 1, 0):
case IP_VERSION(9, 2, 2):
diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v7_0.c b/drivers/gpu/drm/amd/amdgpu/ih_v7_0.c
index 451828bf583e..1fbe904f4223 100644
--- a/drivers/gpu/drm/amd/amdgpu/ih_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/ih_v7_0.c
@@ -289,6 +289,13 @@ static uint32_t ih_v7_0_setup_retry_doorbell(u32 doorbell_index)
return val;
}
+#define regIH_RING1_CLIENT_CFG_INDEX_V7_1 0x122
+#define regIH_RING1_CLIENT_CFG_INDEX_V7_1_BASE_IDX 0
+#define regIH_RING1_CLIENT_CFG_DATA_V7_1 0x123
+#define regIH_RING1_CLIENT_CFG_DATA_V7_1_BASE_IDX 0
+#define regIH_CHICKEN_V7_1 0x129
+#define regIH_CHICKEN_V7_1_BASE_IDX 0
+
/**
* ih_v7_0_irq_init - init and enable the interrupt ring
*
@@ -307,6 +314,7 @@ static int ih_v7_0_irq_init(struct amdgpu_device *adev)
u32 tmp;
int ret;
int i;
+ u32 reg_addr;
/* disable irqs */
ret = ih_v7_0_toggle_interrupts(adev, false);
@@ -318,10 +326,15 @@ static int ih_v7_0_irq_init(struct amdgpu_device *adev)
if (unlikely((adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) ||
(adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO))) {
if (ih[0]->use_bus_addr) {
- ih_chicken = RREG32_SOC15(OSSSYS, 0, regIH_CHICKEN);
+ if (amdgpu_ip_version(adev, OSSSYS_HWIP, 0) == IP_VERSION(7, 1, 0))
+ reg_addr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_CHICKEN_V7_1);
+ else
+ reg_addr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_CHICKEN);
+ ih_chicken = RREG32(reg_addr);
+ /* The reg fields definitions are identical in ih v7_0 and ih v7_1 */
ih_chicken = REG_SET_FIELD(ih_chicken,
IH_CHICKEN, MC_SPACE_GPA_ENABLE, 1);
- WREG32_SOC15(OSSSYS, 0, regIH_CHICKEN, ih_chicken);
+ WREG32(reg_addr, ih_chicken);
}
}
@@ -358,17 +371,26 @@ static int ih_v7_0_irq_init(struct amdgpu_device *adev)
/* Redirect the interrupts to IH RB1 for dGPU */
if (adev->irq.ih1.ring_size) {
- tmp = RREG32_SOC15(OSSSYS, 0, regIH_RING1_CLIENT_CFG_INDEX);
+ if (amdgpu_ip_version(adev, OSSSYS_HWIP, 0) == IP_VERSION(7, 1, 0))
+ reg_addr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RING1_CLIENT_CFG_INDEX_V7_1);
+ else
+ reg_addr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RING1_CLIENT_CFG_INDEX);
+ tmp = RREG32(reg_addr);
+ /* The reg fields definitions are identical in ih v7_0 and ih v7_1 */
tmp = REG_SET_FIELD(tmp, IH_RING1_CLIENT_CFG_INDEX, INDEX, 0);
- WREG32_SOC15(OSSSYS, 0, regIH_RING1_CLIENT_CFG_INDEX, tmp);
+ WREG32(reg_addr, tmp);
- tmp = RREG32_SOC15(OSSSYS, 0, regIH_RING1_CLIENT_CFG_DATA);
+ if (amdgpu_ip_version(adev, OSSSYS_HWIP, 0) == IP_VERSION(7, 1, 0))
+ reg_addr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RING1_CLIENT_CFG_DATA_V7_1);
+ else
+ reg_addr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RING1_CLIENT_CFG_DATA);
+ tmp = RREG32(reg_addr);
+ /* The reg fields definitions are identical in ih v7_0 and ih v7_1 */
tmp = REG_SET_FIELD(tmp, IH_RING1_CLIENT_CFG_DATA, CLIENT_ID, 0xa);
tmp = REG_SET_FIELD(tmp, IH_RING1_CLIENT_CFG_DATA, SOURCE_ID, 0x0);
tmp = REG_SET_FIELD(tmp, IH_RING1_CLIENT_CFG_DATA,
SOURCE_ID_MATCH_ENABLE, 0x1);
-
- WREG32_SOC15(OSSSYS, 0, regIH_RING1_CLIENT_CFG_DATA, tmp);
+ WREG32(reg_addr, tmp);
}
pci_set_master(adev->pdev);
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_2.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_2.c
new file mode 100644
index 000000000000..285c459379c4
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_2.c
@@ -0,0 +1,840 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright 2025-2026 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_jpeg.h"
+#include "amdgpu_pm.h"
+#include "soc15.h"
+#include "soc15d.h"
+#include "jpeg_v4_0_3.h"
+#include "jpeg_v5_0_2.h"
+#include "mmsch_v5_0.h"
+
+#include "vcn/vcn_5_0_0_offset.h"
+#include "vcn/vcn_5_0_0_sh_mask.h"
+#include "ivsrcid/vcn/irqsrcs_vcn_5_0.h"
+
+static void jpeg_v5_0_2_set_dec_ring_funcs(struct amdgpu_device *adev);
+static void jpeg_v5_0_2_set_irq_funcs(struct amdgpu_device *adev);
+static int jpeg_v5_0_2_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state);
+static void jpeg_v5_0_2_dec_ring_set_wptr(struct amdgpu_ring *ring);
+
+static int amdgpu_ih_srcid_jpeg[] = {
+ VCN_5_0__SRCID__JPEG_DECODE,
+ VCN_5_0__SRCID__JPEG1_DECODE,
+ VCN_5_0__SRCID__JPEG2_DECODE,
+ VCN_5_0__SRCID__JPEG3_DECODE,
+ VCN_5_0__SRCID__JPEG4_DECODE,
+ VCN_5_0__SRCID__JPEG5_DECODE,
+ VCN_5_0__SRCID__JPEG6_DECODE,
+ VCN_5_0__SRCID__JPEG7_DECODE,
+ VCN_5_0__SRCID__JPEG8_DECODE,
+ VCN_5_0__SRCID__JPEG9_DECODE,
+};
+
+static const struct amdgpu_hwip_reg_entry jpeg_reg_list_5_0_2[] = {
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_POWER_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_INT_STAT),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC0_UVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC0_UVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC0_UVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_ADDR_MODE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_GFX10_ADDR_CONFIG),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_Y_GFX10_TILING_SURFACE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_UV_GFX10_TILING_SURFACE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_PITCH),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_UV_PITCH),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC1_UVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC1_UVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC1_UVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC2_UVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC2_UVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC2_UVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC3_UVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC3_UVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC3_UVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC4_UVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC4_UVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC4_UVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC5_UVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC5_UVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC5_UVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC6_UVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC6_UVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC6_UVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC7_UVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC7_UVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC7_UVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC8_UVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC8_UVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC8_UVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC9_UVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC9_UVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC9_UVD_JRBC_STATUS),
+};
+
+static int jpeg_v5_0_2_core_reg_offset(u32 pipe)
+{
+ if (pipe <= AMDGPU_MAX_JPEG_RINGS_4_0_3)
+ return ((0x40 * pipe) - 0xc80);
+ else
+ return ((0x40 * pipe) - 0x440);
+}
+
+/**
+ * jpeg_v5_0_2_early_init - set function pointers
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Set ring and irq function pointers
+ */
+static int jpeg_v5_0_2_early_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ if (!adev->jpeg.num_jpeg_inst || adev->jpeg.num_jpeg_inst > AMDGPU_MAX_JPEG_INSTANCES)
+ return -ENOENT;
+
+ adev->jpeg.num_jpeg_rings = AMDGPU_MAX_JPEG_RINGS;
+ jpeg_v5_0_2_set_dec_ring_funcs(adev);
+ jpeg_v5_0_2_set_irq_funcs(adev);
+
+ return 0;
+}
+
+/**
+ * jpeg_v5_0_2_sw_init - sw init for JPEG block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Load firmware and sw initialization
+ */
+static int jpeg_v5_0_2_sw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_ring *ring;
+ int i, j, r, jpeg_inst;
+
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+ /* JPEG TRAP */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
+ amdgpu_ih_srcid_jpeg[j], &adev->jpeg.inst->irq);
+ if (r)
+ return r;
+ }
+
+ r = amdgpu_jpeg_sw_init(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_jpeg_resume(adev);
+ if (r)
+ return r;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ jpeg_inst = GET_INST(JPEG, i);
+
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+ ring = &adev->jpeg.inst[i].ring_dec[j];
+ ring->use_doorbell = false;
+ ring->vm_hub = AMDGPU_MMHUB0(adev->jpeg.inst[i].aid_id);
+ ring->doorbell_index =
+ (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+ 1 + j + 11 * jpeg_inst;
+ sprintf(ring->name, "jpeg_dec_%d.%d", adev->jpeg.inst[i].aid_id, j);
+ r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0,
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
+ if (r)
+ return r;
+
+ adev->jpeg.internal.jpeg_pitch[j] =
+ regUVD_JRBC0_UVD_JRBC_SCRATCH0_INTERNAL_OFFSET;
+ adev->jpeg.inst[i].external.jpeg_pitch[j] =
+ SOC15_REG_OFFSET1(JPEG, jpeg_inst, regUVD_JRBC_SCRATCH0,
+ (j ? jpeg_v5_0_2_core_reg_offset(j) : 0));
+ }
+ }
+
+ r = amdgpu_jpeg_reg_dump_init(adev, jpeg_reg_list_5_0_2, ARRAY_SIZE(jpeg_reg_list_5_0_2));
+ if (r)
+ return r;
+
+ adev->jpeg.supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->jpeg.inst[0].ring_dec[0]);
+ adev->jpeg.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ r = amdgpu_jpeg_sysfs_reset_mask_init(adev);
+
+ return r;
+}
+
+/**
+ * jpeg_v5_0_2_sw_fini - sw fini for JPEG block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * JPEG suspend and free up sw allocation
+ */
+static int jpeg_v5_0_2_sw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ r = amdgpu_jpeg_suspend(adev);
+ if (r)
+ return r;
+
+ amdgpu_jpeg_sysfs_reset_mask_fini(adev);
+
+ r = amdgpu_jpeg_sw_fini(adev);
+
+ return r;
+}
+
+/**
+ * jpeg_v5_0_2_hw_init - start and test JPEG block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ */
+static int jpeg_v5_0_2_hw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_ring *ring;
+ int i, j, r, jpeg_inst, tmp;
+
+ if (RREG32_SOC15(VCN, GET_INST(VCN, 0), regVCN_RRMT_CNTL) & 0x100)
+ adev->jpeg.caps |= AMDGPU_JPEG_CAPS(RRMT_ENABLED);
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ jpeg_inst = GET_INST(JPEG, i);
+ ring = adev->jpeg.inst[i].ring_dec;
+
+ /* Remove JPEG Tile antihang mechanism */
+ tmp = RREG32_SOC15(JPEG, jpeg_inst, regUVD_JPEG_POWER_STATUS);
+ tmp &= (~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
+ WREG32_SOC15(JPEG, jpeg_inst, regUVD_JPEG_POWER_STATUS, tmp);
+
+ if (ring->use_doorbell)
+ adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
+ (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 11 * jpeg_inst,
+ adev->jpeg.inst[i].aid_id);
+
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+ ring = &adev->jpeg.inst[i].ring_dec[j];
+ if (ring->use_doorbell)
+ WREG32_SOC15_OFFSET(VCN, GET_INST(VCN, i), regVCN_JPEG_DB_CTRL,
+ ring->pipe,
+ ring->doorbell_index <<
+ VCN_JPEG_DB_CTRL__OFFSET__SHIFT |
+ VCN_JPEG_DB_CTRL__EN_MASK);
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * jpeg_v5_0_2_hw_fini - stop the hardware block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Stop the JPEG block, mark ring as not ready any more
+ */
+static int jpeg_v5_0_2_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int ret = 0;
+
+ cancel_delayed_work_sync(&adev->jpeg.idle_work);
+
+ if (adev->jpeg.cur_state != AMD_PG_STATE_GATE)
+ ret = jpeg_v5_0_2_set_powergating_state(ip_block, AMD_PG_STATE_GATE);
+
+ return ret;
+}
+
+/**
+ * jpeg_v5_0_2_suspend - suspend JPEG block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * HW fini and suspend JPEG block
+ */
+static int jpeg_v5_0_2_suspend(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ r = jpeg_v5_0_2_hw_fini(ip_block);
+ if (r)
+ return r;
+
+ r = amdgpu_jpeg_suspend(adev);
+
+ return r;
+}
+
+/**
+ * jpeg_v5_0_2_resume - resume JPEG block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Resume firmware and hw init JPEG block
+ */
+static int jpeg_v5_0_2_resume(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ r = amdgpu_jpeg_resume(adev);
+ if (r)
+ return r;
+
+ r = jpeg_v5_0_2_hw_init(ip_block);
+
+ return r;
+}
+
+static void jpeg_v5_0_2_init_inst(struct amdgpu_device *adev, int i)
+{
+ int jpeg_inst = GET_INST(JPEG, i);
+
+ /* disable anti hang mechanism */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JPEG_POWER_STATUS), 0,
+ ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
+
+ /* keep the JPEG in static PG mode */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JPEG_POWER_STATUS), 0,
+ ~UVD_JPEG_POWER_STATUS__JPEG_PG_MODE_MASK);
+
+ /* MJPEG global tiling registers */
+ WREG32_SOC15(JPEG, 0, regJPEG_DEC_GFX10_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+
+ /* enable JMI channel */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JMI_CNTL), 0,
+ ~UVD_JMI_CNTL__SOFT_RESET_MASK);
+}
+
+static void jpeg_v5_0_2_deinit_inst(struct amdgpu_device *adev, int i)
+{
+ int jpeg_inst = GET_INST(JPEG, i);
+ /* reset JMI */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JMI_CNTL),
+ UVD_JMI_CNTL__SOFT_RESET_MASK,
+ ~UVD_JMI_CNTL__SOFT_RESET_MASK);
+
+ /* enable anti hang mechanism */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JPEG_POWER_STATUS),
+ UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK,
+ ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
+}
+
+static void jpeg_v5_0_2_init_jrbc(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ u32 reg, data, mask;
+ int jpeg_inst = GET_INST(JPEG, ring->me);
+ int reg_offset = ring->pipe ? jpeg_v5_0_2_core_reg_offset(ring->pipe) : 0;
+
+ /* enable System Interrupt for JRBC */
+ reg = SOC15_REG_OFFSET(JPEG, jpeg_inst, regJPEG_SYS_INT_EN);
+ if (ring->pipe < AMDGPU_MAX_JPEG_RINGS_4_0_3) {
+ data = JPEG_SYS_INT_EN__DJRBC0_MASK << ring->pipe;
+ mask = ~(JPEG_SYS_INT_EN__DJRBC0_MASK << ring->pipe);
+ WREG32_P(reg, data, mask);
+ } else {
+ data = JPEG_SYS_INT_EN__DJRBC0_MASK << (ring->pipe+12);
+ mask = ~(JPEG_SYS_INT_EN__DJRBC0_MASK << (ring->pipe+12));
+ WREG32_P(reg, data, mask);
+ }
+
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_LMI_JRBC_RB_VMID,
+ reg_offset, 0);
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JRBC_RB_CNTL,
+ reg_offset,
+ (0x00000001L | 0x00000002L));
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_LMI_JRBC_RB_64BIT_BAR_LOW,
+ reg_offset, lower_32_bits(ring->gpu_addr));
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_LMI_JRBC_RB_64BIT_BAR_HIGH,
+ reg_offset, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JRBC_RB_RPTR,
+ reg_offset, 0);
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JRBC_RB_WPTR,
+ reg_offset, 0);
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JRBC_RB_CNTL,
+ reg_offset, 0x00000002L);
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JRBC_RB_SIZE,
+ reg_offset, ring->ring_size / 4);
+ ring->wptr = RREG32_SOC15_OFFSET(JPEG, jpeg_inst, regUVD_JRBC_RB_WPTR,
+ reg_offset);
+}
+
+/**
+ * jpeg_v5_0_2_start - start JPEG block
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Setup and start the JPEG block
+ */
+static int jpeg_v5_0_2_start(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+ int i, j;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ jpeg_v5_0_2_init_inst(adev, i);
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+ ring = &adev->jpeg.inst[i].ring_dec[j];
+ jpeg_v5_0_2_init_jrbc(ring);
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * jpeg_v5_0_2_stop - stop JPEG block
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * stop the JPEG block
+ */
+static int jpeg_v5_0_2_stop(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i)
+ jpeg_v5_0_2_deinit_inst(adev, i);
+
+ return 0;
+}
+
+/**
+ * jpeg_v5_0_2_dec_ring_get_rptr - get read pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware read pointer
+ */
+static uint64_t jpeg_v5_0_2_dec_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ return RREG32_SOC15_OFFSET(JPEG, GET_INST(JPEG, ring->me), regUVD_JRBC_RB_RPTR,
+ ring->pipe ? jpeg_v5_0_2_core_reg_offset(ring->pipe) : 0);
+}
+
+/**
+ * jpeg_v5_0_2_dec_ring_get_wptr - get write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware write pointer
+ */
+static uint64_t jpeg_v5_0_2_dec_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell)
+ return adev->wb.wb[ring->wptr_offs];
+
+ return RREG32_SOC15_OFFSET(JPEG, GET_INST(JPEG, ring->me), regUVD_JRBC_RB_WPTR,
+ ring->pipe ? jpeg_v5_0_2_core_reg_offset(ring->pipe) : 0);
+}
+
+/**
+ * jpeg_v5_0_2_dec_ring_set_wptr - set write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Commits the write pointer to the hardware
+ */
+static void jpeg_v5_0_2_dec_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell) {
+ adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
+ WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
+ } else {
+ WREG32_SOC15_OFFSET(JPEG, GET_INST(JPEG, ring->me),
+ regUVD_JRBC_RB_WPTR,
+ (ring->pipe ? jpeg_v5_0_2_core_reg_offset(ring->pipe) : 0),
+ lower_32_bits(ring->wptr));
+ }
+}
+
+static bool jpeg_v5_0_2_is_idle(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ bool ret = false;
+ int i, j;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+ int reg_offset = (j ? jpeg_v5_0_2_core_reg_offset(j) : 0);
+
+ ret &= ((RREG32_SOC15_OFFSET(JPEG, GET_INST(JPEG, i),
+ regUVD_JRBC_STATUS, reg_offset) &
+ UVD_JRBC_STATUS__RB_JOB_DONE_MASK) ==
+ UVD_JRBC_STATUS__RB_JOB_DONE_MASK);
+ }
+ }
+
+ return ret;
+}
+
+static int jpeg_v5_0_2_wait_for_idle(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int ret = 0;
+ int i, j;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+ int reg_offset = (j ? jpeg_v5_0_2_core_reg_offset(j) : 0);
+
+ ret &= SOC15_WAIT_ON_RREG_OFFSET(JPEG, GET_INST(JPEG, i),
+ regUVD_JRBC_STATUS, reg_offset,
+ UVD_JRBC_STATUS__RB_JOB_DONE_MASK,
+ UVD_JRBC_STATUS__RB_JOB_DONE_MASK);
+ }
+ }
+ return ret;
+}
+
+static int jpeg_v5_0_2_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ bool enable = state == AMD_CG_STATE_GATE;
+
+ int i;
+
+ if (!enable)
+ return 0;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ if (!jpeg_v5_0_2_is_idle(ip_block))
+ return -EBUSY;
+ }
+
+ return 0;
+}
+
+static int jpeg_v5_0_2_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int ret;
+
+ if (state == adev->jpeg.cur_state)
+ return 0;
+
+ if (state == AMD_PG_STATE_GATE)
+ ret = jpeg_v5_0_2_stop(adev);
+ else
+ ret = jpeg_v5_0_2_start(adev);
+
+ if (!ret)
+ adev->jpeg.cur_state = state;
+
+ return ret;
+}
+
+static int jpeg_v5_0_2_set_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned int type,
+ enum amdgpu_interrupt_state state)
+{
+ return 0;
+}
+
+static int jpeg_v5_0_2_process_interrupt(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ u32 i, inst;
+
+ i = node_id_to_phys_map[entry->node_id];
+ DRM_DEV_DEBUG(adev->dev, "IH: JPEG TRAP\n");
+
+ for (inst = 0; inst < adev->jpeg.num_jpeg_inst; ++inst)
+ if (adev->jpeg.inst[inst].aid_id == i)
+ break;
+
+ if (inst >= adev->jpeg.num_jpeg_inst) {
+ dev_WARN_ONCE(adev->dev, 1,
+ "Interrupt received for unknown JPEG instance %d",
+ entry->node_id);
+ return 0;
+ }
+
+ switch (entry->src_id) {
+ case VCN_5_0__SRCID__JPEG_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[0]);
+ break;
+ case VCN_5_0__SRCID__JPEG1_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[1]);
+ break;
+ case VCN_5_0__SRCID__JPEG2_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[2]);
+ break;
+ case VCN_5_0__SRCID__JPEG3_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[3]);
+ break;
+ case VCN_5_0__SRCID__JPEG4_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[4]);
+ break;
+ case VCN_5_0__SRCID__JPEG5_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[5]);
+ break;
+ case VCN_5_0__SRCID__JPEG6_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[6]);
+ break;
+ case VCN_5_0__SRCID__JPEG7_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[7]);
+ break;
+ case VCN_5_0__SRCID__JPEG8_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[8]);
+ break;
+ case VCN_5_0__SRCID__JPEG9_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[9]);
+ break;
+ default:
+ DRM_DEV_ERROR(adev->dev, "Unhandled interrupt: %d %d\n",
+ entry->src_id, entry->src_data[0]);
+ break;
+ }
+
+ return 0;
+}
+
+static void jpeg_v5_0_2_core_stall_reset(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ int jpeg_inst = GET_INST(JPEG, ring->me);
+ int reg_offset = ring->pipe ? jpeg_v5_0_2_core_reg_offset(ring->pipe) : 0;
+
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JMI0_UVD_JMI_CLIENT_STALL,
+ reg_offset, 0x1F);
+ SOC15_WAIT_ON_RREG_OFFSET(JPEG, jpeg_inst,
+ regUVD_JMI0_UVD_JMI_CLIENT_CLEAN_STATUS,
+ reg_offset, 0x1F, 0x1F);
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JMI0_JPEG_LMI_DROP,
+ reg_offset, 0x1F);
+ WREG32_SOC15(JPEG, jpeg_inst, regJPEG_CORE_RST_CTRL, 1 << ring->pipe);
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JMI0_UVD_JMI_CLIENT_STALL,
+ reg_offset, 0x00);
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JMI0_JPEG_LMI_DROP,
+ reg_offset, 0x00);
+ WREG32_SOC15(JPEG, jpeg_inst, regJPEG_CORE_RST_CTRL, 0x00);
+}
+
+static int jpeg_v5_0_2_ring_reset(struct amdgpu_ring *ring,
+ unsigned int vmid,
+ struct amdgpu_fence *timedout_fence)
+{
+ amdgpu_ring_reset_helper_begin(ring, timedout_fence);
+ jpeg_v5_0_2_core_stall_reset(ring);
+ jpeg_v5_0_2_init_jrbc(ring);
+ return amdgpu_ring_reset_helper_end(ring, timedout_fence);
+}
+
+static const struct amd_ip_funcs jpeg_v5_0_2_ip_funcs = {
+ .name = "jpeg_v5_0_2",
+ .early_init = jpeg_v5_0_2_early_init,
+ .late_init = NULL,
+ .sw_init = jpeg_v5_0_2_sw_init,
+ .sw_fini = jpeg_v5_0_2_sw_fini,
+ .hw_init = jpeg_v5_0_2_hw_init,
+ .hw_fini = jpeg_v5_0_2_hw_fini,
+ .suspend = jpeg_v5_0_2_suspend,
+ .resume = jpeg_v5_0_2_resume,
+ .is_idle = jpeg_v5_0_2_is_idle,
+ .wait_for_idle = jpeg_v5_0_2_wait_for_idle,
+ .check_soft_reset = NULL,
+ .pre_soft_reset = NULL,
+ .soft_reset = NULL,
+ .post_soft_reset = NULL,
+ .set_clockgating_state = jpeg_v5_0_2_set_clockgating_state,
+ .set_powergating_state = jpeg_v5_0_2_set_powergating_state,
+ .dump_ip_state = amdgpu_jpeg_dump_ip_state,
+ .print_ip_state = amdgpu_jpeg_print_ip_state,
+};
+
+static const struct amdgpu_ring_funcs jpeg_v5_0_2_dec_ring_vm_funcs = {
+ .type = AMDGPU_RING_TYPE_VCN_JPEG,
+ .align_mask = 0xf,
+ .get_rptr = jpeg_v5_0_2_dec_ring_get_rptr,
+ .get_wptr = jpeg_v5_0_2_dec_ring_get_wptr,
+ .set_wptr = jpeg_v5_0_2_dec_ring_set_wptr,
+ .emit_frame_size =
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
+ 8 + /* jpeg_v5_0_2_dec_ring_emit_vm_flush */
+ 22 + 22 + /* jpeg_v5_0_2_dec_ring_emit_fence x2 vm fence */
+ 8 + 16,
+ .emit_ib_size = 22, /* jpeg_v5_0_2_dec_ring_emit_ib */
+ .emit_ib = jpeg_v4_0_3_dec_ring_emit_ib,
+ .emit_fence = jpeg_v4_0_3_dec_ring_emit_fence,
+ .emit_vm_flush = jpeg_v4_0_3_dec_ring_emit_vm_flush,
+ .emit_hdp_flush = jpeg_v4_0_3_ring_emit_hdp_flush,
+ .test_ring = amdgpu_jpeg_dec_ring_test_ring,
+ .test_ib = amdgpu_jpeg_dec_ring_test_ib,
+ .insert_nop = jpeg_v4_0_3_dec_ring_nop,
+ .insert_start = jpeg_v4_0_3_dec_ring_insert_start,
+ .insert_end = jpeg_v4_0_3_dec_ring_insert_end,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .begin_use = amdgpu_jpeg_ring_begin_use,
+ .end_use = amdgpu_jpeg_ring_end_use,
+ .emit_wreg = jpeg_v4_0_3_dec_ring_emit_wreg,
+ .emit_reg_wait = jpeg_v4_0_3_dec_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+ .reset = jpeg_v5_0_2_ring_reset,
+};
+
+static void jpeg_v5_0_2_set_dec_ring_funcs(struct amdgpu_device *adev)
+{
+ int i, j, jpeg_inst;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+ adev->jpeg.inst[i].ring_dec[j].funcs = &jpeg_v5_0_2_dec_ring_vm_funcs;
+ adev->jpeg.inst[i].ring_dec[j].me = i;
+ adev->jpeg.inst[i].ring_dec[j].pipe = j;
+ }
+ jpeg_inst = GET_INST(JPEG, i);
+ adev->jpeg.inst[i].aid_id =
+ jpeg_inst / adev->jpeg.num_inst_per_aid;
+ }
+}
+
+static const struct amdgpu_irq_src_funcs jpeg_v5_0_2_irq_funcs = {
+ .set = jpeg_v5_0_2_set_interrupt_state,
+ .process = jpeg_v5_0_2_process_interrupt,
+};
+
+static void jpeg_v5_0_2_set_irq_funcs(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i)
+ adev->jpeg.inst->irq.num_types += adev->jpeg.num_jpeg_rings;
+
+ adev->jpeg.inst->irq.funcs = &jpeg_v5_0_2_irq_funcs;
+}
+
+const struct amdgpu_ip_block_version jpeg_v5_0_2_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_JPEG,
+ .major = 5,
+ .minor = 0,
+ .rev = 2,
+ .funcs = &jpeg_v5_0_2_ip_funcs,
+};
+
+#if 0
+static int jpeg_v5_0_2_aca_bank_parser(struct aca_handle *handle, struct aca_bank *bank,
+ enum aca_smu_type type, void *data)
+{
+ struct aca_bank_info info;
+ u64 misc0;
+ int ret;
+
+ ret = aca_bank_info_decode(bank, &info);
+ if (ret)
+ return ret;
+
+ misc0 = bank->regs[ACA_REG_IDX_MISC0];
+ switch (type) {
+ case ACA_SMU_TYPE_UE:
+ bank->aca_err_type = ACA_ERROR_TYPE_UE;
+ ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_UE,
+ 1ULL);
+ break;
+ case ACA_SMU_TYPE_CE:
+ bank->aca_err_type = ACA_ERROR_TYPE_CE;
+ ret = aca_error_cache_log_bank_error(handle, &info, bank->aca_err_type,
+ ACA_REG__MISC0__ERRCNT(misc0));
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return ret;
+}
+
+/* reference to smu driver if header file */
+static int jpeg_v5_0_2_err_codes[] = {
+ 16, 17, 18, 19, 20, 21, 22, 23, /* JPEG[0-9][S|D] */
+ 24, 25, 26, 27, 28, 29, 30, 31,
+ 48, 49, 50, 51,
+};
+
+static bool jpeg_v5_0_2_aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank,
+ enum aca_smu_type type, void *data)
+{
+ u32 instlo;
+
+ instlo = ACA_REG__IPID__INSTANCEIDLO(bank->regs[ACA_REG_IDX_IPID]);
+ instlo &= GENMASK(31, 1);
+
+ if (instlo != mmSMNAID_AID0_MCA_SMU)
+ return false;
+
+ if (aca_bank_check_error_codes(handle->adev, bank,
+ jpeg_v5_0_2_err_codes,
+ ARRAY_SIZE(jpeg_v5_0_2_err_codes)))
+ return false;
+
+ return true;
+}
+
+static const struct aca_bank_ops jpeg_v5_0_2_aca_bank_ops = {
+ .aca_bank_parser = jpeg_v5_0_2_aca_bank_parser,
+ .aca_bank_is_valid = jpeg_v5_0_2_aca_bank_is_valid,
+};
+
+static const struct aca_info jpeg_v5_0_2_aca_info = {
+ .hwip = ACA_HWIP_TYPE_SMU,
+ .mask = ACA_ERROR_UE_MASK,
+ .bank_ops = &jpeg_v5_0_2_aca_bank_ops,
+};
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_2.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_2.h
new file mode 100644
index 000000000000..c201a2c42a0d
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_2.h
@@ -0,0 +1,111 @@
+/*
+ * Copyright 2025-2026 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __JPEG_V5_0_2_H__
+#define __JPEG_V5_0_2_H__
+
+extern const struct amdgpu_ip_block_version jpeg_v5_0_2_ip_block;
+
+#define regUVD_JRBC0_UVD_JRBC_SCRATCH0_INTERNAL_OFFSET 0x4094
+#define regUVD_JRBC_EXTERNAL_MCM_ADDR_INTERNAL_OFFSET 0x1bffe
+
+#define regUVD_JRBC0_UVD_JRBC_RB_WPTR 0x0640
+#define regUVD_JRBC0_UVD_JRBC_RB_WPTR_BASE_IDX 1
+#define regUVD_JRBC0_UVD_JRBC_STATUS 0x0649
+#define regUVD_JRBC0_UVD_JRBC_STATUS_BASE_IDX 1
+#define regUVD_JRBC0_UVD_JRBC_RB_RPTR 0x064a
+#define regUVD_JRBC0_UVD_JRBC_RB_RPTR_BASE_IDX 1
+#define regUVD_JRBC1_UVD_JRBC_RB_WPTR 0x0000
+#define regUVD_JRBC1_UVD_JRBC_RB_WPTR_BASE_IDX 0
+#define regUVD_JRBC1_UVD_JRBC_STATUS 0x0009
+#define regUVD_JRBC1_UVD_JRBC_STATUS_BASE_IDX 0
+#define regUVD_JRBC1_UVD_JRBC_RB_RPTR 0x000a
+#define regUVD_JRBC1_UVD_JRBC_RB_RPTR_BASE_IDX 0
+#define regUVD_JRBC2_UVD_JRBC_RB_WPTR 0x0040
+#define regUVD_JRBC2_UVD_JRBC_RB_WPTR_BASE_IDX 0
+#define regUVD_JRBC2_UVD_JRBC_STATUS 0x0049
+#define regUVD_JRBC2_UVD_JRBC_STATUS_BASE_IDX 0
+#define regUVD_JRBC2_UVD_JRBC_RB_RPTR 0x004a
+#define regUVD_JRBC2_UVD_JRBC_RB_RPTR_BASE_IDX 0
+#define regUVD_JRBC3_UVD_JRBC_RB_WPTR 0x0080
+#define regUVD_JRBC3_UVD_JRBC_RB_WPTR_BASE_IDX 0
+#define regUVD_JRBC3_UVD_JRBC_STATUS 0x0089
+#define regUVD_JRBC3_UVD_JRBC_STATUS_BASE_IDX 0
+#define regUVD_JRBC3_UVD_JRBC_RB_RPTR 0x008a
+#define regUVD_JRBC3_UVD_JRBC_RB_RPTR_BASE_IDX 0
+#define regUVD_JRBC4_UVD_JRBC_RB_WPTR 0x00c0
+#define regUVD_JRBC4_UVD_JRBC_RB_WPTR_BASE_IDX 0
+#define regUVD_JRBC4_UVD_JRBC_STATUS 0x00c9
+#define regUVD_JRBC4_UVD_JRBC_STATUS_BASE_IDX 0
+#define regUVD_JRBC4_UVD_JRBC_RB_RPTR 0x00ca
+#define regUVD_JRBC4_UVD_JRBC_RB_RPTR_BASE_IDX 0
+#define regUVD_JRBC5_UVD_JRBC_RB_WPTR 0x0100
+#define regUVD_JRBC5_UVD_JRBC_RB_WPTR_BASE_IDX 0
+#define regUVD_JRBC5_UVD_JRBC_STATUS 0x0109
+#define regUVD_JRBC5_UVD_JRBC_STATUS_BASE_IDX 0
+#define regUVD_JRBC5_UVD_JRBC_RB_RPTR 0x010a
+#define regUVD_JRBC5_UVD_JRBC_RB_RPTR_BASE_IDX 0
+#define regUVD_JRBC6_UVD_JRBC_RB_WPTR 0x0140
+#define regUVD_JRBC6_UVD_JRBC_RB_WPTR_BASE_IDX 0
+#define regUVD_JRBC6_UVD_JRBC_STATUS 0x0149
+#define regUVD_JRBC6_UVD_JRBC_STATUS_BASE_IDX 0
+#define regUVD_JRBC6_UVD_JRBC_RB_RPTR 0x014a
+#define regUVD_JRBC6_UVD_JRBC_RB_RPTR_BASE_IDX 0
+#define regUVD_JRBC7_UVD_JRBC_RB_WPTR 0x0180
+#define regUVD_JRBC7_UVD_JRBC_RB_WPTR_BASE_IDX 0
+#define regUVD_JRBC7_UVD_JRBC_STATUS 0x0189
+#define regUVD_JRBC7_UVD_JRBC_STATUS_BASE_IDX 0
+#define regUVD_JRBC7_UVD_JRBC_RB_RPTR 0x018a
+#define regUVD_JRBC7_UVD_JRBC_RB_RPTR_BASE_IDX 0
+#define regUVD_JRBC8_UVD_JRBC_RB_WPTR 0x01c0
+#define regUVD_JRBC8_UVD_JRBC_RB_WPTR_BASE_IDX 0
+#define regUVD_JRBC8_UVD_JRBC_STATUS 0x01c9
+#define regUVD_JRBC8_UVD_JRBC_STATUS_BASE_IDX 0
+#define regUVD_JRBC8_UVD_JRBC_RB_RPTR 0x01ca
+#define regUVD_JRBC8_UVD_JRBC_RB_RPTR_BASE_IDX 0
+#define regUVD_JRBC9_UVD_JRBC_RB_WPTR 0x0440
+#define regUVD_JRBC9_UVD_JRBC_RB_WPTR_BASE_IDX 1
+#define regUVD_JRBC9_UVD_JRBC_STATUS 0x0449
+#define regUVD_JRBC9_UVD_JRBC_STATUS_BASE_IDX 1
+#define regUVD_JRBC9_UVD_JRBC_RB_RPTR 0x044a
+#define regUVD_JRBC9_UVD_JRBC_RB_RPTR_BASE_IDX 1
+#define regUVD_JMI0_JPEG_LMI_DROP 0x0663
+#define regUVD_JMI0_JPEG_LMI_DROP_BASE_IDX 1
+#define regUVD_JMI0_UVD_JMI_CLIENT_STALL 0x067a
+#define regUVD_JMI0_UVD_JMI_CLIENT_STALL_BASE_IDX 1
+#define regUVD_JMI0_UVD_JMI_CLIENT_CLEAN_STATUS 0x067b
+#define regUVD_JMI0_UVD_JMI_CLIENT_CLEAN_STATUS_BASE_IDX 1
+#define regJPEG_CORE_RST_CTRL 0x072e
+#define regJPEG_CORE_RST_CTRL_BASE_IDX 1
+
+#define regVCN_RRMT_CNTL 0x0940
+#define regVCN_RRMT_CNTL_BASE_IDX 1
+
+enum amdgpu_jpeg_v5_0_2_sub_block {
+ AMDGPU_JPEG_V5_0_2_JPEG0 = 0,
+ AMDGPU_JPEG_V5_0_2_JPEG1,
+
+ AMDGPU_JPEG_V5_0_2_MAX_SUB_BLOCK,
+};
+
+#endif /* __JPEG_V5_0_2_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/lsdma_v7_1.c b/drivers/gpu/drm/amd/amdgpu/lsdma_v7_1.c
new file mode 100644
index 000000000000..d93a0e65ce7d
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/lsdma_v7_1.c
@@ -0,0 +1,99 @@
+/*
+ * Copyright 2026 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/delay.h>
+#include "amdgpu.h"
+#include "lsdma_v7_1.h"
+#include "amdgpu_lsdma.h"
+
+#include "lsdma/lsdma_7_1_0_offset.h"
+#include "lsdma/lsdma_7_1_0_sh_mask.h"
+
+static int lsdma_v7_1_wait_pio_status(struct amdgpu_device *adev)
+{
+ return amdgpu_lsdma_wait_for(adev, SOC15_REG_OFFSET(LSDMA, 0, regLSDMA_PIO_STATUS),
+ LSDMA_PIO_STATUS__PIO_IDLE_MASK | LSDMA_PIO_STATUS__PIO_FIFO_EMPTY_MASK,
+ LSDMA_PIO_STATUS__PIO_IDLE_MASK | LSDMA_PIO_STATUS__PIO_FIFO_EMPTY_MASK);
+}
+
+static int lsdma_v7_1_copy_mem(struct amdgpu_device *adev,
+ uint64_t src_addr,
+ uint64_t dst_addr,
+ uint64_t size)
+{
+ int ret;
+ uint32_t tmp;
+
+ WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_SRC_ADDR_LO, lower_32_bits(src_addr));
+ WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_SRC_ADDR_HI, upper_32_bits(src_addr));
+
+ WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_DST_ADDR_LO, lower_32_bits(dst_addr));
+ WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_DST_ADDR_HI, upper_32_bits(dst_addr));
+
+ WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_CONTROL, 0x0);
+
+ tmp = RREG32_SOC15(LSDMA, 0, regLSDMA_PIO_COMMAND);
+ tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, COUNT, size);
+ tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, RAW_WAIT, 0);
+ tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, CONSTANT_FILL, 0);
+ WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_COMMAND, tmp);
+
+ ret = lsdma_v7_1_wait_pio_status(adev);
+ if (ret)
+ dev_err(adev->dev, "LSDMA PIO failed to copy memory!\n");
+
+ return ret;
+}
+
+static int lsdma_v7_1_fill_mem(struct amdgpu_device *adev,
+ uint64_t dst_addr,
+ uint32_t data,
+ uint64_t size)
+{
+ int ret;
+ uint32_t tmp;
+
+ WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_CONSTFILL_DATA, data);
+
+ WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_DST_ADDR_LO, lower_32_bits(dst_addr));
+ WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_DST_ADDR_HI, upper_32_bits(dst_addr));
+
+ WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_CONTROL, 0x0);
+
+ tmp = RREG32_SOC15(LSDMA, 0, regLSDMA_PIO_COMMAND);
+ tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, COUNT, size);
+ tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, RAW_WAIT, 0);
+ tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, CONSTANT_FILL, 1);
+ WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_COMMAND, tmp);
+
+ ret = lsdma_v7_1_wait_pio_status(adev);
+ if (ret)
+ dev_err(adev->dev, "LSDMA PIO failed to fill memory!\n");
+
+ return ret;
+}
+
+const struct amdgpu_lsdma_funcs lsdma_v7_1_funcs = {
+ .copy_mem = lsdma_v7_1_copy_mem,
+ .fill_mem = lsdma_v7_1_fill_mem,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/lsdma_v7_1.h b/drivers/gpu/drm/amd/amdgpu/lsdma_v7_1.h
new file mode 100644
index 000000000000..3d1ab605849a
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/lsdma_v7_1.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2026 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __LSDMA_V7_1_H__
+#define __LSDMA_V7_1_H__
+
+#include "soc15_common.h"
+
+extern const struct amdgpu_lsdma_funcs lsdma_v7_1_funcs;
+
+#endif /* __LSDMA_V7_1_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_1.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_1.c
index 7b8c670d0a9e..0e9089544769 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v12_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_1.c
@@ -31,6 +31,8 @@
#include "gc/gc_11_0_0_default.h"
#include "v12_structs.h"
#include "mes_v12_api_def.h"
+#include "gfx_v12_1_pkt.h"
+#include "sdma_v7_1_0_pkt_open.h"
MODULE_FIRMWARE("amdgpu/gc_12_1_0_mes.bin");
MODULE_FIRMWARE("amdgpu/gc_12_1_0_mes1.bin");
@@ -41,6 +43,7 @@ static int mes_v12_1_xcc_hw_init(struct amdgpu_ip_block *ip_block, int xcc_id);
static int mes_v12_1_hw_fini(struct amdgpu_ip_block *ip_block);
static int mes_v12_1_kiq_hw_init(struct amdgpu_device *adev, uint32_t xcc_id);
static int mes_v12_1_kiq_hw_fini(struct amdgpu_device *adev, uint32_t xcc_id);
+static int mes_v12_1_self_test(struct amdgpu_device *adev, int xcc_id);
#define MES_EOP_SIZE 2048
@@ -491,16 +494,12 @@ static int mes_v12_1_query_sched_status(struct amdgpu_mes *mes,
}
static uint32_t mes_v12_1_get_xcc_from_reg(uint32_t reg_offset)
{
- /* Check xcc reg offset range */
- uint32_t xcc = (reg_offset & XCC_MID_MASK) ? 4 : 0;
- /* Each XCC has two register ranges.
- * These are represented in reg_offset[17:16]
- */
- return ((reg_offset >> 16) & 0x3) + xcc;
+ return ((reg_offset >> 16) & 0x7);
}
static void mes_v12_1_get_rrmt(uint32_t reg, uint32_t xcc_id,
- struct RRMT_OPTION *rrmt_opt)
+ struct RRMT_OPTION *rrmt_opt,
+ uint32_t *out_reg)
{
uint32_t normalized_reg = soc_v1_0_normalize_xcc_reg_offset(reg);
@@ -509,8 +508,12 @@ static void mes_v12_1_get_rrmt(uint32_t reg, uint32_t xcc_id,
rrmt_opt->mode = (xcc_id == rrmt_opt->xcd_die_id) ?
MES_RRMT_MODE_LOCAL_XCD : MES_RRMT_MODE_REMOTE_XCD;
} else {
- rrmt_opt->mode = MES_RRMT_MODE_LOCAL_REMOTE_AID;
+ rrmt_opt->mode = MES_RRMT_MODE_REMOTE_MID;
+ if (soc_v1_0_mid1_reg_range(reg))
+ rrmt_opt->mid_die_id = 1;
}
+
+ *out_reg = soc_v1_0_normalize_reg_offset(reg);
}
static int mes_v12_1_misc_op(struct amdgpu_mes *mes,
@@ -534,65 +537,44 @@ static int mes_v12_1_misc_op(struct amdgpu_mes *mes,
switch (input->op) {
case MES_MISC_OP_READ_REG:
misc_pkt.opcode = MESAPI_MISC__READ_REG;
- misc_pkt.read_reg.reg_offset = input->read_reg.reg_offset;
misc_pkt.read_reg.buffer_addr = input->read_reg.buffer_addr;
mes_v12_1_get_rrmt(input->read_reg.reg_offset,
GET_INST(GC, input->xcc_id),
- &misc_pkt.read_reg.rrmt_opt);
- if (misc_pkt.read_reg.rrmt_opt.mode != MES_RRMT_MODE_REMOTE_MID) {
- misc_pkt.read_reg.reg_offset =
- soc_v1_0_normalize_xcc_reg_offset(misc_pkt.read_reg.reg_offset);
- }
+ &misc_pkt.read_reg.rrmt_opt,
+ &misc_pkt.read_reg.reg_offset);
break;
case MES_MISC_OP_WRITE_REG:
misc_pkt.opcode = MESAPI_MISC__WRITE_REG;
- misc_pkt.write_reg.reg_offset = input->write_reg.reg_offset;
misc_pkt.write_reg.reg_value = input->write_reg.reg_value;
mes_v12_1_get_rrmt(input->write_reg.reg_offset,
GET_INST(GC, input->xcc_id),
- &misc_pkt.write_reg.rrmt_opt);
- if (misc_pkt.write_reg.rrmt_opt.mode != MES_RRMT_MODE_REMOTE_MID) {
- misc_pkt.write_reg.reg_offset =
- soc_v1_0_normalize_xcc_reg_offset(misc_pkt.write_reg.reg_offset);
- }
+ &misc_pkt.write_reg.rrmt_opt,
+ &misc_pkt.write_reg.reg_offset);
break;
case MES_MISC_OP_WRM_REG_WAIT:
misc_pkt.opcode = MESAPI_MISC__WAIT_REG_MEM;
misc_pkt.wait_reg_mem.op = WRM_OPERATION__WAIT_REG_MEM;
misc_pkt.wait_reg_mem.reference = input->wrm_reg.ref;
misc_pkt.wait_reg_mem.mask = input->wrm_reg.mask;
- misc_pkt.wait_reg_mem.reg_offset1 = input->wrm_reg.reg0;
misc_pkt.wait_reg_mem.reg_offset2 = 0;
mes_v12_1_get_rrmt(input->wrm_reg.reg0,
GET_INST(GC, input->xcc_id),
- &misc_pkt.wait_reg_mem.rrmt_opt1);
- if (misc_pkt.wait_reg_mem.rrmt_opt1.mode != MES_RRMT_MODE_REMOTE_MID) {
- misc_pkt.wait_reg_mem.reg_offset1 =
- soc_v1_0_normalize_xcc_reg_offset(misc_pkt.wait_reg_mem.reg_offset1);
- }
+ &misc_pkt.wait_reg_mem.rrmt_opt1,
+ &misc_pkt.wait_reg_mem.reg_offset1);
break;
case MES_MISC_OP_WRM_REG_WR_WAIT:
misc_pkt.opcode = MESAPI_MISC__WAIT_REG_MEM;
misc_pkt.wait_reg_mem.op = WRM_OPERATION__WR_WAIT_WR_REG;
misc_pkt.wait_reg_mem.reference = input->wrm_reg.ref;
misc_pkt.wait_reg_mem.mask = input->wrm_reg.mask;
- misc_pkt.wait_reg_mem.reg_offset1 = input->wrm_reg.reg0;
- misc_pkt.wait_reg_mem.reg_offset2 = input->wrm_reg.reg1;
mes_v12_1_get_rrmt(input->wrm_reg.reg0,
GET_INST(GC, input->xcc_id),
- &misc_pkt.wait_reg_mem.rrmt_opt1);
+ &misc_pkt.wait_reg_mem.rrmt_opt1,
+ &misc_pkt.wait_reg_mem.reg_offset1);
mes_v12_1_get_rrmt(input->wrm_reg.reg1,
GET_INST(GC, input->xcc_id),
- &misc_pkt.wait_reg_mem.rrmt_opt2);
-
- if (misc_pkt.wait_reg_mem.rrmt_opt1.mode != MES_RRMT_MODE_REMOTE_MID) {
- misc_pkt.wait_reg_mem.reg_offset1 =
- soc_v1_0_normalize_xcc_reg_offset(misc_pkt.wait_reg_mem.reg_offset1);
- }
- if (misc_pkt.wait_reg_mem.rrmt_opt2.mode != MES_RRMT_MODE_REMOTE_MID) {
- misc_pkt.wait_reg_mem.reg_offset2 =
- soc_v1_0_normalize_xcc_reg_offset(misc_pkt.wait_reg_mem.reg_offset2);
- }
+ &misc_pkt.wait_reg_mem.rrmt_opt2,
+ &misc_pkt.wait_reg_mem.reg_offset2);
break;
case MES_MISC_OP_SET_SHADER_DEBUGGER:
pipe = AMDGPU_MES_SCHED_PIPE;
@@ -1611,7 +1593,6 @@ static int mes_v12_1_sw_fini(struct amdgpu_ip_block *ip_block)
amdgpu_bo_free_kernel(&adev->mes.eop_gpu_obj[inst],
&adev->mes.eop_gpu_addr[inst],
NULL);
- amdgpu_ucode_release(&adev->mes.fw[inst]);
if (adev->enable_uni_mes || pipe == AMDGPU_MES_SCHED_PIPE) {
amdgpu_bo_free_kernel(&adev->mes.ring[inst].mqd_obj,
@@ -1622,6 +1603,9 @@ static int mes_v12_1_sw_fini(struct amdgpu_ip_block *ip_block)
}
}
+ for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++)
+ amdgpu_ucode_release(&adev->mes.fw[pipe]);
+
for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) {
if (!adev->enable_uni_mes) {
amdgpu_bo_free_kernel(&adev->gfx.kiq[xcc_id].ring.mqd_obj,
@@ -1947,10 +1931,31 @@ static int mes_v12_1_early_init(struct amdgpu_ip_block *ip_block)
return 0;
}
+static int mes_v12_1_late_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int xcc_id, num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+
+ /* TODO: remove it if issue fixed. */
+ if (adev->mes.enable_coop_mode)
+ return 0;
+
+ for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) {
+ /* for COOP mode, only test master xcc. */
+ if (adev->mes.enable_coop_mode &&
+ adev->mes.master_xcc_ids[xcc_id] != xcc_id)
+ continue;
+
+ mes_v12_1_self_test(adev, xcc_id);
+ }
+
+ return 0;
+}
+
static const struct amd_ip_funcs mes_v12_1_ip_funcs = {
.name = "mes_v12_1",
.early_init = mes_v12_1_early_init,
- .late_init = NULL,
+ .late_init = mes_v12_1_late_init,
.sw_init = mes_v12_1_sw_init,
.sw_fini = mes_v12_1_sw_fini,
.hw_init = mes_v12_1_hw_init,
@@ -1966,3 +1971,312 @@ const struct amdgpu_ip_block_version mes_v12_1_ip_block = {
.rev = 0,
.funcs = &mes_v12_1_ip_funcs,
};
+
+static int mes_v12_1_alloc_test_buf(struct amdgpu_device *adev,
+ struct amdgpu_bo **bo, uint64_t *addr,
+ void **ptr, int size)
+{
+ amdgpu_bo_create_kernel(adev, size, PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
+ bo, addr, ptr);
+ if (!*bo) {
+ dev_err(adev->dev, "failed to allocate test buffer bo\n");
+ return -ENOMEM;
+ }
+ memset(*ptr, 0, size);
+ return 0;
+}
+
+static int mes_v12_1_map_test_bo(struct amdgpu_device *adev,
+ struct amdgpu_bo *bo, struct amdgpu_vm *vm,
+ struct amdgpu_bo_va **bo_va, u64 va, int size)
+{
+ struct amdgpu_sync sync;
+ int r;
+
+ r = amdgpu_map_static_csa(adev, vm, bo, bo_va, va, size);
+ if (r)
+ return r;
+
+ amdgpu_sync_create(&sync);
+
+ r = amdgpu_vm_bo_update(adev, *bo_va, false);
+ if (r) {
+ dev_err(adev->dev, "failed to do vm_bo_update on meta data\n");
+ goto error;
+ }
+ amdgpu_sync_fence(&sync, (*bo_va)->last_pt_update, GFP_KERNEL);
+
+ r = amdgpu_vm_update_pdes(adev, vm, false);
+ if (r) {
+ dev_err(adev->dev, "failed to update pdes on meta data\n");
+ goto error;
+ }
+ amdgpu_sync_fence(&sync, vm->last_update, GFP_KERNEL);
+ amdgpu_sync_wait(&sync, false);
+
+error:
+ amdgpu_sync_free(&sync);
+ return 0;
+}
+
+static int mes_v12_1_test_ring(struct amdgpu_device *adev, int xcc_id,
+ u32 *queue_ptr, u64 fence_gpu_addr,
+ void *fence_cpu_ptr, void *wptr_cpu_addr,
+ u64 doorbell_idx, int queue_type)
+{
+ volatile uint32_t *cpu_ptr = fence_cpu_ptr;
+ int num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ int sdma_ring_align = 0x10, compute_ring_align = 0x100;
+ uint32_t tmp, xcc_offset;
+ int r = 0, i, wptr = 0;
+
+ if (queue_type == AMDGPU_RING_TYPE_COMPUTE) {
+ if (!adev->mes.enable_coop_mode) {
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id),
+ regSCRATCH_REG0, 0xCAFEDEAD);
+ } else {
+ for (i = 0; i < num_xcc; i++) {
+ if (adev->mes.master_xcc_ids[i] == xcc_id)
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regSCRATCH_REG0, 0xCAFEDEAD);
+ }
+ }
+
+ xcc_offset = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0);
+ queue_ptr[wptr++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
+ queue_ptr[wptr++] = xcc_offset - PACKET3_SET_UCONFIG_REG_START;
+ queue_ptr[wptr++] = 0xDEADBEEF;
+
+ for (i = wptr; i < compute_ring_align; i++)
+ queue_ptr[wptr++] = PACKET3(PACKET3_NOP, 0x3FFF);
+
+ } else if (queue_type == AMDGPU_RING_TYPE_SDMA) {
+ *cpu_ptr = 0xCAFEDEAD;
+
+ queue_ptr[wptr++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_WRITE) |
+ SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
+ queue_ptr[wptr++] = lower_32_bits(fence_gpu_addr);
+ queue_ptr[wptr++] = upper_32_bits(fence_gpu_addr);
+ queue_ptr[wptr++] = SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0);
+ queue_ptr[wptr++] = 0xDEADBEEF;
+
+ for (i = wptr; i < sdma_ring_align; i++)
+ queue_ptr[wptr++] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
+
+ wptr <<= 2;
+ }
+
+ atomic64_set((atomic64_t *)wptr_cpu_addr, wptr);
+ WDOORBELL64(doorbell_idx, wptr);
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (queue_type == AMDGPU_RING_TYPE_SDMA) {
+ tmp = le32_to_cpu(*cpu_ptr);
+ } else {
+ if (!adev->mes.enable_coop_mode) {
+ tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id),
+ regSCRATCH_REG0);
+ } else {
+ for (i = 0; i < num_xcc; i++) {
+ if (xcc_id != adev->mes.master_xcc_ids[i])
+ continue;
+
+ tmp = RREG32_SOC15(GC, GET_INST(GC, i),
+ regSCRATCH_REG0);
+ if (tmp != 0xDEADBEEF)
+ break;
+ }
+ }
+ }
+
+ if (tmp == 0xDEADBEEF)
+ break;
+
+ if (amdgpu_emu_mode == 1)
+ msleep(1);
+ else
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout) {
+ dev_err(adev->dev, "xcc%d: mes self test (%s) failed\n", xcc_id,
+ queue_type == AMDGPU_RING_TYPE_SDMA ? "sdma" : "compute");
+
+ while (halt_if_hws_hang)
+ schedule();
+
+ r = -ETIMEDOUT;
+ } else {
+ dev_info(adev->dev, "xcc%d: mes self test (%s) pass\n", xcc_id,
+ queue_type == AMDGPU_RING_TYPE_SDMA ? "sdma" : "compute");
+ }
+
+ return r;
+}
+
+#define USER_CTX_SIZE (PAGE_SIZE * 2)
+#define USER_CTX_VA AMDGPU_VA_RESERVED_BOTTOM
+#define RING_OFFSET(addr) ((addr))
+#define EOP_OFFSET(addr) ((addr) + PAGE_SIZE)
+#define WPTR_OFFSET(addr) ((addr) + USER_CTX_SIZE - sizeof(u64))
+#define RPTR_OFFSET(addr) ((addr) + USER_CTX_SIZE - sizeof(u64) * 2)
+#define FENCE_OFFSET(addr) ((addr) + USER_CTX_SIZE - sizeof(u64) * 3)
+
+static int mes_v12_1_test_queue(struct amdgpu_device *adev, int xcc_id,
+ int pasid, struct amdgpu_vm *vm, u64 meta_gpu_addr,
+ u64 queue_gpu_addr, void *ctx_ptr, int queue_type)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+ struct amdgpu_mqd *mqd_mgr = &adev->mqds[queue_type];
+ struct amdgpu_mqd_prop mqd_prop = {0};
+ struct mes_add_queue_input add_queue = {0};
+ struct mes_remove_queue_input remove_queue = {0};
+ struct amdgpu_bo *mqd_bo = NULL;
+ int num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ int i, r, off, mqd_size, mqd_count = 1;
+ void *mqd_ptr = NULL;
+ u64 mqd_gpu_addr, doorbell_idx;
+
+ /* extra one page size padding for mes fw */
+ mqd_size = mqd_mgr->mqd_size + PAGE_SIZE;
+
+ if (queue_type == AMDGPU_RING_TYPE_SDMA) {
+ doorbell_idx = adev->mes.db_start_dw_offset + \
+ adev->doorbell_index.sdma_engine[0];
+ } else {
+ doorbell_idx = adev->mes.db_start_dw_offset + \
+ adev->doorbell_index.userqueue_start;
+ }
+
+ if (adev->mes.enable_coop_mode &&
+ queue_type == AMDGPU_RING_TYPE_COMPUTE) {
+ for (i = 0, mqd_count = 0; i < num_xcc; i++) {
+ if (adev->mes.master_xcc_ids[i] == xcc_id)
+ mqd_count++;
+ }
+ mqd_size *= mqd_count;
+ }
+
+ r = mes_v12_1_alloc_test_buf(adev, &mqd_bo, &mqd_gpu_addr,
+ &mqd_ptr, mqd_size * mqd_count);
+ if (r < 0)
+ return r;
+
+ mqd_prop.mqd_gpu_addr = mqd_gpu_addr;
+ mqd_prop.hqd_base_gpu_addr = RING_OFFSET(USER_CTX_VA);
+ mqd_prop.eop_gpu_addr = EOP_OFFSET(USER_CTX_VA);
+ mqd_prop.wptr_gpu_addr = WPTR_OFFSET(USER_CTX_VA);
+ mqd_prop.rptr_gpu_addr = RPTR_OFFSET(USER_CTX_VA);
+ mqd_prop.doorbell_index = doorbell_idx;
+ mqd_prop.queue_size = PAGE_SIZE;
+ mqd_prop.mqd_stride_size = mqd_size;
+ mqd_prop.use_doorbell = true;
+ mqd_prop.hqd_active = false;
+
+ mqd_mgr->init_mqd(adev, mqd_ptr, &mqd_prop);
+ if (mqd_count > 1) {
+ for (i = 1; i < mqd_count; i++) {
+ off = mqd_size * i;
+ mqd_prop.mqd_gpu_addr = mqd_gpu_addr + off;
+ mqd_mgr->init_mqd(adev, (char *)mqd_ptr + off,
+ &mqd_prop);
+ }
+ }
+
+ add_queue.xcc_id = xcc_id;
+ add_queue.process_id = pasid;
+ add_queue.page_table_base_addr = adev->vm_manager.vram_base_offset +
+ amdgpu_bo_gpu_offset(vm->root.bo) - adev->gmc.vram_start;
+ add_queue.process_va_start = 0;
+ add_queue.process_va_end = adev->vm_manager.max_pfn - 1;
+ add_queue.process_context_addr = meta_gpu_addr;
+ add_queue.gang_context_addr = meta_gpu_addr + AMDGPU_MES_PROC_CTX_SIZE;
+ add_queue.doorbell_offset = doorbell_idx;
+ add_queue.mqd_addr = mqd_gpu_addr;
+ add_queue.wptr_addr = mqd_prop.wptr_gpu_addr;
+ add_queue.wptr_mc_addr = WPTR_OFFSET(queue_gpu_addr);
+ add_queue.queue_type = queue_type;
+ add_queue.vm_cntx_cntl = hub->vm_cntx_cntl;
+
+ r = mes_v12_1_add_hw_queue(&adev->mes, &add_queue);
+ if (r)
+ goto error;
+
+ mes_v12_1_test_ring(adev, xcc_id, (u32 *)RING_OFFSET((char *)ctx_ptr),
+ FENCE_OFFSET(USER_CTX_VA),
+ FENCE_OFFSET((char *)ctx_ptr),
+ WPTR_OFFSET((char *)ctx_ptr),
+ doorbell_idx, queue_type);
+
+ remove_queue.xcc_id = xcc_id;
+ remove_queue.doorbell_offset = doorbell_idx;
+ remove_queue.gang_context_addr = add_queue.gang_context_addr;
+ r = mes_v12_1_remove_hw_queue(&adev->mes, &remove_queue);
+
+error:
+ amdgpu_bo_free_kernel(&mqd_bo, &mqd_gpu_addr, &mqd_ptr);
+ return r;
+}
+
+static int mes_v12_1_self_test(struct amdgpu_device *adev, int xcc_id)
+{
+ int queue_types[] = { AMDGPU_RING_TYPE_COMPUTE,
+ /* AMDGPU_RING_TYPE_SDMA */ };
+ struct amdgpu_bo_va *bo_va = NULL;
+ struct amdgpu_vm *vm = NULL;
+ struct amdgpu_bo *meta_bo = NULL, *ctx_bo = NULL;
+ void *meta_ptr = NULL, *ctx_ptr = NULL;
+ u64 meta_gpu_addr, ctx_gpu_addr;
+ int size, i, r, pasid;
+
+ pasid = amdgpu_pasid_alloc(16);
+ if (pasid < 0)
+ pasid = 0;
+
+ size = AMDGPU_MES_PROC_CTX_SIZE + AMDGPU_MES_GANG_CTX_SIZE;
+ r = mes_v12_1_alloc_test_buf(adev, &meta_bo, &meta_gpu_addr,
+ &meta_ptr, size);
+ if (r < 0)
+ goto err2;
+
+ r = mes_v12_1_alloc_test_buf(adev, &ctx_bo, &ctx_gpu_addr,
+ &ctx_ptr, USER_CTX_SIZE);
+ if (r < 0)
+ goto err2;
+
+ vm = kzalloc(sizeof(*vm), GFP_KERNEL);
+ if (!vm) {
+ r = -ENOMEM;
+ goto err2;
+ }
+
+ r = amdgpu_vm_init(adev, vm, -1, pasid);
+ if (r)
+ goto err1;
+
+ r = mes_v12_1_map_test_bo(adev, ctx_bo, vm, &bo_va,
+ USER_CTX_VA, USER_CTX_SIZE);
+ if (r)
+ goto err0;
+
+ for (i = 0; i < ARRAY_SIZE(queue_types); i++) {
+ memset(ctx_ptr, 0, USER_CTX_SIZE);
+
+ r = mes_v12_1_test_queue(adev, xcc_id, pasid, vm, meta_gpu_addr,
+ ctx_gpu_addr, ctx_ptr, queue_types[i]);
+ if (r)
+ break;
+ }
+
+ amdgpu_unmap_static_csa(adev, vm, ctx_bo, bo_va, USER_CTX_VA);
+err0:
+ amdgpu_vm_fini(adev, vm);
+err1:
+ kfree(vm);
+err2:
+ amdgpu_bo_free_kernel(&meta_bo, &meta_gpu_addr, &meta_ptr);
+ amdgpu_bo_free_kernel(&ctx_bo, &ctx_gpu_addr, &ctx_ptr);
+ amdgpu_pasid_free(pasid);
+ return r;
+}
+
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c
index 534cb4c544dc..42a09a277ec3 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c
@@ -141,7 +141,7 @@ mmhub_v2_0_print_l2_protection_fault_status(struct amdgpu_device *adev,
uint32_t status)
{
uint32_t cid, rw;
- const char *mmhub_cid = NULL;
+ const char *mmhub_cid;
cid = REG_GET_FIELD(status,
MMVM_L2_PROTECTION_FAULT_STATUS, CID);
@@ -151,25 +151,7 @@ mmhub_v2_0_print_l2_protection_fault_status(struct amdgpu_device *adev,
dev_err(adev->dev,
"MMVM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
status);
- switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) {
- case IP_VERSION(2, 0, 0):
- case IP_VERSION(2, 0, 2):
- mmhub_cid = cid < ARRAY_SIZE(mmhub_client_ids_navi1x) ?
- mmhub_client_ids_navi1x[cid][rw] : NULL;
- break;
- case IP_VERSION(2, 1, 0):
- case IP_VERSION(2, 1, 1):
- mmhub_cid = cid < ARRAY_SIZE(mmhub_client_ids_sienna_cichlid) ?
- mmhub_client_ids_sienna_cichlid[cid][rw] : NULL;
- break;
- case IP_VERSION(2, 1, 2):
- mmhub_cid = cid < ARRAY_SIZE(mmhub_client_ids_beige_goby) ?
- mmhub_client_ids_beige_goby[cid][rw] : NULL;
- break;
- default:
- mmhub_cid = NULL;
- break;
- }
+ mmhub_cid = amdgpu_mmhub_client_name(&adev->mmhub, cid, rw);
dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n",
mmhub_cid ? mmhub_cid : "unknown", cid);
dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n",
@@ -521,6 +503,31 @@ static const struct amdgpu_vmhub_funcs mmhub_v2_0_vmhub_funcs = {
.get_invalidate_req = mmhub_v2_0_get_invalidate_req,
};
+static void mmhub_v2_0_init_client_info(struct amdgpu_device *adev)
+{
+ switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) {
+ case IP_VERSION(2, 0, 0):
+ case IP_VERSION(2, 0, 2):
+ amdgpu_mmhub_init_client_info(&adev->mmhub,
+ mmhub_client_ids_navi1x,
+ ARRAY_SIZE(mmhub_client_ids_navi1x));
+ break;
+ case IP_VERSION(2, 1, 0):
+ case IP_VERSION(2, 1, 1):
+ amdgpu_mmhub_init_client_info(&adev->mmhub,
+ mmhub_client_ids_sienna_cichlid,
+ ARRAY_SIZE(mmhub_client_ids_sienna_cichlid));
+ break;
+ case IP_VERSION(2, 1, 2):
+ amdgpu_mmhub_init_client_info(&adev->mmhub,
+ mmhub_client_ids_beige_goby,
+ ARRAY_SIZE(mmhub_client_ids_beige_goby));
+ break;
+ default:
+ break;
+ }
+}
+
static void mmhub_v2_0_init(struct amdgpu_device *adev)
{
struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
@@ -561,6 +568,8 @@ static void mmhub_v2_0_init(struct amdgpu_device *adev)
MMVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK;
hub->vmhub_funcs = &mmhub_v2_0_vmhub_funcs;
+
+ mmhub_v2_0_init_client_info(adev);
}
static void mmhub_v2_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c
index ceb2f6b46de5..31c479d76c42 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c
@@ -80,7 +80,7 @@ mmhub_v2_3_print_l2_protection_fault_status(struct amdgpu_device *adev,
uint32_t status)
{
uint32_t cid, rw;
- const char *mmhub_cid = NULL;
+ const char *mmhub_cid;
cid = REG_GET_FIELD(status,
MMVM_L2_PROTECTION_FAULT_STATUS, CID);
@@ -90,17 +90,7 @@ mmhub_v2_3_print_l2_protection_fault_status(struct amdgpu_device *adev,
dev_err(adev->dev,
"MMVM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
status);
- switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) {
- case IP_VERSION(2, 3, 0):
- case IP_VERSION(2, 4, 0):
- case IP_VERSION(2, 4, 1):
- mmhub_cid = cid < ARRAY_SIZE(mmhub_client_ids_vangogh) ?
- mmhub_client_ids_vangogh[cid][rw] : NULL;
- break;
- default:
- mmhub_cid = NULL;
- break;
- }
+ mmhub_cid = amdgpu_mmhub_client_name(&adev->mmhub, cid, rw);
dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n",
mmhub_cid ? mmhub_cid : "unknown", cid);
dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n",
@@ -487,6 +477,10 @@ static void mmhub_v2_3_init(struct amdgpu_device *adev)
MMVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK;
hub->vmhub_funcs = &mmhub_v2_3_vmhub_funcs;
+
+ amdgpu_mmhub_init_client_info(&adev->mmhub,
+ mmhub_client_ids_vangogh,
+ ARRAY_SIZE(mmhub_client_ids_vangogh));
}
static void
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c
index ab966e69a342..3d82cfa0f1b5 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c
@@ -97,7 +97,7 @@ mmhub_v3_0_print_l2_protection_fault_status(struct amdgpu_device *adev,
uint32_t status)
{
uint32_t cid, rw;
- const char *mmhub_cid = NULL;
+ const char *mmhub_cid;
cid = REG_GET_FIELD(status,
MMVM_L2_PROTECTION_FAULT_STATUS, CID);
@@ -107,16 +107,7 @@ mmhub_v3_0_print_l2_protection_fault_status(struct amdgpu_device *adev,
dev_err(adev->dev,
"MMVM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
status);
- switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) {
- case IP_VERSION(3, 0, 0):
- case IP_VERSION(3, 0, 1):
- mmhub_cid = cid < ARRAY_SIZE(mmhub_client_ids_v3_0_0) ?
- mmhub_client_ids_v3_0_0[cid][rw] : NULL;
- break;
- default:
- mmhub_cid = NULL;
- break;
- }
+ mmhub_cid = amdgpu_mmhub_client_name(&adev->mmhub, cid, rw);
dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n",
mmhub_cid ? mmhub_cid : "unknown", cid);
dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n",
@@ -521,6 +512,10 @@ static void mmhub_v3_0_init(struct amdgpu_device *adev)
SOC15_REG_OFFSET(MMHUB, 0, regMMVM_CONTEXTS_DISABLE);
hub->vmhub_funcs = &mmhub_v3_0_vmhub_funcs;
+
+ amdgpu_mmhub_init_client_info(&adev->mmhub,
+ mmhub_client_ids_v3_0_0,
+ ARRAY_SIZE(mmhub_client_ids_v3_0_0));
}
static u64 mmhub_v3_0_get_fb_location(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c
index 14a742d3a99d..a1b0b7b39a42 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c
@@ -104,7 +104,7 @@ mmhub_v3_0_1_print_l2_protection_fault_status(struct amdgpu_device *adev,
uint32_t status)
{
uint32_t cid, rw;
- const char *mmhub_cid = NULL;
+ const char *mmhub_cid;
cid = REG_GET_FIELD(status,
MMVM_L2_PROTECTION_FAULT_STATUS, CID);
@@ -114,17 +114,7 @@ mmhub_v3_0_1_print_l2_protection_fault_status(struct amdgpu_device *adev,
dev_err(adev->dev,
"MMVM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
status);
-
- switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) {
- case IP_VERSION(3, 0, 1):
- mmhub_cid = cid < ARRAY_SIZE(mmhub_client_ids_v3_0_1) ?
- mmhub_client_ids_v3_0_1[cid][rw] : NULL;
- break;
- default:
- mmhub_cid = NULL;
- break;
- }
-
+ mmhub_cid = amdgpu_mmhub_client_name(&adev->mmhub, cid, rw);
dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n",
mmhub_cid ? mmhub_cid : "unknown", cid);
dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n",
@@ -504,6 +494,10 @@ static void mmhub_v3_0_1_init(struct amdgpu_device *adev)
MMVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK;
hub->vmhub_funcs = &mmhub_v3_0_1_vmhub_funcs;
+
+ amdgpu_mmhub_init_client_info(&adev->mmhub,
+ mmhub_client_ids_v3_0_1,
+ ARRAY_SIZE(mmhub_client_ids_v3_0_1));
}
static u64 mmhub_v3_0_1_get_fb_location(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c
index e1f07f2a1852..34e8dbd47c0f 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c
@@ -97,7 +97,7 @@ mmhub_v3_0_2_print_l2_protection_fault_status(struct amdgpu_device *adev,
uint32_t status)
{
uint32_t cid, rw;
- const char *mmhub_cid = NULL;
+ const char *mmhub_cid;
cid = REG_GET_FIELD(status,
MMVM_L2_PROTECTION_FAULT_STATUS, CID);
@@ -107,9 +107,7 @@ mmhub_v3_0_2_print_l2_protection_fault_status(struct amdgpu_device *adev,
dev_err(adev->dev,
"MMVM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
status);
-
- mmhub_cid = cid < ARRAY_SIZE(mmhub_client_ids_v3_0_2) ?
- mmhub_client_ids_v3_0_2[cid][rw] : NULL;
+ mmhub_cid = amdgpu_mmhub_client_name(&adev->mmhub, cid, rw);
dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n",
mmhub_cid ? mmhub_cid : "unknown", cid);
dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n",
@@ -510,6 +508,10 @@ static void mmhub_v3_0_2_init(struct amdgpu_device *adev)
SOC15_REG_OFFSET(MMHUB, 0, regMMVM_L2_BANK_SELECT_RESERVED_CID2);
hub->vmhub_funcs = &mmhub_v3_0_2_vmhub_funcs;
+
+ amdgpu_mmhub_init_client_info(&adev->mmhub,
+ mmhub_client_ids_v3_0_2,
+ ARRAY_SIZE(mmhub_client_ids_v3_0_2));
}
static u64 mmhub_v3_0_2_get_fb_location(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_3.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_3.c
index daf1f8ad4cca..cfce7e1297d4 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_3.c
@@ -217,7 +217,7 @@ mmhub_v3_3_print_l2_protection_fault_status(struct amdgpu_device *adev,
uint32_t status)
{
uint32_t cid, rw;
- const char *mmhub_cid = NULL;
+ const char *mmhub_cid;
cid = REG_GET_FIELD(status,
MMVM_L2_PROTECTION_FAULT_STATUS, CID);
@@ -227,29 +227,10 @@ mmhub_v3_3_print_l2_protection_fault_status(struct amdgpu_device *adev,
dev_err(adev->dev,
"MMVM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
status);
-
- switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) {
- case IP_VERSION(3, 3, 0):
- case IP_VERSION(3, 3, 2):
- mmhub_cid = cid < ARRAY_SIZE(mmhub_client_ids_v3_3) ?
- mmhub_client_ids_v3_3[cid][rw] :
- cid == 0x140 ? "UMSCH" : NULL;
- break;
- case IP_VERSION(3, 3, 1):
- mmhub_cid = cid < ARRAY_SIZE(mmhub_client_ids_v3_3_1) ?
- mmhub_client_ids_v3_3_1[cid][rw] :
- cid == 0x140 ? "UMSCH" : NULL;
- break;
- case IP_VERSION(3, 4, 0):
- mmhub_cid = cid < ARRAY_SIZE(mmhub_client_ids_v3_4) ?
- mmhub_client_ids_v3_4[cid][rw] :
- cid == 0x140 ? "UMSCH" : NULL;
- break;
- default:
- mmhub_cid = NULL;
- break;
- }
-
+ if (cid == 0x140)
+ mmhub_cid = "UMSCH";
+ else
+ mmhub_cid = amdgpu_mmhub_client_name(&adev->mmhub, cid, rw);
dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n",
mmhub_cid ? mmhub_cid : "unknown", cid);
dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n",
@@ -640,6 +621,30 @@ static const struct amdgpu_vmhub_funcs mmhub_v3_3_vmhub_funcs = {
.get_invalidate_req = mmhub_v3_3_get_invalidate_req,
};
+static void mmhub_v3_3_init_client_info(struct amdgpu_device *adev)
+{
+ switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) {
+ case IP_VERSION(3, 3, 0):
+ case IP_VERSION(3, 3, 2):
+ amdgpu_mmhub_init_client_info(&adev->mmhub,
+ mmhub_client_ids_v3_3,
+ ARRAY_SIZE(mmhub_client_ids_v3_3));
+ break;
+ case IP_VERSION(3, 3, 1):
+ amdgpu_mmhub_init_client_info(&adev->mmhub,
+ mmhub_client_ids_v3_3_1,
+ ARRAY_SIZE(mmhub_client_ids_v3_3_1));
+ break;
+ case IP_VERSION(3, 4, 0):
+ amdgpu_mmhub_init_client_info(&adev->mmhub,
+ mmhub_client_ids_v3_4,
+ ARRAY_SIZE(mmhub_client_ids_v3_4));
+ break;
+ default:
+ break;
+ }
+}
+
static void mmhub_v3_3_init(struct amdgpu_device *adev)
{
struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
@@ -680,6 +685,8 @@ static void mmhub_v3_3_init(struct amdgpu_device *adev)
MMVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK;
hub->vmhub_funcs = &mmhub_v3_3_vmhub_funcs;
+
+ mmhub_v3_3_init_client_info(adev);
}
static u64 mmhub_v3_3_get_fb_location(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c
index 88bfe321f83a..bef75c4c48d3 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c
@@ -90,7 +90,7 @@ mmhub_v4_1_0_print_l2_protection_fault_status(struct amdgpu_device *adev,
uint32_t status)
{
uint32_t cid, rw;
- const char *mmhub_cid = NULL;
+ const char *mmhub_cid;
cid = REG_GET_FIELD(status,
MMVM_L2_PROTECTION_FAULT_STATUS_LO32, CID);
@@ -100,15 +100,7 @@ mmhub_v4_1_0_print_l2_protection_fault_status(struct amdgpu_device *adev,
dev_err(adev->dev,
"MMVM_L2_PROTECTION_FAULT_STATUS_LO32:0x%08X\n",
status);
- switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) {
- case IP_VERSION(4, 1, 0):
- mmhub_cid = cid < ARRAY_SIZE(mmhub_client_ids_v4_1_0) ?
- mmhub_client_ids_v4_1_0[cid][rw] : NULL;
- break;
- default:
- mmhub_cid = NULL;
- break;
- }
+ mmhub_cid = amdgpu_mmhub_client_name(&adev->mmhub, cid, rw);
dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n",
mmhub_cid ? mmhub_cid : "unknown", cid);
dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n",
@@ -515,6 +507,10 @@ static void mmhub_v4_1_0_init(struct amdgpu_device *adev)
SOC15_REG_OFFSET(MMHUB, 0, regMMVM_CONTEXTS_DISABLE);
hub->vmhub_funcs = &mmhub_v4_1_0_vmhub_funcs;
+
+ amdgpu_mmhub_init_client_info(&adev->mmhub,
+ mmhub_client_ids_v4_1_0,
+ ARRAY_SIZE(mmhub_client_ids_v4_1_0));
}
static u64 mmhub_v4_1_0_get_fb_location(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v4_2_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v4_2_0.c
index 2532ca80f735..29f7ed466858 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v4_2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v4_2_0.c
@@ -72,6 +72,45 @@ static const char *mmhub_client_ids_v4_2_0[][2] = {
[23][1] = "VCN1",
};
+static int mmhub_v4_2_0_get_xgmi_info(struct amdgpu_device *adev)
+{
+ u32 max_num_physical_nodes;
+ u32 max_physical_node_id;
+ u32 xgmi_lfb_cntl;
+ u32 max_region;
+ u64 seg_size;
+
+ /* limit this callback to A + A configuration only */
+ if (!adev->gmc.xgmi.connected_to_cpu)
+ return 0;
+
+ xgmi_lfb_cntl = RREG32_SOC15(MMHUB, GET_INST(MMHUB, 0),
+ regMMMC_VM_XGMI_LFB_CNTL);
+ seg_size = REG_GET_FIELD(
+ RREG32_SOC15(MMHUB, GET_INST(MMHUB, 0), regMMMC_VM_XGMI_LFB_SIZE),
+ MMMC_VM_XGMI_LFB_SIZE, PF_LFB_SIZE) << 24;
+ max_region =
+ REG_GET_FIELD(xgmi_lfb_cntl, MMMC_VM_XGMI_LFB_CNTL, PF_MAX_REGION);
+
+ max_num_physical_nodes = 4;
+ max_physical_node_id = 3;
+
+ adev->gmc.xgmi.num_physical_nodes = max_region + 1;
+
+ if (adev->gmc.xgmi.num_physical_nodes > max_num_physical_nodes)
+ return -EINVAL;
+
+ adev->gmc.xgmi.physical_node_id =
+ REG_GET_FIELD(xgmi_lfb_cntl, MMMC_VM_XGMI_LFB_CNTL, PF_LFB_REGION);
+
+ if (adev->gmc.xgmi.physical_node_id > max_physical_node_id)
+ return -EINVAL;
+
+ adev->gmc.xgmi.node_segment_size = seg_size;
+
+ return 0;
+}
+
static u64 mmhub_v4_2_0_get_fb_location(struct amdgpu_device *adev)
{
u64 base;
@@ -131,7 +170,7 @@ static void mmhub_v4_2_0_setup_vm_pt_regs(struct amdgpu_device *adev,
static void mmhub_v4_2_0_mid_init_gart_aperture_regs(struct amdgpu_device *adev,
uint32_t mid_mask)
{
- uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
+ uint64_t pt_base;
int i;
if (adev->gmc.pdb0_bo)
@@ -152,10 +191,10 @@ static void mmhub_v4_2_0_mid_init_gart_aperture_regs(struct amdgpu_device *adev,
WREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
regMMVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
- (u32)(adev->gmc.fb_end >> 12));
+ (u32)(adev->gmc.gart_end >> 12));
WREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
regMMVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
- (u32)(adev->gmc.fb_end >> 44));
+ (u32)(adev->gmc.gart_end >> 44));
} else {
WREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
regMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
@@ -190,41 +229,74 @@ static void mmhub_v4_2_0_mid_init_system_aperture_regs(struct amdgpu_device *ade
return;
for_each_inst(i, mid_mask) {
- /* Program the AGP BAR */
- WREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
- regMMMC_VM_AGP_BASE_LO32, 0);
- WREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
- regMMMC_VM_AGP_BASE_HI32, 0);
- WREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
- regMMMC_VM_AGP_BOT_LO32,
- lower_32_bits(adev->gmc.agp_start >> 24));
- WREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
- regMMMC_VM_AGP_BOT_HI32,
- upper_32_bits(adev->gmc.agp_start >> 24));
- WREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
- regMMMC_VM_AGP_TOP_LO32,
- lower_32_bits(adev->gmc.agp_end >> 24));
- WREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
- regMMMC_VM_AGP_TOP_HI32,
- upper_32_bits(adev->gmc.agp_end >> 24));
+ if (adev->gmc.pdb0_bo) {
+ /* Disable agp and system aperture
+ * when vmid0 page table is enabled */
+ WREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
+ regMMMC_VM_FB_LOCATION_TOP_LO32, 0);
+ WREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
+ regMMMC_VM_FB_LOCATION_TOP_HI32, 0);
+ WREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
+ regMMMC_VM_FB_LOCATION_BASE_LO32,
+ 0xFFFFFFFF);
+ WREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
+ regMMMC_VM_FB_LOCATION_BASE_HI32, 1);
+ WREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
+ regMMMC_VM_AGP_TOP_LO32, 0);
+ WREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
+ regMMMC_VM_AGP_TOP_HI32, 0);
+ WREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
+ regMMMC_VM_AGP_BOT_LO32,
+ 0xFFFFFFFF);
+ WREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
+ regMMMC_VM_AGP_BOT_HI32, 1);
+ WREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
+ regMMMC_VM_SYSTEM_APERTURE_LOW_ADDR_LO32,
+ 0xFFFFFFFF);
+ WREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
+ regMMMC_VM_SYSTEM_APERTURE_LOW_ADDR_HI32,
+ 0x7F);
+ WREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
+ regMMMC_VM_SYSTEM_APERTURE_HIGH_ADDR_LO32, 0);
+ WREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
+ regMMMC_VM_SYSTEM_APERTURE_HIGH_ADDR_HI32, 0);
+ } else {
+ /* Program the AGP BAR */
+ WREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
+ regMMMC_VM_AGP_BASE_LO32, 0);
+ WREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
+ regMMMC_VM_AGP_BASE_HI32, 0);
+ WREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
+ regMMMC_VM_AGP_BOT_LO32,
+ lower_32_bits(adev->gmc.agp_start >> 24));
+ WREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
+ regMMMC_VM_AGP_BOT_HI32,
+ upper_32_bits(adev->gmc.agp_start >> 24));
+ WREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
+ regMMMC_VM_AGP_TOP_LO32,
+ lower_32_bits(adev->gmc.agp_end >> 24));
+ WREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
+ regMMMC_VM_AGP_TOP_HI32,
+ upper_32_bits(adev->gmc.agp_end >> 24));
- /* Program the system aperture low logical page number. */
- WREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
- regMMMC_VM_SYSTEM_APERTURE_LOW_ADDR_LO32,
- lower_32_bits(min(adev->gmc.fb_start,
- adev->gmc.agp_start) >> 18));
- WREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
- regMMMC_VM_SYSTEM_APERTURE_LOW_ADDR_HI32,
- upper_32_bits(min(adev->gmc.fb_start,
- adev->gmc.agp_start) >> 18));
- WREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
- regMMMC_VM_SYSTEM_APERTURE_HIGH_ADDR_LO32,
- lower_32_bits(max(adev->gmc.fb_end,
- adev->gmc.agp_end) >> 18));
- WREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
- regMMMC_VM_SYSTEM_APERTURE_HIGH_ADDR_HI32,
- upper_32_bits(max(adev->gmc.fb_end,
- adev->gmc.agp_end) >> 18));
+ /* Program the system aperture low logical page number. */
+ WREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
+ regMMMC_VM_SYSTEM_APERTURE_LOW_ADDR_LO32,
+ lower_32_bits(min(adev->gmc.fb_start,
+ adev->gmc.agp_start) >> 18));
+ WREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
+ regMMMC_VM_SYSTEM_APERTURE_LOW_ADDR_HI32,
+ upper_32_bits(min(adev->gmc.fb_start,
+ adev->gmc.agp_start) >> 18));
+ WREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
+ regMMMC_VM_SYSTEM_APERTURE_HIGH_ADDR_LO32,
+ lower_32_bits(max(adev->gmc.fb_end,
+ adev->gmc.agp_end) >> 18));
+ WREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
+ regMMMC_VM_SYSTEM_APERTURE_HIGH_ADDR_HI32,
+ upper_32_bits(max(adev->gmc.fb_end,
+ adev->gmc.agp_end) >> 18));
+ }
/* Set default page address. */
value = amdgpu_gmc_vram_mc2pa(adev, adev->mem_scratch.gpu_addr);
@@ -252,38 +324,6 @@ static void mmhub_v4_2_0_mid_init_system_aperture_regs(struct amdgpu_device *ade
WREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
regMMVM_L2_PROTECTION_FAULT_CNTL2, tmp);
}
-
- /* In the case squeezing vram into GART aperture, we don't use
- * FB aperture and AGP aperture. Disable them.
- */
- if (adev->gmc.pdb0_bo) {
- WREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
- regMMMC_VM_FB_LOCATION_TOP_LO32, 0);
- WREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
- regMMMC_VM_FB_LOCATION_TOP_HI32, 0);
- WREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
- regMMMC_VM_FB_LOCATION_BASE_LO32, 0xFFFFFFFF);
- WREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
- regMMMC_VM_FB_LOCATION_BASE_HI32, 1);
- WREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
- regMMMC_VM_AGP_TOP_LO32, 0);
- WREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
- regMMMC_VM_AGP_TOP_HI32, 0);
- WREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
- regMMMC_VM_AGP_BOT_LO32, 0xFFFFFFFF);
- WREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
- regMMMC_VM_AGP_BOT_HI32, 1);
- WREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
- regMMMC_VM_SYSTEM_APERTURE_LOW_ADDR_LO32,
- 0xFFFFFFFF);
- WREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
- regMMMC_VM_SYSTEM_APERTURE_LOW_ADDR_HI32,
- 0x7F);
- WREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
- regMMMC_VM_SYSTEM_APERTURE_HIGH_ADDR_LO32, 0);
- WREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
- regMMMC_VM_SYSTEM_APERTURE_HIGH_ADDR_HI32, 0);
- }
}
static void mmhub_v4_2_0_mid_init_tlb_regs(struct amdgpu_device *adev,
@@ -676,7 +716,7 @@ mmhub_v4_2_0_print_l2_protection_fault_status(struct amdgpu_device *adev,
uint32_t status)
{
uint32_t cid, rw;
- const char *mmhub_cid = NULL;
+ const char *mmhub_cid;
cid = REG_GET_FIELD(status,
MMVM_L2_PROTECTION_FAULT_STATUS_LO32, CID);
@@ -686,15 +726,7 @@ mmhub_v4_2_0_print_l2_protection_fault_status(struct amdgpu_device *adev,
dev_err(adev->dev,
"MMVM_L2_PROTECTION_FAULT_STATUS_LO32:0x%08X\n",
status);
- switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) {
- case IP_VERSION(4, 2, 0):
- mmhub_cid = cid < ARRAY_SIZE(mmhub_client_ids_v4_2_0) ?
- mmhub_client_ids_v4_2_0[cid][rw] : NULL;
- break;
- default:
- mmhub_cid = NULL;
- break;
- }
+ mmhub_cid = amdgpu_mmhub_client_name(&adev->mmhub, cid, rw);
dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n",
mmhub_cid ? mmhub_cid : "unknown", cid);
dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n",
@@ -785,6 +817,10 @@ static void mmhub_v4_2_0_init(struct amdgpu_device *adev)
mid_mask = adev->aid_mask;
mmhub_v4_2_0_mid_init(adev, mid_mask);
+
+ amdgpu_mmhub_init_client_info(&adev->mmhub,
+ mmhub_client_ids_v4_2_0,
+ ARRAY_SIZE(mmhub_client_ids_v4_2_0));
}
static void
@@ -884,6 +920,7 @@ const struct amdgpu_mmhub_funcs mmhub_v4_2_0_funcs = {
.set_fault_enable_default = mmhub_v4_2_0_set_fault_enable_default,
.set_clockgating = mmhub_v4_2_0_set_clockgating,
.get_clockgating = mmhub_v4_2_0_get_clockgating,
+ .get_xgmi_info = mmhub_v4_2_0_get_xgmi_info,
};
static int mmhub_v4_2_0_xcp_resume(void *handle, uint32_t inst_mask)
diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c
index f17c3839aea1..7ce1a1b95606 100644
--- a/drivers/gpu/drm/amd/amdgpu/nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/nv.c
@@ -283,10 +283,10 @@ static u32 nv_didt_rreg(struct amdgpu_device *adev, u32 reg)
address = SOC15_REG_OFFSET(GC, 0, mmDIDT_IND_INDEX);
data = SOC15_REG_OFFSET(GC, 0, mmDIDT_IND_DATA);
- spin_lock_irqsave(&adev->didt_idx_lock, flags);
+ spin_lock_irqsave(&adev->reg.didt.lock, flags);
WREG32(address, (reg));
r = RREG32(data);
- spin_unlock_irqrestore(&adev->didt_idx_lock, flags);
+ spin_unlock_irqrestore(&adev->reg.didt.lock, flags);
return r;
}
@@ -297,10 +297,10 @@ static void nv_didt_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
address = SOC15_REG_OFFSET(GC, 0, mmDIDT_IND_INDEX);
data = SOC15_REG_OFFSET(GC, 0, mmDIDT_IND_DATA);
- spin_lock_irqsave(&adev->didt_idx_lock, flags);
+ spin_lock_irqsave(&adev->reg.didt.lock, flags);
WREG32(address, (reg));
WREG32(data, (v));
- spin_unlock_irqrestore(&adev->didt_idx_lock, flags);
+ spin_unlock_irqrestore(&adev->reg.didt.lock, flags);
}
static u32 nv_get_config_memsize(struct amdgpu_device *adev)
@@ -635,21 +635,15 @@ static int nv_common_early_init(struct amdgpu_ip_block *ip_block)
struct amdgpu_device *adev = ip_block->adev;
adev->nbio.funcs->set_reg_remap(adev);
- adev->smc_rreg = NULL;
- adev->smc_wreg = NULL;
- adev->pcie_rreg = &amdgpu_device_indirect_rreg;
- adev->pcie_wreg = &amdgpu_device_indirect_wreg;
- adev->pcie_rreg64 = &amdgpu_device_indirect_rreg64;
- adev->pcie_wreg64 = &amdgpu_device_indirect_wreg64;
- adev->pciep_rreg = amdgpu_device_pcie_port_rreg;
- adev->pciep_wreg = amdgpu_device_pcie_port_wreg;
-
- /* TODO: will add them during VCN v2 implementation */
- adev->uvd_ctx_rreg = NULL;
- adev->uvd_ctx_wreg = NULL;
-
- adev->didt_rreg = &nv_didt_rreg;
- adev->didt_wreg = &nv_didt_wreg;
+ adev->reg.pcie.rreg = &amdgpu_device_indirect_rreg;
+ adev->reg.pcie.wreg = &amdgpu_device_indirect_wreg;
+ adev->reg.pcie.rreg64 = &amdgpu_device_indirect_rreg64;
+ adev->reg.pcie.wreg64 = &amdgpu_device_indirect_wreg64;
+ adev->reg.pcie.port_rreg = &amdgpu_device_pcie_port_rreg;
+ adev->reg.pcie.port_wreg = &amdgpu_device_pcie_port_wreg;
+
+ adev->reg.didt.rreg = &nv_didt_rreg;
+ adev->reg.didt.wreg = &nv_didt_wreg;
adev->asic_funcs = &nv_asic_funcs;
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
index d1e1a4369521..a0c84f81c0c9 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
@@ -166,7 +166,7 @@ static void psp_v13_0_bootloader_print_status(struct psp_context *psp,
bl_status_reg =
(SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_92)
<< 2) +
- adev->asic_funcs->encode_ext_smn_addressing(i);
+ amdgpu_reg_get_smn_base64(adev, MP0_HWIP, i);
at += snprintf(bl_status_msg + at,
PSP13_BL_STATUS_SIZE - at,
" status(%02i): 0x%08x", i,
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
index 92ce580647cd..0090ace49024 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
@@ -51,7 +51,6 @@
static void sdma_v2_4_set_ring_funcs(struct amdgpu_device *adev);
static void sdma_v2_4_set_buffer_funcs(struct amdgpu_device *adev);
-static void sdma_v2_4_set_vm_pte_funcs(struct amdgpu_device *adev);
static void sdma_v2_4_set_irq_funcs(struct amdgpu_device *adev);
MODULE_FIRMWARE("amdgpu/topaz_sdma.bin");
@@ -809,6 +808,14 @@ static void sdma_v2_4_ring_emit_wreg(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, val);
}
+static const struct amdgpu_vm_pte_funcs sdma_v2_4_vm_pte_funcs = {
+ .copy_pte_num_dw = 7,
+ .copy_pte = sdma_v2_4_vm_copy_pte,
+
+ .write_pte = sdma_v2_4_vm_write_pte,
+ .set_pte_pde = sdma_v2_4_vm_set_pte_pde,
+};
+
static int sdma_v2_4_early_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_device *adev = ip_block->adev;
@@ -822,7 +829,7 @@ static int sdma_v2_4_early_init(struct amdgpu_ip_block *ip_block)
sdma_v2_4_set_ring_funcs(adev);
sdma_v2_4_set_buffer_funcs(adev);
- sdma_v2_4_set_vm_pte_funcs(adev);
+ amdgpu_sdma_set_vm_pte_scheds(adev, &sdma_v2_4_vm_pte_funcs);
sdma_v2_4_set_irq_funcs(adev);
return 0;
@@ -1232,26 +1239,6 @@ static void sdma_v2_4_set_buffer_funcs(struct amdgpu_device *adev)
adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring;
}
-static const struct amdgpu_vm_pte_funcs sdma_v2_4_vm_pte_funcs = {
- .copy_pte_num_dw = 7,
- .copy_pte = sdma_v2_4_vm_copy_pte,
-
- .write_pte = sdma_v2_4_vm_write_pte,
- .set_pte_pde = sdma_v2_4_vm_set_pte_pde,
-};
-
-static void sdma_v2_4_set_vm_pte_funcs(struct amdgpu_device *adev)
-{
- unsigned i;
-
- adev->vm_manager.vm_pte_funcs = &sdma_v2_4_vm_pte_funcs;
- for (i = 0; i < adev->sdma.num_instances; i++) {
- adev->vm_manager.vm_pte_scheds[i] =
- &adev->sdma.instance[i].ring.sched;
- }
- adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances;
-}
-
const struct amdgpu_ip_block_version sdma_v2_4_ip_block = {
.type = AMD_IP_BLOCK_TYPE_SDMA,
.major = 2,
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
index 1c076bd1cf73..2526d393162a 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
@@ -51,7 +51,6 @@
static void sdma_v3_0_set_ring_funcs(struct amdgpu_device *adev);
static void sdma_v3_0_set_buffer_funcs(struct amdgpu_device *adev);
-static void sdma_v3_0_set_vm_pte_funcs(struct amdgpu_device *adev);
static void sdma_v3_0_set_irq_funcs(struct amdgpu_device *adev);
MODULE_FIRMWARE("amdgpu/tonga_sdma.bin");
@@ -1082,6 +1081,14 @@ static void sdma_v3_0_ring_emit_wreg(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, val);
}
+static const struct amdgpu_vm_pte_funcs sdma_v3_0_vm_pte_funcs = {
+ .copy_pte_num_dw = 7,
+ .copy_pte = sdma_v3_0_vm_copy_pte,
+
+ .write_pte = sdma_v3_0_vm_write_pte,
+ .set_pte_pde = sdma_v3_0_vm_set_pte_pde,
+};
+
static int sdma_v3_0_early_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_device *adev = ip_block->adev;
@@ -1102,7 +1109,7 @@ static int sdma_v3_0_early_init(struct amdgpu_ip_block *ip_block)
sdma_v3_0_set_ring_funcs(adev);
sdma_v3_0_set_buffer_funcs(adev);
- sdma_v3_0_set_vm_pte_funcs(adev);
+ amdgpu_sdma_set_vm_pte_scheds(adev, &sdma_v3_0_vm_pte_funcs);
sdma_v3_0_set_irq_funcs(adev);
return 0;
@@ -1674,26 +1681,6 @@ static void sdma_v3_0_set_buffer_funcs(struct amdgpu_device *adev)
adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring;
}
-static const struct amdgpu_vm_pte_funcs sdma_v3_0_vm_pte_funcs = {
- .copy_pte_num_dw = 7,
- .copy_pte = sdma_v3_0_vm_copy_pte,
-
- .write_pte = sdma_v3_0_vm_write_pte,
- .set_pte_pde = sdma_v3_0_vm_set_pte_pde,
-};
-
-static void sdma_v3_0_set_vm_pte_funcs(struct amdgpu_device *adev)
-{
- unsigned i;
-
- adev->vm_manager.vm_pte_funcs = &sdma_v3_0_vm_pte_funcs;
- for (i = 0; i < adev->sdma.num_instances; i++) {
- adev->vm_manager.vm_pte_scheds[i] =
- &adev->sdma.instance[i].ring.sched;
- }
- adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances;
-}
-
const struct amdgpu_ip_block_version sdma_v3_0_ip_block =
{
.type = AMD_IP_BLOCK_TYPE_SDMA,
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index f38004e6064e..44f0f23e1148 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -129,7 +129,6 @@ static const struct amdgpu_hwip_reg_entry sdma_reg_list_4_0[] = {
static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev);
static void sdma_v4_0_set_buffer_funcs(struct amdgpu_device *adev);
-static void sdma_v4_0_set_vm_pte_funcs(struct amdgpu_device *adev);
static void sdma_v4_0_set_irq_funcs(struct amdgpu_device *adev);
static void sdma_v4_0_set_ras_funcs(struct amdgpu_device *adev);
@@ -1751,6 +1750,14 @@ static bool sdma_v4_0_fw_support_paging_queue(struct amdgpu_device *adev)
}
}
+static const struct amdgpu_vm_pte_funcs sdma_v4_0_vm_pte_funcs = {
+ .copy_pte_num_dw = 7,
+ .copy_pte = sdma_v4_0_vm_copy_pte,
+
+ .write_pte = sdma_v4_0_vm_write_pte,
+ .set_pte_pde = sdma_v4_0_vm_set_pte_pde,
+};
+
static int sdma_v4_0_early_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_device *adev = ip_block->adev;
@@ -1769,7 +1776,7 @@ static int sdma_v4_0_early_init(struct amdgpu_ip_block *ip_block)
sdma_v4_0_set_ring_funcs(adev);
sdma_v4_0_set_buffer_funcs(adev);
- sdma_v4_0_set_vm_pte_funcs(adev);
+ amdgpu_sdma_set_vm_pte_scheds(adev, &sdma_v4_0_vm_pte_funcs);
sdma_v4_0_set_irq_funcs(adev);
sdma_v4_0_set_ras_funcs(adev);
@@ -2597,48 +2604,37 @@ static void sdma_v4_0_emit_fill_buffer(struct amdgpu_ib *ib,
}
static const struct amdgpu_buffer_funcs sdma_v4_0_buffer_funcs = {
- .copy_max_bytes = 0x400000,
+ .copy_max_bytes = 1 << 22,
.copy_num_dw = 7,
.emit_copy_buffer = sdma_v4_0_emit_copy_buffer,
- .fill_max_bytes = 0x400000,
+ .fill_max_bytes = 1 << 22,
+ .fill_num_dw = 5,
+ .emit_fill_buffer = sdma_v4_0_emit_fill_buffer,
+};
+
+static const struct amdgpu_buffer_funcs sdma_v4_4_buffer_funcs = {
+ .copy_max_bytes = 1 << 30,
+ .copy_num_dw = 7,
+ .emit_copy_buffer = sdma_v4_0_emit_copy_buffer,
+
+ .fill_max_bytes = 1 << 30,
.fill_num_dw = 5,
.emit_fill_buffer = sdma_v4_0_emit_fill_buffer,
};
static void sdma_v4_0_set_buffer_funcs(struct amdgpu_device *adev)
{
- adev->mman.buffer_funcs = &sdma_v4_0_buffer_funcs;
+ if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) >= IP_VERSION(4, 4, 0))
+ adev->mman.buffer_funcs = &sdma_v4_4_buffer_funcs;
+ else
+ adev->mman.buffer_funcs = &sdma_v4_0_buffer_funcs;
if (adev->sdma.has_page_queue)
adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].page;
else
adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring;
}
-static const struct amdgpu_vm_pte_funcs sdma_v4_0_vm_pte_funcs = {
- .copy_pte_num_dw = 7,
- .copy_pte = sdma_v4_0_vm_copy_pte,
-
- .write_pte = sdma_v4_0_vm_write_pte,
- .set_pte_pde = sdma_v4_0_vm_set_pte_pde,
-};
-
-static void sdma_v4_0_set_vm_pte_funcs(struct amdgpu_device *adev)
-{
- struct drm_gpu_scheduler *sched;
- unsigned i;
-
- adev->vm_manager.vm_pte_funcs = &sdma_v4_0_vm_pte_funcs;
- for (i = 0; i < adev->sdma.num_instances; i++) {
- if (adev->sdma.has_page_queue)
- sched = &adev->sdma.instance[i].page.sched;
- else
- sched = &adev->sdma.instance[i].ring.sched;
- adev->vm_manager.vm_pte_scheds[i] = sched;
- }
- adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances;
-}
-
static void sdma_v4_0_get_ras_error_count(uint32_t value,
uint32_t instance,
uint32_t *sec_count)
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
index a1443990d5c6..78bdfed0a7fd 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
@@ -104,7 +104,6 @@ static const struct amdgpu_hwip_reg_entry sdma_reg_list_4_4_2[] = {
static void sdma_v4_4_2_set_ring_funcs(struct amdgpu_device *adev);
static void sdma_v4_4_2_set_buffer_funcs(struct amdgpu_device *adev);
-static void sdma_v4_4_2_set_vm_pte_funcs(struct amdgpu_device *adev);
static void sdma_v4_4_2_set_irq_funcs(struct amdgpu_device *adev);
static void sdma_v4_4_2_set_ras_funcs(struct amdgpu_device *adev);
static void sdma_v4_4_2_update_reset_mask(struct amdgpu_device *adev);
@@ -1347,6 +1346,14 @@ static const struct amdgpu_sdma_funcs sdma_v4_4_2_sdma_funcs = {
.soft_reset_kernel_queue = &sdma_v4_4_2_soft_reset_engine,
};
+static const struct amdgpu_vm_pte_funcs sdma_v4_4_2_vm_pte_funcs = {
+ .copy_pte_num_dw = 7,
+ .copy_pte = sdma_v4_4_2_vm_copy_pte,
+
+ .write_pte = sdma_v4_4_2_vm_write_pte,
+ .set_pte_pde = sdma_v4_4_2_vm_set_pte_pde,
+};
+
static int sdma_v4_4_2_early_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_device *adev = ip_block->adev;
@@ -1362,7 +1369,7 @@ static int sdma_v4_4_2_early_init(struct amdgpu_ip_block *ip_block)
sdma_v4_4_2_set_ring_funcs(adev);
sdma_v4_4_2_set_buffer_funcs(adev);
- sdma_v4_4_2_set_vm_pte_funcs(adev);
+ amdgpu_sdma_set_vm_pte_scheds(adev, &sdma_v4_4_2_vm_pte_funcs);
sdma_v4_4_2_set_irq_funcs(adev);
sdma_v4_4_2_set_ras_funcs(adev);
return 0;
@@ -2298,11 +2305,11 @@ static void sdma_v4_4_2_emit_fill_buffer(struct amdgpu_ib *ib,
}
static const struct amdgpu_buffer_funcs sdma_v4_4_2_buffer_funcs = {
- .copy_max_bytes = 0x400000,
+ .copy_max_bytes = 1 << 30,
.copy_num_dw = 7,
.emit_copy_buffer = sdma_v4_4_2_emit_copy_buffer,
- .fill_max_bytes = 0x400000,
+ .fill_max_bytes = 1 << 30,
.fill_num_dw = 5,
.emit_fill_buffer = sdma_v4_4_2_emit_fill_buffer,
};
@@ -2316,30 +2323,6 @@ static void sdma_v4_4_2_set_buffer_funcs(struct amdgpu_device *adev)
adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring;
}
-static const struct amdgpu_vm_pte_funcs sdma_v4_4_2_vm_pte_funcs = {
- .copy_pte_num_dw = 7,
- .copy_pte = sdma_v4_4_2_vm_copy_pte,
-
- .write_pte = sdma_v4_4_2_vm_write_pte,
- .set_pte_pde = sdma_v4_4_2_vm_set_pte_pde,
-};
-
-static void sdma_v4_4_2_set_vm_pte_funcs(struct amdgpu_device *adev)
-{
- struct drm_gpu_scheduler *sched;
- unsigned i;
-
- adev->vm_manager.vm_pte_funcs = &sdma_v4_4_2_vm_pte_funcs;
- for (i = 0; i < adev->sdma.num_instances; i++) {
- if (adev->sdma.has_page_queue)
- sched = &adev->sdma.instance[i].page.sched;
- else
- sched = &adev->sdma.instance[i].ring.sched;
- adev->vm_manager.vm_pte_scheds[i] = sched;
- }
- adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances;
-}
-
/**
* sdma_v4_4_2_update_reset_mask - update reset mask for SDMA
* @adev: Pointer to the AMDGPU device structure
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
index e3a035c9fece..52f4e9e099cb 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
@@ -110,7 +110,6 @@ static const struct amdgpu_hwip_reg_entry sdma_reg_list_5_0[] = {
static void sdma_v5_0_set_ring_funcs(struct amdgpu_device *adev);
static void sdma_v5_0_set_buffer_funcs(struct amdgpu_device *adev);
-static void sdma_v5_0_set_vm_pte_funcs(struct amdgpu_device *adev);
static void sdma_v5_0_set_irq_funcs(struct amdgpu_device *adev);
static int sdma_v5_0_stop_queue(struct amdgpu_ring *ring);
static int sdma_v5_0_restore_queue(struct amdgpu_ring *ring);
@@ -1357,6 +1356,13 @@ static const struct amdgpu_sdma_funcs sdma_v5_0_sdma_funcs = {
.soft_reset_kernel_queue = &sdma_v5_0_soft_reset_engine,
};
+static const struct amdgpu_vm_pte_funcs sdma_v5_0_vm_pte_funcs = {
+ .copy_pte_num_dw = 7,
+ .copy_pte = sdma_v5_0_vm_copy_pte,
+ .write_pte = sdma_v5_0_vm_write_pte,
+ .set_pte_pde = sdma_v5_0_vm_set_pte_pde,
+};
+
static int sdma_v5_0_early_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_device *adev = ip_block->adev;
@@ -1368,7 +1374,7 @@ static int sdma_v5_0_early_init(struct amdgpu_ip_block *ip_block)
sdma_v5_0_set_ring_funcs(adev);
sdma_v5_0_set_buffer_funcs(adev);
- sdma_v5_0_set_vm_pte_funcs(adev);
+ amdgpu_sdma_set_vm_pte_scheds(adev, &sdma_v5_0_vm_pte_funcs);
sdma_v5_0_set_irq_funcs(adev);
sdma_v5_0_set_mqd_funcs(adev);
@@ -2052,27 +2058,6 @@ static void sdma_v5_0_set_buffer_funcs(struct amdgpu_device *adev)
}
}
-static const struct amdgpu_vm_pte_funcs sdma_v5_0_vm_pte_funcs = {
- .copy_pte_num_dw = 7,
- .copy_pte = sdma_v5_0_vm_copy_pte,
- .write_pte = sdma_v5_0_vm_write_pte,
- .set_pte_pde = sdma_v5_0_vm_set_pte_pde,
-};
-
-static void sdma_v5_0_set_vm_pte_funcs(struct amdgpu_device *adev)
-{
- unsigned i;
-
- if (adev->vm_manager.vm_pte_funcs == NULL) {
- adev->vm_manager.vm_pte_funcs = &sdma_v5_0_vm_pte_funcs;
- for (i = 0; i < adev->sdma.num_instances; i++) {
- adev->vm_manager.vm_pte_scheds[i] =
- &adev->sdma.instance[i].ring.sched;
- }
- adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances;
- }
-}
-
const struct amdgpu_ip_block_version sdma_v5_0_ip_block = {
.type = AMD_IP_BLOCK_TYPE_SDMA,
.major = 5,
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
index feebaa8cd9b1..b4fb90cc8f7d 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
@@ -111,7 +111,6 @@ static const struct amdgpu_hwip_reg_entry sdma_reg_list_5_2[] = {
static void sdma_v5_2_set_ring_funcs(struct amdgpu_device *adev);
static void sdma_v5_2_set_buffer_funcs(struct amdgpu_device *adev);
-static void sdma_v5_2_set_vm_pte_funcs(struct amdgpu_device *adev);
static void sdma_v5_2_set_irq_funcs(struct amdgpu_device *adev);
static int sdma_v5_2_stop_queue(struct amdgpu_ring *ring);
static int sdma_v5_2_restore_queue(struct amdgpu_ring *ring);
@@ -1248,6 +1247,13 @@ static void sdma_v5_2_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask);
}
+static const struct amdgpu_vm_pte_funcs sdma_v5_2_vm_pte_funcs = {
+ .copy_pte_num_dw = 7,
+ .copy_pte = sdma_v5_2_vm_copy_pte,
+ .write_pte = sdma_v5_2_vm_write_pte,
+ .set_pte_pde = sdma_v5_2_vm_set_pte_pde,
+};
+
static int sdma_v5_2_early_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_device *adev = ip_block->adev;
@@ -1259,7 +1265,7 @@ static int sdma_v5_2_early_init(struct amdgpu_ip_block *ip_block)
sdma_v5_2_set_ring_funcs(adev);
sdma_v5_2_set_buffer_funcs(adev);
- sdma_v5_2_set_vm_pte_funcs(adev);
+ amdgpu_sdma_set_vm_pte_scheds(adev, &sdma_v5_2_vm_pte_funcs);
sdma_v5_2_set_irq_funcs(adev);
sdma_v5_2_set_mqd_funcs(adev);
@@ -2039,11 +2045,11 @@ static void sdma_v5_2_emit_fill_buffer(struct amdgpu_ib *ib,
}
static const struct amdgpu_buffer_funcs sdma_v5_2_buffer_funcs = {
- .copy_max_bytes = 0x400000,
+ .copy_max_bytes = 1 << 30,
.copy_num_dw = 7,
.emit_copy_buffer = sdma_v5_2_emit_copy_buffer,
- .fill_max_bytes = 0x400000,
+ .fill_max_bytes = 1 << 30, /* HW supports 1 << 30, but PAL uses 1 << 22 */
.fill_num_dw = 5,
.emit_fill_buffer = sdma_v5_2_emit_fill_buffer,
};
@@ -2056,27 +2062,6 @@ static void sdma_v5_2_set_buffer_funcs(struct amdgpu_device *adev)
}
}
-static const struct amdgpu_vm_pte_funcs sdma_v5_2_vm_pte_funcs = {
- .copy_pte_num_dw = 7,
- .copy_pte = sdma_v5_2_vm_copy_pte,
- .write_pte = sdma_v5_2_vm_write_pte,
- .set_pte_pde = sdma_v5_2_vm_set_pte_pde,
-};
-
-static void sdma_v5_2_set_vm_pte_funcs(struct amdgpu_device *adev)
-{
- unsigned i;
-
- if (adev->vm_manager.vm_pte_funcs == NULL) {
- adev->vm_manager.vm_pte_funcs = &sdma_v5_2_vm_pte_funcs;
- for (i = 0; i < adev->sdma.num_instances; i++) {
- adev->vm_manager.vm_pte_scheds[i] =
- &adev->sdma.instance[i].ring.sched;
- }
- adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances;
- }
-}
-
const struct amdgpu_ip_block_version sdma_v5_2_ip_block = {
.type = AMD_IP_BLOCK_TYPE_SDMA,
.major = 5,
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
index b40126f5d3ef..b005672f2f96 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
@@ -120,7 +120,6 @@ static const struct amdgpu_hwip_reg_entry sdma_reg_list_6_0[] = {
static void sdma_v6_0_set_ring_funcs(struct amdgpu_device *adev);
static void sdma_v6_0_set_buffer_funcs(struct amdgpu_device *adev);
-static void sdma_v6_0_set_vm_pte_funcs(struct amdgpu_device *adev);
static void sdma_v6_0_set_irq_funcs(struct amdgpu_device *adev);
static int sdma_v6_0_start(struct amdgpu_device *adev);
@@ -1280,6 +1279,13 @@ static void sdma_v6_0_get_csa_info(struct amdgpu_device *adev,
csa_info->alignment = SDMA6_CSA_ALIGNMENT;
}
+static const struct amdgpu_vm_pte_funcs sdma_v6_0_vm_pte_funcs = {
+ .copy_pte_num_dw = 7,
+ .copy_pte = sdma_v6_0_vm_copy_pte,
+ .write_pte = sdma_v6_0_vm_write_pte,
+ .set_pte_pde = sdma_v6_0_vm_set_pte_pde,
+};
+
static int sdma_v6_0_early_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_device *adev = ip_block->adev;
@@ -1308,7 +1314,7 @@ static int sdma_v6_0_early_init(struct amdgpu_ip_block *ip_block)
sdma_v6_0_set_ring_funcs(adev);
sdma_v6_0_set_buffer_funcs(adev);
- sdma_v6_0_set_vm_pte_funcs(adev);
+ amdgpu_sdma_set_vm_pte_scheds(adev, &sdma_v6_0_vm_pte_funcs);
sdma_v6_0_set_irq_funcs(adev);
sdma_v6_0_set_mqd_funcs(adev);
sdma_v6_0_set_ras_funcs(adev);
@@ -1878,11 +1884,11 @@ static void sdma_v6_0_emit_fill_buffer(struct amdgpu_ib *ib,
}
static const struct amdgpu_buffer_funcs sdma_v6_0_buffer_funcs = {
- .copy_max_bytes = 0x400000,
+ .copy_max_bytes = 1 << 30,
.copy_num_dw = 7,
.emit_copy_buffer = sdma_v6_0_emit_copy_buffer,
- .fill_max_bytes = 0x400000,
+ .fill_max_bytes = 1 << 30,
.fill_num_dw = 5,
.emit_fill_buffer = sdma_v6_0_emit_fill_buffer,
};
@@ -1893,25 +1899,6 @@ static void sdma_v6_0_set_buffer_funcs(struct amdgpu_device *adev)
adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring;
}
-static const struct amdgpu_vm_pte_funcs sdma_v6_0_vm_pte_funcs = {
- .copy_pte_num_dw = 7,
- .copy_pte = sdma_v6_0_vm_copy_pte,
- .write_pte = sdma_v6_0_vm_write_pte,
- .set_pte_pde = sdma_v6_0_vm_set_pte_pde,
-};
-
-static void sdma_v6_0_set_vm_pte_funcs(struct amdgpu_device *adev)
-{
- unsigned i;
-
- adev->vm_manager.vm_pte_funcs = &sdma_v6_0_vm_pte_funcs;
- for (i = 0; i < adev->sdma.num_instances; i++) {
- adev->vm_manager.vm_pte_scheds[i] =
- &adev->sdma.instance[i].ring.sched;
- }
- adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances;
-}
-
const struct amdgpu_ip_block_version sdma_v6_0_ip_block = {
.type = AMD_IP_BLOCK_TYPE_SDMA,
.major = 6,
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
index 8d16ef257bcb..5679a94d0815 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
@@ -119,7 +119,6 @@ static const struct amdgpu_hwip_reg_entry sdma_reg_list_7_0[] = {
static void sdma_v7_0_set_ring_funcs(struct amdgpu_device *adev);
static void sdma_v7_0_set_buffer_funcs(struct amdgpu_device *adev);
-static void sdma_v7_0_set_vm_pte_funcs(struct amdgpu_device *adev);
static void sdma_v7_0_set_irq_funcs(struct amdgpu_device *adev);
static int sdma_v7_0_start(struct amdgpu_device *adev);
@@ -1264,6 +1263,13 @@ static void sdma_v7_0_get_csa_info(struct amdgpu_device *adev,
csa_info->alignment = SDMA7_CSA_ALIGNMENT;
}
+static const struct amdgpu_vm_pte_funcs sdma_v7_0_vm_pte_funcs = {
+ .copy_pte_num_dw = 8,
+ .copy_pte = sdma_v7_0_vm_copy_pte,
+ .write_pte = sdma_v7_0_vm_write_pte,
+ .set_pte_pde = sdma_v7_0_vm_set_pte_pde,
+};
+
static int sdma_v7_0_early_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_device *adev = ip_block->adev;
@@ -1294,7 +1300,7 @@ static int sdma_v7_0_early_init(struct amdgpu_ip_block *ip_block)
sdma_v7_0_set_ring_funcs(adev);
sdma_v7_0_set_buffer_funcs(adev);
- sdma_v7_0_set_vm_pte_funcs(adev);
+ amdgpu_sdma_set_vm_pte_scheds(adev, &sdma_v7_0_vm_pte_funcs);
sdma_v7_0_set_irq_funcs(adev);
sdma_v7_0_set_mqd_funcs(adev);
adev->sdma.get_csa_info = &sdma_v7_0_get_csa_info;
@@ -1829,10 +1835,10 @@ static void sdma_v7_0_emit_fill_buffer(struct amdgpu_ib *ib,
}
static const struct amdgpu_buffer_funcs sdma_v7_0_buffer_funcs = {
- .copy_max_bytes = 0x400000,
+ .copy_max_bytes = 1 << 30,
.copy_num_dw = 8,
.emit_copy_buffer = sdma_v7_0_emit_copy_buffer,
- .fill_max_bytes = 0x400000,
+ .fill_max_bytes = 1 << 30,
.fill_num_dw = 5,
.emit_fill_buffer = sdma_v7_0_emit_fill_buffer,
};
@@ -1843,25 +1849,6 @@ static void sdma_v7_0_set_buffer_funcs(struct amdgpu_device *adev)
adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring;
}
-static const struct amdgpu_vm_pte_funcs sdma_v7_0_vm_pte_funcs = {
- .copy_pte_num_dw = 8,
- .copy_pte = sdma_v7_0_vm_copy_pte,
- .write_pte = sdma_v7_0_vm_write_pte,
- .set_pte_pde = sdma_v7_0_vm_set_pte_pde,
-};
-
-static void sdma_v7_0_set_vm_pte_funcs(struct amdgpu_device *adev)
-{
- unsigned i;
-
- adev->vm_manager.vm_pte_funcs = &sdma_v7_0_vm_pte_funcs;
- for (i = 0; i < adev->sdma.num_instances; i++) {
- adev->vm_manager.vm_pte_scheds[i] =
- &adev->sdma.instance[i].ring.sched;
- }
- adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances;
-}
-
const struct amdgpu_ip_block_version sdma_v7_0_ip_block = {
.type = AMD_IP_BLOCK_TYPE_SDMA,
.major = 7,
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_1.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_1.c
index 0824cba48f2e..f20e0fc3fc74 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_1.c
@@ -110,7 +110,6 @@ static const struct amdgpu_hwip_reg_entry sdma_reg_list_7_1[] = {
static void sdma_v7_1_set_ring_funcs(struct amdgpu_device *adev);
static void sdma_v7_1_set_buffer_funcs(struct amdgpu_device *adev);
-static void sdma_v7_1_set_vm_pte_funcs(struct amdgpu_device *adev);
static void sdma_v7_1_set_irq_funcs(struct amdgpu_device *adev);
static int sdma_v7_1_inst_start(struct amdgpu_device *adev,
uint32_t inst_mask);
@@ -1130,12 +1129,21 @@ static void sdma_v7_1_vm_set_pte_pde(struct amdgpu_ib *ib,
/* for physically contiguous pages (vram) */
u32 header = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_PTEPDE);
- if (amdgpu_mtype_local)
- header |= SDMA_PKT_PTEPDE_COPY_HEADER_MTYPE(0x3);
- else
- header |= (SDMA_PKT_PTEPDE_COPY_HEADER_MTYPE(0x2) |
- SDMA_PKT_PTEPDE_COPY_HEADER_SNOOP(0x1) |
- SDMA_PKT_PTEPDE_COPY_HEADER_SCOPE(0x3));
+ /* TODO:
+ * When VM_L2_CNTL5.WALKER_FETCH_PDE_MTYPE_ENABLE is enabled, change below MTYPE
+ * to RW for AID A1 and UC for AID A0. NC needs additional GCR flush and need not
+ * be supported. Also, honour amdgpu_mtype_local override. RW would additionally
+ * require setting SCOPE bits in the header.
+ *
+ * header |= (SDMA_PKT_PTEPDE_COPY_HEADER_MTYPE(0x2:RW) |
+ * SDMA_PKT_PTEPDE_COPY_HEADER_SNOOP(0x1) |
+ * SDMA_PKT_PTEPDE_COPY_HEADER_SCOPE(0x3:SYS_SCOPE));
+ */
+
+ /* VM_L2_CNTL5.WALKER_FETCH_PDE_MTYPE_ENABLE is 0 which defaults to UC. So,
+ * use MTYPE_UC (0x3). For ref. MTYPE_RW=0x2 MTYPE_NC=0x0
+ */
+ header |= SDMA_PKT_PTEPDE_COPY_HEADER_MTYPE(0x3) | SDMA_PKT_PTEPDE_COPY_HEADER_SNOOP(0x1);
ib->ptr[ib->length_dw++] = header;
ib->ptr[ib->length_dw++] = lower_32_bits(pe); /* dst addr */
@@ -1248,6 +1256,13 @@ static void sdma_v7_1_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask);
}
+static const struct amdgpu_vm_pte_funcs sdma_v7_1_vm_pte_funcs = {
+ .copy_pte_num_dw = 8,
+ .copy_pte = sdma_v7_1_vm_copy_pte,
+ .write_pte = sdma_v7_1_vm_write_pte,
+ .set_pte_pde = sdma_v7_1_vm_set_pte_pde,
+};
+
static int sdma_v7_1_early_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_device *adev = ip_block->adev;
@@ -1261,7 +1276,7 @@ static int sdma_v7_1_early_init(struct amdgpu_ip_block *ip_block)
sdma_v7_1_set_ring_funcs(adev);
sdma_v7_1_set_buffer_funcs(adev);
- sdma_v7_1_set_vm_pte_funcs(adev);
+ amdgpu_sdma_set_vm_pte_scheds(adev, &sdma_v7_1_vm_pte_funcs);
sdma_v7_1_set_irq_funcs(adev);
sdma_v7_1_set_mqd_funcs(adev);
@@ -1739,10 +1754,10 @@ static void sdma_v7_1_emit_fill_buffer(struct amdgpu_ib *ib,
}
static const struct amdgpu_buffer_funcs sdma_v7_1_buffer_funcs = {
- .copy_max_bytes = 0x400000,
+ .copy_max_bytes = 1 << 30,
.copy_num_dw = 8,
.emit_copy_buffer = sdma_v7_1_emit_copy_buffer,
- .fill_max_bytes = 0x400000,
+ .fill_max_bytes = 1 << 30,
.fill_num_dw = 5,
.emit_fill_buffer = sdma_v7_1_emit_fill_buffer,
};
@@ -1753,25 +1768,6 @@ static void sdma_v7_1_set_buffer_funcs(struct amdgpu_device *adev)
adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring;
}
-static const struct amdgpu_vm_pte_funcs sdma_v7_1_vm_pte_funcs = {
- .copy_pte_num_dw = 8,
- .copy_pte = sdma_v7_1_vm_copy_pte,
- .write_pte = sdma_v7_1_vm_write_pte,
- .set_pte_pde = sdma_v7_1_vm_set_pte_pde,
-};
-
-static void sdma_v7_1_set_vm_pte_funcs(struct amdgpu_device *adev)
-{
- unsigned i;
-
- adev->vm_manager.vm_pte_funcs = &sdma_v7_1_vm_pte_funcs;
- for (i = 0; i < adev->sdma.num_instances; i++) {
- adev->vm_manager.vm_pte_scheds[i] =
- &adev->sdma.instance[i].ring.sched;
- }
- adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances;
-}
-
const struct amdgpu_ip_block_version sdma_v7_1_ip_block = {
.type = AMD_IP_BLOCK_TYPE_SDMA,
.major = 7,
diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c
index 509d43b238f3..c26cb3e8bff6 100644
--- a/drivers/gpu/drm/amd/amdgpu/si.c
+++ b/drivers/gpu/drm/amd/amdgpu/si.c
@@ -1027,11 +1027,11 @@ static u32 si_pcie_rreg(struct amdgpu_device *adev, u32 reg)
unsigned long flags;
u32 r;
- spin_lock_irqsave(&adev->pcie_idx_lock, flags);
+ spin_lock_irqsave(&adev->reg.pcie.lock, flags);
WREG32(AMDGPU_PCIE_INDEX, reg);
(void)RREG32(AMDGPU_PCIE_INDEX);
r = RREG32(AMDGPU_PCIE_DATA);
- spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+ spin_unlock_irqrestore(&adev->reg.pcie.lock, flags);
return r;
}
@@ -1039,12 +1039,12 @@ static void si_pcie_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
{
unsigned long flags;
- spin_lock_irqsave(&adev->pcie_idx_lock, flags);
+ spin_lock_irqsave(&adev->reg.pcie.lock, flags);
WREG32(AMDGPU_PCIE_INDEX, reg);
(void)RREG32(AMDGPU_PCIE_INDEX);
WREG32(AMDGPU_PCIE_DATA, v);
(void)RREG32(AMDGPU_PCIE_DATA);
- spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+ spin_unlock_irqrestore(&adev->reg.pcie.lock, flags);
}
static u32 si_pciep_rreg(struct amdgpu_device *adev, u32 reg)
@@ -1052,11 +1052,11 @@ static u32 si_pciep_rreg(struct amdgpu_device *adev, u32 reg)
unsigned long flags;
u32 r;
- spin_lock_irqsave(&adev->pcie_idx_lock, flags);
+ spin_lock_irqsave(&adev->reg.pcie.lock, flags);
WREG32(PCIE_PORT_INDEX, ((reg) & 0xff));
(void)RREG32(PCIE_PORT_INDEX);
r = RREG32(PCIE_PORT_DATA);
- spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+ spin_unlock_irqrestore(&adev->reg.pcie.lock, flags);
return r;
}
@@ -1064,12 +1064,12 @@ static void si_pciep_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
{
unsigned long flags;
- spin_lock_irqsave(&adev->pcie_idx_lock, flags);
+ spin_lock_irqsave(&adev->reg.pcie.lock, flags);
WREG32(PCIE_PORT_INDEX, ((reg) & 0xff));
(void)RREG32(PCIE_PORT_INDEX);
WREG32(PCIE_PORT_DATA, (v));
(void)RREG32(PCIE_PORT_DATA);
- spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+ spin_unlock_irqrestore(&adev->reg.pcie.lock, flags);
}
static u32 si_smc_rreg(struct amdgpu_device *adev, u32 reg)
@@ -1077,10 +1077,10 @@ static u32 si_smc_rreg(struct amdgpu_device *adev, u32 reg)
unsigned long flags;
u32 r;
- spin_lock_irqsave(&adev->smc_idx_lock, flags);
+ spin_lock_irqsave(&adev->reg.smc.lock, flags);
WREG32(mmSMC_IND_INDEX_0, (reg));
r = RREG32(mmSMC_IND_DATA_0);
- spin_unlock_irqrestore(&adev->smc_idx_lock, flags);
+ spin_unlock_irqrestore(&adev->reg.smc.lock, flags);
return r;
}
@@ -1088,10 +1088,10 @@ static void si_smc_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
{
unsigned long flags;
- spin_lock_irqsave(&adev->smc_idx_lock, flags);
+ spin_lock_irqsave(&adev->reg.smc.lock, flags);
WREG32(mmSMC_IND_INDEX_0, (reg));
WREG32(mmSMC_IND_DATA_0, (v));
- spin_unlock_irqrestore(&adev->smc_idx_lock, flags);
+ spin_unlock_irqrestore(&adev->reg.smc.lock, flags);
}
static u32 si_uvd_ctx_rreg(struct amdgpu_device *adev, u32 reg)
@@ -1099,10 +1099,10 @@ static u32 si_uvd_ctx_rreg(struct amdgpu_device *adev, u32 reg)
unsigned long flags;
u32 r;
- spin_lock_irqsave(&adev->uvd_ctx_idx_lock, flags);
+ spin_lock_irqsave(&adev->reg.uvd_ctx.lock, flags);
WREG32(mmUVD_CTX_INDEX, ((reg) & 0x1ff));
r = RREG32(mmUVD_CTX_DATA);
- spin_unlock_irqrestore(&adev->uvd_ctx_idx_lock, flags);
+ spin_unlock_irqrestore(&adev->reg.uvd_ctx.lock, flags);
return r;
}
@@ -1110,10 +1110,10 @@ static void si_uvd_ctx_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
{
unsigned long flags;
- spin_lock_irqsave(&adev->uvd_ctx_idx_lock, flags);
+ spin_lock_irqsave(&adev->reg.uvd_ctx.lock, flags);
WREG32(mmUVD_CTX_INDEX, ((reg) & 0x1ff));
WREG32(mmUVD_CTX_DATA, (v));
- spin_unlock_irqrestore(&adev->uvd_ctx_idx_lock, flags);
+ spin_unlock_irqrestore(&adev->reg.uvd_ctx.lock, flags);
}
static struct amdgpu_allowed_register_entry si_allowed_read_registers[] = {
@@ -2037,16 +2037,14 @@ static int si_common_early_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_device *adev = ip_block->adev;
- adev->smc_rreg = &si_smc_rreg;
- adev->smc_wreg = &si_smc_wreg;
- adev->pcie_rreg = &si_pcie_rreg;
- adev->pcie_wreg = &si_pcie_wreg;
- adev->pciep_rreg = &si_pciep_rreg;
- adev->pciep_wreg = &si_pciep_wreg;
- adev->uvd_ctx_rreg = si_uvd_ctx_rreg;
- adev->uvd_ctx_wreg = si_uvd_ctx_wreg;
- adev->didt_rreg = NULL;
- adev->didt_wreg = NULL;
+ adev->reg.smc.rreg = si_smc_rreg;
+ adev->reg.smc.wreg = si_smc_wreg;
+ adev->reg.pcie.rreg = &si_pcie_rreg;
+ adev->reg.pcie.wreg = &si_pcie_wreg;
+ adev->reg.pcie.port_rreg = &si_pciep_rreg;
+ adev->reg.pcie.port_wreg = &si_pciep_wreg;
+ adev->reg.uvd_ctx.rreg = &si_uvd_ctx_rreg;
+ adev->reg.uvd_ctx.wreg = &si_uvd_ctx_wreg;
adev->asic_funcs = &si_asic_funcs;
@@ -2382,10 +2380,10 @@ static inline u32 si_pif_phy0_rreg(struct amdgpu_device *adev, u32 reg)
unsigned long flags;
u32 r;
- spin_lock_irqsave(&adev->pcie_idx_lock, flags);
+ spin_lock_irqsave(&adev->reg.pcie.lock, flags);
WREG32(EVERGREEN_PIF_PHY0_INDEX, ((reg) & 0xffff));
r = RREG32(EVERGREEN_PIF_PHY0_DATA);
- spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+ spin_unlock_irqrestore(&adev->reg.pcie.lock, flags);
return r;
}
@@ -2393,10 +2391,10 @@ static inline void si_pif_phy0_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
{
unsigned long flags;
- spin_lock_irqsave(&adev->pcie_idx_lock, flags);
+ spin_lock_irqsave(&adev->reg.pcie.lock, flags);
WREG32(EVERGREEN_PIF_PHY0_INDEX, ((reg) & 0xffff));
WREG32(EVERGREEN_PIF_PHY0_DATA, (v));
- spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+ spin_unlock_irqrestore(&adev->reg.pcie.lock, flags);
}
static inline u32 si_pif_phy1_rreg(struct amdgpu_device *adev, u32 reg)
@@ -2404,10 +2402,10 @@ static inline u32 si_pif_phy1_rreg(struct amdgpu_device *adev, u32 reg)
unsigned long flags;
u32 r;
- spin_lock_irqsave(&adev->pcie_idx_lock, flags);
+ spin_lock_irqsave(&adev->reg.pcie.lock, flags);
WREG32(EVERGREEN_PIF_PHY1_INDEX, ((reg) & 0xffff));
r = RREG32(EVERGREEN_PIF_PHY1_DATA);
- spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+ spin_unlock_irqrestore(&adev->reg.pcie.lock, flags);
return r;
}
@@ -2415,10 +2413,10 @@ static inline void si_pif_phy1_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
{
unsigned long flags;
- spin_lock_irqsave(&adev->pcie_idx_lock, flags);
+ spin_lock_irqsave(&adev->reg.pcie.lock, flags);
WREG32(EVERGREEN_PIF_PHY1_INDEX, ((reg) & 0xffff));
WREG32(EVERGREEN_PIF_PHY1_DATA, (v));
- spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+ spin_unlock_irqrestore(&adev->reg.pcie.lock, flags);
}
static void si_program_aspm(struct amdgpu_device *adev)
{
diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c
index 74fcaa340d9b..3e58feb2d5e4 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_dma.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c
@@ -37,7 +37,6 @@ const u32 sdma_offsets[SDMA_MAX_INSTANCE] =
static void si_dma_set_ring_funcs(struct amdgpu_device *adev);
static void si_dma_set_buffer_funcs(struct amdgpu_device *adev);
-static void si_dma_set_vm_pte_funcs(struct amdgpu_device *adev);
static void si_dma_set_irq_funcs(struct amdgpu_device *adev);
/**
@@ -473,6 +472,14 @@ static void si_dma_ring_emit_wreg(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, val);
}
+static const struct amdgpu_vm_pte_funcs si_dma_vm_pte_funcs = {
+ .copy_pte_num_dw = 5,
+ .copy_pte = si_dma_vm_copy_pte,
+
+ .write_pte = si_dma_vm_write_pte,
+ .set_pte_pde = si_dma_vm_set_pte_pde,
+};
+
static int si_dma_early_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_device *adev = ip_block->adev;
@@ -481,7 +488,7 @@ static int si_dma_early_init(struct amdgpu_ip_block *ip_block)
si_dma_set_ring_funcs(adev);
si_dma_set_buffer_funcs(adev);
- si_dma_set_vm_pte_funcs(adev);
+ amdgpu_sdma_set_vm_pte_scheds(adev, &si_dma_vm_pte_funcs);
si_dma_set_irq_funcs(adev);
return 0;
@@ -830,26 +837,6 @@ static void si_dma_set_buffer_funcs(struct amdgpu_device *adev)
adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring;
}
-static const struct amdgpu_vm_pte_funcs si_dma_vm_pte_funcs = {
- .copy_pte_num_dw = 5,
- .copy_pte = si_dma_vm_copy_pte,
-
- .write_pte = si_dma_vm_write_pte,
- .set_pte_pde = si_dma_vm_set_pte_pde,
-};
-
-static void si_dma_set_vm_pte_funcs(struct amdgpu_device *adev)
-{
- unsigned i;
-
- adev->vm_manager.vm_pte_funcs = &si_dma_vm_pte_funcs;
- for (i = 0; i < adev->sdma.num_instances; i++) {
- adev->vm_manager.vm_pte_scheds[i] =
- &adev->sdma.instance[i].ring.sched;
- }
- adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances;
-}
-
const struct amdgpu_ip_block_version si_dma_ip_block =
{
.type = AMD_IP_BLOCK_TYPE_SDMA,
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
index 4e037a6978f0..b456e4541d9a 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -245,10 +245,10 @@ static u32 soc15_uvd_ctx_rreg(struct amdgpu_device *adev, u32 reg)
address = SOC15_REG_OFFSET(UVD, 0, mmUVD_CTX_INDEX);
data = SOC15_REG_OFFSET(UVD, 0, mmUVD_CTX_DATA);
- spin_lock_irqsave(&adev->uvd_ctx_idx_lock, flags);
+ spin_lock_irqsave(&adev->reg.uvd_ctx.lock, flags);
WREG32(address, ((reg) & 0x1ff));
r = RREG32(data);
- spin_unlock_irqrestore(&adev->uvd_ctx_idx_lock, flags);
+ spin_unlock_irqrestore(&adev->reg.uvd_ctx.lock, flags);
return r;
}
@@ -259,10 +259,10 @@ static void soc15_uvd_ctx_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
address = SOC15_REG_OFFSET(UVD, 0, mmUVD_CTX_INDEX);
data = SOC15_REG_OFFSET(UVD, 0, mmUVD_CTX_DATA);
- spin_lock_irqsave(&adev->uvd_ctx_idx_lock, flags);
+ spin_lock_irqsave(&adev->reg.uvd_ctx.lock, flags);
WREG32(address, ((reg) & 0x1ff));
WREG32(data, (v));
- spin_unlock_irqrestore(&adev->uvd_ctx_idx_lock, flags);
+ spin_unlock_irqrestore(&adev->reg.uvd_ctx.lock, flags);
}
static u32 soc15_didt_rreg(struct amdgpu_device *adev, u32 reg)
@@ -273,10 +273,10 @@ static u32 soc15_didt_rreg(struct amdgpu_device *adev, u32 reg)
address = SOC15_REG_OFFSET(GC, 0, mmDIDT_IND_INDEX);
data = SOC15_REG_OFFSET(GC, 0, mmDIDT_IND_DATA);
- spin_lock_irqsave(&adev->didt_idx_lock, flags);
+ spin_lock_irqsave(&adev->reg.didt.lock, flags);
WREG32(address, (reg));
r = RREG32(data);
- spin_unlock_irqrestore(&adev->didt_idx_lock, flags);
+ spin_unlock_irqrestore(&adev->reg.didt.lock, flags);
return r;
}
@@ -287,10 +287,10 @@ static void soc15_didt_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
address = SOC15_REG_OFFSET(GC, 0, mmDIDT_IND_INDEX);
data = SOC15_REG_OFFSET(GC, 0, mmDIDT_IND_DATA);
- spin_lock_irqsave(&adev->didt_idx_lock, flags);
+ spin_lock_irqsave(&adev->reg.didt.lock, flags);
WREG32(address, (reg));
WREG32(data, (v));
- spin_unlock_irqrestore(&adev->didt_idx_lock, flags);
+ spin_unlock_irqrestore(&adev->reg.didt.lock, flags);
}
static u32 soc15_gc_cac_rreg(struct amdgpu_device *adev, u32 reg)
@@ -298,10 +298,10 @@ static u32 soc15_gc_cac_rreg(struct amdgpu_device *adev, u32 reg)
unsigned long flags;
u32 r;
- spin_lock_irqsave(&adev->gc_cac_idx_lock, flags);
+ spin_lock_irqsave(&adev->reg.gc_cac.lock, flags);
WREG32_SOC15(GC, 0, mmGC_CAC_IND_INDEX, (reg));
r = RREG32_SOC15(GC, 0, mmGC_CAC_IND_DATA);
- spin_unlock_irqrestore(&adev->gc_cac_idx_lock, flags);
+ spin_unlock_irqrestore(&adev->reg.gc_cac.lock, flags);
return r;
}
@@ -309,10 +309,10 @@ static void soc15_gc_cac_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
{
unsigned long flags;
- spin_lock_irqsave(&adev->gc_cac_idx_lock, flags);
+ spin_lock_irqsave(&adev->reg.gc_cac.lock, flags);
WREG32_SOC15(GC, 0, mmGC_CAC_IND_INDEX, (reg));
WREG32_SOC15(GC, 0, mmGC_CAC_IND_DATA, (v));
- spin_unlock_irqrestore(&adev->gc_cac_idx_lock, flags);
+ spin_unlock_irqrestore(&adev->reg.gc_cac.lock, flags);
}
static u32 soc15_se_cac_rreg(struct amdgpu_device *adev, u32 reg)
@@ -320,10 +320,10 @@ static u32 soc15_se_cac_rreg(struct amdgpu_device *adev, u32 reg)
unsigned long flags;
u32 r;
- spin_lock_irqsave(&adev->se_cac_idx_lock, flags);
+ spin_lock_irqsave(&adev->reg.se_cac.lock, flags);
WREG32_SOC15(GC, 0, mmSE_CAC_IND_INDEX, (reg));
r = RREG32_SOC15(GC, 0, mmSE_CAC_IND_DATA);
- spin_unlock_irqrestore(&adev->se_cac_idx_lock, flags);
+ spin_unlock_irqrestore(&adev->reg.se_cac.lock, flags);
return r;
}
@@ -331,10 +331,10 @@ static void soc15_se_cac_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
{
unsigned long flags;
- spin_lock_irqsave(&adev->se_cac_idx_lock, flags);
+ spin_lock_irqsave(&adev->reg.se_cac.lock, flags);
WREG32_SOC15(GC, 0, mmSE_CAC_IND_INDEX, (reg));
WREG32_SOC15(GC, 0, mmSE_CAC_IND_DATA, (v));
- spin_unlock_irqrestore(&adev->se_cac_idx_lock, flags);
+ spin_unlock_irqrestore(&adev->reg.se_cac.lock, flags);
}
static u32 soc15_get_config_memsize(struct amdgpu_device *adev)
@@ -952,7 +952,6 @@ static const struct amdgpu_asic_funcs aqua_vanjaram_asic_funcs =
.get_pcie_replay_count = &amdgpu_nbio_get_pcie_replay_count,
.supports_baco = &soc15_supports_baco,
.query_video_codecs = &soc15_query_video_codecs,
- .encode_ext_smn_addressing = &aqua_vanjaram_encode_ext_smn_addressing,
.get_reg_state = &aqua_vanjaram_get_reg_state,
};
@@ -961,24 +960,22 @@ static int soc15_common_early_init(struct amdgpu_ip_block *ip_block)
struct amdgpu_device *adev = ip_block->adev;
adev->nbio.funcs->set_reg_remap(adev);
- adev->smc_rreg = NULL;
- adev->smc_wreg = NULL;
- adev->pcie_rreg = &amdgpu_device_indirect_rreg;
- adev->pcie_wreg = &amdgpu_device_indirect_wreg;
- adev->pcie_rreg_ext = &amdgpu_device_indirect_rreg_ext;
- adev->pcie_wreg_ext = &amdgpu_device_indirect_wreg_ext;
- adev->pcie_rreg64 = &amdgpu_device_indirect_rreg64;
- adev->pcie_wreg64 = &amdgpu_device_indirect_wreg64;
- adev->pcie_rreg64_ext = &amdgpu_device_indirect_rreg64_ext;
- adev->pcie_wreg64_ext = &amdgpu_device_indirect_wreg64_ext;
- adev->uvd_ctx_rreg = &soc15_uvd_ctx_rreg;
- adev->uvd_ctx_wreg = &soc15_uvd_ctx_wreg;
- adev->didt_rreg = &soc15_didt_rreg;
- adev->didt_wreg = &soc15_didt_wreg;
- adev->gc_cac_rreg = &soc15_gc_cac_rreg;
- adev->gc_cac_wreg = &soc15_gc_cac_wreg;
- adev->se_cac_rreg = &soc15_se_cac_rreg;
- adev->se_cac_wreg = &soc15_se_cac_wreg;
+ adev->reg.pcie.rreg = &amdgpu_device_indirect_rreg;
+ adev->reg.pcie.wreg = &amdgpu_device_indirect_wreg;
+ adev->reg.pcie.rreg_ext = &amdgpu_device_indirect_rreg_ext;
+ adev->reg.pcie.wreg_ext = &amdgpu_device_indirect_wreg_ext;
+ adev->reg.pcie.rreg64 = &amdgpu_device_indirect_rreg64;
+ adev->reg.pcie.wreg64 = &amdgpu_device_indirect_wreg64;
+ adev->reg.pcie.rreg64_ext = &amdgpu_device_indirect_rreg64_ext;
+ adev->reg.pcie.wreg64_ext = &amdgpu_device_indirect_wreg64_ext;
+ adev->reg.uvd_ctx.rreg = &soc15_uvd_ctx_rreg;
+ adev->reg.uvd_ctx.wreg = &soc15_uvd_ctx_wreg;
+ adev->reg.didt.rreg = &soc15_didt_rreg;
+ adev->reg.didt.wreg = &soc15_didt_wreg;
+ adev->reg.gc_cac.rreg = &soc15_gc_cac_rreg;
+ adev->reg.gc_cac.wreg = &soc15_gc_cac_wreg;
+ adev->reg.se_cac.rreg = &soc15_se_cac_rreg;
+ adev->reg.se_cac.wreg = &soc15_se_cac_wreg;
adev->rev_id = amdgpu_device_get_rev_id(adev);
adev->external_rev_id = 0xFF;
@@ -1200,6 +1197,7 @@ static int soc15_common_early_init(struct amdgpu_ip_block *ip_block)
case IP_VERSION(9, 4, 4):
case IP_VERSION(9, 5, 0):
adev->asic_funcs = &aqua_vanjaram_asic_funcs;
+ adev->reg.smn.get_smn_base = &amdgpu_reg_smn_v1_0_get_base;
adev->cg_flags =
AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_CGCG |
AMD_CG_SUPPORT_GFX_CGLS | AMD_CG_SUPPORT_SDMA_MGCG |
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.h b/drivers/gpu/drm/amd/amdgpu/soc15.h
index c8ac11a9cdef..46a6477b677b 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.h
@@ -118,7 +118,6 @@ int vega10_reg_base_init(struct amdgpu_device *adev);
int vega20_reg_base_init(struct amdgpu_device *adev);
int arct_reg_base_init(struct amdgpu_device *adev);
int aldebaran_reg_base_init(struct amdgpu_device *adev);
-u64 aqua_vanjaram_encode_ext_smn_addressing(int ext_id);
int aqua_vanjaram_init_soc_config(struct amdgpu_device *adev);
ssize_t aqua_vanjaram_get_reg_state(struct amdgpu_device *adev,
enum amdgpu_reg_state reg_state, void *buf,
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15_common.h b/drivers/gpu/drm/amd/amdgpu/soc15_common.h
index 242b24f73c17..a7b5a95ebebb 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15_common.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15_common.h
@@ -195,19 +195,22 @@
__RREG32_SOC15_RLC__((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) + offset, AMDGPU_REGS_RLC, ip##_HWIP, inst)
/* inst equals to ext for some IPs */
-#define RREG32_SOC15_EXT(ip, inst, reg, ext) \
- RREG32_PCIE_EXT((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) * 4 \
- + adev->asic_funcs->encode_ext_smn_addressing(ext)) \
-
-#define WREG32_SOC15_EXT(ip, inst, reg, ext, value) \
- WREG32_PCIE_EXT((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) * 4 \
- + adev->asic_funcs->encode_ext_smn_addressing(ext), \
- value) \
-
-#define RREG64_MCA(ext, mca_base, idx) \
- RREG64_PCIE_EXT(adev->asic_funcs->encode_ext_smn_addressing(ext) + mca_base + (idx * 8))
-
-#define WREG64_MCA(ext, mca_base, idx, val) \
- WREG64_PCIE_EXT(adev->asic_funcs->encode_ext_smn_addressing(ext) + mca_base + (idx * 8), val)
+#define RREG32_SOC15_EXT(ip, inst, reg, ext) \
+ RREG32_PCIE_EXT((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + \
+ reg) * 4 + \
+ amdgpu_reg_get_smn_base64(adev, ip##_HWIP, inst))
+
+#define WREG32_SOC15_EXT(ip, inst, reg, ext, value) \
+ WREG32_PCIE_EXT( \
+ (adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) * \
+ 4 + \
+ amdgpu_reg_get_smn_base64(adev, ip##_HWIP, inst), \
+ value)
+
+#define RREG64_MCA(smn_base, mca_base, idx) \
+ RREG64_PCIE_EXT(smn_base + mca_base + (idx * 8))
+
+#define WREG64_MCA(smn_base, mca_base, idx, val) \
+ WREG64_PCIE_EXT(smn_base + mca_base + (idx * 8), val)
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c
index a0ad1f8a76f0..fbd1d97f33ad 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc21.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc21.c
@@ -229,10 +229,10 @@ static u32 soc21_didt_rreg(struct amdgpu_device *adev, u32 reg)
address = SOC15_REG_OFFSET(GC, 0, regDIDT_IND_INDEX);
data = SOC15_REG_OFFSET(GC, 0, regDIDT_IND_DATA);
- spin_lock_irqsave(&adev->didt_idx_lock, flags);
+ spin_lock_irqsave(&adev->reg.didt.lock, flags);
WREG32(address, (reg));
r = RREG32(data);
- spin_unlock_irqrestore(&adev->didt_idx_lock, flags);
+ spin_unlock_irqrestore(&adev->reg.didt.lock, flags);
return r;
}
@@ -243,10 +243,10 @@ static void soc21_didt_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
address = SOC15_REG_OFFSET(GC, 0, regDIDT_IND_INDEX);
data = SOC15_REG_OFFSET(GC, 0, regDIDT_IND_DATA);
- spin_lock_irqsave(&adev->didt_idx_lock, flags);
+ spin_lock_irqsave(&adev->reg.didt.lock, flags);
WREG32(address, (reg));
WREG32(data, (v));
- spin_unlock_irqrestore(&adev->didt_idx_lock, flags);
+ spin_unlock_irqrestore(&adev->reg.didt.lock, flags);
}
static u32 soc21_get_config_memsize(struct amdgpu_device *adev)
@@ -589,21 +589,15 @@ static int soc21_common_early_init(struct amdgpu_ip_block *ip_block)
struct amdgpu_device *adev = ip_block->adev;
adev->nbio.funcs->set_reg_remap(adev);
- adev->smc_rreg = NULL;
- adev->smc_wreg = NULL;
- adev->pcie_rreg = &amdgpu_device_indirect_rreg;
- adev->pcie_wreg = &amdgpu_device_indirect_wreg;
- adev->pcie_rreg64 = &amdgpu_device_indirect_rreg64;
- adev->pcie_wreg64 = &amdgpu_device_indirect_wreg64;
- adev->pciep_rreg = amdgpu_device_pcie_port_rreg;
- adev->pciep_wreg = amdgpu_device_pcie_port_wreg;
-
- /* TODO: will add them during VCN v2 implementation */
- adev->uvd_ctx_rreg = NULL;
- adev->uvd_ctx_wreg = NULL;
-
- adev->didt_rreg = &soc21_didt_rreg;
- adev->didt_wreg = &soc21_didt_wreg;
+ adev->reg.pcie.rreg = &amdgpu_device_indirect_rreg;
+ adev->reg.pcie.wreg = &amdgpu_device_indirect_wreg;
+ adev->reg.pcie.rreg64 = &amdgpu_device_indirect_rreg64;
+ adev->reg.pcie.wreg64 = &amdgpu_device_indirect_wreg64;
+ adev->reg.pcie.port_rreg = &amdgpu_device_pcie_port_rreg;
+ adev->reg.pcie.port_wreg = &amdgpu_device_pcie_port_wreg;
+
+ adev->reg.didt.rreg = &soc21_didt_rreg;
+ adev->reg.didt.wreg = &soc21_didt_wreg;
adev->asic_funcs = &soc21_asic_funcs;
diff --git a/drivers/gpu/drm/amd/amdgpu/soc24.c b/drivers/gpu/drm/amd/amdgpu/soc24.c
index ecb6c3fcfbd1..d1adf19a51c4 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc24.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc24.c
@@ -362,18 +362,12 @@ static int soc24_common_early_init(struct amdgpu_ip_block *ip_block)
struct amdgpu_device *adev = ip_block->adev;
adev->nbio.funcs->set_reg_remap(adev);
- adev->smc_rreg = NULL;
- adev->smc_wreg = NULL;
- adev->pcie_rreg = &amdgpu_device_indirect_rreg;
- adev->pcie_wreg = &amdgpu_device_indirect_wreg;
- adev->pcie_rreg64 = &amdgpu_device_indirect_rreg64;
- adev->pcie_wreg64 = &amdgpu_device_indirect_wreg64;
- adev->pciep_rreg = amdgpu_device_pcie_port_rreg;
- adev->pciep_wreg = amdgpu_device_pcie_port_wreg;
- adev->uvd_ctx_rreg = NULL;
- adev->uvd_ctx_wreg = NULL;
- adev->didt_rreg = NULL;
- adev->didt_wreg = NULL;
+ adev->reg.pcie.rreg = &amdgpu_device_indirect_rreg;
+ adev->reg.pcie.wreg = &amdgpu_device_indirect_wreg;
+ adev->reg.pcie.rreg64 = &amdgpu_device_indirect_rreg64;
+ adev->reg.pcie.wreg64 = &amdgpu_device_indirect_wreg64;
+ adev->reg.pcie.port_rreg = &amdgpu_device_pcie_port_rreg;
+ adev->reg.pcie.port_wreg = &amdgpu_device_pcie_port_wreg;
adev->asic_funcs = &soc24_asic_funcs;
@@ -484,7 +478,7 @@ static int soc24_common_hw_init(struct amdgpu_ip_block *ip_block)
if (adev->nbio.funcs->remap_hdp_registers)
adev->nbio.funcs->remap_hdp_registers(adev);
- if (adev->df.funcs->hw_init)
+ if (adev->df.funcs && adev->df.funcs->hw_init)
adev->df.funcs->hw_init(adev);
/* enable the doorbell aperture */
diff --git a/drivers/gpu/drm/amd/amdgpu/soc_v1_0.c b/drivers/gpu/drm/amd/amdgpu/soc_v1_0.c
index 59ab952d5cce..709b1669b07b 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc_v1_0.c
@@ -41,6 +41,44 @@
#define NORMALIZE_XCC_REG_OFFSET(offset) \
(offset & 0xFFFF)
+#define MID1_REG_RANGE_0_LOW 0x40000
+#define MID1_REG_RANGE_0_HIGH 0x80000
+#define NORMALIZE_MID_REG_OFFSET(offset) \
+ (offset & 0x3FFFF)
+
+static const struct amdgpu_video_codecs vcn_5_0_2_video_codecs_encode_vcn0 = {
+ .codec_count = 0,
+ .codec_array = NULL,
+};
+
+static const struct amdgpu_video_codec_info vcn_5_0_2_video_codecs_decode_array_vcn0[] = {
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)},
+};
+
+static const struct amdgpu_video_codecs vcn_5_0_2_video_codecs_decode_vcn0 = {
+ .codec_count = ARRAY_SIZE(vcn_5_0_2_video_codecs_decode_array_vcn0),
+ .codec_array = vcn_5_0_2_video_codecs_decode_array_vcn0,
+};
+
+static int soc_v1_0_query_video_codecs(struct amdgpu_device *adev, bool encode,
+ const struct amdgpu_video_codecs **codecs)
+{
+ switch (amdgpu_ip_version(adev, UVD_HWIP, 0)) {
+ case IP_VERSION(5, 0, 2):
+ if (encode)
+ *codecs = &vcn_5_0_2_video_codecs_encode_vcn0;
+ else
+ *codecs = &vcn_5_0_2_video_codecs_decode_vcn0;
+ return 0;
+ default:
+ return -EINVAL;
+ }
+}
+
/* Initialized doorbells for amdgpu including multimedia
* KFD can use all the rest in 2M doorbell bar */
static void soc_v1_0_doorbell_index_init(struct amdgpu_device *adev)
@@ -57,7 +95,7 @@ static void soc_v1_0_doorbell_index_init(struct amdgpu_device *adev)
adev->doorbell_index.userqueue_end = AMDGPU_SOC_V1_0_DOORBELL_USERQUEUE_END;
adev->doorbell_index.xcc_doorbell_range = AMDGPU_SOC_V1_0_DOORBELL_XCC_RANGE;
- adev->doorbell_index.sdma_doorbell_range = 20;
+ adev->doorbell_index.sdma_doorbell_range = 14;
for (i = 0; i < adev->sdma.num_instances; i++)
adev->doorbell_index.sdma_engine[i] =
AMDGPU_SOC_V1_0_DOORBELL_sDMA_ENGINE_START +
@@ -214,23 +252,35 @@ static bool soc_v1_0_need_full_reset(struct amdgpu_device *adev)
static bool soc_v1_0_need_reset_on_init(struct amdgpu_device *adev)
{
- u32 sol_reg;
- if (adev->flags & AMD_IS_APU)
- return false;
+ return false;
+}
- /* Check sOS sign of life register to confirm sys driver and sOS
- * are already been loaded.
- */
- sol_reg = RREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_81);
- if (sol_reg)
- return true;
+static enum amd_reset_method
+soc_v1_0_asic_reset_method(struct amdgpu_device *adev)
+{
+ if ((adev->gmc.xgmi.supported && adev->gmc.xgmi.connected_to_cpu) ||
+ (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(15, 0, 8))) {
+ if (amdgpu_reset_method != -1)
+ dev_warn_once(adev->dev, "Reset override isn't supported, using Mode2 instead.\n");
- return false;
+ return AMD_RESET_METHOD_MODE2;
+ }
+
+ return amdgpu_reset_method;
}
static int soc_v1_0_asic_reset(struct amdgpu_device *adev)
{
+ switch (soc_v1_0_asic_reset_method(adev)) {
+ case AMD_RESET_METHOD_MODE2:
+ dev_info(adev->dev, "MODE2 reset\n");
+ return amdgpu_dpm_mode2_reset(adev);
+ default:
+ dev_info(adev->dev, "Invalid reset method Not supported\n");
+ return -EOPNOTSUPP;
+ }
+
return 0;
}
@@ -244,28 +294,24 @@ static const struct amdgpu_asic_funcs soc_v1_0_asic_funcs = {
.need_reset_on_init = &soc_v1_0_need_reset_on_init,
.encode_ext_smn_addressing = &soc_v1_0_encode_ext_smn_addressing,
.reset = soc_v1_0_asic_reset,
+ .reset_method = &soc_v1_0_asic_reset_method,
+ .query_video_codecs = &soc_v1_0_query_video_codecs,
};
static int soc_v1_0_common_early_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_device *adev = ip_block->adev;
- adev->smc_rreg = NULL;
- adev->smc_wreg = NULL;
- adev->pcie_rreg = &amdgpu_device_indirect_rreg;
- adev->pcie_wreg = &amdgpu_device_indirect_wreg;
- adev->pcie_rreg_ext = &amdgpu_device_indirect_rreg_ext;
- adev->pcie_wreg_ext = &amdgpu_device_indirect_wreg_ext;
- adev->pcie_rreg64 = &amdgpu_device_indirect_rreg64;
- adev->pcie_wreg64 = &amdgpu_device_indirect_wreg64;
- adev->pciep_rreg = amdgpu_device_pcie_port_rreg;
- adev->pciep_wreg = amdgpu_device_pcie_port_wreg;
- adev->pcie_rreg64_ext = &amdgpu_device_indirect_rreg64_ext;
- adev->pcie_wreg64_ext = &amdgpu_device_indirect_wreg64_ext;
- adev->uvd_ctx_rreg = NULL;
- adev->uvd_ctx_wreg = NULL;
- adev->didt_rreg = NULL;
- adev->didt_wreg = NULL;
+ adev->reg.pcie.rreg = &amdgpu_device_indirect_rreg;
+ adev->reg.pcie.wreg = &amdgpu_device_indirect_wreg;
+ adev->reg.pcie.rreg_ext = &amdgpu_device_indirect_rreg_ext;
+ adev->reg.pcie.wreg_ext = &amdgpu_device_indirect_wreg_ext;
+ adev->reg.pcie.rreg64 = &amdgpu_device_indirect_rreg64;
+ adev->reg.pcie.wreg64 = &amdgpu_device_indirect_wreg64;
+ adev->reg.pcie.port_rreg = &amdgpu_device_pcie_port_rreg;
+ adev->reg.pcie.port_wreg = &amdgpu_device_pcie_port_wreg;
+ adev->reg.pcie.rreg64_ext = &amdgpu_device_indirect_rreg64_ext;
+ adev->reg.pcie.wreg64_ext = &amdgpu_device_indirect_wreg64_ext;
adev->asic_funcs = &soc_v1_0_asic_funcs;
@@ -274,8 +320,9 @@ static int soc_v1_0_common_early_init(struct amdgpu_ip_block *ip_block)
switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(12, 1, 0):
- adev->cg_flags = 0;
- adev->pg_flags = 0;
+ adev->cg_flags = AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS;
+ adev->pg_flags = AMD_PG_SUPPORT_VCN_DPG;
adev->external_rev_id = adev->rev_id + 0x50;
break;
default:
@@ -815,7 +862,7 @@ int soc_v1_0_init_soc_config(struct amdgpu_device *adev)
{
int ret, i;
int xcc_inst_per_aid = 4;
- uint16_t xcc_mask;
+ uint16_t xcc_mask, sdma_mask = 0;
xcc_mask = adev->gfx.xcc_mask;
adev->aid_mask = 0;
@@ -825,10 +872,12 @@ int soc_v1_0_init_soc_config(struct amdgpu_device *adev)
}
adev->sdma.num_inst_per_xcc = 2;
- adev->sdma.num_instances =
- NUM_XCC(adev->gfx.xcc_mask) * adev->sdma.num_inst_per_xcc;
- adev->sdma.sdma_mask =
- GENMASK(adev->sdma.num_instances - 1, 0);
+ for_each_inst(i, adev->gfx.xcc_mask)
+ sdma_mask |=
+ GENMASK(adev->sdma.num_inst_per_xcc - 1, 0) <<
+ (i * adev->sdma.num_inst_per_xcc);
+ adev->sdma.sdma_mask = sdma_mask;
+ adev->sdma.num_instances = NUM_XCC(adev->sdma.sdma_mask);
ret = soc_v1_0_xcp_mgr_init(adev);
if (ret)
@@ -860,3 +909,31 @@ uint32_t soc_v1_0_normalize_xcc_reg_offset(uint32_t reg)
else
return reg;
}
+
+bool soc_v1_0_mid1_reg_range(uint32_t reg)
+{
+ uint32_t normalized_reg = soc_v1_0_normalize_xcc_reg_offset(reg);
+
+ if (soc_v1_0_normalize_xcc_reg_range(normalized_reg))
+ return false;
+
+ if ((reg >= MID1_REG_RANGE_0_LOW) && (reg < MID1_REG_RANGE_0_HIGH))
+ return true;
+ else
+ return false;
+}
+
+uint32_t soc_v1_0_normalize_reg_offset(uint32_t reg)
+{
+ uint32_t normalized_reg = soc_v1_0_normalize_xcc_reg_offset(reg);
+
+ if (soc_v1_0_normalize_xcc_reg_range(normalized_reg))
+ return soc_v1_0_normalize_xcc_reg_offset(reg);
+
+ /* check if the reg offset is inside MID1. */
+ if (soc_v1_0_mid1_reg_range(reg))
+ return NORMALIZE_MID_REG_OFFSET(reg);
+
+ return reg;
+}
+
diff --git a/drivers/gpu/drm/amd/amdgpu/soc_v1_0.h b/drivers/gpu/drm/amd/amdgpu/soc_v1_0.h
index 146996101aa0..16c220fcc4e9 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc_v1_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc_v1_0.h
@@ -31,7 +31,9 @@ void soc_v1_0_grbm_select(struct amdgpu_device *adev,
int xcc_id);
int soc_v1_0_init_soc_config(struct amdgpu_device *adev);
bool soc_v1_0_normalize_xcc_reg_range(uint32_t reg);
+bool soc_v1_0_mid1_reg_range(uint32_t reg);
uint32_t soc_v1_0_normalize_xcc_reg_offset(uint32_t reg);
+uint32_t soc_v1_0_normalize_reg_offset(uint32_t reg);
u64 soc_v1_0_encode_ext_smn_addressing(int ext_id);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
index 1f80045775f5..db505ab32fa0 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
@@ -183,50 +183,97 @@ static void umc_v12_0_get_retire_flip_bits(struct amdgpu_device *adev)
if (adev->gmc.gmc_funcs->query_mem_partition_mode)
nps = adev->gmc.gmc_funcs->query_mem_partition_mode(adev);
- /* default setting */
- flip_bits->flip_bits_in_pa[0] = UMC_V12_0_PA_C2_BIT;
- flip_bits->flip_bits_in_pa[1] = UMC_V12_0_PA_C3_BIT;
- flip_bits->flip_bits_in_pa[2] = UMC_V12_0_PA_C4_BIT;
- flip_bits->flip_bits_in_pa[3] = UMC_V12_0_PA_R13_BIT;
- flip_bits->flip_row_bit = 13;
- flip_bits->bit_num = 4;
- flip_bits->r13_in_pa = UMC_V12_0_PA_R13_BIT;
-
- if (nps == AMDGPU_NPS2_PARTITION_MODE) {
+ if (adev->gmc.num_umc == 16) {
+ /* default setting */
+ flip_bits->flip_bits_in_pa[0] = UMC_V12_0_PA_C2_BIT;
+ flip_bits->flip_bits_in_pa[1] = UMC_V12_0_PA_C3_BIT;
+ flip_bits->flip_bits_in_pa[2] = UMC_V12_0_PA_C4_BIT;
+ flip_bits->flip_bits_in_pa[3] = UMC_V12_0_PA_R13_BIT;
+ flip_bits->flip_row_bit = 13;
+ flip_bits->bit_num = 4;
+ flip_bits->r13_in_pa = UMC_V12_0_PA_R13_BIT;
+
+ if (nps == AMDGPU_NPS2_PARTITION_MODE) {
+ flip_bits->flip_bits_in_pa[0] = UMC_V12_0_PA_CH5_BIT;
+ flip_bits->flip_bits_in_pa[1] = UMC_V12_0_PA_C2_BIT;
+ flip_bits->flip_bits_in_pa[2] = UMC_V12_0_PA_B1_BIT;
+ flip_bits->r13_in_pa = UMC_V12_0_PA_R12_BIT;
+ } else if (nps == AMDGPU_NPS4_PARTITION_MODE) {
+ flip_bits->flip_bits_in_pa[0] = UMC_V12_0_PA_CH4_BIT;
+ flip_bits->flip_bits_in_pa[1] = UMC_V12_0_PA_CH5_BIT;
+ flip_bits->flip_bits_in_pa[2] = UMC_V12_0_PA_B0_BIT;
+ flip_bits->r13_in_pa = UMC_V12_0_PA_R11_BIT;
+ }
+
+ switch (vram_type) {
+ case AMDGPU_VRAM_TYPE_HBM:
+ /* other nps modes are taken as nps1 */
+ if (nps == AMDGPU_NPS2_PARTITION_MODE)
+ flip_bits->flip_bits_in_pa[3] = UMC_V12_0_PA_R12_BIT;
+ else if (nps == AMDGPU_NPS4_PARTITION_MODE)
+ flip_bits->flip_bits_in_pa[3] = UMC_V12_0_PA_R11_BIT;
+
+ break;
+ case AMDGPU_VRAM_TYPE_HBM3E:
+ flip_bits->flip_bits_in_pa[3] = UMC_V12_0_PA_R12_BIT;
+ flip_bits->flip_row_bit = 12;
+
+ if (nps == AMDGPU_NPS2_PARTITION_MODE)
+ flip_bits->flip_bits_in_pa[3] = UMC_V12_0_PA_R11_BIT;
+ else if (nps == AMDGPU_NPS4_PARTITION_MODE)
+ flip_bits->flip_bits_in_pa[3] = UMC_V12_0_PA_R10_BIT;
+
+ break;
+ default:
+ dev_warn(adev->dev,
+ "Unknown HBM type, set RAS retire flip bits to the value in NPS1 mode.\n");
+ break;
+ }
+ } else if (adev->gmc.num_umc == 8) {
+ /* default setting */
flip_bits->flip_bits_in_pa[0] = UMC_V12_0_PA_CH5_BIT;
flip_bits->flip_bits_in_pa[1] = UMC_V12_0_PA_C2_BIT;
flip_bits->flip_bits_in_pa[2] = UMC_V12_0_PA_B1_BIT;
+ flip_bits->flip_bits_in_pa[3] = UMC_V12_0_PA_R11_BIT;
+ flip_bits->flip_row_bit = 12;
+ flip_bits->bit_num = 4;
flip_bits->r13_in_pa = UMC_V12_0_PA_R12_BIT;
- } else if (nps == AMDGPU_NPS4_PARTITION_MODE) {
- flip_bits->flip_bits_in_pa[0] = UMC_V12_0_PA_CH4_BIT;
- flip_bits->flip_bits_in_pa[1] = UMC_V12_0_PA_CH5_BIT;
- flip_bits->flip_bits_in_pa[2] = UMC_V12_0_PA_B0_BIT;
- flip_bits->r13_in_pa = UMC_V12_0_PA_R11_BIT;
- }
- switch (vram_type) {
- case AMDGPU_VRAM_TYPE_HBM:
- /* other nps modes are taken as nps1 */
- if (nps == AMDGPU_NPS2_PARTITION_MODE)
+ if (nps == AMDGPU_NPS2_PARTITION_MODE) {
+ flip_bits->flip_bits_in_pa[0] = UMC_V12_0_PA_CH4_BIT;
+ flip_bits->flip_bits_in_pa[1] = UMC_V12_0_PA_CH5_BIT;
+ flip_bits->flip_bits_in_pa[2] = UMC_V12_0_PA_B0_BIT;
+ flip_bits->r13_in_pa = UMC_V12_0_PA_R11_BIT;
+ }
+
+ switch (vram_type) {
+ case AMDGPU_VRAM_TYPE_HBM:
flip_bits->flip_bits_in_pa[3] = UMC_V12_0_PA_R12_BIT;
- else if (nps == AMDGPU_NPS4_PARTITION_MODE)
- flip_bits->flip_bits_in_pa[3] = UMC_V12_0_PA_R11_BIT;
- break;
- case AMDGPU_VRAM_TYPE_HBM3E:
- flip_bits->flip_bits_in_pa[3] = UMC_V12_0_PA_R12_BIT;
- flip_bits->flip_row_bit = 12;
+ /* other nps modes are taken as nps1 */
+ if (nps == AMDGPU_NPS2_PARTITION_MODE)
+ flip_bits->flip_bits_in_pa[3] = UMC_V12_0_PA_R11_BIT;
- if (nps == AMDGPU_NPS2_PARTITION_MODE)
+ break;
+ case AMDGPU_VRAM_TYPE_HBM3E:
flip_bits->flip_bits_in_pa[3] = UMC_V12_0_PA_R11_BIT;
- else if (nps == AMDGPU_NPS4_PARTITION_MODE)
- flip_bits->flip_bits_in_pa[3] = UMC_V12_0_PA_R10_BIT;
+ flip_bits->flip_row_bit = 12;
- break;
- default:
+ if (nps == AMDGPU_NPS2_PARTITION_MODE)
+ flip_bits->flip_bits_in_pa[3] = UMC_V12_0_PA_R10_BIT;
+
+ break;
+ default:
+ dev_warn(adev->dev,
+ "Unknown HBM type, set RAS retire flip bits to the value in NPS1 mode.\n");
+ break;
+ }
+ } else {
dev_warn(adev->dev,
- "Unknown HBM type, set RAS retire flip bits to the value in NPS1 mode.\n");
- break;
+ "Unsupported UMC number(%d), failed to set RAS flip bits.\n",
+ adev->gmc.num_umc);
+
+ return;
}
adev->umc.retire_unit = 0x1 << flip_bits->bit_num;
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
index 73ce3d211ed6..8a9ba2276275 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
@@ -93,6 +93,11 @@ static void uvd_v4_2_ring_set_wptr(struct amdgpu_ring *ring)
static int uvd_v4_2_early_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_device *adev = ip_block->adev;
+
+ /* UVD doesn't work without DPM, it needs DPM to ungate it. */
+ if (!amdgpu_dpm)
+ return -ENOENT;
+
adev->uvd.num_uvd_inst = 1;
uvd_v4_2_set_ring_funcs(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v1_0.c
index 9ae424618556..5b7b46d242c6 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v1_0.c
@@ -47,11 +47,6 @@
#define VCE_V1_0_DATA_SIZE (7808 * (AMDGPU_MAX_VCE_HANDLES + 1))
#define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK 0x02
-#define VCE_V1_0_GART_PAGE_START \
- (AMDGPU_GTT_MAX_TRANSFER_SIZE * AMDGPU_GTT_NUM_TRANSFER_WINDOWS)
-#define VCE_V1_0_GART_ADDR_START \
- (VCE_V1_0_GART_PAGE_START * AMDGPU_GPU_PAGE_SIZE)
-
static void vce_v1_0_set_ring_funcs(struct amdgpu_device *adev);
static void vce_v1_0_set_irq_funcs(struct amdgpu_device *adev);
@@ -535,27 +530,29 @@ static int vce_v1_0_early_init(struct amdgpu_ip_block *ip_block)
*/
static int vce_v1_0_ensure_vcpu_bo_32bit_addr(struct amdgpu_device *adev)
{
- u64 gpu_addr = amdgpu_bo_gpu_offset(adev->vce.vcpu_bo);
u64 bo_size = amdgpu_bo_size(adev->vce.vcpu_bo);
u64 max_vcpu_bo_addr = 0xffffffff - bo_size;
u64 num_pages = ALIGN(bo_size, AMDGPU_GPU_PAGE_SIZE) / AMDGPU_GPU_PAGE_SIZE;
u64 pa = amdgpu_gmc_vram_pa(adev, adev->vce.vcpu_bo);
u64 flags = AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE | AMDGPU_PTE_VALID;
+ u64 vce_gart_start_offs;
+ int r;
- /*
- * Check if the VCPU BO already has a 32-bit address.
- * Eg. if MC is configured to put VRAM in the low address range.
- */
- if (gpu_addr <= max_vcpu_bo_addr)
- return 0;
+ r = amdgpu_gtt_mgr_alloc_entries(&adev->mman.gtt_mgr,
+ &adev->vce.gart_node, num_pages,
+ DRM_MM_INSERT_LOW);
+ if (r)
+ return r;
+
+ vce_gart_start_offs = amdgpu_gtt_node_to_byte_offset(&adev->vce.gart_node);
/* Check if we can map the VCPU BO in GART to a 32-bit address. */
- if (adev->gmc.gart_start + VCE_V1_0_GART_ADDR_START > max_vcpu_bo_addr)
+ if (adev->gmc.gart_start + vce_gart_start_offs > max_vcpu_bo_addr)
return -EINVAL;
- amdgpu_gart_map_vram_range(adev, pa, VCE_V1_0_GART_PAGE_START,
+ amdgpu_gart_map_vram_range(adev, pa, adev->vce.gart_node.start,
num_pages, flags, adev->gart.ptr);
- adev->vce.gpu_addr = adev->gmc.gart_start + VCE_V1_0_GART_ADDR_START;
+ adev->vce.gpu_addr = adev->gmc.gart_start + vce_gart_start_offs;
if (adev->vce.gpu_addr > max_vcpu_bo_addr)
return -EINVAL;
@@ -610,7 +607,11 @@ static int vce_v1_0_sw_fini(struct amdgpu_ip_block *ip_block)
if (r)
return r;
- return amdgpu_vce_sw_fini(adev);
+ r = amdgpu_vce_sw_fini(adev);
+
+ amdgpu_gtt_mgr_free_entries(&adev->mman.gtt_mgr, &adev->vce.gart_node);
+
+ return r;
}
/**
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
index e78526a4e521..ff3013b97abd 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
@@ -134,6 +134,21 @@ static int vcn_v4_0_3_early_init(struct amdgpu_ip_block *ip_block)
return 0;
}
+static bool vcn_v4_0_3_is_psp_fw_reset_supported(struct amdgpu_device *adev)
+{
+ uint32_t fw_ver = adev->psp.sos.fw_version;
+ uint32_t pgm = (fw_ver >> 8) & 0xFF;
+
+ /*
+ * FWDEV-159155: PSP SOS FW must be >= 0x0036015f for program 0x01
+ * before enabling VCN per-queue reset.
+ */
+ if (pgm == 1)
+ return fw_ver >= 0x0036015f;
+
+ return true;
+}
+
static int vcn_v4_0_3_late_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_device *adev = ip_block->adev;
@@ -141,7 +156,9 @@ static int vcn_v4_0_3_late_init(struct amdgpu_ip_block *ip_block)
adev->vcn.supported_reset =
amdgpu_get_soft_full_reset_mask(&adev->vcn.inst[0].ring_enc[0]);
- if (amdgpu_dpm_reset_vcn_is_supported(adev) && !amdgpu_sriov_vf(adev))
+ if (amdgpu_dpm_reset_vcn_is_supported(adev) &&
+ vcn_v4_0_3_is_psp_fw_reset_supported(adev) &&
+ !amdgpu_sriov_vf(adev))
adev->vcn.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_2.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_2.c
new file mode 100644
index 000000000000..c3d3cc023058
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_2.c
@@ -0,0 +1,1219 @@
+/*
+ * Copyright 2025-2026 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/firmware.h>
+#include "amdgpu.h"
+#include "amdgpu_vcn.h"
+#include "amdgpu_pm.h"
+#include "soc15.h"
+#include "soc15d.h"
+#include "soc15_hw_ip.h"
+#include "vcn_v2_0.h"
+#include "vcn_v4_0_3.h"
+
+#include "vcn/vcn_5_0_0_offset.h"
+#include "vcn/vcn_5_0_0_sh_mask.h"
+#include "ivsrcid/vcn/irqsrcs_vcn_5_0.h"
+#include "vcn_v5_0_0.h"
+#include "vcn_v5_0_1.h"
+#include "vcn_v5_0_2.h"
+
+#include <drm/drm_drv.h>
+
+static void vcn_v5_0_2_set_unified_ring_funcs(struct amdgpu_device *adev);
+static void vcn_v5_0_2_set_irq_funcs(struct amdgpu_device *adev);
+static int vcn_v5_0_2_set_pg_state(struct amdgpu_vcn_inst *vinst,
+ enum amd_powergating_state state);
+static void vcn_v5_0_2_unified_ring_set_wptr(struct amdgpu_ring *ring);
+
+/**
+ * vcn_v5_0_2_early_init - set function pointers and load microcode
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Set ring and irq function pointers
+ * Load microcode from filesystem
+ */
+static int vcn_v5_0_2_early_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, r;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i)
+ /* re-use enc ring as unified ring */
+ adev->vcn.inst[i].num_enc_rings = 1;
+
+ vcn_v5_0_2_set_unified_ring_funcs(adev);
+ vcn_v5_0_2_set_irq_funcs(adev);
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ adev->vcn.inst[i].set_pg_state = vcn_v5_0_2_set_pg_state;
+
+ r = amdgpu_vcn_early_init(adev, i);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+static void vcn_v5_0_2_fw_shared_init(struct amdgpu_device *adev, int inst_idx)
+{
+ struct amdgpu_vcn5_fw_shared *fw_shared;
+
+ fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
+
+ if (fw_shared->sq.is_enabled)
+ return;
+ fw_shared->present_flag_0 =
+ cpu_to_le32(AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE);
+ fw_shared->sq.is_enabled = 1;
+
+ if (amdgpu_vcnfw_log)
+ amdgpu_vcn_fwlog_init(&adev->vcn.inst[inst_idx]);
+}
+
+/**
+ * vcn_v5_0_2_sw_init - sw init for VCN block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Load firmware and sw initialization
+ */
+static int vcn_v5_0_2_sw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_ring *ring;
+ int i, r, vcn_inst;
+
+ /* VCN UNIFIED TRAP */
+ r = amdgpu_irq_add_id(adev, SOC_V1_0_IH_CLIENTID_VCN,
+ VCN_5_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst->irq);
+ if (r)
+ return r;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ vcn_inst = GET_INST(VCN, i);
+
+ r = amdgpu_vcn_sw_init(adev, i);
+ if (r)
+ return r;
+
+ amdgpu_vcn_setup_ucode(adev, i);
+
+ r = amdgpu_vcn_resume(adev, i);
+ if (r)
+ return r;
+
+ ring = &adev->vcn.inst[i].ring_enc[0];
+ ring->use_doorbell = true;
+
+ ring->doorbell_index =
+ (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 32 * vcn_inst;
+
+ ring->vm_hub = AMDGPU_MMHUB0(adev->vcn.inst[i].aid_id);
+ sprintf(ring->name, "vcn_unified_%d", adev->vcn.inst[i].aid_id);
+
+ r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[i].irq, 0,
+ AMDGPU_RING_PRIO_DEFAULT, &adev->vcn.inst[i].sched_score);
+ if (r)
+ return r;
+
+ vcn_v5_0_2_fw_shared_init(adev, i);
+ }
+
+ /* TODO: Add queue reset mask when FW fully supports it */
+ adev->vcn.supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->vcn.inst[0].ring_enc[0]);
+
+ return amdgpu_vcn_sysfs_reset_mask_init(adev);
+}
+
+/**
+ * vcn_v5_0_2_sw_fini - sw fini for VCN block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * VCN suspend and free up sw allocation
+ */
+static int vcn_v5_0_2_sw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, r, idx;
+
+ if (drm_dev_enter(adev_to_drm(adev), &idx)) {
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ struct amdgpu_vcn5_fw_shared *fw_shared;
+
+ fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
+ fw_shared->present_flag_0 = 0;
+ fw_shared->sq.is_enabled = 0;
+ }
+
+ drm_dev_exit(idx);
+ }
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ r = amdgpu_vcn_suspend(adev, i);
+ if (r)
+ return r;
+ }
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++)
+ amdgpu_vcn_sw_fini(adev, i);
+
+ amdgpu_vcn_sysfs_reset_mask_fini(adev);
+
+ kfree(adev->vcn.ip_dump); //TODO check
+
+ return 0;
+}
+
+/**
+ * vcn_v5_0_2_hw_init - start and test VCN block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Initialize the hardware, boot up the VCPU and do some testing
+ */
+static int vcn_v5_0_2_hw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_ring *ring;
+ int i, r, vcn_inst;
+ uint32_t tmp;
+
+ if (RREG32_SOC15(VCN, GET_INST(VCN, 0), regVCN_RRMT_CNTL) & 0x200)
+ adev->vcn.caps |= AMDGPU_VCN_CAPS(RRMT_ENABLED);
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ vcn_inst = GET_INST(VCN, i);
+ ring = &adev->vcn.inst[i].ring_enc[0];
+
+ /* Remove Video Tiles antihang mechanism */
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_POWER_STATUS);
+ tmp &= (~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_POWER_STATUS, tmp);
+
+ if (ring->use_doorbell)
+ adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
+ ((adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+ 11 * vcn_inst),
+ adev->vcn.inst[i].aid_id);
+
+ /* Re-init fw_shared, if required */
+ vcn_v5_0_2_fw_shared_init(adev, i);
+
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+/**
+ * vcn_v5_0_2_hw_fini - stop the hardware block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Stop the VCN block, mark ring as not ready any more
+ */
+static int vcn_v5_0_2_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[i];
+
+ cancel_delayed_work_sync(&adev->vcn.inst[i].idle_work);
+ if (vinst->cur_state != AMD_PG_STATE_GATE)
+ vinst->set_pg_state(vinst, AMD_PG_STATE_GATE);
+ }
+
+ return 0;
+}
+
+/**
+ * vcn_v5_0_2_suspend - suspend VCN block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * HW fini and suspend VCN block
+ */
+static int vcn_v5_0_2_suspend(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r, i;
+
+ r = vcn_v5_0_2_hw_fini(ip_block);
+ if (r)
+ return r;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ r = amdgpu_vcn_suspend(ip_block->adev, i);
+ if (r)
+ return r;
+ }
+
+ return r;
+}
+
+/**
+ * vcn_v5_0_2_resume - resume VCN block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Resume firmware and hw init VCN block
+ */
+static int vcn_v5_0_2_resume(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r, i;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[i];
+
+ if (amdgpu_in_reset(adev))
+ vinst->cur_state = AMD_PG_STATE_GATE;
+
+ r = amdgpu_vcn_resume(ip_block->adev, i);
+ if (r)
+ return r;
+ }
+
+ r = vcn_v5_0_2_hw_init(ip_block);
+
+ return r;
+}
+
+/**
+ * vcn_v5_0_2_mc_resume - memory controller programming
+ *
+ * @vinst: VCN instance
+ *
+ * Let the VCN memory controller know it's offsets
+ */
+static void vcn_v5_0_2_mc_resume(struct amdgpu_vcn_inst *vinst)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst = vinst->inst;
+ uint32_t offset, size, vcn_inst;
+ const struct common_firmware_header *hdr;
+
+ hdr = (const struct common_firmware_header *)adev->vcn.inst[inst].fw->data;
+ size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
+
+ vcn_inst = GET_INST(VCN, inst);
+ /* cache window 0: fw */
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst].tmr_mc_addr_lo));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst].tmr_mc_addr_hi));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_OFFSET0, 0);
+ offset = 0;
+ } else {
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst[inst].gpu_addr));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst[inst].gpu_addr));
+ offset = size;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_OFFSET0,
+ AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
+ }
+ WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_SIZE0, size);
+
+ /* cache window 1: stack */
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst[inst].gpu_addr + offset));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst[inst].gpu_addr + offset));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_OFFSET1, 0);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE);
+
+ /* cache window 2: context */
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst[inst].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst[inst].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_OFFSET2, 0);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE);
+
+ /* non-cache window */
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst[inst].fw_shared.gpu_addr));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst[inst].fw_shared.gpu_addr));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_NONCACHE_OFFSET0, 0);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_NONCACHE_SIZE0,
+ AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn5_fw_shared)));
+}
+
+/**
+ * vcn_v5_0_2_mc_resume_dpg_mode - memory controller programming for dpg mode
+ *
+ * @vinst: VCN instance
+ * @indirect: indirectly write sram
+ *
+ * Let the VCN memory controller know it's offsets with dpg mode
+ */
+static void vcn_v5_0_2_mc_resume_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ bool indirect)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
+ uint32_t offset, size;
+ const struct common_firmware_header *hdr;
+
+ hdr = (const struct common_firmware_header *)adev->vcn.inst[inst_idx].fw->data;
+ size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
+
+ /* cache window 0: fw */
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ if (!indirect) {
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN +
+ inst_idx].tmr_mc_addr_lo), 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN +
+ inst_idx].tmr_mc_addr_hi), 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
+ } else {
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 0, 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 0, 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
+ }
+ offset = 0;
+ } else {
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect);
+ offset = size;
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CACHE_OFFSET0),
+ AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0, indirect);
+ }
+
+ if (!indirect)
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CACHE_SIZE0), size, 0, indirect);
+ else
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CACHE_SIZE0), 0, 0, indirect);
+
+ /* cache window 1: stack */
+ if (!indirect) {
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
+ } else {
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), 0, 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), 0, 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
+ }
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE, 0, indirect);
+
+ /* cache window 2: context */
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset +
+ AMDGPU_VCN_STACK_SIZE), 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset +
+ AMDGPU_VCN_STACK_SIZE), 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CACHE_OFFSET2), 0, 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE, 0, indirect);
+
+ /* non-cache window */
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_NONCACHE_SIZE0),
+ AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn5_fw_shared)), 0, indirect);
+
+ /* VCN global tiling registers */
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_GFX10_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect);
+}
+
+/**
+ * vcn_v5_0_2_disable_clock_gating - disable VCN clock gating
+ *
+ * @vinst: VCN instance
+ *
+ * Disable clock gating for VCN block
+ */
+static void vcn_v5_0_2_disable_clock_gating(struct amdgpu_vcn_inst *vinst)
+{
+}
+
+/**
+ * vcn_v5_0_2_enable_clock_gating - enable VCN clock gating
+ *
+ * @vinst: VCN instance
+ *
+ * Enable clock gating for VCN block
+ */
+static void vcn_v5_0_2_enable_clock_gating(struct amdgpu_vcn_inst *vinst)
+{
+}
+
+/**
+ * vcn_v5_0_2_pause_dpg_mode - VCN pause with dpg mode
+ *
+ * @vinst: VCN instance
+ * @new_state: pause state
+ *
+ * Pause dpg mode for VCN block
+ */
+static int vcn_v5_0_2_pause_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ struct dpg_pause_state *new_state)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ uint32_t reg_data = 0;
+ int vcn_inst;
+
+ vcn_inst = GET_INST(VCN, vinst->inst);
+
+ /* pause/unpause if state is changed */
+ if (vinst->pause_state.fw_based != new_state->fw_based) {
+ DRM_DEV_DEBUG(adev->dev, "dpg pause state changed %d -> %d %s\n",
+ vinst->pause_state.fw_based, new_state->fw_based,
+ new_state->fw_based ? "VCN_DPG_STATE__PAUSE" : "VCN_DPG_STATE__UNPAUSE");
+ reg_data = RREG32_SOC15(VCN, vcn_inst, regUVD_DPG_PAUSE) &
+ (~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);
+
+ if (new_state->fw_based == VCN_DPG_STATE__PAUSE) {
+ /* pause DPG */
+ reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_DPG_PAUSE, reg_data);
+
+ /* wait for ACK */
+ SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_DPG_PAUSE,
+ UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK,
+ UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);
+ } else {
+ /* unpause DPG, no need to wait */
+ reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_DPG_PAUSE, reg_data);
+ }
+ vinst->pause_state.fw_based = new_state->fw_based;
+ }
+
+ return 0;
+}
+
+/**
+ * vcn_v5_0_2_start_dpg_mode - VCN start with dpg mode
+ *
+ * @vinst: VCN instance
+ * @indirect: indirectly write sram
+ *
+ * Start VCN block with dpg mode
+ */
+static int vcn_v5_0_2_start_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ bool indirect)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
+ struct amdgpu_vcn5_fw_shared *fw_shared =
+ adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
+ struct amdgpu_ring *ring;
+ struct dpg_pause_state state = {.fw_based = VCN_DPG_STATE__PAUSE};
+ int vcn_inst, ret;
+ uint32_t tmp;
+
+ vcn_inst = GET_INST(VCN, inst_idx);
+
+ /* disable register anti-hang mechanism */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_POWER_STATUS), 1,
+ ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
+
+ /* enable dynamic power gating mode */
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_POWER_STATUS);
+ tmp |= UVD_POWER_STATUS__UVD_PG_MODE_MASK;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_POWER_STATUS, tmp);
+
+ if (indirect) {
+ adev->vcn.inst[inst_idx].dpg_sram_curr_addr =
+ (uint32_t *)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr;
+ /* Use dummy register 0xDEADBEEF passing AID selection to PSP FW */
+ WREG32_SOC24_DPG_MODE(inst_idx, 0xDEADBEEF,
+ adev->vcn.inst[inst_idx].aid_id, 0, true);
+ }
+
+ /* enable VCPU clock */
+ tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
+ tmp |= UVD_VCPU_CNTL__CLK_EN_MASK | UVD_VCPU_CNTL__BLK_RST_MASK;
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CNTL), tmp, 0, indirect);
+
+ /* disable master interrupt */
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_MASTINT_EN), 0, 0, indirect);
+
+ /* setup regUVD_LMI_CTRL */
+ tmp = (UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
+ UVD_LMI_CTRL__REQ_MODE_MASK |
+ UVD_LMI_CTRL__CRC_RESET_MASK |
+ UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
+ UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
+ UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK |
+ (8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
+ 0x00100000L);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_CTRL), tmp, 0, indirect);
+
+ vcn_v5_0_2_mc_resume_dpg_mode(vinst, indirect);
+
+ tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
+ tmp |= UVD_VCPU_CNTL__CLK_EN_MASK;
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CNTL), tmp, 0, indirect);
+
+ /* enable LMI MC and UMC channels */
+ tmp = 0x1f << UVD_LMI_CTRL2__RE_OFLD_MIF_WR_REQ_NUM__SHIFT;
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_CTRL2), tmp, 0, indirect);
+
+ /* enable master interrupt */
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_MASTINT_EN),
+ UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect);
+
+ if (indirect) {
+ ret = amdgpu_vcn_psp_update_sram(adev, inst_idx, AMDGPU_UCODE_ID_VCN0_RAM);
+ if (ret) {
+ dev_err(adev->dev, "vcn sram load failed %d\n", ret);
+ return ret;
+ }
+ }
+
+ /* resetting ring, fw should not check RB ring */
+ fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET;
+
+ /* Pause dpg */
+ vcn_v5_0_2_pause_dpg_mode(vinst, &state);
+
+ ring = &adev->vcn.inst[inst_idx].ring_enc[0];
+
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_LO, lower_32_bits(ring->gpu_addr));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_SIZE, ring->ring_size / sizeof(uint32_t));
+
+ tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE);
+ tmp &= ~(VCN_RB_ENABLE__RB1_EN_MASK);
+ WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_RPTR, 0);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR, 0);
+
+ ring->wptr = RREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR);
+
+ tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE);
+ tmp |= VCN_RB_ENABLE__RB1_EN_MASK;
+ WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp);
+ /* resetting done, fw can check RB ring */
+ fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF);
+
+ WREG32_SOC15(VCN, vcn_inst, regVCN_RB1_DB_CTRL,
+ ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT |
+ VCN_RB1_DB_CTRL__EN_MASK);
+ /* Read DB_CTRL to flush the write DB_CTRL command. */
+ RREG32_SOC15(VCN, vcn_inst, regVCN_RB1_DB_CTRL);
+
+ return 0;
+}
+
+/**
+ * vcn_v5_0_2_start - VCN start
+ *
+ * @vinst: VCN instance
+ *
+ * Start VCN block
+ */
+static int vcn_v5_0_2_start(struct amdgpu_vcn_inst *vinst)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int i = vinst->inst;
+ struct amdgpu_vcn5_fw_shared *fw_shared;
+ struct amdgpu_ring *ring;
+ uint32_t tmp;
+ int j, k, r, vcn_inst;
+
+ fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
+ return vcn_v5_0_2_start_dpg_mode(vinst, adev->vcn.inst[i].indirect_sram);
+
+ vcn_inst = GET_INST(VCN, i);
+
+ /* set VCN status busy */
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_STATUS) | UVD_STATUS__UVD_BUSY;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_STATUS, tmp);
+
+ /* enable VCPU clock */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK);
+
+ /* disable master interrupt */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_MASTINT_EN), 0,
+ ~UVD_MASTINT_EN__VCPU_EN_MASK);
+
+ /* enable LMI MC and UMC channels */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_LMI_CTRL2), 0,
+ ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
+
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET);
+ tmp &= ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
+ tmp &= ~UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET, tmp);
+
+ /* setup regUVD_LMI_CTRL */
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL, tmp |
+ UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
+ UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
+ UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
+ UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK);
+
+ vcn_v5_0_2_mc_resume(vinst);
+
+ /* VCN global tiling registers */
+ WREG32_SOC15(VCN, vcn_inst, regUVD_GFX10_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+
+ /* unblock VCPU register access */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_RB_ARB_CTRL), 0,
+ ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
+
+ /* release VCPU reset to boot */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL), 0,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+
+ for (j = 0; j < 10; ++j) {
+ uint32_t status;
+
+ for (k = 0; k < 100; ++k) {
+ status = RREG32_SOC15(VCN, vcn_inst, regUVD_STATUS);
+ if (status & 2)
+ break;
+ mdelay(1000);
+ if (amdgpu_emu_mode == 1)
+ msleep(520);
+ }
+
+ if (amdgpu_emu_mode == 1) {
+ r = -1;
+ if (status & 2) {
+ r = 0;
+ break;
+ }
+ } else {
+ r = 0;
+ if (status & 2)
+ break;
+
+ dev_err(adev->dev,
+ "VCN[%d] is not responding, trying to reset the VCPU!!!\n", i);
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__BLK_RST_MASK,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+ mdelay(10);
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL), 0,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+
+ mdelay(10);
+ r = -1;
+ }
+ }
+
+ if (r) {
+ dev_err(adev->dev, "VCN[%d] is not responding, giving up!!!\n", i);
+ return r;
+ }
+
+ /* enable master interrupt */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_MASTINT_EN),
+ UVD_MASTINT_EN__VCPU_EN_MASK,
+ ~UVD_MASTINT_EN__VCPU_EN_MASK);
+
+ /* clear the busy bit of VCN_STATUS */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_STATUS), 0,
+ ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT));
+
+ ring = &adev->vcn.inst[i].ring_enc[0];
+
+ WREG32_SOC15(VCN, vcn_inst, regVCN_RB1_DB_CTRL,
+ ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT |
+ VCN_RB1_DB_CTRL__EN_MASK);
+
+ /* Read DB_CTRL to flush the write DB_CTRL command. */
+ RREG32_SOC15(VCN, vcn_inst, regVCN_RB1_DB_CTRL);
+
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_LO, ring->gpu_addr);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_SIZE, ring->ring_size / 4);
+
+ tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE);
+ tmp &= ~(VCN_RB_ENABLE__RB1_EN_MASK);
+ WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp);
+ fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_RPTR, 0);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR, 0);
+
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_RB_RPTR);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR, tmp);
+ ring->wptr = RREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR);
+
+ tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE);
+ tmp |= VCN_RB_ENABLE__RB1_EN_MASK;
+ WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp);
+ fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF);
+
+ return 0;
+}
+
+/**
+ * vcn_v5_0_2_stop_dpg_mode - VCN stop with dpg mode
+ *
+ * @vinst: VCN instance
+ *
+ * Stop VCN block with dpg mode
+ */
+static void vcn_v5_0_2_stop_dpg_mode(struct amdgpu_vcn_inst *vinst)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
+ uint32_t tmp;
+ int vcn_inst;
+ struct dpg_pause_state state = {.fw_based = VCN_DPG_STATE__UNPAUSE};
+
+ vcn_inst = GET_INST(VCN, inst_idx);
+
+ /* Unpause dpg */
+ vcn_v5_0_2_pause_dpg_mode(vinst, &state);
+
+ /* Wait for power status to be 1 */
+ SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_POWER_STATUS, 1,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
+
+ /* wait for read ptr to be equal to write ptr */
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR);
+ SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_RB_RPTR, tmp, 0xFFFFFFFF);
+
+ /* disable dynamic power gating mode */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_POWER_STATUS), 0,
+ ~UVD_POWER_STATUS__UVD_PG_MODE_MASK);
+}
+
+/**
+ * vcn_v5_0_2_stop - VCN stop
+ *
+ * @vinst: VCN instance
+ *
+ * Stop VCN block
+ */
+static int vcn_v5_0_2_stop(struct amdgpu_vcn_inst *vinst)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int i = vinst->inst;
+ struct amdgpu_vcn5_fw_shared *fw_shared;
+ uint32_t tmp;
+ int r = 0, vcn_inst;
+
+ vcn_inst = GET_INST(VCN, i);
+
+ fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
+ fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF;
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
+ vcn_v5_0_2_stop_dpg_mode(vinst);
+ return 0;
+ }
+
+ /* wait for vcn idle */
+ r = SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_STATUS, UVD_STATUS__IDLE, 0x7);
+ if (r)
+ return r;
+
+ tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK |
+ UVD_LMI_STATUS__READ_CLEAN_MASK |
+ UVD_LMI_STATUS__WRITE_CLEAN_MASK |
+ UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK;
+ r = SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_LMI_STATUS, tmp, tmp);
+ if (r)
+ return r;
+
+ /* disable LMI UMC channel */
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL2);
+ tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL2, tmp);
+ tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK |
+ UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK;
+ r = SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_LMI_STATUS, tmp, tmp);
+ if (r)
+ return r;
+
+ /* block VCPU register access */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_RB_ARB_CTRL),
+ UVD_RB_ARB_CTRL__VCPU_DIS_MASK,
+ ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
+
+ /* reset VCPU */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__BLK_RST_MASK,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+
+ /* disable VCPU clock */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL), 0,
+ ~(UVD_VCPU_CNTL__CLK_EN_MASK));
+
+ /* apply soft reset */
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET);
+ tmp |= UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET, tmp);
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET);
+ tmp |= UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET, tmp);
+
+ /* clear status */
+ WREG32_SOC15(VCN, vcn_inst, regUVD_STATUS, 0);
+
+ return 0;
+}
+
+/**
+ * vcn_v5_0_2_unified_ring_get_rptr - get unified read pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware unified read pointer
+ */
+static uint64_t vcn_v5_0_2_unified_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring != &adev->vcn.inst[ring->me].ring_enc[0])
+ DRM_ERROR("wrong ring id is identified in %s", __func__);
+
+ return RREG32_SOC15(VCN, GET_INST(VCN, ring->me), regUVD_RB_RPTR);
+}
+
+/**
+ * vcn_v5_0_2_unified_ring_get_wptr - get unified write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware unified write pointer
+ */
+static uint64_t vcn_v5_0_2_unified_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring != &adev->vcn.inst[ring->me].ring_enc[0])
+ DRM_ERROR("wrong ring id is identified in %s", __func__);
+
+ if (ring->use_doorbell)
+ return *ring->wptr_cpu_addr;
+ else
+ return RREG32_SOC15(VCN, GET_INST(VCN, ring->me), regUVD_RB_WPTR);
+}
+
+/**
+ * vcn_v5_0_2_unified_ring_set_wptr - set enc write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Commits the enc write pointer to the hardware
+ */
+static void vcn_v5_0_2_unified_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring != &adev->vcn.inst[ring->me].ring_enc[0])
+ DRM_ERROR("wrong ring id is identified in %s", __func__);
+
+ if (ring->use_doorbell) {
+ *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
+ WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
+ } else {
+ WREG32_SOC15(VCN, GET_INST(VCN, ring->me), regUVD_RB_WPTR,
+ lower_32_bits(ring->wptr));
+ }
+}
+
+static const struct amdgpu_ring_funcs vcn_v5_0_2_unified_ring_vm_funcs = {
+ .type = AMDGPU_RING_TYPE_VCN_ENC,
+ .align_mask = 0x3f,
+ .nop = VCN_ENC_CMD_NO_OP,
+ .get_rptr = vcn_v5_0_2_unified_ring_get_rptr,
+ .get_wptr = vcn_v5_0_2_unified_ring_get_wptr,
+ .set_wptr = vcn_v5_0_2_unified_ring_set_wptr,
+ .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
+ 4 + /* vcn_v2_0_enc_ring_emit_vm_flush */
+ 5 +
+ 5 + /* vcn_v2_0_enc_ring_emit_fence x2 vm fence */
+ 1, /* vcn_v2_0_enc_ring_insert_end */
+ .emit_ib_size = 5, /* vcn_v2_0_enc_ring_emit_ib */
+ .emit_ib = vcn_v2_0_enc_ring_emit_ib,
+ .emit_fence = vcn_v2_0_enc_ring_emit_fence,
+ .emit_vm_flush = vcn_v4_0_3_enc_ring_emit_vm_flush,
+ .emit_hdp_flush = vcn_v4_0_3_ring_emit_hdp_flush,
+ .test_ring = amdgpu_vcn_enc_ring_test_ring,
+ .test_ib = amdgpu_vcn_unified_ring_test_ib,
+ .insert_nop = amdgpu_ring_insert_nop,
+ .insert_end = vcn_v2_0_enc_ring_insert_end,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .begin_use = amdgpu_vcn_ring_begin_use,
+ .end_use = amdgpu_vcn_ring_end_use,
+ .emit_wreg = vcn_v4_0_3_enc_ring_emit_wreg,
+ .emit_reg_wait = vcn_v4_0_3_enc_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+};
+
+/**
+ * vcn_v5_0_2_set_unified_ring_funcs - set unified ring functions
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Set unified ring functions
+ */
+static void vcn_v5_0_2_set_unified_ring_funcs(struct amdgpu_device *adev)
+{
+ int i, vcn_inst;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ adev->vcn.inst[i].ring_enc[0].funcs = &vcn_v5_0_2_unified_ring_vm_funcs;
+ adev->vcn.inst[i].ring_enc[0].me = i;
+ vcn_inst = GET_INST(VCN, i);
+ adev->vcn.inst[i].aid_id = vcn_inst / adev->vcn.num_inst_per_aid;
+ }
+}
+
+/**
+ * vcn_v5_0_2_is_idle - check VCN block is idle
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block structure
+ *
+ * Check whether VCN block is idle
+ */
+static bool vcn_v5_0_2_is_idle(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, ret = 1;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i)
+ ret &= (RREG32_SOC15(VCN, GET_INST(VCN, i), regUVD_STATUS) == UVD_STATUS__IDLE);
+
+ return ret;
+}
+
+/**
+ * vcn_v5_0_2_wait_for_idle - wait for VCN block idle
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Wait for VCN block idle
+ */
+static int vcn_v5_0_2_wait_for_idle(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, ret = 0;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ ret = SOC15_WAIT_ON_RREG(VCN, GET_INST(VCN, i), regUVD_STATUS, UVD_STATUS__IDLE,
+ UVD_STATUS__IDLE);
+ if (ret)
+ return ret;
+ }
+
+ return ret;
+}
+
+/**
+ * vcn_v5_0_2_set_clockgating_state - set VCN block clockgating state
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ * @state: clock gating state
+ *
+ * Set VCN block clockgating state
+ */
+static int vcn_v5_0_2_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ bool enable = state == AMD_CG_STATE_GATE;
+ int i;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[i];
+
+ if (enable) {
+ if (RREG32_SOC15(VCN, GET_INST(VCN, i), regUVD_STATUS) != UVD_STATUS__IDLE)
+ return -EBUSY;
+ vcn_v5_0_2_enable_clock_gating(vinst);
+ } else {
+ vcn_v5_0_2_disable_clock_gating(vinst);
+ }
+ }
+
+ return 0;
+}
+
+static int vcn_v5_0_2_set_pg_state(struct amdgpu_vcn_inst *vinst,
+ enum amd_powergating_state state)
+{
+ int ret = 0;
+
+ if (state == vinst->cur_state)
+ return 0;
+
+ if (state == AMD_PG_STATE_GATE)
+ ret = vcn_v5_0_2_stop(vinst);
+ else
+ ret = vcn_v5_0_2_start(vinst);
+
+ if (!ret)
+ vinst->cur_state = state;
+
+ return ret;
+}
+
+/**
+ * vcn_v5_0_2_process_interrupt - process VCN block interrupt
+ *
+ * @adev: amdgpu_device pointer
+ * @source: interrupt sources
+ * @entry: interrupt entry from clients and sources
+ *
+ * Process VCN block interrupt
+ */
+static int vcn_v5_0_2_process_interrupt(struct amdgpu_device *adev, struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ uint32_t i, inst;
+
+ i = node_id_to_phys_map[entry->node_id];
+
+ DRM_DEV_DEBUG(adev->dev, "IH: VCN TRAP\n");
+
+ for (inst = 0; inst < adev->vcn.num_vcn_inst; ++inst)
+ if (adev->vcn.inst[inst].aid_id == i)
+ break;
+
+ if (inst >= adev->vcn.num_vcn_inst) {
+ dev_WARN_ONCE(adev->dev, 1,
+ "Interrupt received for unknown VCN instance %d",
+ entry->node_id);
+ return 0;
+ }
+
+ switch (entry->src_id) {
+ case VCN_5_0__SRCID__UVD_ENC_GENERAL_PURPOSE:
+ amdgpu_fence_process(&adev->vcn.inst[inst].ring_enc[0]);
+ break;
+ default:
+ DRM_DEV_ERROR(adev->dev, "Unhandled interrupt: %d %d\n",
+ entry->src_id, entry->src_data[0]);
+ break;
+ }
+
+ return 0;
+}
+
+static const struct amdgpu_irq_src_funcs vcn_v5_0_2_irq_funcs = {
+ .process = vcn_v5_0_2_process_interrupt,
+};
+
+/**
+ * vcn_v5_0_2_set_irq_funcs - set VCN block interrupt irq functions
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Set VCN block interrupt irq functions
+ */
+static void vcn_v5_0_2_set_irq_funcs(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i)
+ adev->vcn.inst->irq.num_types++;
+ adev->vcn.inst->irq.funcs = &vcn_v5_0_2_irq_funcs;
+}
+
+static const struct amd_ip_funcs vcn_v5_0_2_ip_funcs = {
+ .name = "vcn_v5_0_2",
+ .early_init = vcn_v5_0_2_early_init,
+ .late_init = NULL,
+ .sw_init = vcn_v5_0_2_sw_init,
+ .sw_fini = vcn_v5_0_2_sw_fini,
+ .hw_init = vcn_v5_0_2_hw_init,
+ .hw_fini = vcn_v5_0_2_hw_fini,
+ .suspend = vcn_v5_0_2_suspend,
+ .resume = vcn_v5_0_2_resume,
+ .is_idle = vcn_v5_0_2_is_idle,
+ .wait_for_idle = vcn_v5_0_2_wait_for_idle,
+ .check_soft_reset = NULL,
+ .pre_soft_reset = NULL,
+ .soft_reset = NULL,
+ .post_soft_reset = NULL,
+ .set_clockgating_state = vcn_v5_0_2_set_clockgating_state,
+ .set_powergating_state = vcn_set_powergating_state,
+};
+
+const struct amdgpu_ip_block_version vcn_v5_0_2_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_VCN,
+ .major = 5,
+ .minor = 0,
+ .rev = 2,
+ .funcs = &vcn_v5_0_2_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_2.h b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_2.h
new file mode 100644
index 000000000000..461bdda91eb6
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_2.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2025-2026 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __VCN_v5_0_2_H__
+#define __VCN_v5_0_2_H__
+
+extern const struct amdgpu_ip_block_version vcn_v5_0_2_ip_block;
+
+#endif /* __VCN_v5_0_2_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
index 6a574b6c8e63..a256320b92f3 100644
--- a/drivers/gpu/drm/amd/amdgpu/vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/vi.c
@@ -299,11 +299,11 @@ static u32 vi_pcie_rreg(struct amdgpu_device *adev, u32 reg)
unsigned long flags;
u32 r;
- spin_lock_irqsave(&adev->pcie_idx_lock, flags);
+ spin_lock_irqsave(&adev->reg.pcie.lock, flags);
WREG32_NO_KIQ(mmPCIE_INDEX, reg);
(void)RREG32_NO_KIQ(mmPCIE_INDEX);
r = RREG32_NO_KIQ(mmPCIE_DATA);
- spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+ spin_unlock_irqrestore(&adev->reg.pcie.lock, flags);
return r;
}
@@ -311,12 +311,12 @@ static void vi_pcie_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
{
unsigned long flags;
- spin_lock_irqsave(&adev->pcie_idx_lock, flags);
+ spin_lock_irqsave(&adev->reg.pcie.lock, flags);
WREG32_NO_KIQ(mmPCIE_INDEX, reg);
(void)RREG32_NO_KIQ(mmPCIE_INDEX);
WREG32_NO_KIQ(mmPCIE_DATA, v);
(void)RREG32_NO_KIQ(mmPCIE_DATA);
- spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+ spin_unlock_irqrestore(&adev->reg.pcie.lock, flags);
}
static u32 vi_smc_rreg(struct amdgpu_device *adev, u32 reg)
@@ -324,10 +324,10 @@ static u32 vi_smc_rreg(struct amdgpu_device *adev, u32 reg)
unsigned long flags;
u32 r;
- spin_lock_irqsave(&adev->smc_idx_lock, flags);
+ spin_lock_irqsave(&adev->reg.smc.lock, flags);
WREG32_NO_KIQ(mmSMC_IND_INDEX_11, (reg));
r = RREG32_NO_KIQ(mmSMC_IND_DATA_11);
- spin_unlock_irqrestore(&adev->smc_idx_lock, flags);
+ spin_unlock_irqrestore(&adev->reg.smc.lock, flags);
return r;
}
@@ -335,10 +335,10 @@ static void vi_smc_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
{
unsigned long flags;
- spin_lock_irqsave(&adev->smc_idx_lock, flags);
+ spin_lock_irqsave(&adev->reg.smc.lock, flags);
WREG32_NO_KIQ(mmSMC_IND_INDEX_11, (reg));
WREG32_NO_KIQ(mmSMC_IND_DATA_11, (v));
- spin_unlock_irqrestore(&adev->smc_idx_lock, flags);
+ spin_unlock_irqrestore(&adev->reg.smc.lock, flags);
}
/* smu_8_0_d.h */
@@ -350,10 +350,10 @@ static u32 cz_smc_rreg(struct amdgpu_device *adev, u32 reg)
unsigned long flags;
u32 r;
- spin_lock_irqsave(&adev->smc_idx_lock, flags);
+ spin_lock_irqsave(&adev->reg.smc.lock, flags);
WREG32(mmMP0PUB_IND_INDEX, (reg));
r = RREG32(mmMP0PUB_IND_DATA);
- spin_unlock_irqrestore(&adev->smc_idx_lock, flags);
+ spin_unlock_irqrestore(&adev->reg.smc.lock, flags);
return r;
}
@@ -361,10 +361,10 @@ static void cz_smc_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
{
unsigned long flags;
- spin_lock_irqsave(&adev->smc_idx_lock, flags);
+ spin_lock_irqsave(&adev->reg.smc.lock, flags);
WREG32(mmMP0PUB_IND_INDEX, (reg));
WREG32(mmMP0PUB_IND_DATA, (v));
- spin_unlock_irqrestore(&adev->smc_idx_lock, flags);
+ spin_unlock_irqrestore(&adev->reg.smc.lock, flags);
}
static u32 vi_uvd_ctx_rreg(struct amdgpu_device *adev, u32 reg)
@@ -372,10 +372,10 @@ static u32 vi_uvd_ctx_rreg(struct amdgpu_device *adev, u32 reg)
unsigned long flags;
u32 r;
- spin_lock_irqsave(&adev->uvd_ctx_idx_lock, flags);
+ spin_lock_irqsave(&adev->reg.uvd_ctx.lock, flags);
WREG32(mmUVD_CTX_INDEX, ((reg) & 0x1ff));
r = RREG32(mmUVD_CTX_DATA);
- spin_unlock_irqrestore(&adev->uvd_ctx_idx_lock, flags);
+ spin_unlock_irqrestore(&adev->reg.uvd_ctx.lock, flags);
return r;
}
@@ -383,10 +383,10 @@ static void vi_uvd_ctx_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
{
unsigned long flags;
- spin_lock_irqsave(&adev->uvd_ctx_idx_lock, flags);
+ spin_lock_irqsave(&adev->reg.uvd_ctx.lock, flags);
WREG32(mmUVD_CTX_INDEX, ((reg) & 0x1ff));
WREG32(mmUVD_CTX_DATA, (v));
- spin_unlock_irqrestore(&adev->uvd_ctx_idx_lock, flags);
+ spin_unlock_irqrestore(&adev->reg.uvd_ctx.lock, flags);
}
static u32 vi_didt_rreg(struct amdgpu_device *adev, u32 reg)
@@ -394,10 +394,10 @@ static u32 vi_didt_rreg(struct amdgpu_device *adev, u32 reg)
unsigned long flags;
u32 r;
- spin_lock_irqsave(&adev->didt_idx_lock, flags);
+ spin_lock_irqsave(&adev->reg.didt.lock, flags);
WREG32(mmDIDT_IND_INDEX, (reg));
r = RREG32(mmDIDT_IND_DATA);
- spin_unlock_irqrestore(&adev->didt_idx_lock, flags);
+ spin_unlock_irqrestore(&adev->reg.didt.lock, flags);
return r;
}
@@ -405,10 +405,10 @@ static void vi_didt_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
{
unsigned long flags;
- spin_lock_irqsave(&adev->didt_idx_lock, flags);
+ spin_lock_irqsave(&adev->reg.didt.lock, flags);
WREG32(mmDIDT_IND_INDEX, (reg));
WREG32(mmDIDT_IND_DATA, (v));
- spin_unlock_irqrestore(&adev->didt_idx_lock, flags);
+ spin_unlock_irqrestore(&adev->reg.didt.lock, flags);
}
static u32 vi_gc_cac_rreg(struct amdgpu_device *adev, u32 reg)
@@ -416,10 +416,10 @@ static u32 vi_gc_cac_rreg(struct amdgpu_device *adev, u32 reg)
unsigned long flags;
u32 r;
- spin_lock_irqsave(&adev->gc_cac_idx_lock, flags);
+ spin_lock_irqsave(&adev->reg.gc_cac.lock, flags);
WREG32(mmGC_CAC_IND_INDEX, (reg));
r = RREG32(mmGC_CAC_IND_DATA);
- spin_unlock_irqrestore(&adev->gc_cac_idx_lock, flags);
+ spin_unlock_irqrestore(&adev->reg.gc_cac.lock, flags);
return r;
}
@@ -427,10 +427,10 @@ static void vi_gc_cac_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
{
unsigned long flags;
- spin_lock_irqsave(&adev->gc_cac_idx_lock, flags);
+ spin_lock_irqsave(&adev->reg.gc_cac.lock, flags);
WREG32(mmGC_CAC_IND_INDEX, (reg));
WREG32(mmGC_CAC_IND_DATA, (v));
- spin_unlock_irqrestore(&adev->gc_cac_idx_lock, flags);
+ spin_unlock_irqrestore(&adev->reg.gc_cac.lock, flags);
}
@@ -649,7 +649,7 @@ static bool vi_read_bios_from_rom(struct amdgpu_device *adev,
dw_ptr = (u32 *)bios;
length_dw = ALIGN(length_bytes, 4) / 4;
/* take the smc lock since we are using the smc index */
- spin_lock_irqsave(&adev->smc_idx_lock, flags);
+ spin_lock_irqsave(&adev->reg.smc.lock, flags);
/* set rom index to 0 */
WREG32(mmSMC_IND_INDEX_11, ixROM_INDEX);
WREG32(mmSMC_IND_DATA_11, 0);
@@ -657,7 +657,7 @@ static bool vi_read_bios_from_rom(struct amdgpu_device *adev,
WREG32(mmSMC_IND_INDEX_11, ixROM_DATA);
for (i = 0; i < length_dw; i++)
dw_ptr[i] = RREG32(mmSMC_IND_DATA_11);
- spin_unlock_irqrestore(&adev->smc_idx_lock, flags);
+ spin_unlock_irqrestore(&adev->reg.smc.lock, flags);
return true;
}
@@ -1454,20 +1454,20 @@ static int vi_common_early_init(struct amdgpu_ip_block *ip_block)
struct amdgpu_device *adev = ip_block->adev;
if (adev->flags & AMD_IS_APU) {
- adev->smc_rreg = &cz_smc_rreg;
- adev->smc_wreg = &cz_smc_wreg;
+ adev->reg.smc.rreg = cz_smc_rreg;
+ adev->reg.smc.wreg = cz_smc_wreg;
} else {
- adev->smc_rreg = &vi_smc_rreg;
- adev->smc_wreg = &vi_smc_wreg;
+ adev->reg.smc.rreg = vi_smc_rreg;
+ adev->reg.smc.wreg = vi_smc_wreg;
}
- adev->pcie_rreg = &vi_pcie_rreg;
- adev->pcie_wreg = &vi_pcie_wreg;
- adev->uvd_ctx_rreg = &vi_uvd_ctx_rreg;
- adev->uvd_ctx_wreg = &vi_uvd_ctx_wreg;
- adev->didt_rreg = &vi_didt_rreg;
- adev->didt_wreg = &vi_didt_wreg;
- adev->gc_cac_rreg = &vi_gc_cac_rreg;
- adev->gc_cac_wreg = &vi_gc_cac_wreg;
+ adev->reg.pcie.rreg = &vi_pcie_rreg;
+ adev->reg.pcie.wreg = &vi_pcie_wreg;
+ adev->reg.uvd_ctx.rreg = &vi_uvd_ctx_rreg;
+ adev->reg.uvd_ctx.wreg = &vi_uvd_ctx_wreg;
+ adev->reg.didt.rreg = &vi_didt_rreg;
+ adev->reg.didt.wreg = &vi_didt_wreg;
+ adev->reg.gc_cac.rreg = &vi_gc_cac_rreg;
+ adev->reg.gc_cac.wreg = &vi_gc_cac_wreg;
adev->asic_funcs = &vi_asic_funcs;