diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2023-08-30 13:34:34 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2023-08-30 13:34:34 -0700 |
commit | 461f35f014466c4e26dca6be0f431f57297df3f2 (patch) | |
tree | 0bd2fded69ba0752ca16c304d3e1880d5f1eb30b /drivers/accel/ivpu/ivpu_mmu_context.c | |
parent | 53ea7f624fb91074c2f9458832ed74975ee5d64c (diff) | |
parent | 3698a75f5a98d0a6599e2878ab25d30a82dd836a (diff) | |
download | lwn-461f35f014466c4e26dca6be0f431f57297df3f2.tar.gz lwn-461f35f014466c4e26dca6be0f431f57297df3f2.zip |
Merge tag 'drm-next-2023-08-30' of git://anongit.freedesktop.org/drm/drm
Pull drm updates from Dave Airlie:
"The drm core grew a new generic gpu virtual address manager, and new
execution locking helpers. These are used by nouveau now to provide
uAPI support for the userspace Vulkan driver. AMD had a bunch of new
IP core support, loads of refactoring around fbdev, but mostly just
the usual amount of stuff across the board.
core:
- fix gfp flags in drmm_kmalloc
gpuva:
- add new generic GPU VA manager (for nouveau initially)
syncobj:
- add new DRM_IOCTL_SYNCOBJ_EVENTFD ioctl
dma-buf:
- acquire resv lock for mmap() in exporters
- support dma-buf self import automatically
- docs fixes
backlight:
- fix fbdev interactions
atomic:
- improve logging
prime:
- remove struct gem_prim_mmap plus driver updates
gem:
- drm_exec: add locking over multiple GEM objects
- fix lockdep checking
fbdev:
- make fbdev userspace interfaces optional
- use linux device instead of fbdev device
- use deferred i/o helper macros in various drivers
- Make FB core selectable without drivers
- Remove obsolete flags FBINFO_DEFAULT and FBINFO_FLAG_DEFAULT
- Add helper macros and Kconfig tokens for DMA-allocated framebuffer
ttm:
- support init_on_free
- swapout fixes
panel:
- panel-edp: Support AUO B116XAB01.4
- Support Visionox R66451 plus DT bindings
- ld9040:
- Backlight support
- magic improved
- Kconfig fix
- Convert to of_device_get_match_data()
- Fix Kconfig dependencies
- simple:
- Set bpc value to fix warning
- Set connector type for AUO T215HVN01
- Support Innolux G156HCE-L01 plus DT bindings
- ili9881: Support TDO TL050HDV35 LCD panel plus DT bindings
- startek: Support KD070FHFID015 MIPI-DSI panel plus DT bindings
- sitronix-st7789v:
- Support Inanbo T28CP45TN89 plus DT bindings
- Support EDT ET028013DMA plus DT bindings
- Various cleanups
- edp: Add timings for N140HCA-EAC
- Allow panels and touchscreens to power sequence together
- Fix Innolux G156HCE-L01 LVDS clock
bridge:
- debugfs for chains support
- dw-hdmi:
- Improve support for YUV420 bus format
- CEC suspend/resume
- update EDID on HDMI detect
- dw-mipi-dsi: Fix enable/disable of DSI controller
- lt9611uxc: Use MODULE_FIRMWARE()
- ps8640: Remove broken EDID code
- samsung-dsim: Fix command transfer
- tc358764:
- Handle HS/VS polarity
- Use BIT() macro
- Various cleanups
- adv7511: Fix low refresh rate
- anx7625:
- Switch to macros instead of hardcoded values
- locking fixes
- tc358767: fix hardware delays
- sitronix-st7789v:
- Support panel orientation
- Support rotation property
- Add support for Jasonic JT240MHQS-HWT-EK-E3 plus DT bindings
amdgpu:
- SDMA 6.1.0 support
- HDP 6.1 support
- SMUIO 14.0 support
- PSP 14.0 support
- IH 6.1 support
- Lots of checkpatch cleanups
- GFX 9.4.3 updates
- Add USB PD and IFWI flashing documentation
- GPUVM updates
- RAS fixes
- DRR fixes
- FAMS fixes
- Virtual display fixes
- Soft IH fixes
- SMU13 fixes
- Rework PSP firmware loading for other IPs
- Kernel doc fixes
- DCN 3.0.1 fixes
- LTTPR fixes
- DP MST fixes
- DCN 3.1.6 fixes
- SMU 13.x fixes
- PSP 13.x fixes
- SubVP fixes
- GC 9.4.3 fixes
- Display bandwidth calculation fixes
- VCN4 secure submission fixes
- Allow building DC on RISC-V
- Add visible FB info to bo_print_info
- HBR3 fixes
- GFX9 MCBP fix
- GMC10 vmhub index fix
- GMC11 vmhub index fix
- Create a new doorbell manager
- SR-IOV fixes
- initial freesync panel replay support
- revert zpos properly until igt regression is fixeed
- use TTM to manage doorbell BAR
- Expose both current and average power via hwmon if supported
amdkfd:
- Cleanup CRIU dma-buf handling
- Use KIQ to unmap HIQ
- GFX 9.4.3 debugger updates
- GFX 9.4.2 debugger fixes
- Enable cooperative groups fof gfx11
- SVM fixes
- Convert older APUs to use dGPU path like newer APUs
- Drop IOMMUv2 path as it is no longer used
- TBA fix for aldebaran
i915:
- ICL+ DSI modeset sequence
- HDCP improvements
- MTL display fixes and cleanups
- HSW/BDW PSR1 restored
- Init DDI ports in VBT order
- General display refactors
- Start using plane scale factor for relative data rate
- Use shmem for dpt objects
- Expose RPS thresholds in sysfs
- Apply GuC SLPC min frequency softlimit correctly
- Extend Wa_14015795083 to TGL, RKL, DG1 and ADL
- Fix a VMA UAF for multi-gt platform
- Do not use stolen on MTL due to HW bug
- Check HuC and GuC version compatibility on MTL
- avoid infinite GPU waits due to premature release of request memory
- Fixes and updates for GSC memory allocation
- Display SDVO fixes
- Take stolen handling out of FBC code
- Make i915_coherent_map_type GT-centric
- Simplify shmem_create_from_object map_type
msm:
- SM6125 MDSS support
- DPU: SM6125 DPU support
- DSI: runtime PM support, burst mode support
- DSI PHY: SM6125 support in 14nm DSI PHY driver
- GPU: prepare for a7xx
- fix a690 firmware
- disable relocs on a6xx and newer
radeon:
- Lots of checkpatch cleanups
ast:
- improve device-model detection
- Represent BMV as virtual connector
- Report DP connection status
nouveau:
- add new exec/bind interface to support Vulkan
- document some getparam ioctls
- improve VRAM detection
- various fixes/cleanups
- workraound DPCD issues
ivpu:
- MMU updates
- debugfs support
- Support vpu4
virtio:
- add sync object support
atmel-hlcdc:
- Support inverted pixclock polarity
etnaviv:
- runtime PM cleanups
- hang handling fixes
exynos:
- use fbdev DMA helpers
- fix possible NULL ptr dereference
komeda:
- always attach encoder
omapdrm:
- use fbdev DMA helpers
ingenic:
- kconfig regmap fixes
loongson:
- support display controller
mediatek:
- Small mtk-dpi cleanups
- DisplayPort: support eDP and aux-bus
- Fix coverity issues
- Fix potential memory leak if vmap() fail
mgag200:
- minor fixes
mxsfb:
- support disabling overlay planes
panfrost:
- fix sync in IRQ handling
ssd130x:
- Support per-controller default resolution plus DT bindings
- Reduce memory-allocation overhead
- Improve intermediate buffer size computation
- Fix allocation of temporary buffers
- Fix pitch computation
- Fix shadow plane allocation
tegra:
- use fbdev DMA helpers
- Convert to devm_platform_ioremap_resource()
- support bridge/connector
- enable PM
tidss:
- Support TI AM625 plus DT bindings
- Implement new connector model plus driver updates
vkms:
- improve write back support
- docs fixes
- support gamma LUT
zynqmp-dpsub:
- misc fixes"
* tag 'drm-next-2023-08-30' of git://anongit.freedesktop.org/drm/drm: (1327 commits)
drm/gpuva_mgr: remove unused prev pointer in __drm_gpuva_sm_map()
drm/tests/drm_kunit_helpers: Place correct function name in the comment header
drm/nouveau: uapi: don't pass NO_PREFETCH flag implicitly
drm/nouveau: uvmm: fix unset region pointer on remap
drm/nouveau: sched: avoid job races between entities
drm/i915: Fix HPD polling, reenabling the output poll work as needed
drm: Add an HPD poll helper to reschedule the poll work
drm/i915: Fix TLB-Invalidation seqno store
drm/ttm/tests: Fix type conversion in ttm_pool_test
drm/msm/a6xx: Bail out early if setting GPU OOB fails
drm/msm/a6xx: Move LLC accessors to the common header
drm/msm/a6xx: Introduce a6xx_llc_read
drm/ttm/tests: Require MMU when testing
drm/panel: simple: Fix Innolux G156HCE-L01 LVDS clock
Revert "Revert "drm/amdgpu/display: change pipe policy for DCN 2.0""
drm/amdgpu: Add memory vendor information
drm/amd: flush any delayed gfxoff on suspend entry
drm/amdgpu: skip fence GFX interrupts disable/enable for S0ix
drm/amdgpu: Remove gfxoff check in GFX v9.4.3
drm/amd/pm: Update pci link speed for smu v13.0.6
...
Diffstat (limited to 'drivers/accel/ivpu/ivpu_mmu_context.c')
-rw-r--r-- | drivers/accel/ivpu/ivpu_mmu_context.c | 302 |
1 files changed, 209 insertions, 93 deletions
diff --git a/drivers/accel/ivpu/ivpu_mmu_context.c b/drivers/accel/ivpu/ivpu_mmu_context.c index 8ce9b12ac356..1d2e554e2c4a 100644 --- a/drivers/accel/ivpu/ivpu_mmu_context.c +++ b/drivers/accel/ivpu/ivpu_mmu_context.c @@ -11,10 +11,12 @@ #include "ivpu_mmu.h" #include "ivpu_mmu_context.h" -#define IVPU_MMU_PGD_INDEX_MASK GENMASK(38, 30) +#define IVPU_MMU_PGD_INDEX_MASK GENMASK(47, 39) +#define IVPU_MMU_PUD_INDEX_MASK GENMASK(38, 30) #define IVPU_MMU_PMD_INDEX_MASK GENMASK(29, 21) #define IVPU_MMU_PTE_INDEX_MASK GENMASK(20, 12) -#define IVPU_MMU_ENTRY_FLAGS_MASK GENMASK(11, 0) +#define IVPU_MMU_ENTRY_FLAGS_MASK (BIT(52) | GENMASK(11, 0)) +#define IVPU_MMU_ENTRY_FLAG_CONT BIT(52) #define IVPU_MMU_ENTRY_FLAG_NG BIT(11) #define IVPU_MMU_ENTRY_FLAG_AF BIT(10) #define IVPU_MMU_ENTRY_FLAG_USER BIT(6) @@ -22,10 +24,13 @@ #define IVPU_MMU_ENTRY_FLAG_TYPE_PAGE BIT(1) #define IVPU_MMU_ENTRY_FLAG_VALID BIT(0) -#define IVPU_MMU_PAGE_SIZE SZ_4K -#define IVPU_MMU_PTE_MAP_SIZE (IVPU_MMU_PGTABLE_ENTRIES * IVPU_MMU_PAGE_SIZE) -#define IVPU_MMU_PMD_MAP_SIZE (IVPU_MMU_PGTABLE_ENTRIES * IVPU_MMU_PTE_MAP_SIZE) -#define IVPU_MMU_PGTABLE_SIZE (IVPU_MMU_PGTABLE_ENTRIES * sizeof(u64)) +#define IVPU_MMU_PAGE_SIZE SZ_4K +#define IVPU_MMU_CONT_PAGES_SIZE (IVPU_MMU_PAGE_SIZE * 16) +#define IVPU_MMU_PTE_MAP_SIZE (IVPU_MMU_PGTABLE_ENTRIES * IVPU_MMU_PAGE_SIZE) +#define IVPU_MMU_PMD_MAP_SIZE (IVPU_MMU_PGTABLE_ENTRIES * IVPU_MMU_PTE_MAP_SIZE) +#define IVPU_MMU_PUD_MAP_SIZE (IVPU_MMU_PGTABLE_ENTRIES * IVPU_MMU_PMD_MAP_SIZE) +#define IVPU_MMU_PGD_MAP_SIZE (IVPU_MMU_PGTABLE_ENTRIES * IVPU_MMU_PUD_MAP_SIZE) +#define IVPU_MMU_PGTABLE_SIZE (IVPU_MMU_PGTABLE_ENTRIES * sizeof(u64)) #define IVPU_MMU_DUMMY_ADDRESS 0xdeadb000 #define IVPU_MMU_ENTRY_VALID (IVPU_MMU_ENTRY_FLAG_TYPE_PAGE | IVPU_MMU_ENTRY_FLAG_VALID) @@ -36,167 +41,268 @@ static int ivpu_mmu_pgtable_init(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable) { dma_addr_t pgd_dma; - u64 *pgd; - pgd = dma_alloc_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, &pgd_dma, GFP_KERNEL); - if (!pgd) + pgtable->pgd_dma_ptr = dma_alloc_coherent(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, &pgd_dma, + GFP_KERNEL); + if (!pgtable->pgd_dma_ptr) return -ENOMEM; - pgtable->pgd = pgd; pgtable->pgd_dma = pgd_dma; return 0; } -static void ivpu_mmu_pgtable_free(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable) +static void ivpu_mmu_pgtable_free(struct ivpu_device *vdev, u64 *cpu_addr, dma_addr_t dma_addr) { - int pgd_index, pmd_index; + if (cpu_addr) + dma_free_coherent(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, cpu_addr, + dma_addr & ~IVPU_MMU_ENTRY_FLAGS_MASK); +} + +static void ivpu_mmu_pgtables_free(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable) +{ + int pgd_idx, pud_idx, pmd_idx; + dma_addr_t pud_dma, pmd_dma, pte_dma; + u64 *pud_dma_ptr, *pmd_dma_ptr, *pte_dma_ptr; - for (pgd_index = 0; pgd_index < IVPU_MMU_PGTABLE_ENTRIES; ++pgd_index) { - u64 **pmd_entries = pgtable->pgd_cpu_entries[pgd_index]; - u64 *pmd = pgtable->pgd_entries[pgd_index]; + for (pgd_idx = 0; pgd_idx < IVPU_MMU_PGTABLE_ENTRIES; ++pgd_idx) { + pud_dma_ptr = pgtable->pud_ptrs[pgd_idx]; + pud_dma = pgtable->pgd_dma_ptr[pgd_idx]; - if (!pmd_entries) + if (!pud_dma_ptr) continue; - for (pmd_index = 0; pmd_index < IVPU_MMU_PGTABLE_ENTRIES; ++pmd_index) { - if (pmd_entries[pmd_index]) - dma_free_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, - pmd_entries[pmd_index], - pmd[pmd_index] & ~IVPU_MMU_ENTRY_FLAGS_MASK); + for (pud_idx = 0; pud_idx < IVPU_MMU_PGTABLE_ENTRIES; ++pud_idx) { + pmd_dma_ptr = pgtable->pmd_ptrs[pgd_idx][pud_idx]; + pmd_dma = pgtable->pud_ptrs[pgd_idx][pud_idx]; + + if (!pmd_dma_ptr) + continue; + + for (pmd_idx = 0; pmd_idx < IVPU_MMU_PGTABLE_ENTRIES; ++pmd_idx) { + pte_dma_ptr = pgtable->pte_ptrs[pgd_idx][pud_idx][pmd_idx]; + pte_dma = pgtable->pmd_ptrs[pgd_idx][pud_idx][pmd_idx]; + + ivpu_mmu_pgtable_free(vdev, pte_dma_ptr, pte_dma); + } + + kfree(pgtable->pte_ptrs[pgd_idx][pud_idx]); + ivpu_mmu_pgtable_free(vdev, pmd_dma_ptr, pmd_dma); } - kfree(pmd_entries); - dma_free_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, pgtable->pgd_entries[pgd_index], - pgtable->pgd[pgd_index] & ~IVPU_MMU_ENTRY_FLAGS_MASK); + kfree(pgtable->pmd_ptrs[pgd_idx]); + kfree(pgtable->pte_ptrs[pgd_idx]); + ivpu_mmu_pgtable_free(vdev, pud_dma_ptr, pud_dma); } - dma_free_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, pgtable->pgd, - pgtable->pgd_dma & ~IVPU_MMU_ENTRY_FLAGS_MASK); + ivpu_mmu_pgtable_free(vdev, pgtable->pgd_dma_ptr, pgtable->pgd_dma); +} + +static u64* +ivpu_mmu_ensure_pud(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable, int pgd_idx) +{ + u64 *pud_dma_ptr = pgtable->pud_ptrs[pgd_idx]; + dma_addr_t pud_dma; + + if (pud_dma_ptr) + return pud_dma_ptr; + + pud_dma_ptr = dma_alloc_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, &pud_dma, GFP_KERNEL); + if (!pud_dma_ptr) + return NULL; + + drm_WARN_ON(&vdev->drm, pgtable->pmd_ptrs[pgd_idx]); + pgtable->pmd_ptrs[pgd_idx] = kzalloc(IVPU_MMU_PGTABLE_SIZE, GFP_KERNEL); + if (!pgtable->pmd_ptrs[pgd_idx]) + goto err_free_pud_dma_ptr; + + drm_WARN_ON(&vdev->drm, pgtable->pte_ptrs[pgd_idx]); + pgtable->pte_ptrs[pgd_idx] = kzalloc(IVPU_MMU_PGTABLE_SIZE, GFP_KERNEL); + if (!pgtable->pte_ptrs[pgd_idx]) + goto err_free_pmd_ptrs; + + pgtable->pud_ptrs[pgd_idx] = pud_dma_ptr; + pgtable->pgd_dma_ptr[pgd_idx] = pud_dma | IVPU_MMU_ENTRY_VALID; + + return pud_dma_ptr; + +err_free_pmd_ptrs: + kfree(pgtable->pmd_ptrs[pgd_idx]); + +err_free_pud_dma_ptr: + ivpu_mmu_pgtable_free(vdev, pud_dma_ptr, pud_dma); + return NULL; } static u64* -ivpu_mmu_ensure_pmd(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable, u64 pgd_index) +ivpu_mmu_ensure_pmd(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable, int pgd_idx, + int pud_idx) { - u64 **pmd_entries; + u64 *pmd_dma_ptr = pgtable->pmd_ptrs[pgd_idx][pud_idx]; dma_addr_t pmd_dma; - u64 *pmd; - if (pgtable->pgd_entries[pgd_index]) - return pgtable->pgd_entries[pgd_index]; + if (pmd_dma_ptr) + return pmd_dma_ptr; - pmd = dma_alloc_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, &pmd_dma, GFP_KERNEL); - if (!pmd) + pmd_dma_ptr = dma_alloc_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, &pmd_dma, GFP_KERNEL); + if (!pmd_dma_ptr) return NULL; - pmd_entries = kzalloc(IVPU_MMU_PGTABLE_SIZE, GFP_KERNEL); - if (!pmd_entries) - goto err_free_pgd; + drm_WARN_ON(&vdev->drm, pgtable->pte_ptrs[pgd_idx][pud_idx]); + pgtable->pte_ptrs[pgd_idx][pud_idx] = kzalloc(IVPU_MMU_PGTABLE_SIZE, GFP_KERNEL); + if (!pgtable->pte_ptrs[pgd_idx][pud_idx]) + goto err_free_pmd_dma_ptr; - pgtable->pgd_entries[pgd_index] = pmd; - pgtable->pgd_cpu_entries[pgd_index] = pmd_entries; - pgtable->pgd[pgd_index] = pmd_dma | IVPU_MMU_ENTRY_VALID; + pgtable->pmd_ptrs[pgd_idx][pud_idx] = pmd_dma_ptr; + pgtable->pud_ptrs[pgd_idx][pud_idx] = pmd_dma | IVPU_MMU_ENTRY_VALID; - return pmd; + return pmd_dma_ptr; -err_free_pgd: - dma_free_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, pmd, pmd_dma); +err_free_pmd_dma_ptr: + ivpu_mmu_pgtable_free(vdev, pmd_dma_ptr, pmd_dma); return NULL; } static u64* ivpu_mmu_ensure_pte(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable, - int pgd_index, int pmd_index) + int pgd_idx, int pud_idx, int pmd_idx) { + u64 *pte_dma_ptr = pgtable->pte_ptrs[pgd_idx][pud_idx][pmd_idx]; dma_addr_t pte_dma; - u64 *pte; - if (pgtable->pgd_cpu_entries[pgd_index][pmd_index]) - return pgtable->pgd_cpu_entries[pgd_index][pmd_index]; + if (pte_dma_ptr) + return pte_dma_ptr; - pte = dma_alloc_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, &pte_dma, GFP_KERNEL); - if (!pte) + pte_dma_ptr = dma_alloc_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, &pte_dma, GFP_KERNEL); + if (!pte_dma_ptr) return NULL; - pgtable->pgd_cpu_entries[pgd_index][pmd_index] = pte; - pgtable->pgd_entries[pgd_index][pmd_index] = pte_dma | IVPU_MMU_ENTRY_VALID; + pgtable->pte_ptrs[pgd_idx][pud_idx][pmd_idx] = pte_dma_ptr; + pgtable->pmd_ptrs[pgd_idx][pud_idx][pmd_idx] = pte_dma | IVPU_MMU_ENTRY_VALID; - return pte; + return pte_dma_ptr; } static int ivpu_mmu_context_map_page(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, - u64 vpu_addr, dma_addr_t dma_addr, int prot) + u64 vpu_addr, dma_addr_t dma_addr, u64 prot) { u64 *pte; - int pgd_index = FIELD_GET(IVPU_MMU_PGD_INDEX_MASK, vpu_addr); - int pmd_index = FIELD_GET(IVPU_MMU_PMD_INDEX_MASK, vpu_addr); - int pte_index = FIELD_GET(IVPU_MMU_PTE_INDEX_MASK, vpu_addr); + int pgd_idx = FIELD_GET(IVPU_MMU_PGD_INDEX_MASK, vpu_addr); + int pud_idx = FIELD_GET(IVPU_MMU_PUD_INDEX_MASK, vpu_addr); + int pmd_idx = FIELD_GET(IVPU_MMU_PMD_INDEX_MASK, vpu_addr); + int pte_idx = FIELD_GET(IVPU_MMU_PTE_INDEX_MASK, vpu_addr); + + /* Allocate PUD - second level page table if needed */ + if (!ivpu_mmu_ensure_pud(vdev, &ctx->pgtable, pgd_idx)) + return -ENOMEM; - /* Allocate PMD - second level page table if needed */ - if (!ivpu_mmu_ensure_pmd(vdev, &ctx->pgtable, pgd_index)) + /* Allocate PMD - third level page table if needed */ + if (!ivpu_mmu_ensure_pmd(vdev, &ctx->pgtable, pgd_idx, pud_idx)) return -ENOMEM; - /* Allocate PTE - third level page table if needed */ - pte = ivpu_mmu_ensure_pte(vdev, &ctx->pgtable, pgd_index, pmd_index); + /* Allocate PTE - fourth level page table if needed */ + pte = ivpu_mmu_ensure_pte(vdev, &ctx->pgtable, pgd_idx, pud_idx, pmd_idx); if (!pte) return -ENOMEM; - /* Update PTE - third level page table with DMA address */ - pte[pte_index] = dma_addr | prot; + /* Update PTE */ + pte[pte_idx] = dma_addr | prot; + + return 0; +} + +static int +ivpu_mmu_context_map_cont_64k(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u64 vpu_addr, + dma_addr_t dma_addr, u64 prot) +{ + size_t size = IVPU_MMU_CONT_PAGES_SIZE; + + drm_WARN_ON(&vdev->drm, !IS_ALIGNED(vpu_addr, size)); + drm_WARN_ON(&vdev->drm, !IS_ALIGNED(dma_addr, size)); + + prot |= IVPU_MMU_ENTRY_FLAG_CONT; + + while (size) { + int ret = ivpu_mmu_context_map_page(vdev, ctx, vpu_addr, dma_addr, prot); + + if (ret) + return ret; + + size -= IVPU_MMU_PAGE_SIZE; + vpu_addr += IVPU_MMU_PAGE_SIZE; + dma_addr += IVPU_MMU_PAGE_SIZE; + } return 0; } static void ivpu_mmu_context_unmap_page(struct ivpu_mmu_context *ctx, u64 vpu_addr) { - int pgd_index = FIELD_GET(IVPU_MMU_PGD_INDEX_MASK, vpu_addr); - int pmd_index = FIELD_GET(IVPU_MMU_PMD_INDEX_MASK, vpu_addr); - int pte_index = FIELD_GET(IVPU_MMU_PTE_INDEX_MASK, vpu_addr); + int pgd_idx = FIELD_GET(IVPU_MMU_PGD_INDEX_MASK, vpu_addr); + int pud_idx = FIELD_GET(IVPU_MMU_PUD_INDEX_MASK, vpu_addr); + int pmd_idx = FIELD_GET(IVPU_MMU_PMD_INDEX_MASK, vpu_addr); + int pte_idx = FIELD_GET(IVPU_MMU_PTE_INDEX_MASK, vpu_addr); /* Update PTE with dummy physical address and clear flags */ - ctx->pgtable.pgd_cpu_entries[pgd_index][pmd_index][pte_index] = IVPU_MMU_ENTRY_INVALID; + ctx->pgtable.pte_ptrs[pgd_idx][pud_idx][pmd_idx][pte_idx] = IVPU_MMU_ENTRY_INVALID; } static void ivpu_mmu_context_flush_page_tables(struct ivpu_mmu_context *ctx, u64 vpu_addr, size_t size) { + struct ivpu_mmu_pgtable *pgtable = &ctx->pgtable; u64 end_addr = vpu_addr + size; - u64 *pgd = ctx->pgtable.pgd; /* Align to PMD entry (2 MB) */ vpu_addr &= ~(IVPU_MMU_PTE_MAP_SIZE - 1); while (vpu_addr < end_addr) { - int pgd_index = FIELD_GET(IVPU_MMU_PGD_INDEX_MASK, vpu_addr); - u64 pmd_end = (pgd_index + 1) * (u64)IVPU_MMU_PMD_MAP_SIZE; - u64 *pmd = ctx->pgtable.pgd_entries[pgd_index]; - - while (vpu_addr < end_addr && vpu_addr < pmd_end) { - int pmd_index = FIELD_GET(IVPU_MMU_PMD_INDEX_MASK, vpu_addr); - u64 *pte = ctx->pgtable.pgd_cpu_entries[pgd_index][pmd_index]; - - clflush_cache_range(pte, IVPU_MMU_PGTABLE_SIZE); - vpu_addr += IVPU_MMU_PTE_MAP_SIZE; + int pgd_idx = FIELD_GET(IVPU_MMU_PGD_INDEX_MASK, vpu_addr); + u64 pud_end = (pgd_idx + 1) * (u64)IVPU_MMU_PUD_MAP_SIZE; + + while (vpu_addr < end_addr && vpu_addr < pud_end) { + int pud_idx = FIELD_GET(IVPU_MMU_PUD_INDEX_MASK, vpu_addr); + u64 pmd_end = (pud_idx + 1) * (u64)IVPU_MMU_PMD_MAP_SIZE; + + while (vpu_addr < end_addr && vpu_addr < pmd_end) { + int pmd_idx = FIELD_GET(IVPU_MMU_PMD_INDEX_MASK, vpu_addr); + + clflush_cache_range(pgtable->pte_ptrs[pgd_idx][pud_idx][pmd_idx], + IVPU_MMU_PGTABLE_SIZE); + vpu_addr += IVPU_MMU_PTE_MAP_SIZE; + } + clflush_cache_range(pgtable->pmd_ptrs[pgd_idx][pud_idx], + IVPU_MMU_PGTABLE_SIZE); } - clflush_cache_range(pmd, IVPU_MMU_PGTABLE_SIZE); + clflush_cache_range(pgtable->pud_ptrs[pgd_idx], IVPU_MMU_PGTABLE_SIZE); } - clflush_cache_range(pgd, IVPU_MMU_PGTABLE_SIZE); + clflush_cache_range(pgtable->pgd_dma_ptr, IVPU_MMU_PGTABLE_SIZE); } static int ivpu_mmu_context_map_pages(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, - u64 vpu_addr, dma_addr_t dma_addr, size_t size, int prot) + u64 vpu_addr, dma_addr_t dma_addr, size_t size, u64 prot) { + int map_size; + int ret; + while (size) { - int ret = ivpu_mmu_context_map_page(vdev, ctx, vpu_addr, dma_addr, prot); + if (!ivpu_disable_mmu_cont_pages && size >= IVPU_MMU_CONT_PAGES_SIZE && + IS_ALIGNED(vpu_addr | dma_addr, IVPU_MMU_CONT_PAGES_SIZE)) { + ret = ivpu_mmu_context_map_cont_64k(vdev, ctx, vpu_addr, dma_addr, prot); + map_size = IVPU_MMU_CONT_PAGES_SIZE; + } else { + ret = ivpu_mmu_context_map_page(vdev, ctx, vpu_addr, dma_addr, prot); + map_size = IVPU_MMU_PAGE_SIZE; + } if (ret) return ret; - vpu_addr += IVPU_MMU_PAGE_SIZE; - dma_addr += IVPU_MMU_PAGE_SIZE; - size -= IVPU_MMU_PAGE_SIZE; + vpu_addr += map_size; + dma_addr += map_size; + size -= map_size; } return 0; @@ -216,8 +322,8 @@ ivpu_mmu_context_map_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u64 vpu_addr, struct sg_table *sgt, bool llc_coherent) { struct scatterlist *sg; - int prot; int ret; + u64 prot; u64 i; if (!IS_ALIGNED(vpu_addr, IVPU_MMU_PAGE_SIZE)) @@ -237,7 +343,7 @@ ivpu_mmu_context_map_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, mutex_lock(&ctx->lock); for_each_sgtable_dma_sg(sgt, sg, i) { - u64 dma_addr = sg_dma_address(sg) - sg->offset; + dma_addr_t dma_addr = sg_dma_address(sg) - sg->offset; size_t size = sg_dma_len(sg) + sg->offset; ret = ivpu_mmu_context_map_pages(vdev, ctx, vpu_addr, dma_addr, size, prot); @@ -293,8 +399,14 @@ ivpu_mmu_context_insert_node_locked(struct ivpu_mmu_context *ctx, { lockdep_assert_held(&ctx->lock); - return drm_mm_insert_node_in_range(&ctx->mm, node, size, IVPU_MMU_PAGE_SIZE, - 0, range->start, range->end, DRM_MM_INSERT_BEST); + if (!ivpu_disable_mmu_cont_pages && size >= IVPU_MMU_CONT_PAGES_SIZE) { + if (!drm_mm_insert_node_in_range(&ctx->mm, node, size, IVPU_MMU_CONT_PAGES_SIZE, 0, + range->start, range->end, DRM_MM_INSERT_BEST)) + return 0; + } + + return drm_mm_insert_node_in_range(&ctx->mm, node, size, IVPU_MMU_PAGE_SIZE, 0, + range->start, range->end, DRM_MM_INSERT_BEST); } void @@ -319,11 +431,11 @@ ivpu_mmu_context_init(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u3 return ret; if (!context_id) { - start = vdev->hw->ranges.global_low.start; - end = vdev->hw->ranges.global_high.end; + start = vdev->hw->ranges.global.start; + end = vdev->hw->ranges.shave.end; } else { - start = vdev->hw->ranges.user_low.start; - end = vdev->hw->ranges.user_high.end; + start = vdev->hw->ranges.user.start; + end = vdev->hw->ranges.dma.end; } drm_mm_init(&ctx->mm, start, end - start); @@ -334,11 +446,15 @@ ivpu_mmu_context_init(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u3 static void ivpu_mmu_context_fini(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx) { - drm_WARN_ON(&vdev->drm, !ctx->pgtable.pgd); + if (drm_WARN_ON(&vdev->drm, !ctx->pgtable.pgd_dma_ptr)) + return; mutex_destroy(&ctx->lock); - ivpu_mmu_pgtable_free(vdev, &ctx->pgtable); + ivpu_mmu_pgtables_free(vdev, &ctx->pgtable); drm_mm_takedown(&ctx->mm); + + ctx->pgtable.pgd_dma_ptr = NULL; + ctx->pgtable.pgd_dma = 0; } int ivpu_mmu_global_context_init(struct ivpu_device *vdev) |