summaryrefslogtreecommitdiff
path: root/drivers/accel/ivpu/ivpu_mmu_context.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-08-30 13:34:34 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2023-08-30 13:34:34 -0700
commit461f35f014466c4e26dca6be0f431f57297df3f2 (patch)
tree0bd2fded69ba0752ca16c304d3e1880d5f1eb30b /drivers/accel/ivpu/ivpu_mmu_context.c
parent53ea7f624fb91074c2f9458832ed74975ee5d64c (diff)
parent3698a75f5a98d0a6599e2878ab25d30a82dd836a (diff)
downloadlwn-461f35f014466c4e26dca6be0f431f57297df3f2.tar.gz
lwn-461f35f014466c4e26dca6be0f431f57297df3f2.zip
Merge tag 'drm-next-2023-08-30' of git://anongit.freedesktop.org/drm/drm
Pull drm updates from Dave Airlie: "The drm core grew a new generic gpu virtual address manager, and new execution locking helpers. These are used by nouveau now to provide uAPI support for the userspace Vulkan driver. AMD had a bunch of new IP core support, loads of refactoring around fbdev, but mostly just the usual amount of stuff across the board. core: - fix gfp flags in drmm_kmalloc gpuva: - add new generic GPU VA manager (for nouveau initially) syncobj: - add new DRM_IOCTL_SYNCOBJ_EVENTFD ioctl dma-buf: - acquire resv lock for mmap() in exporters - support dma-buf self import automatically - docs fixes backlight: - fix fbdev interactions atomic: - improve logging prime: - remove struct gem_prim_mmap plus driver updates gem: - drm_exec: add locking over multiple GEM objects - fix lockdep checking fbdev: - make fbdev userspace interfaces optional - use linux device instead of fbdev device - use deferred i/o helper macros in various drivers - Make FB core selectable without drivers - Remove obsolete flags FBINFO_DEFAULT and FBINFO_FLAG_DEFAULT - Add helper macros and Kconfig tokens for DMA-allocated framebuffer ttm: - support init_on_free - swapout fixes panel: - panel-edp: Support AUO B116XAB01.4 - Support Visionox R66451 plus DT bindings - ld9040: - Backlight support - magic improved - Kconfig fix - Convert to of_device_get_match_data() - Fix Kconfig dependencies - simple: - Set bpc value to fix warning - Set connector type for AUO T215HVN01 - Support Innolux G156HCE-L01 plus DT bindings - ili9881: Support TDO TL050HDV35 LCD panel plus DT bindings - startek: Support KD070FHFID015 MIPI-DSI panel plus DT bindings - sitronix-st7789v: - Support Inanbo T28CP45TN89 plus DT bindings - Support EDT ET028013DMA plus DT bindings - Various cleanups - edp: Add timings for N140HCA-EAC - Allow panels and touchscreens to power sequence together - Fix Innolux G156HCE-L01 LVDS clock bridge: - debugfs for chains support - dw-hdmi: - Improve support for YUV420 bus format - CEC suspend/resume - update EDID on HDMI detect - dw-mipi-dsi: Fix enable/disable of DSI controller - lt9611uxc: Use MODULE_FIRMWARE() - ps8640: Remove broken EDID code - samsung-dsim: Fix command transfer - tc358764: - Handle HS/VS polarity - Use BIT() macro - Various cleanups - adv7511: Fix low refresh rate - anx7625: - Switch to macros instead of hardcoded values - locking fixes - tc358767: fix hardware delays - sitronix-st7789v: - Support panel orientation - Support rotation property - Add support for Jasonic JT240MHQS-HWT-EK-E3 plus DT bindings amdgpu: - SDMA 6.1.0 support - HDP 6.1 support - SMUIO 14.0 support - PSP 14.0 support - IH 6.1 support - Lots of checkpatch cleanups - GFX 9.4.3 updates - Add USB PD and IFWI flashing documentation - GPUVM updates - RAS fixes - DRR fixes - FAMS fixes - Virtual display fixes - Soft IH fixes - SMU13 fixes - Rework PSP firmware loading for other IPs - Kernel doc fixes - DCN 3.0.1 fixes - LTTPR fixes - DP MST fixes - DCN 3.1.6 fixes - SMU 13.x fixes - PSP 13.x fixes - SubVP fixes - GC 9.4.3 fixes - Display bandwidth calculation fixes - VCN4 secure submission fixes - Allow building DC on RISC-V - Add visible FB info to bo_print_info - HBR3 fixes - GFX9 MCBP fix - GMC10 vmhub index fix - GMC11 vmhub index fix - Create a new doorbell manager - SR-IOV fixes - initial freesync panel replay support - revert zpos properly until igt regression is fixeed - use TTM to manage doorbell BAR - Expose both current and average power via hwmon if supported amdkfd: - Cleanup CRIU dma-buf handling - Use KIQ to unmap HIQ - GFX 9.4.3 debugger updates - GFX 9.4.2 debugger fixes - Enable cooperative groups fof gfx11 - SVM fixes - Convert older APUs to use dGPU path like newer APUs - Drop IOMMUv2 path as it is no longer used - TBA fix for aldebaran i915: - ICL+ DSI modeset sequence - HDCP improvements - MTL display fixes and cleanups - HSW/BDW PSR1 restored - Init DDI ports in VBT order - General display refactors - Start using plane scale factor for relative data rate - Use shmem for dpt objects - Expose RPS thresholds in sysfs - Apply GuC SLPC min frequency softlimit correctly - Extend Wa_14015795083 to TGL, RKL, DG1 and ADL - Fix a VMA UAF for multi-gt platform - Do not use stolen on MTL due to HW bug - Check HuC and GuC version compatibility on MTL - avoid infinite GPU waits due to premature release of request memory - Fixes and updates for GSC memory allocation - Display SDVO fixes - Take stolen handling out of FBC code - Make i915_coherent_map_type GT-centric - Simplify shmem_create_from_object map_type msm: - SM6125 MDSS support - DPU: SM6125 DPU support - DSI: runtime PM support, burst mode support - DSI PHY: SM6125 support in 14nm DSI PHY driver - GPU: prepare for a7xx - fix a690 firmware - disable relocs on a6xx and newer radeon: - Lots of checkpatch cleanups ast: - improve device-model detection - Represent BMV as virtual connector - Report DP connection status nouveau: - add new exec/bind interface to support Vulkan - document some getparam ioctls - improve VRAM detection - various fixes/cleanups - workraound DPCD issues ivpu: - MMU updates - debugfs support - Support vpu4 virtio: - add sync object support atmel-hlcdc: - Support inverted pixclock polarity etnaviv: - runtime PM cleanups - hang handling fixes exynos: - use fbdev DMA helpers - fix possible NULL ptr dereference komeda: - always attach encoder omapdrm: - use fbdev DMA helpers ingenic: - kconfig regmap fixes loongson: - support display controller mediatek: - Small mtk-dpi cleanups - DisplayPort: support eDP and aux-bus - Fix coverity issues - Fix potential memory leak if vmap() fail mgag200: - minor fixes mxsfb: - support disabling overlay planes panfrost: - fix sync in IRQ handling ssd130x: - Support per-controller default resolution plus DT bindings - Reduce memory-allocation overhead - Improve intermediate buffer size computation - Fix allocation of temporary buffers - Fix pitch computation - Fix shadow plane allocation tegra: - use fbdev DMA helpers - Convert to devm_platform_ioremap_resource() - support bridge/connector - enable PM tidss: - Support TI AM625 plus DT bindings - Implement new connector model plus driver updates vkms: - improve write back support - docs fixes - support gamma LUT zynqmp-dpsub: - misc fixes" * tag 'drm-next-2023-08-30' of git://anongit.freedesktop.org/drm/drm: (1327 commits) drm/gpuva_mgr: remove unused prev pointer in __drm_gpuva_sm_map() drm/tests/drm_kunit_helpers: Place correct function name in the comment header drm/nouveau: uapi: don't pass NO_PREFETCH flag implicitly drm/nouveau: uvmm: fix unset region pointer on remap drm/nouveau: sched: avoid job races between entities drm/i915: Fix HPD polling, reenabling the output poll work as needed drm: Add an HPD poll helper to reschedule the poll work drm/i915: Fix TLB-Invalidation seqno store drm/ttm/tests: Fix type conversion in ttm_pool_test drm/msm/a6xx: Bail out early if setting GPU OOB fails drm/msm/a6xx: Move LLC accessors to the common header drm/msm/a6xx: Introduce a6xx_llc_read drm/ttm/tests: Require MMU when testing drm/panel: simple: Fix Innolux G156HCE-L01 LVDS clock Revert "Revert "drm/amdgpu/display: change pipe policy for DCN 2.0"" drm/amdgpu: Add memory vendor information drm/amd: flush any delayed gfxoff on suspend entry drm/amdgpu: skip fence GFX interrupts disable/enable for S0ix drm/amdgpu: Remove gfxoff check in GFX v9.4.3 drm/amd/pm: Update pci link speed for smu v13.0.6 ...
Diffstat (limited to 'drivers/accel/ivpu/ivpu_mmu_context.c')
-rw-r--r--drivers/accel/ivpu/ivpu_mmu_context.c302
1 files changed, 209 insertions, 93 deletions
diff --git a/drivers/accel/ivpu/ivpu_mmu_context.c b/drivers/accel/ivpu/ivpu_mmu_context.c
index 8ce9b12ac356..1d2e554e2c4a 100644
--- a/drivers/accel/ivpu/ivpu_mmu_context.c
+++ b/drivers/accel/ivpu/ivpu_mmu_context.c
@@ -11,10 +11,12 @@
#include "ivpu_mmu.h"
#include "ivpu_mmu_context.h"
-#define IVPU_MMU_PGD_INDEX_MASK GENMASK(38, 30)
+#define IVPU_MMU_PGD_INDEX_MASK GENMASK(47, 39)
+#define IVPU_MMU_PUD_INDEX_MASK GENMASK(38, 30)
#define IVPU_MMU_PMD_INDEX_MASK GENMASK(29, 21)
#define IVPU_MMU_PTE_INDEX_MASK GENMASK(20, 12)
-#define IVPU_MMU_ENTRY_FLAGS_MASK GENMASK(11, 0)
+#define IVPU_MMU_ENTRY_FLAGS_MASK (BIT(52) | GENMASK(11, 0))
+#define IVPU_MMU_ENTRY_FLAG_CONT BIT(52)
#define IVPU_MMU_ENTRY_FLAG_NG BIT(11)
#define IVPU_MMU_ENTRY_FLAG_AF BIT(10)
#define IVPU_MMU_ENTRY_FLAG_USER BIT(6)
@@ -22,10 +24,13 @@
#define IVPU_MMU_ENTRY_FLAG_TYPE_PAGE BIT(1)
#define IVPU_MMU_ENTRY_FLAG_VALID BIT(0)
-#define IVPU_MMU_PAGE_SIZE SZ_4K
-#define IVPU_MMU_PTE_MAP_SIZE (IVPU_MMU_PGTABLE_ENTRIES * IVPU_MMU_PAGE_SIZE)
-#define IVPU_MMU_PMD_MAP_SIZE (IVPU_MMU_PGTABLE_ENTRIES * IVPU_MMU_PTE_MAP_SIZE)
-#define IVPU_MMU_PGTABLE_SIZE (IVPU_MMU_PGTABLE_ENTRIES * sizeof(u64))
+#define IVPU_MMU_PAGE_SIZE SZ_4K
+#define IVPU_MMU_CONT_PAGES_SIZE (IVPU_MMU_PAGE_SIZE * 16)
+#define IVPU_MMU_PTE_MAP_SIZE (IVPU_MMU_PGTABLE_ENTRIES * IVPU_MMU_PAGE_SIZE)
+#define IVPU_MMU_PMD_MAP_SIZE (IVPU_MMU_PGTABLE_ENTRIES * IVPU_MMU_PTE_MAP_SIZE)
+#define IVPU_MMU_PUD_MAP_SIZE (IVPU_MMU_PGTABLE_ENTRIES * IVPU_MMU_PMD_MAP_SIZE)
+#define IVPU_MMU_PGD_MAP_SIZE (IVPU_MMU_PGTABLE_ENTRIES * IVPU_MMU_PUD_MAP_SIZE)
+#define IVPU_MMU_PGTABLE_SIZE (IVPU_MMU_PGTABLE_ENTRIES * sizeof(u64))
#define IVPU_MMU_DUMMY_ADDRESS 0xdeadb000
#define IVPU_MMU_ENTRY_VALID (IVPU_MMU_ENTRY_FLAG_TYPE_PAGE | IVPU_MMU_ENTRY_FLAG_VALID)
@@ -36,167 +41,268 @@
static int ivpu_mmu_pgtable_init(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable)
{
dma_addr_t pgd_dma;
- u64 *pgd;
- pgd = dma_alloc_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, &pgd_dma, GFP_KERNEL);
- if (!pgd)
+ pgtable->pgd_dma_ptr = dma_alloc_coherent(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, &pgd_dma,
+ GFP_KERNEL);
+ if (!pgtable->pgd_dma_ptr)
return -ENOMEM;
- pgtable->pgd = pgd;
pgtable->pgd_dma = pgd_dma;
return 0;
}
-static void ivpu_mmu_pgtable_free(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable)
+static void ivpu_mmu_pgtable_free(struct ivpu_device *vdev, u64 *cpu_addr, dma_addr_t dma_addr)
{
- int pgd_index, pmd_index;
+ if (cpu_addr)
+ dma_free_coherent(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, cpu_addr,
+ dma_addr & ~IVPU_MMU_ENTRY_FLAGS_MASK);
+}
+
+static void ivpu_mmu_pgtables_free(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable)
+{
+ int pgd_idx, pud_idx, pmd_idx;
+ dma_addr_t pud_dma, pmd_dma, pte_dma;
+ u64 *pud_dma_ptr, *pmd_dma_ptr, *pte_dma_ptr;
- for (pgd_index = 0; pgd_index < IVPU_MMU_PGTABLE_ENTRIES; ++pgd_index) {
- u64 **pmd_entries = pgtable->pgd_cpu_entries[pgd_index];
- u64 *pmd = pgtable->pgd_entries[pgd_index];
+ for (pgd_idx = 0; pgd_idx < IVPU_MMU_PGTABLE_ENTRIES; ++pgd_idx) {
+ pud_dma_ptr = pgtable->pud_ptrs[pgd_idx];
+ pud_dma = pgtable->pgd_dma_ptr[pgd_idx];
- if (!pmd_entries)
+ if (!pud_dma_ptr)
continue;
- for (pmd_index = 0; pmd_index < IVPU_MMU_PGTABLE_ENTRIES; ++pmd_index) {
- if (pmd_entries[pmd_index])
- dma_free_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE,
- pmd_entries[pmd_index],
- pmd[pmd_index] & ~IVPU_MMU_ENTRY_FLAGS_MASK);
+ for (pud_idx = 0; pud_idx < IVPU_MMU_PGTABLE_ENTRIES; ++pud_idx) {
+ pmd_dma_ptr = pgtable->pmd_ptrs[pgd_idx][pud_idx];
+ pmd_dma = pgtable->pud_ptrs[pgd_idx][pud_idx];
+
+ if (!pmd_dma_ptr)
+ continue;
+
+ for (pmd_idx = 0; pmd_idx < IVPU_MMU_PGTABLE_ENTRIES; ++pmd_idx) {
+ pte_dma_ptr = pgtable->pte_ptrs[pgd_idx][pud_idx][pmd_idx];
+ pte_dma = pgtable->pmd_ptrs[pgd_idx][pud_idx][pmd_idx];
+
+ ivpu_mmu_pgtable_free(vdev, pte_dma_ptr, pte_dma);
+ }
+
+ kfree(pgtable->pte_ptrs[pgd_idx][pud_idx]);
+ ivpu_mmu_pgtable_free(vdev, pmd_dma_ptr, pmd_dma);
}
- kfree(pmd_entries);
- dma_free_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, pgtable->pgd_entries[pgd_index],
- pgtable->pgd[pgd_index] & ~IVPU_MMU_ENTRY_FLAGS_MASK);
+ kfree(pgtable->pmd_ptrs[pgd_idx]);
+ kfree(pgtable->pte_ptrs[pgd_idx]);
+ ivpu_mmu_pgtable_free(vdev, pud_dma_ptr, pud_dma);
}
- dma_free_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, pgtable->pgd,
- pgtable->pgd_dma & ~IVPU_MMU_ENTRY_FLAGS_MASK);
+ ivpu_mmu_pgtable_free(vdev, pgtable->pgd_dma_ptr, pgtable->pgd_dma);
+}
+
+static u64*
+ivpu_mmu_ensure_pud(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable, int pgd_idx)
+{
+ u64 *pud_dma_ptr = pgtable->pud_ptrs[pgd_idx];
+ dma_addr_t pud_dma;
+
+ if (pud_dma_ptr)
+ return pud_dma_ptr;
+
+ pud_dma_ptr = dma_alloc_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, &pud_dma, GFP_KERNEL);
+ if (!pud_dma_ptr)
+ return NULL;
+
+ drm_WARN_ON(&vdev->drm, pgtable->pmd_ptrs[pgd_idx]);
+ pgtable->pmd_ptrs[pgd_idx] = kzalloc(IVPU_MMU_PGTABLE_SIZE, GFP_KERNEL);
+ if (!pgtable->pmd_ptrs[pgd_idx])
+ goto err_free_pud_dma_ptr;
+
+ drm_WARN_ON(&vdev->drm, pgtable->pte_ptrs[pgd_idx]);
+ pgtable->pte_ptrs[pgd_idx] = kzalloc(IVPU_MMU_PGTABLE_SIZE, GFP_KERNEL);
+ if (!pgtable->pte_ptrs[pgd_idx])
+ goto err_free_pmd_ptrs;
+
+ pgtable->pud_ptrs[pgd_idx] = pud_dma_ptr;
+ pgtable->pgd_dma_ptr[pgd_idx] = pud_dma | IVPU_MMU_ENTRY_VALID;
+
+ return pud_dma_ptr;
+
+err_free_pmd_ptrs:
+ kfree(pgtable->pmd_ptrs[pgd_idx]);
+
+err_free_pud_dma_ptr:
+ ivpu_mmu_pgtable_free(vdev, pud_dma_ptr, pud_dma);
+ return NULL;
}
static u64*
-ivpu_mmu_ensure_pmd(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable, u64 pgd_index)
+ivpu_mmu_ensure_pmd(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable, int pgd_idx,
+ int pud_idx)
{
- u64 **pmd_entries;
+ u64 *pmd_dma_ptr = pgtable->pmd_ptrs[pgd_idx][pud_idx];
dma_addr_t pmd_dma;
- u64 *pmd;
- if (pgtable->pgd_entries[pgd_index])
- return pgtable->pgd_entries[pgd_index];
+ if (pmd_dma_ptr)
+ return pmd_dma_ptr;
- pmd = dma_alloc_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, &pmd_dma, GFP_KERNEL);
- if (!pmd)
+ pmd_dma_ptr = dma_alloc_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, &pmd_dma, GFP_KERNEL);
+ if (!pmd_dma_ptr)
return NULL;
- pmd_entries = kzalloc(IVPU_MMU_PGTABLE_SIZE, GFP_KERNEL);
- if (!pmd_entries)
- goto err_free_pgd;
+ drm_WARN_ON(&vdev->drm, pgtable->pte_ptrs[pgd_idx][pud_idx]);
+ pgtable->pte_ptrs[pgd_idx][pud_idx] = kzalloc(IVPU_MMU_PGTABLE_SIZE, GFP_KERNEL);
+ if (!pgtable->pte_ptrs[pgd_idx][pud_idx])
+ goto err_free_pmd_dma_ptr;
- pgtable->pgd_entries[pgd_index] = pmd;
- pgtable->pgd_cpu_entries[pgd_index] = pmd_entries;
- pgtable->pgd[pgd_index] = pmd_dma | IVPU_MMU_ENTRY_VALID;
+ pgtable->pmd_ptrs[pgd_idx][pud_idx] = pmd_dma_ptr;
+ pgtable->pud_ptrs[pgd_idx][pud_idx] = pmd_dma | IVPU_MMU_ENTRY_VALID;
- return pmd;
+ return pmd_dma_ptr;
-err_free_pgd:
- dma_free_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, pmd, pmd_dma);
+err_free_pmd_dma_ptr:
+ ivpu_mmu_pgtable_free(vdev, pmd_dma_ptr, pmd_dma);
return NULL;
}
static u64*
ivpu_mmu_ensure_pte(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable,
- int pgd_index, int pmd_index)
+ int pgd_idx, int pud_idx, int pmd_idx)
{
+ u64 *pte_dma_ptr = pgtable->pte_ptrs[pgd_idx][pud_idx][pmd_idx];
dma_addr_t pte_dma;
- u64 *pte;
- if (pgtable->pgd_cpu_entries[pgd_index][pmd_index])
- return pgtable->pgd_cpu_entries[pgd_index][pmd_index];
+ if (pte_dma_ptr)
+ return pte_dma_ptr;
- pte = dma_alloc_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, &pte_dma, GFP_KERNEL);
- if (!pte)
+ pte_dma_ptr = dma_alloc_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, &pte_dma, GFP_KERNEL);
+ if (!pte_dma_ptr)
return NULL;
- pgtable->pgd_cpu_entries[pgd_index][pmd_index] = pte;
- pgtable->pgd_entries[pgd_index][pmd_index] = pte_dma | IVPU_MMU_ENTRY_VALID;
+ pgtable->pte_ptrs[pgd_idx][pud_idx][pmd_idx] = pte_dma_ptr;
+ pgtable->pmd_ptrs[pgd_idx][pud_idx][pmd_idx] = pte_dma | IVPU_MMU_ENTRY_VALID;
- return pte;
+ return pte_dma_ptr;
}
static int
ivpu_mmu_context_map_page(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx,
- u64 vpu_addr, dma_addr_t dma_addr, int prot)
+ u64 vpu_addr, dma_addr_t dma_addr, u64 prot)
{
u64 *pte;
- int pgd_index = FIELD_GET(IVPU_MMU_PGD_INDEX_MASK, vpu_addr);
- int pmd_index = FIELD_GET(IVPU_MMU_PMD_INDEX_MASK, vpu_addr);
- int pte_index = FIELD_GET(IVPU_MMU_PTE_INDEX_MASK, vpu_addr);
+ int pgd_idx = FIELD_GET(IVPU_MMU_PGD_INDEX_MASK, vpu_addr);
+ int pud_idx = FIELD_GET(IVPU_MMU_PUD_INDEX_MASK, vpu_addr);
+ int pmd_idx = FIELD_GET(IVPU_MMU_PMD_INDEX_MASK, vpu_addr);
+ int pte_idx = FIELD_GET(IVPU_MMU_PTE_INDEX_MASK, vpu_addr);
+
+ /* Allocate PUD - second level page table if needed */
+ if (!ivpu_mmu_ensure_pud(vdev, &ctx->pgtable, pgd_idx))
+ return -ENOMEM;
- /* Allocate PMD - second level page table if needed */
- if (!ivpu_mmu_ensure_pmd(vdev, &ctx->pgtable, pgd_index))
+ /* Allocate PMD - third level page table if needed */
+ if (!ivpu_mmu_ensure_pmd(vdev, &ctx->pgtable, pgd_idx, pud_idx))
return -ENOMEM;
- /* Allocate PTE - third level page table if needed */
- pte = ivpu_mmu_ensure_pte(vdev, &ctx->pgtable, pgd_index, pmd_index);
+ /* Allocate PTE - fourth level page table if needed */
+ pte = ivpu_mmu_ensure_pte(vdev, &ctx->pgtable, pgd_idx, pud_idx, pmd_idx);
if (!pte)
return -ENOMEM;
- /* Update PTE - third level page table with DMA address */
- pte[pte_index] = dma_addr | prot;
+ /* Update PTE */
+ pte[pte_idx] = dma_addr | prot;
+
+ return 0;
+}
+
+static int
+ivpu_mmu_context_map_cont_64k(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u64 vpu_addr,
+ dma_addr_t dma_addr, u64 prot)
+{
+ size_t size = IVPU_MMU_CONT_PAGES_SIZE;
+
+ drm_WARN_ON(&vdev->drm, !IS_ALIGNED(vpu_addr, size));
+ drm_WARN_ON(&vdev->drm, !IS_ALIGNED(dma_addr, size));
+
+ prot |= IVPU_MMU_ENTRY_FLAG_CONT;
+
+ while (size) {
+ int ret = ivpu_mmu_context_map_page(vdev, ctx, vpu_addr, dma_addr, prot);
+
+ if (ret)
+ return ret;
+
+ size -= IVPU_MMU_PAGE_SIZE;
+ vpu_addr += IVPU_MMU_PAGE_SIZE;
+ dma_addr += IVPU_MMU_PAGE_SIZE;
+ }
return 0;
}
static void ivpu_mmu_context_unmap_page(struct ivpu_mmu_context *ctx, u64 vpu_addr)
{
- int pgd_index = FIELD_GET(IVPU_MMU_PGD_INDEX_MASK, vpu_addr);
- int pmd_index = FIELD_GET(IVPU_MMU_PMD_INDEX_MASK, vpu_addr);
- int pte_index = FIELD_GET(IVPU_MMU_PTE_INDEX_MASK, vpu_addr);
+ int pgd_idx = FIELD_GET(IVPU_MMU_PGD_INDEX_MASK, vpu_addr);
+ int pud_idx = FIELD_GET(IVPU_MMU_PUD_INDEX_MASK, vpu_addr);
+ int pmd_idx = FIELD_GET(IVPU_MMU_PMD_INDEX_MASK, vpu_addr);
+ int pte_idx = FIELD_GET(IVPU_MMU_PTE_INDEX_MASK, vpu_addr);
/* Update PTE with dummy physical address and clear flags */
- ctx->pgtable.pgd_cpu_entries[pgd_index][pmd_index][pte_index] = IVPU_MMU_ENTRY_INVALID;
+ ctx->pgtable.pte_ptrs[pgd_idx][pud_idx][pmd_idx][pte_idx] = IVPU_MMU_ENTRY_INVALID;
}
static void
ivpu_mmu_context_flush_page_tables(struct ivpu_mmu_context *ctx, u64 vpu_addr, size_t size)
{
+ struct ivpu_mmu_pgtable *pgtable = &ctx->pgtable;
u64 end_addr = vpu_addr + size;
- u64 *pgd = ctx->pgtable.pgd;
/* Align to PMD entry (2 MB) */
vpu_addr &= ~(IVPU_MMU_PTE_MAP_SIZE - 1);
while (vpu_addr < end_addr) {
- int pgd_index = FIELD_GET(IVPU_MMU_PGD_INDEX_MASK, vpu_addr);
- u64 pmd_end = (pgd_index + 1) * (u64)IVPU_MMU_PMD_MAP_SIZE;
- u64 *pmd = ctx->pgtable.pgd_entries[pgd_index];
-
- while (vpu_addr < end_addr && vpu_addr < pmd_end) {
- int pmd_index = FIELD_GET(IVPU_MMU_PMD_INDEX_MASK, vpu_addr);
- u64 *pte = ctx->pgtable.pgd_cpu_entries[pgd_index][pmd_index];
-
- clflush_cache_range(pte, IVPU_MMU_PGTABLE_SIZE);
- vpu_addr += IVPU_MMU_PTE_MAP_SIZE;
+ int pgd_idx = FIELD_GET(IVPU_MMU_PGD_INDEX_MASK, vpu_addr);
+ u64 pud_end = (pgd_idx + 1) * (u64)IVPU_MMU_PUD_MAP_SIZE;
+
+ while (vpu_addr < end_addr && vpu_addr < pud_end) {
+ int pud_idx = FIELD_GET(IVPU_MMU_PUD_INDEX_MASK, vpu_addr);
+ u64 pmd_end = (pud_idx + 1) * (u64)IVPU_MMU_PMD_MAP_SIZE;
+
+ while (vpu_addr < end_addr && vpu_addr < pmd_end) {
+ int pmd_idx = FIELD_GET(IVPU_MMU_PMD_INDEX_MASK, vpu_addr);
+
+ clflush_cache_range(pgtable->pte_ptrs[pgd_idx][pud_idx][pmd_idx],
+ IVPU_MMU_PGTABLE_SIZE);
+ vpu_addr += IVPU_MMU_PTE_MAP_SIZE;
+ }
+ clflush_cache_range(pgtable->pmd_ptrs[pgd_idx][pud_idx],
+ IVPU_MMU_PGTABLE_SIZE);
}
- clflush_cache_range(pmd, IVPU_MMU_PGTABLE_SIZE);
+ clflush_cache_range(pgtable->pud_ptrs[pgd_idx], IVPU_MMU_PGTABLE_SIZE);
}
- clflush_cache_range(pgd, IVPU_MMU_PGTABLE_SIZE);
+ clflush_cache_range(pgtable->pgd_dma_ptr, IVPU_MMU_PGTABLE_SIZE);
}
static int
ivpu_mmu_context_map_pages(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx,
- u64 vpu_addr, dma_addr_t dma_addr, size_t size, int prot)
+ u64 vpu_addr, dma_addr_t dma_addr, size_t size, u64 prot)
{
+ int map_size;
+ int ret;
+
while (size) {
- int ret = ivpu_mmu_context_map_page(vdev, ctx, vpu_addr, dma_addr, prot);
+ if (!ivpu_disable_mmu_cont_pages && size >= IVPU_MMU_CONT_PAGES_SIZE &&
+ IS_ALIGNED(vpu_addr | dma_addr, IVPU_MMU_CONT_PAGES_SIZE)) {
+ ret = ivpu_mmu_context_map_cont_64k(vdev, ctx, vpu_addr, dma_addr, prot);
+ map_size = IVPU_MMU_CONT_PAGES_SIZE;
+ } else {
+ ret = ivpu_mmu_context_map_page(vdev, ctx, vpu_addr, dma_addr, prot);
+ map_size = IVPU_MMU_PAGE_SIZE;
+ }
if (ret)
return ret;
- vpu_addr += IVPU_MMU_PAGE_SIZE;
- dma_addr += IVPU_MMU_PAGE_SIZE;
- size -= IVPU_MMU_PAGE_SIZE;
+ vpu_addr += map_size;
+ dma_addr += map_size;
+ size -= map_size;
}
return 0;
@@ -216,8 +322,8 @@ ivpu_mmu_context_map_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx,
u64 vpu_addr, struct sg_table *sgt, bool llc_coherent)
{
struct scatterlist *sg;
- int prot;
int ret;
+ u64 prot;
u64 i;
if (!IS_ALIGNED(vpu_addr, IVPU_MMU_PAGE_SIZE))
@@ -237,7 +343,7 @@ ivpu_mmu_context_map_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx,
mutex_lock(&ctx->lock);
for_each_sgtable_dma_sg(sgt, sg, i) {
- u64 dma_addr = sg_dma_address(sg) - sg->offset;
+ dma_addr_t dma_addr = sg_dma_address(sg) - sg->offset;
size_t size = sg_dma_len(sg) + sg->offset;
ret = ivpu_mmu_context_map_pages(vdev, ctx, vpu_addr, dma_addr, size, prot);
@@ -293,8 +399,14 @@ ivpu_mmu_context_insert_node_locked(struct ivpu_mmu_context *ctx,
{
lockdep_assert_held(&ctx->lock);
- return drm_mm_insert_node_in_range(&ctx->mm, node, size, IVPU_MMU_PAGE_SIZE,
- 0, range->start, range->end, DRM_MM_INSERT_BEST);
+ if (!ivpu_disable_mmu_cont_pages && size >= IVPU_MMU_CONT_PAGES_SIZE) {
+ if (!drm_mm_insert_node_in_range(&ctx->mm, node, size, IVPU_MMU_CONT_PAGES_SIZE, 0,
+ range->start, range->end, DRM_MM_INSERT_BEST))
+ return 0;
+ }
+
+ return drm_mm_insert_node_in_range(&ctx->mm, node, size, IVPU_MMU_PAGE_SIZE, 0,
+ range->start, range->end, DRM_MM_INSERT_BEST);
}
void
@@ -319,11 +431,11 @@ ivpu_mmu_context_init(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u3
return ret;
if (!context_id) {
- start = vdev->hw->ranges.global_low.start;
- end = vdev->hw->ranges.global_high.end;
+ start = vdev->hw->ranges.global.start;
+ end = vdev->hw->ranges.shave.end;
} else {
- start = vdev->hw->ranges.user_low.start;
- end = vdev->hw->ranges.user_high.end;
+ start = vdev->hw->ranges.user.start;
+ end = vdev->hw->ranges.dma.end;
}
drm_mm_init(&ctx->mm, start, end - start);
@@ -334,11 +446,15 @@ ivpu_mmu_context_init(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u3
static void ivpu_mmu_context_fini(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx)
{
- drm_WARN_ON(&vdev->drm, !ctx->pgtable.pgd);
+ if (drm_WARN_ON(&vdev->drm, !ctx->pgtable.pgd_dma_ptr))
+ return;
mutex_destroy(&ctx->lock);
- ivpu_mmu_pgtable_free(vdev, &ctx->pgtable);
+ ivpu_mmu_pgtables_free(vdev, &ctx->pgtable);
drm_mm_takedown(&ctx->mm);
+
+ ctx->pgtable.pgd_dma_ptr = NULL;
+ ctx->pgtable.pgd_dma = 0;
}
int ivpu_mmu_global_context_init(struct ivpu_device *vdev)