From 90ec7a76cc4ba65bfedeb8621cba09cd5a317d8f Mon Sep 17 00:00:00 2001 From: Vivek Gautam Date: Thu, 16 May 2019 15:00:20 +0530 Subject: iommu/io-pgtable-arm: Add support to use system cache Few Qualcomm platforms such as, sdm845 have an additional outer cache called as System cache, aka. Last level cache (LLC) that allows non-coherent devices to upgrade to using caching. This cache sits right before the DDR, and is tightly coupled with the memory controller. The clients using this cache request their slices from this system cache, make it active, and can then start using it. There is a fundamental assumption that non-coherent devices can't access caches. This change adds an exception where they *can* use some level of cache despite still being non-coherent overall. The coherent devices that use cacheable memory, and CPU make use of this system cache by default. Looking at memory types, we have following - a) Normal uncached :- MAIR 0x44, inner non-cacheable, outer non-cacheable; b) Normal cached :- MAIR 0xff, inner read write-back non-transient, outer read write-back non-transient; attribute setting for coherenet I/O devices. and, for non-coherent i/o devices that can allocate in system cache another type gets added - c) Normal sys-cached :- MAIR 0xf4, inner non-cacheable, outer read write-back non-transient Coherent I/O devices use system cache by marking the memory as normal cached. Non-coherent I/O devices should mark the memory as normal sys-cached in page tables to use system cache. Acked-by: Robin Murphy Signed-off-by: Vivek Gautam Signed-off-by: Will Deacon --- include/linux/iommu.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index a815cf6f6f47..8ee3fbaf5855 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -41,6 +41,12 @@ * if the IOMMU page table format is equivalent. */ #define IOMMU_PRIV (1 << 5) +/* + * Non-coherent masters on few Qualcomm SoCs can use this page protection flag + * to set correct cacheability attributes to use an outer level of cache - + * last level cache, aka system cache. + */ +#define IOMMU_QCOM_SYS_CACHE (1 << 6) struct iommu_ops; struct iommu_group; -- cgit v1.2.3 From 4f41845b340783eaec9cc2840fe3cb9a00574054 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 25 Jun 2019 12:51:25 +0100 Subject: iommu/io-pgtable: Replace IO_PGTABLE_QUIRK_NO_DMA with specific flag IO_PGTABLE_QUIRK_NO_DMA is a bit of a misnomer, since it's really just an indication of whether or not the page-table walker for the IOMMU is coherent with the CPU caches. Since cache coherency is more than just a quirk, replace the flag with its own field in the io_pgtable_cfg structure. Cc: Bjorn Andersson Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu-v3.c | 4 +--- drivers/iommu/arm-smmu.c | 4 +--- drivers/iommu/io-pgtable-arm-v7s.c | 10 +++++----- drivers/iommu/io-pgtable-arm.c | 19 ++++++++----------- drivers/iommu/ipmmu-vmsa.c | 1 + include/linux/io-pgtable.h | 11 ++++------- 6 files changed, 20 insertions(+), 29 deletions(-) (limited to 'include/linux') diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 65de2458999f..8ff8f61d9e1c 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -1789,13 +1789,11 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain) .pgsize_bitmap = smmu->pgsize_bitmap, .ias = ias, .oas = oas, + .coherent_walk = smmu->features & ARM_SMMU_FEAT_COHERENCY, .tlb = &arm_smmu_gather_ops, .iommu_dev = smmu->dev, }; - if (smmu->features & ARM_SMMU_FEAT_COHERENCY) - pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_NO_DMA; - if (smmu_domain->non_strict) pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT; diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 5e54cc0a28b3..009156bb6d42 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -895,13 +895,11 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, .pgsize_bitmap = smmu->pgsize_bitmap, .ias = ias, .oas = oas, + .coherent_walk = smmu->features & ARM_SMMU_FEAT_COHERENT_WALK, .tlb = smmu_domain->tlb_ops, .iommu_dev = smmu->dev, }; - if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK) - pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_NO_DMA; - if (smmu_domain->non_strict) pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT; diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c index 9a8a8870e267..8454de93e356 100644 --- a/drivers/iommu/io-pgtable-arm-v7s.c +++ b/drivers/iommu/io-pgtable-arm-v7s.c @@ -215,7 +215,7 @@ static void *__arm_v7s_alloc_table(int lvl, gfp_t gfp, dev_err(dev, "Page table does not fit in PTE: %pa", &phys); goto out_free; } - if (table && !(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA)) { + if (table && !cfg->coherent_walk) { dma = dma_map_single(dev, table, size, DMA_TO_DEVICE); if (dma_mapping_error(dev, dma)) goto out_free; @@ -249,7 +249,7 @@ static void __arm_v7s_free_table(void *table, int lvl, struct device *dev = cfg->iommu_dev; size_t size = ARM_V7S_TABLE_SIZE(lvl); - if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA)) + if (!cfg->coherent_walk) dma_unmap_single(dev, __arm_v7s_dma_addr(table), size, DMA_TO_DEVICE); if (lvl == 1) @@ -261,7 +261,7 @@ static void __arm_v7s_free_table(void *table, int lvl, static void __arm_v7s_pte_sync(arm_v7s_iopte *ptep, int num_entries, struct io_pgtable_cfg *cfg) { - if (cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA) + if (cfg->coherent_walk) return; dma_sync_single_for_device(cfg->iommu_dev, __arm_v7s_dma_addr(ptep), @@ -727,7 +727,6 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg, IO_PGTABLE_QUIRK_NO_PERMS | IO_PGTABLE_QUIRK_TLBI_ON_MAP | IO_PGTABLE_QUIRK_ARM_MTK_4GB | - IO_PGTABLE_QUIRK_NO_DMA | IO_PGTABLE_QUIRK_NON_STRICT)) return NULL; @@ -846,7 +845,8 @@ static int __init arm_v7s_do_selftests(void) .tlb = &dummy_tlb_ops, .oas = 32, .ias = 32, - .quirks = IO_PGTABLE_QUIRK_ARM_NS | IO_PGTABLE_QUIRK_NO_DMA, + .coherent_walk = true, + .quirks = IO_PGTABLE_QUIRK_ARM_NS, .pgsize_bitmap = SZ_4K | SZ_64K | SZ_1M | SZ_16M, }; unsigned int iova, size, iova_start; diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index 2454ac11aa97..91d0a4228b58 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -252,7 +252,7 @@ static void *__arm_lpae_alloc_pages(size_t size, gfp_t gfp, return NULL; pages = page_address(p); - if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA)) { + if (!cfg->coherent_walk) { dma = dma_map_single(dev, pages, size, DMA_TO_DEVICE); if (dma_mapping_error(dev, dma)) goto out_free; @@ -278,7 +278,7 @@ out_free: static void __arm_lpae_free_pages(void *pages, size_t size, struct io_pgtable_cfg *cfg) { - if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA)) + if (!cfg->coherent_walk) dma_unmap_single(cfg->iommu_dev, __arm_lpae_dma_addr(pages), size, DMA_TO_DEVICE); free_pages((unsigned long)pages, get_order(size)); @@ -296,7 +296,7 @@ static void __arm_lpae_set_pte(arm_lpae_iopte *ptep, arm_lpae_iopte pte, { *ptep = pte; - if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA)) + if (!cfg->coherent_walk) __arm_lpae_sync_pte(ptep, cfg); } @@ -374,8 +374,7 @@ static arm_lpae_iopte arm_lpae_install_table(arm_lpae_iopte *table, old = cmpxchg64_relaxed(ptep, curr, new); - if ((cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA) || - (old & ARM_LPAE_PTE_SW_SYNC)) + if (cfg->coherent_walk || (old & ARM_LPAE_PTE_SW_SYNC)) return old; /* Even if it's not ours, there's no point waiting; just kick it */ @@ -416,8 +415,7 @@ static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova, pte = arm_lpae_install_table(cptep, ptep, 0, cfg); if (pte) __arm_lpae_free_pages(cptep, tblsz, cfg); - } else if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA) && - !(pte & ARM_LPAE_PTE_SW_SYNC)) { + } else if (!cfg->coherent_walk && !(pte & ARM_LPAE_PTE_SW_SYNC)) { __arm_lpae_sync_pte(ptep, cfg); } @@ -799,7 +797,7 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie) u64 reg; struct arm_lpae_io_pgtable *data; - if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS | IO_PGTABLE_QUIRK_NO_DMA | + if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS | IO_PGTABLE_QUIRK_NON_STRICT)) return NULL; @@ -894,8 +892,7 @@ arm_64_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie) struct arm_lpae_io_pgtable *data; /* The NS quirk doesn't apply at stage 2 */ - if (cfg->quirks & ~(IO_PGTABLE_QUIRK_NO_DMA | - IO_PGTABLE_QUIRK_NON_STRICT)) + if (cfg->quirks & ~(IO_PGTABLE_QUIRK_NON_STRICT)) return NULL; data = arm_lpae_alloc_pgtable(cfg); @@ -1230,7 +1227,7 @@ static int __init arm_lpae_do_selftests(void) struct io_pgtable_cfg cfg = { .tlb = &dummy_tlb_ops, .oas = 48, - .quirks = IO_PGTABLE_QUIRK_NO_DMA, + .coherent_walk = true, }; for (i = 0; i < ARRAY_SIZE(pgsize); ++i) { diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index 9a380c10655e..12bcb95bdaa8 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -431,6 +431,7 @@ static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain) * TODO: Add support for coherent walk through CCI with DVM and remove * cache handling. For now, delegate it to the io-pgtable code. */ + domain->cfg.coherent_walk = false; domain->cfg.iommu_dev = domain->mmu->root->dev; /* diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index 76969a564831..b5a450a3bb47 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -44,6 +44,8 @@ struct iommu_gather_ops { * tables. * @ias: Input address (iova) size, in bits. * @oas: Output address (paddr) size, in bits. + * @coherent_walk A flag to indicate whether or not page table walks made + * by the IOMMU are coherent with the CPU caches. * @tlb: TLB management callbacks for this set of tables. * @iommu_dev: The device representing the DMA configuration for the * page table walker. @@ -68,11 +70,6 @@ struct io_pgtable_cfg { * when the SoC is in "4GB mode" and they can only access the high * remap of DRAM (0x1_00000000 to 0x1_ffffffff). * - * IO_PGTABLE_QUIRK_NO_DMA: Guarantees that the tables will only ever - * be accessed by a fully cache-coherent IOMMU or CPU (e.g. for a - * software-emulated IOMMU), such that pagetable updates need not - * be treated as explicit DMA data. - * * IO_PGTABLE_QUIRK_NON_STRICT: Skip issuing synchronous leaf TLBIs * on unmap, for DMA domains using the flush queue mechanism for * delayed invalidation. @@ -81,12 +78,12 @@ struct io_pgtable_cfg { #define IO_PGTABLE_QUIRK_NO_PERMS BIT(1) #define IO_PGTABLE_QUIRK_TLBI_ON_MAP BIT(2) #define IO_PGTABLE_QUIRK_ARM_MTK_4GB BIT(3) - #define IO_PGTABLE_QUIRK_NO_DMA BIT(4) - #define IO_PGTABLE_QUIRK_NON_STRICT BIT(5) + #define IO_PGTABLE_QUIRK_NON_STRICT BIT(4) unsigned long quirks; unsigned long pgsize_bitmap; unsigned int ias; unsigned int oas; + bool coherent_walk; const struct iommu_gather_ops *tlb; struct device *iommu_dev; -- cgit v1.2.3