summaryrefslogtreecommitdiff
path: root/arch/arc/mm/cache.c
diff options
context:
space:
mode:
authorAlexey Brodkin <abrodkin@synopsys.com>2015-05-25 19:54:28 +0300
committerVineet Gupta <vgupta@synopsys.com>2015-08-20 18:11:17 +0530
commitf2b0b25a37a6db12580dcdfdf00f020e5e0e3a43 (patch)
tree13670a5cef1737ba025aea9301875e0504193f53 /arch/arc/mm/cache.c
parent2a4401687c11def29fe5a9b23ab98bf7ab1dce61 (diff)
downloadlwn-f2b0b25a37a6db12580dcdfdf00f020e5e0e3a43.tar.gz
lwn-f2b0b25a37a6db12580dcdfdf00f020e5e0e3a43.zip
ARCv2: Support IO Coherency and permutations involving L1 and L2 caches
In case of ARCv2 CPU there're could be following configurations that affect cache handling for data exchanged with peripherals via DMA: [1] Only L1 cache exists [2] Both L1 and L2 exist, but no IO coherency unit [3] L1, L2 caches and IO coherency unit exist Current implementation takes care of [1] and [2]. Moreover support of [2] is implemented with run-time check for SLC existence which is not super optimal. This patch introduces support of [3] and rework of DMA ops usage. Instead of doing run-time check every time a particular DMA op is executed we'll have 3 different implementations of DMA ops and select appropriate one during init. As for IOC support for it we need: [a] Implement empty DMA ops because IOC takes care of cache coherency with DMAed data [b] Route dma_alloc_coherent() via dma_alloc_noncoherent() This is required to make IOC work in first place and also serves as optimization as LD/ST to coherent buffers can be srviced from caches w/o going all the way to memory Signed-off-by: Alexey Brodkin <abrodkin@synopsys.com> [vgupta: -Added some comments about IOC gains -Marked dma ops as static, -Massaged changelog a bit] Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
Diffstat (limited to 'arch/arc/mm/cache.c')
-rw-r--r--arch/arc/mm/cache.c114
1 files changed, 98 insertions, 16 deletions
diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c
index 1cd6695b6ab5..25e7077d4c04 100644
--- a/arch/arc/mm/cache.c
+++ b/arch/arc/mm/cache.c
@@ -22,10 +22,15 @@
#include <asm/setup.h>
static int l2_line_sz;
+int ioc_exists;
void (*_cache_line_loop_ic_fn)(unsigned long paddr, unsigned long vaddr,
unsigned long sz, const int cacheop);
+void (*__dma_cache_wback_inv)(unsigned long start, unsigned long sz);
+void (*__dma_cache_inv)(unsigned long start, unsigned long sz);
+void (*__dma_cache_wback)(unsigned long start, unsigned long sz);
+
char *arc_cache_mumbojumbo(int c, char *buf, int len)
{
int n = 0;
@@ -50,6 +55,9 @@ char *arc_cache_mumbojumbo(int c, char *buf, int len)
n += scnprintf(buf + n, len - n,
"SLC\t\t: %uK, %uB Line\n", p->sz_k, p->line_len);
+ if (ioc_exists)
+ n += scnprintf(buf + n, len - n, "IOC\t\t: exists\n");
+
return buf;
}
@@ -80,6 +88,14 @@ void read_decode_cache_bcr(void)
#endif
} slc_cfg;
+ struct bcr_clust_cfg {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+ unsigned int pad:7, c:1, num_entries:8, num_cores:8, ver:8;
+#else
+ unsigned int ver:8, num_cores:8, num_entries:8, c:1, pad:7;
+#endif
+ } cbcr;
+
p_ic = &cpuinfo_arc700[cpu].icache;
READ_BCR(ARC_REG_IC_BCR, ibcr);
@@ -133,6 +149,10 @@ slc_chk:
p_slc->sz_k = 128 << slc_cfg.sz;
l2_line_sz = p_slc->line_len = (slc_cfg.lsz == 0) ? 128 : 64;
}
+
+ READ_BCR(ARC_REG_CLUSTER_BCR, cbcr);
+ if (cbcr.c)
+ ioc_exists = 1;
}
/*
@@ -516,11 +536,6 @@ noinline void slc_op(unsigned long paddr, unsigned long sz, const int op)
#endif
}
-static inline int need_slc_flush(void)
-{
- return is_isa_arcv2() && l2_line_sz;
-}
-
/***********************************************************
* Exported APIs
*/
@@ -569,30 +584,74 @@ void flush_dcache_page(struct page *page)
}
EXPORT_SYMBOL(flush_dcache_page);
-void dma_cache_wback_inv(unsigned long start, unsigned long sz)
+/*
+ * DMA ops for systems with L1 cache only
+ * Make memory coherent with L1 cache by flushing/invalidating L1 lines
+ */
+static void __dma_cache_wback_inv_l1(unsigned long start, unsigned long sz)
{
__dc_line_op_k(start, sz, OP_FLUSH_N_INV);
+}
- if (need_slc_flush())
- slc_op(start, sz, OP_FLUSH_N_INV);
+static void __dma_cache_inv_l1(unsigned long start, unsigned long sz)
+{
+ __dc_line_op_k(start, sz, OP_INV);
}
-EXPORT_SYMBOL(dma_cache_wback_inv);
-void dma_cache_inv(unsigned long start, unsigned long sz)
+static void __dma_cache_wback_l1(unsigned long start, unsigned long sz)
+{
+ __dc_line_op_k(start, sz, OP_FLUSH);
+}
+
+/*
+ * DMA ops for systems with both L1 and L2 caches, but without IOC
+ * Both L1 and L2 lines need to be explicity flushed/invalidated
+ */
+static void __dma_cache_wback_inv_slc(unsigned long start, unsigned long sz)
+{
+ __dc_line_op_k(start, sz, OP_FLUSH_N_INV);
+ slc_op(start, sz, OP_FLUSH_N_INV);
+}
+
+static void __dma_cache_inv_slc(unsigned long start, unsigned long sz)
{
__dc_line_op_k(start, sz, OP_INV);
+ slc_op(start, sz, OP_INV);
+}
- if (need_slc_flush())
- slc_op(start, sz, OP_INV);
+static void __dma_cache_wback_slc(unsigned long start, unsigned long sz)
+{
+ __dc_line_op_k(start, sz, OP_FLUSH);
+ slc_op(start, sz, OP_FLUSH);
+}
+
+/*
+ * DMA ops for systems with IOC
+ * IOC hardware snoops all DMA traffic keeping the caches consistent with
+ * memory - eliding need for any explicit cache maintenance of DMA buffers
+ */
+static void __dma_cache_wback_inv_ioc(unsigned long start, unsigned long sz) {}
+static void __dma_cache_inv_ioc(unsigned long start, unsigned long sz) {}
+static void __dma_cache_wback_ioc(unsigned long start, unsigned long sz) {}
+
+/*
+ * Exported DMA API
+ */
+void dma_cache_wback_inv(unsigned long start, unsigned long sz)
+{
+ __dma_cache_wback_inv(start, sz);
+}
+EXPORT_SYMBOL(dma_cache_wback_inv);
+
+void dma_cache_inv(unsigned long start, unsigned long sz)
+{
+ __dma_cache_inv(start, sz);
}
EXPORT_SYMBOL(dma_cache_inv);
void dma_cache_wback(unsigned long start, unsigned long sz)
{
- __dc_line_op_k(start, sz, OP_FLUSH);
-
- if (need_slc_flush())
- slc_op(start, sz, OP_FLUSH);
+ __dma_cache_wback(start, sz);
}
EXPORT_SYMBOL(dma_cache_wback);
@@ -848,4 +907,27 @@ void arc_cache_init(void)
panic("Disable CONFIG_ARC_CACHE_VIPT_ALIASING\n");
}
}
+
+ if (is_isa_arcv2() && ioc_exists) {
+ /* IO coherency base - 0x8z */
+ write_aux_reg(ARC_REG_IO_COH_AP0_BASE, 0x80000);
+ /* IO coherency aperture size - 512Mb: 0x8z-0xAz */
+ write_aux_reg(ARC_REG_IO_COH_AP0_SIZE, 0x11);
+ /* Enable partial writes */
+ write_aux_reg(ARC_REG_IO_COH_PARTIAL, 1);
+ /* Enable IO coherency */
+ write_aux_reg(ARC_REG_IO_COH_ENABLE, 1);
+
+ __dma_cache_wback_inv = __dma_cache_wback_inv_ioc;
+ __dma_cache_inv = __dma_cache_inv_ioc;
+ __dma_cache_wback = __dma_cache_wback_ioc;
+ } else if (is_isa_arcv2() && l2_line_sz) {
+ __dma_cache_wback_inv = __dma_cache_wback_inv_slc;
+ __dma_cache_inv = __dma_cache_inv_slc;
+ __dma_cache_wback = __dma_cache_wback_slc;
+ } else {
+ __dma_cache_wback_inv = __dma_cache_wback_inv_l1;
+ __dma_cache_inv = __dma_cache_inv_l1;
+ __dma_cache_wback = __dma_cache_wback_l1;
+ }
}