summaryrefslogtreecommitdiffstats
path: root/arch/arm/cpu
diff options
context:
space:
mode:
authorAhmad Fatoum <a.fatoum@pengutronix.de>2019-04-25 16:32:30 +0200
committerSascha Hauer <s.hauer@pengutronix.de>2019-04-29 08:58:37 +0200
commita44ddaad6928885d5070255d3b0a2765e2227684 (patch)
treeaf99bf5b39e4e04da25b5905ce73666aebe85f74 /arch/arm/cpu
parentb402f62dc09ee2a9823627200d02d79588bcf0f9 (diff)
downloadbarebox-a44ddaad6928885d5070255d3b0a2765e2227684.tar.gz
barebox-a44ddaad6928885d5070255d3b0a2765e2227684.tar.xz
ARM: cache-armv7: start invalidation from outer levels
On 25/4/19 11:57, Lucas Stach wrote: > [T]he sequence that could go wrong in Barebox is as follows: > 1. CPU core starts invalidating at L1 cache level > 2. HW prefetcher decides that a specific address should be brought into > the L1 cache. > 3. HW prefetcher finds a valid block for the requested address in L2 > cache and moves cached data from L2 to L1. > 4. Only now CPU core invalidates L2 cache. > > In the above sequence we now have invalid data in the L1 cache line. > The correct sequence will avoid this issue: > > 1. CPU core starts invalidating at L2 cache level > 2. HW prefetcher decides that a specific address should be brought into > the L1 cache. > 3. HW prefetcher sees invalid tags for the requested address in L2 > cache and brings in the data from external memory. > 4. CPU core invalidates L1 cache, discarding the prefetched data. > The ARM Cortex-A Series Programmer's Guide addresses this issue in the SMP-context[1]: > If another core were to access the affected address between those > two actions, a coherency problem can occur. Such problems can be avoided > by following two simple rules. > > * When cleaning, always clean the innermost (L1) cache first and then > clean the outer cache(s). > * When invalidating, always invalidate the outermost cache first and > the L1 cache last. The current code correctly iterates from inner to outer cache levels when flushing/cleaning (r8 == 0), invalidation (r8 == 1) occurs in the same direction though. Adjust the invalidation iteration order to start from the outermost cache instead. Equivalent C-Code: enum cache_op { CACHE_FLUSH = 0, CACHE_INVAL = 1 }; register enum cache_op operation asm("r8"); register int i asm("r12"); register int limit asm("r3") = max_cache_level << 1; // e.g. 4 with L2 max +if (operation == CACHE_FLUSH) { i = 0; +} else { + i = limit - 2; +} bool loop_again; do { /* [snip] */ + if (operation == CACHE_FLUSH) { i += 2; loop_again = limit > i; + } else { + loop_again = i > 0; + i -= 2; + } } while (loop_again); [1]: 18.6 "TLB and cache maintenance broadcast", Version 4.0 Suggested-by: Lucas Stach <l.stach@pengutronix.de> Signed-off-by: Ahmad Fatoum <a.fatoum@pengutronix.de> Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
Diffstat (limited to 'arch/arm/cpu')
-rw-r--r--arch/arm/cpu/cache-armv7.S12
1 files changed, 11 insertions, 1 deletions
diff --git a/arch/arm/cpu/cache-armv7.S b/arch/arm/cpu/cache-armv7.S
index 9487dd03f6..6a8aff8bb1 100644
--- a/arch/arm/cpu/cache-armv7.S
+++ b/arch/arm/cpu/cache-armv7.S
@@ -83,7 +83,10 @@ hierarchical:
ands r3, r0, #0x7000000 @ extract loc from clidr
mov r3, r3, lsr #23 @ left align loc bit field
beq finished @ if loc is 0, then no need to clean
- mov r12, #0 @ start clean at cache level 0
+ cmp r8, #0
+THUMB( ite eq )
+ moveq r12, #0
+ subne r12, r3, #2 @ start invalidate at outmost cache level
loop1:
add r2, r12, r12, lsr #1 @ work out 3x current cache level
mov r1, r0, lsr r2 @ extract cache type bits from clidr
@@ -118,8 +121,15 @@ THUMB( ite eq )
subs r7, r7, #1 @ decrement the index
bge loop2
skip:
+ cmp r8, #0
+ bne inval_check
add r12, r12, #2 @ increment cache number
cmp r3, r12
+ b loop_end_check
+inval_check:
+ cmp r12, #0
+ sub r12, r12, #2 @ decrement cache number
+loop_end_check:
dsb @ work-around Cortex-A7 erratum 814220
bgt loop1
finished: