diff options
author | Guo Ren <guoren@linux.alibaba.com> | 2022-04-13 15:27:52 +0800 |
---|---|---|
committer | Guo Ren <guoren@linux.alibaba.com> | 2022-04-25 13:51:43 +0800 |
commit | c5acdf12cc24d34ea3f9426472dcb3f5d581b1e5 (patch) | |
tree | f122a87ac51348ecdf351f70664582c6462e5393 /arch/csky | |
parent | 6b160e0513e9f0b0c0b7ff4d5b964822d1750ce9 (diff) | |
download | lwn-c5acdf12cc24d34ea3f9426472dcb3f5d581b1e5.tar.gz lwn-c5acdf12cc24d34ea3f9426472dcb3f5d581b1e5.zip |
csky: atomic: Add conditional atomic operations' optimization
Add conditional atomic operations' optimization:
- arch_atomic_fetch_add_unless
- arch_atomic_inc_unless_negative
- arch_atomic_dec_unless_positive
- arch_atomic_dec_if_positive
Comments by Boqun:
FWIW, you probably need to make sure that a barrier instruction inside
an lr/sc loop is a good thing. IIUC, the execution time of a barrier
instruction is determined by the status of store buffers and invalidate
queues (and probably other stuffs), so it may increase the execution
time of the lr/sc loop, and make it unlikely to succeed. But this really
depends on how the arch executes these instructions.
Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
Signed-off-by: Guo Ren <guoren@kernel.org>
Cc: Boqun Feng <boqun.feng@gmail.com>
Diffstat (limited to 'arch/csky')
-rw-r--r-- | arch/csky/include/asm/atomic.h | 95 |
1 files changed, 95 insertions, 0 deletions
diff --git a/arch/csky/include/asm/atomic.h b/arch/csky/include/asm/atomic.h index 56c9dc8e91b3..60406ef9c2bb 100644 --- a/arch/csky/include/asm/atomic.h +++ b/arch/csky/include/asm/atomic.h @@ -100,6 +100,101 @@ ATOMIC_OPS(xor) #undef ATOMIC_FETCH_OP +static __always_inline int +arch_atomic_fetch_add_unless(atomic_t *v, int a, int u) +{ + int prev, tmp; + + __asm__ __volatile__ ( + RELEASE_FENCE + "1: ldex.w %0, (%3) \n" + " cmpne %0, %4 \n" + " bf 2f \n" + " mov %1, %0 \n" + " add %1, %2 \n" + " stex.w %1, (%3) \n" + " bez %1, 1b \n" + FULL_FENCE + "2:\n" + : "=&r" (prev), "=&r" (tmp) + : "r" (a), "r" (&v->counter), "r" (u) + : "memory"); + + return prev; +} +#define arch_atomic_fetch_add_unless arch_atomic_fetch_add_unless + +static __always_inline bool +arch_atomic_inc_unless_negative(atomic_t *v) +{ + int rc, tmp; + + __asm__ __volatile__ ( + RELEASE_FENCE + "1: ldex.w %0, (%2) \n" + " movi %1, 0 \n" + " blz %0, 2f \n" + " movi %1, 1 \n" + " addi %0, 1 \n" + " stex.w %0, (%2) \n" + " bez %0, 1b \n" + FULL_FENCE + "2:\n" + : "=&r" (tmp), "=&r" (rc) + : "r" (&v->counter) + : "memory"); + + return tmp ? true : false; + +} +#define arch_atomic_inc_unless_negative arch_atomic_inc_unless_negative + +static __always_inline bool +arch_atomic_dec_unless_positive(atomic_t *v) +{ + int rc, tmp; + + __asm__ __volatile__ ( + RELEASE_FENCE + "1: ldex.w %0, (%2) \n" + " movi %1, 0 \n" + " bhz %0, 2f \n" + " movi %1, 1 \n" + " subi %0, 1 \n" + " stex.w %0, (%2) \n" + " bez %0, 1b \n" + FULL_FENCE + "2:\n" + : "=&r" (tmp), "=&r" (rc) + : "r" (&v->counter) + : "memory"); + + return tmp ? true : false; +} +#define arch_atomic_dec_unless_positive arch_atomic_dec_unless_positive + +static __always_inline int +arch_atomic_dec_if_positive(atomic_t *v) +{ + int dec, tmp; + + __asm__ __volatile__ ( + RELEASE_FENCE + "1: ldex.w %0, (%2) \n" + " subi %1, %0, 1 \n" + " blz %1, 2f \n" + " stex.w %1, (%2) \n" + " bez %1, 1b \n" + FULL_FENCE + "2:\n" + : "=&r" (dec), "=&r" (tmp) + : "r" (&v->counter) + : "memory"); + + return dec - 1; +} +#define arch_atomic_dec_if_positive arch_atomic_dec_if_positive + #define ATOMIC_OP() \ static __always_inline \ int arch_atomic_xchg_relaxed(atomic_t *v, int n) \ |