From: Willy Tarreau Date: Tue, 2 Mar 2021 06:08:34 +0000 (+0100) Subject: REORG: atomic: reimplement pl_cpu_relax() from atomic-ops.h X-Git-Tag: v2.4-dev11~32 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=958ae26c3558f0a5cdcb7a92cc535f1cd1ac9a64;p=thirdparty%2Fhaproxy.git REORG: atomic: reimplement pl_cpu_relax() from atomic-ops.h There is some confusion here as we need to place some cpu_relax statements in some loops where it's not easily possible to condition them on the use of threads. That's what atomic.h already does. So let's take the various pl_cpu_relax() implementations from there and place them in atomic.h under the name __ha_cpu_relax() and let them adapt to the presence or absence of threads and to the architecture (currently only x86 and aarch64 use a barrier instruction), though it's very likely that arm would work well with a cache flushing ISB instruction as well). This time they were implemented as expressions returning 1 rather than statements, in order to ease their placement as the loop condition or the continuation expression inside "for" loops. We should probably do the same with barriers and a few such other ones. --- diff --git a/include/haproxy/atomic.h b/include/haproxy/atomic.h index e21d7a3c82..ed55165e91 100644 --- a/include/haproxy/atomic.h +++ b/include/haproxy/atomic.h @@ -152,6 +152,7 @@ #define __ha_barrier_store() do { } while (0) #define __ha_barrier_full() do { } while (0) #define __ha_compiler_barrier() do { } while (0) +#define __ha_cpu_relax() ({ 1; }) #else /* !USE_THREAD */ @@ -395,6 +396,9 @@ __ha_cas_dw(void *target, void *compare, const void *set) return (ret); } +/* short-lived CPU relaxation */ +#define __ha_cpu_relax() ({ asm volatile("rep;nop\n"); 1; }) + #elif defined(__arm__) && (defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__)) static __inline void @@ -457,6 +461,9 @@ static __inline int __ha_cas_dw(void *target, void *compare, const void *set) return (tmp); } +/* short-lived CPU relaxation */ +#define __ha_cpu_relax() ({ asm volatile(""); 1; }) + #elif defined (__aarch64__) static __inline void @@ -498,6 +505,11 @@ __ha_barrier_atomic_full(void) __asm __volatile("dmb ish" ::: "memory"); } +/* short-lived CPU relaxation; this was shown to improve fairness on + * modern ARMv8 cores such as Neoverse N1. + */ +#define __ha_cpu_relax() ({ asm volatile("isb" ::: "memory"); 1; }) + static __inline int __ha_cas_dw(void *target, void *compare, void *set) { void *value[2]; @@ -534,6 +546,9 @@ static __inline int __ha_cas_dw(void *target, void *compare, void *set) #define __ha_barrier_full __sync_synchronize /* Note: there is no generic DWCAS */ +/* short-lived CPU relaxation */ +#define __ha_cpu_relax() ({ asm volatile(""); 1; }) + #endif /* end of arch-specific barrier/dwcas */ static inline void __ha_compiler_barrier(void)