]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
x86/percpu: Convert this_percpu_xchg_op() from asm() to C code, to generate better...
authorUros Bizjak <ubizjak@gmail.com>
Wed, 20 Mar 2024 08:30:40 +0000 (09:30 +0100)
committerIngo Molnar <mingo@kernel.org>
Wed, 20 Mar 2024 11:29:02 +0000 (12:29 +0100)
Rewrite percpu_xchg_op() using generic percpu primitives instead
of using asm. The new implementation is similar to local_xchg() and
allows the compiler to perform various optimizations: e.g. the
compiler is able to create fast path through the loop, according
to likely/unlikely annotations in percpu_try_cmpxchg_op().

No functional changes intended.

Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: https://lore.kernel.org/r/20240320083127.493250-1-ubizjak@gmail.com
arch/x86/include/asm/percpu.h

index 44958ebaf626e20c970acaacaad012f93cba2671..de991e6d050aad12b3ddff88ce4226b8ff900a9e 100644 (file)
@@ -230,25 +230,15 @@ do {                                                                      \
 })
 
 /*
- * xchg is implemented using cmpxchg without a lock prefix. xchg is
- * expensive due to the implied lock prefix.  The processor cannot prefetch
- * cachelines if xchg is used.
+ * this_cpu_xchg() is implemented using cmpxchg without a lock prefix.
+ * xchg is expensive due to the implied lock prefix. The processor
+ * cannot prefetch cachelines if xchg is used.
  */
-#define percpu_xchg_op(size, qual, _var, _nval)                                \
+#define this_percpu_xchg_op(_var, _nval)                               \
 ({                                                                     \
-       __pcpu_type_##size pxo_old__;                                   \
-       __pcpu_type_##size pxo_new__ = __pcpu_cast_##size(_nval);       \
-       asm qual (__pcpu_op2_##size("mov", __percpu_arg([var]),         \
-                                   "%[oval]")                          \
-                 "\n1:\t"                                              \
-                 __pcpu_op2_##size("cmpxchg", "%[nval]",               \
-                                   __percpu_arg([var]))                \
-                 "\n\tjnz 1b"                                          \
-                 : [oval] "=&a" (pxo_old__),                           \
-                   [var] "+m" (__my_cpu_var(_var))                     \
-                 : [nval] __pcpu_reg_##size(, pxo_new__)               \
-                 : "memory");                                          \
-       (typeof(_var))(unsigned long) pxo_old__;                        \
+       typeof(_var) pxo_old__ = this_cpu_read(_var);                   \
+       do { } while (!this_cpu_try_cmpxchg(_var, &pxo_old__, _nval));  \
+       pxo_old__;                                                      \
 })
 
 /*
@@ -534,9 +524,9 @@ do {                                                                        \
 #define this_cpu_or_1(pcp, val)                percpu_to_op(1, volatile, "or", (pcp), val)
 #define this_cpu_or_2(pcp, val)                percpu_to_op(2, volatile, "or", (pcp), val)
 #define this_cpu_or_4(pcp, val)                percpu_to_op(4, volatile, "or", (pcp), val)
-#define this_cpu_xchg_1(pcp, nval)     percpu_xchg_op(1, volatile, pcp, nval)
-#define this_cpu_xchg_2(pcp, nval)     percpu_xchg_op(2, volatile, pcp, nval)
-#define this_cpu_xchg_4(pcp, nval)     percpu_xchg_op(4, volatile, pcp, nval)
+#define this_cpu_xchg_1(pcp, nval)     this_percpu_xchg_op(pcp, nval)
+#define this_cpu_xchg_2(pcp, nval)     this_percpu_xchg_op(pcp, nval)
+#define this_cpu_xchg_4(pcp, nval)     this_percpu_xchg_op(pcp, nval)
 
 #define raw_cpu_add_return_1(pcp, val)         percpu_add_return_op(1, , pcp, val)
 #define raw_cpu_add_return_2(pcp, val)         percpu_add_return_op(2, , pcp, val)
@@ -575,7 +565,7 @@ do {                                                                        \
 #define this_cpu_and_8(pcp, val)               percpu_to_op(8, volatile, "and", (pcp), val)
 #define this_cpu_or_8(pcp, val)                        percpu_to_op(8, volatile, "or", (pcp), val)
 #define this_cpu_add_return_8(pcp, val)                percpu_add_return_op(8, volatile, pcp, val)
-#define this_cpu_xchg_8(pcp, nval)             percpu_xchg_op(8, volatile, pcp, nval)
+#define this_cpu_xchg_8(pcp, nval)             this_percpu_xchg_op(pcp, nval)
 #define this_cpu_cmpxchg_8(pcp, oval, nval)    percpu_cmpxchg_op(8, volatile, pcp, oval, nval)
 #define this_cpu_try_cmpxchg_8(pcp, ovalp, nval)       percpu_try_cmpxchg_op(8, volatile, pcp, ovalp, nval)
 #endif