]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
x86/locking: Use asm_inline for {,try_}cmpxchg{64,128} emulations
authorUros Bizjak <ubizjak@gmail.com>
Fri, 14 Feb 2025 15:07:47 +0000 (16:07 +0100)
committerIngo Molnar <mingo@kernel.org>
Fri, 21 Feb 2025 14:56:08 +0000 (15:56 +0100)
According to:

  https://gcc.gnu.org/onlinedocs/gcc/Size-of-an-asm.html

the usage of asm pseudo directives in the asm template can confuse
the compiler to wrongly estimate the size of the generated
code.

The ALTERNATIVE macro expands to several asm pseudo directives,
so its usage in {,try_}cmpxchg{64,128} causes instruction length estimate
to fail by an order of magnitude (the specially instrumented compiler
reports the estimated length of these asm templates to be more than 20
instructions long).

This incorrect estimate further causes unoptimal inlining
decisions, unoptimal instruction scheduling and unoptimal code block
alignments for functions that use these locking primitives.

Use asm_inline instead:

  https://gcc.gnu.org/pipermail/gcc-patches/2018-December/512349.html

which is a feature that makes GCC pretend some inline assembler code
is tiny (while it would think it is huge), instead of just asm.

For code size estimation, the size of the asm is then taken as
the minimum size of one instruction, ignoring how many instructions
compiler thinks it is.

The effect of this patch on x86_64 target is minor, since 128-bit
functions are rarely used on this target. The code size of the resulting
defconfig object file stays the same:

      text       data     bss      dec         hex filename
  27456612    4638523  814148 32909283     1f627e3 vmlinux-old.o
  27456612    4638523  814148 32909283     1f627e3 vmlinux-new.o

but the patch has minor effect on code layout due to the different
scheduling decisions in functions containing changed macros.

There is no effect on the x64_32 target, the code size of the resulting
defconfig object file and the code layout stays the same:

      text       data     bss      dec         hex filename
  18883870    2679275 1707916 23271061     1631695 vmlinux-old.o
  18883870    2679275 1707916 23271061     1631695 vmlinux-new.o

Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: https://lore.kernel.org/r/20250214150929.5780-2-ubizjak@gmail.com
arch/x86/include/asm/cmpxchg_32.h
arch/x86/include/asm/percpu.h

index fd1282a783ddbff9ba9fa77d7cb9c2351865020c..95b5f990ca88e2e9bdfe8206cf12c98502da934f 100644 (file)
@@ -91,12 +91,14 @@ static __always_inline bool __try_cmpxchg64_local(volatile u64 *ptr, u64 *oldp,
        union __u64_halves o = { .full = (_old), },                     \
                           n = { .full = (_new), };                     \
                                                                        \
-       asm volatile(ALTERNATIVE(_lock_loc                              \
-                                "call cmpxchg8b_emu",                  \
-                                _lock "cmpxchg8b %a[ptr]", X86_FEATURE_CX8) \
-                    : ALT_OUTPUT_SP("+a" (o.low), "+d" (o.high))       \
-                    : "b" (n.low), "c" (n.high), [ptr] "S" (_ptr)      \
-                    : "memory");                                       \
+       asm_inline volatile(                                            \
+               ALTERNATIVE(_lock_loc                                   \
+                           "call cmpxchg8b_emu",                       \
+                           _lock "cmpxchg8b %a[ptr]", X86_FEATURE_CX8) \
+               : ALT_OUTPUT_SP("+a" (o.low), "+d" (o.high))            \
+               : "b" (n.low), "c" (n.high),                            \
+                 [ptr] "S" (_ptr)                                      \
+               : "memory");                                            \
                                                                        \
        o.full;                                                         \
 })
@@ -119,14 +121,16 @@ static __always_inline u64 arch_cmpxchg64_local(volatile u64 *ptr, u64 old, u64
                           n = { .full = (_new), };                     \
        bool ret;                                                       \
                                                                        \
-       asm volatile(ALTERNATIVE(_lock_loc                              \
-                                "call cmpxchg8b_emu",                  \
-                                _lock "cmpxchg8b %a[ptr]", X86_FEATURE_CX8) \
-                    CC_SET(e)                                          \
-                    : ALT_OUTPUT_SP(CC_OUT(e) (ret),                   \
-                                    "+a" (o.low), "+d" (o.high))       \
-                    : "b" (n.low), "c" (n.high), [ptr] "S" (_ptr)      \
-                    : "memory");                                       \
+       asm_inline volatile(                                            \
+               ALTERNATIVE(_lock_loc                                   \
+                           "call cmpxchg8b_emu",                       \
+                           _lock "cmpxchg8b %a[ptr]", X86_FEATURE_CX8) \
+               CC_SET(e)                                               \
+               : ALT_OUTPUT_SP(CC_OUT(e) (ret),                        \
+                               "+a" (o.low), "+d" (o.high))            \
+               : "b" (n.low), "c" (n.high),                            \
+                 [ptr] "S" (_ptr)                                      \
+               : "memory");                                            \
                                                                        \
        if (unlikely(!ret))                                             \
                *(_oldp) = o.full;                                      \
index 0ab991fba7de38c6c67fe5b7b027336c5f2ed449..08f5f61690b7ac585ddc00386082e8d22c8aee69 100644 (file)
@@ -348,15 +348,14 @@ do {                                                                      \
        old__.var = _oval;                                              \
        new__.var = _nval;                                              \
                                                                        \
-       asm qual (ALTERNATIVE("call this_cpu_cmpxchg8b_emu",            \
-                             "cmpxchg8b " __percpu_arg([var]), X86_FEATURE_CX8) \
-                 : ALT_OUTPUT_SP([var] "+m" (__my_cpu_var(_var)),      \
-                                 "+a" (old__.low),                     \
-                                 "+d" (old__.high))                    \
-                 : "b" (new__.low),                                    \
-                   "c" (new__.high),                                   \
-                   "S" (&(_var))                                       \
-                 : "memory");                                          \
+       asm_inline qual (                                               \
+               ALTERNATIVE("call this_cpu_cmpxchg8b_emu",              \
+                           "cmpxchg8b " __percpu_arg([var]), X86_FEATURE_CX8) \
+               : ALT_OUTPUT_SP([var] "+m" (__my_cpu_var(_var)),        \
+                               "+a" (old__.low), "+d" (old__.high))    \
+               : "b" (new__.low), "c" (new__.high),                    \
+                 "S" (&(_var))                                         \
+               : "memory");                                            \
                                                                        \
        old__.var;                                                      \
 })
@@ -378,17 +377,16 @@ do {                                                                      \
        old__.var = *_oval;                                             \
        new__.var = _nval;                                              \
                                                                        \
-       asm qual (ALTERNATIVE("call this_cpu_cmpxchg8b_emu",            \
-                             "cmpxchg8b " __percpu_arg([var]), X86_FEATURE_CX8) \
-                 CC_SET(z)                                             \
-                 : ALT_OUTPUT_SP(CC_OUT(z) (success),                  \
-                                 [var] "+m" (__my_cpu_var(_var)),      \
-                                 "+a" (old__.low),                     \
-                                 "+d" (old__.high))                    \
-                 : "b" (new__.low),                                    \
-                   "c" (new__.high),                                   \
-                   "S" (&(_var))                                       \
-                 : "memory");                                          \
+       asm_inline qual (                                               \
+               ALTERNATIVE("call this_cpu_cmpxchg8b_emu",              \
+                           "cmpxchg8b " __percpu_arg([var]), X86_FEATURE_CX8) \
+               CC_SET(z)                                               \
+               : ALT_OUTPUT_SP(CC_OUT(z) (success),                    \
+                               [var] "+m" (__my_cpu_var(_var)),        \
+                               "+a" (old__.low), "+d" (old__.high))    \
+               : "b" (new__.low), "c" (new__.high),                    \
+                 "S" (&(_var))                                         \
+               : "memory");                                            \
        if (unlikely(!success))                                         \
                *_oval = old__.var;                                     \
                                                                        \
@@ -419,15 +417,14 @@ do {                                                                      \
        old__.var = _oval;                                              \
        new__.var = _nval;                                              \
                                                                        \
-       asm qual (ALTERNATIVE("call this_cpu_cmpxchg16b_emu",           \
-                             "cmpxchg16b " __percpu_arg([var]), X86_FEATURE_CX16) \
-                 : ALT_OUTPUT_SP([var] "+m" (__my_cpu_var(_var)),      \
-                                 "+a" (old__.low),                     \
-                                 "+d" (old__.high))                    \
-                 : "b" (new__.low),                                    \
-                   "c" (new__.high),                                   \
-                   "S" (&(_var))                                       \
-                 : "memory");                                          \
+       asm_inline qual (                                               \
+               ALTERNATIVE("call this_cpu_cmpxchg16b_emu",             \
+                           "cmpxchg16b " __percpu_arg([var]), X86_FEATURE_CX16) \
+               : ALT_OUTPUT_SP([var] "+m" (__my_cpu_var(_var)),        \
+                               "+a" (old__.low), "+d" (old__.high))    \
+               : "b" (new__.low), "c" (new__.high),                    \
+                 "S" (&(_var))                                         \
+               : "memory");                                            \
                                                                        \
        old__.var;                                                      \
 })
@@ -449,19 +446,19 @@ do {                                                                      \
        old__.var = *_oval;                                             \
        new__.var = _nval;                                              \
                                                                        \
-       asm qual (ALTERNATIVE("call this_cpu_cmpxchg16b_emu",           \
-                             "cmpxchg16b " __percpu_arg([var]), X86_FEATURE_CX16) \
-                 CC_SET(z)                                             \
-                 : ALT_OUTPUT_SP(CC_OUT(z) (success),                  \
-                                 [var] "+m" (__my_cpu_var(_var)),      \
-                                 "+a" (old__.low),                     \
-                                 "+d" (old__.high))                    \
-                 : "b" (new__.low),                                    \
-                   "c" (new__.high),                                   \
-                   "S" (&(_var))                                       \
-                 : "memory");                                          \
+       asm_inline qual (                                               \
+               ALTERNATIVE("call this_cpu_cmpxchg16b_emu",             \
+                           "cmpxchg16b " __percpu_arg([var]), X86_FEATURE_CX16) \
+               CC_SET(z)                                               \
+               : ALT_OUTPUT_SP(CC_OUT(z) (success),                    \
+                               [var] "+m" (__my_cpu_var(_var)),        \
+                               "+a" (old__.low), "+d" (old__.high))    \
+               : "b" (new__.low), "c" (new__.high),                    \
+                 "S" (&(_var))                                         \
+               : "memory");                                            \
        if (unlikely(!success))                                         \
                *_oval = old__.var;                                     \
+                                                                       \
        likely(success);                                                \
 })