]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
s390/percpu: Use new percpu code section for arch_this_cpu_add()
authorHeiko Carstens <hca@linux.ibm.com>
Tue, 26 May 2026 05:56:58 +0000 (07:56 +0200)
committerAlexander Gordeev <agordeev@linux.ibm.com>
Wed, 3 Jun 2026 13:32:46 +0000 (15:32 +0200)
Convert arch_this_cpu_add() to make use of the new percpu code section
infrastructure.

With this the text size of the kernel image is reduced by ~76kb
(defconfig). Also more than 5300 generated preempt_schedule_notrace()
function calls within the kernel image (modules not counted) are removed.

With:

DEFINE_PER_CPU(long, foo);
void bar(long a) { this_cpu_add(foo, a); }

Old arch_this_cpu_add() looks like this:

00000000000000c0 <bar>:
  c0:   c0 04 00 00 00 00       jgnop   c0 <bar>
  c6:   eb 01 03 a8 00 6a       asi     936,1
  cc:   c4 18 00 00 00 00       lgrl    %r1,cc <bar+0xc>
                        ce: R_390_GOTENT        foo+0x2
  d2:   e3 10 03 b8 00 08       ag      %r1,952
  d8:   eb 22 10 00 00 e8       laag    %r2,%r2,0(%r1)
  de:   eb ff 03 a8 00 6e       alsi    936,-1
  e4:   a7 a4 00 05             jhe     ee <bar+0x2e>
  e8:   c0 f4 00 00 00 00       jg      e8 <bar+0x28>
                        ea: R_390_PC32DBL       __s390_indirect_jump_r14+0x2
  ee:   c0 f4 00 00 00 00       jg      ee <bar+0x2e>
                        f0: R_390_PLT32DBL      preempt_schedule_notrace+0x2

New arch_this_cpu_add() looks like this:

00000000000000c0 <bar>:
  c0:   c0 04 00 00 00 00       jgnop   c0 <bar>
  c6:   c4 38 00 00 00 00       lgrl    %r3,c6 <bar+0x6>
                        c8: R_390_GOTENT        foo+0x2
  cc:   b9 04 00 43             lgr     %r4,%r3
  d0:   eb 00 43 c0 00 52       mviy    960(%r0),4
  d6:   e3 40 03 b8 00 08       ag      %r4,952
  dc:   eb 52 40 00 00 e8       laag    %r5,%r2,0(%r4)
  e2:   eb 00 03 c0 00 52       mviy    960,0
  e8:   c0 f4 00 00 00 00       jg      e8 <bar+0x28>
                        ea: R_390_PC32DBL       __s390_indirect_jump_r14+0x2

Note that the conditional function call is removed.

Acked-by: Alexander Gordeev <agordeev@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
Signed-off-by: Alexander Gordeev <agordeev@linux.ibm.com>
arch/s390/include/asm/percpu.h

index 79d5a4460b18aa47691686edd9ee13729bdf8251..9140d81b7efce0e983bf00274f19f7e45a390ef4 100644 (file)
 
 #else /* MARCH_HAS_Z196_FEATURES */
 
-#define arch_this_cpu_add(pcp, val, op1, op2, szcast)                  \
-do {                                                                   \
-       typedef typeof(pcp) pcp_op_T__;                                 \
-       pcp_op_T__ val__ = (val);                                       \
-       pcp_op_T__ old__, *ptr__;                                       \
-       preempt_disable_notrace();                                      \
-       ptr__ = raw_cpu_ptr(&(pcp));                            \
-       if (__builtin_constant_p(val__) &&                              \
-           ((szcast)val__ > -129) && ((szcast)val__ < 128)) {          \
-               asm volatile(                                           \
-                       op2 "   %[ptr__],%[val__]"                      \
-                       : [ptr__] "+Q" (*ptr__)                         \
-                       : [val__] "i" ((szcast)val__)                   \
-                       : "cc");                                        \
-       } else {                                                        \
-               asm volatile(                                           \
-                       op1 "   %[old__],%[val__],%[ptr__]"             \
-                       : [old__] "=d" (old__), [ptr__] "+Q" (*ptr__)   \
-                       : [val__] "d" (val__)                           \
-                       : "cc");                                        \
-       }                                                               \
-       preempt_enable_notrace();                                       \
+#define arch_this_cpu_add(pcp, val, op1, op2, szcast)                          \
+do {                                                                           \
+       unsigned long lc_pcpr, lc_pcpo;                                         \
+       typedef typeof(pcp) pcp_op_T__;                                         \
+       pcp_op_T__ val__ = (val);                                               \
+       pcp_op_T__ old__, *ptr__;                                               \
+                                                                               \
+       lc_pcpr = offsetof(struct lowcore, percpu_register);                    \
+       lc_pcpo = offsetof(struct lowcore, percpu_offset);                      \
+       ptr__ = PERCPU_PTR(&(pcp));                                             \
+       if (__builtin_constant_p(val__) &&                                      \
+           ((szcast)val__ > -129) && ((szcast)val__ < 128)) {                  \
+               asm volatile(                                                   \
+                       MVIY_PERCPU("%[disppcpr]", "%[dispaltpcpr]", "%[ptr__]")\
+                       AG_ALT("%[disppcpo]", "%[dispaltpcpo]", "%[ptr__]")     \
+                       op2 "   0(%[ptr__]),%[val__]\n"                         \
+                       MVIY_ALT("%[disppcpr]", "%[dispaltpcpr]")               \
+                       : [ptr__] "+&a" (ptr__), "+m" (*ptr__),                 \
+                         "=m" (((struct lowcore *)0)->percpu_register)         \
+                       : [val__] "i" ((szcast)val__),                          \
+                         [disppcpr] "i" (lc_pcpr),                             \
+                         [disppcpo] "i" (lc_pcpo),                             \
+                         [dispaltpcpr] "i" (lc_pcpr + LOWCORE_ALT_ADDRESS),    \
+                         [dispaltpcpo] "i" (lc_pcpo + LOWCORE_ALT_ADDRESS),    \
+                         "m" (((struct lowcore *)0)->percpu_offset)            \
+                       : "cc");                                                \
+       } else {                                                                \
+               asm volatile(                                                   \
+                       MVIY_PERCPU("%[disppcpr]", "%[dispaltpcpr]", "%[ptr__]")\
+                       AG_ALT("%[disppcpo]", "%[dispaltpcpo]", "%[ptr__]")     \
+                       op1 "   %[old__],%[val__],0(%[ptr__])\n"                \
+                       MVIY_ALT("%[disppcpr]", "%[dispaltpcpr]")               \
+                       : [old__] "=&d" (old__),                                \
+                         [ptr__] "+&a" (ptr__),  "+m" (*ptr__),                \
+                         "=m" (((struct lowcore *)0)->percpu_register)         \
+                       : [val__] "d" (val__),                                  \
+                         [disppcpr] "i" (lc_pcpr),                             \
+                         [disppcpo] "i" (lc_pcpo),                             \
+                         [dispaltpcpr] "i" (lc_pcpr + LOWCORE_ALT_ADDRESS),    \
+                         [dispaltpcpo] "i" (lc_pcpo + LOWCORE_ALT_ADDRESS),    \
+                         "m" (((struct lowcore *)0)->percpu_offset)            \
+                       : "cc");                                                \
+       }                                                                       \
 } while (0)
 
 #define this_cpu_add_4(pcp, val) arch_this_cpu_add(pcp, val, "laa", "asi", int)