]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
s390/percpu: Provide arch_raw_cpu_ptr()
authorHeiko Carstens <hca@linux.ibm.com>
Tue, 17 Mar 2026 19:54:28 +0000 (20:54 +0100)
committerVasily Gorbik <gor@linux.ibm.com>
Tue, 24 Mar 2026 20:00:41 +0000 (21:00 +0100)
Provide an s390 specific arch_raw_cpu_ptr() implementation which avoids the
detour over get_lowcore() to get the lowcore pointer. The inline assembly
is implemented with an alternative so that relocated lowcore (percpu offset
is at a different address) is handled correctly.

This turns code like this

  102f78:       a7 39 00 00             lghi    %r3,0
  102f7c:       e3 20 33 b8 00 08       ag      %r2,952(%r3)

which adds the percpu offset to register r2 into a single instruction

  102f7c:       e3 20 33 b8 00 08       ag      %r2,952(%r0)

and also avoids the need of a base register, thus reducing register
pressure.

With defconfig bloat-o-meter -t provides this result:

add/remove: 12/26 grow/shrink: 183/3391 up/down: 14880/-41950 (-27070)

Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
arch/s390/include/asm/percpu.h

index 5899f57f17d16fd066eb82c1a635f44be0ab2017..b18a96f3a3345b8cd500f308408bad0fa98b68fa 100644 (file)
  */
 #define __my_cpu_offset get_lowcore()->percpu_offset
 
+#define arch_raw_cpu_ptr(_ptr)                                         \
+({                                                                     \
+       unsigned long lc_percpu, tcp_ptr__;                             \
+                                                                       \
+       tcp_ptr__ = (__force unsigned long)(_ptr);                      \
+       lc_percpu = offsetof(struct lowcore, percpu_offset);            \
+       asm_inline volatile(                                            \
+       ALTERNATIVE("ag         %[__ptr__],%[offzero](%%r0)\n",         \
+                   "ag         %[__ptr__],%[offalt](%%r0)\n",          \
+                   ALT_FEATURE(MFEATURE_LOWCORE))                      \
+       : [__ptr__] "+d" (tcp_ptr__)                                    \
+       : [offzero] "i" (lc_percpu),                                    \
+         [offalt] "i" (lc_percpu + LOWCORE_ALT_ADDRESS),               \
+         "m" (((struct lowcore *)0)->percpu_offset)                    \
+       : "cc");                                                        \
+       (TYPEOF_UNQUAL(*(_ptr)) __force __kernel *)tcp_ptr__;           \
+})
+
 /*
  * We use a compare-and-swap loop since that uses less cpu cycles than
  * disabling and enabling interrupts like the generic variant would do.