Provide an inline assembly using alternatives to avoid the need of
a base register when reading preempt_count() from lowcore. Use the
LLGT instruction, which reads only the least significant 31 bits of
preempt_count. This masks out the encoded PREEMPT_NEED_RESCHED bit.
Generated code is changed from
000000000046e5d0 <vfree>:
46e5d0: c0 04 00 00 00 00 jgnop 46e5d0 <vfree>
46e5d6: a7 39 00 00 lghi %r3,0
46e5da: 58 10 33 a8 l %r1,936(%r3)
46e5de: c0 1b 00 ff ff 00 nilf %r1,
16776960
46e5e4: a7 74 00 11 jne 46e606 <vfree+0x36>
to something like this:
000000000046e5d0 <vfree>:
46e5d0: c0 04 00 00 00 00 jgnop 46e5d0 <vfree>
46e5d6: e3 10 03 a8 00 17 llgt %r1,936
46e5dc: ec 41 28 b7 00 55 risbgz %r4,%r1,40,55
46e5e2: a7 74 00 0f jne 46e600 <vfree+0x30>
Overall savings are only 82 bytes according to bloat-o-meter. This
is because of different inlining decisions, and there aren't many
preempt_count() users in the kernel.
Reviewed-by: Sven Schnelle <svens@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
#include <asm/cmpxchg.h>
#include <asm/march.h>
-/* We use the MSB mostly because its available */
+/*
+ * Use MSB so it is possible to read preempt_count with LLGT which
+ * reads the least significant 31 bits with a single instruction.
+ */
#define PREEMPT_NEED_RESCHED 0x80000000
/*
*/
static __always_inline int preempt_count(void)
{
- return READ_ONCE(get_lowcore()->preempt_count) & ~PREEMPT_NEED_RESCHED;
+ unsigned long lc_preempt, count;
+
+ BUILD_BUG_ON(sizeof_field(struct lowcore, preempt_count) != sizeof(int));
+ lc_preempt = offsetof(struct lowcore, preempt_count);
+ /* READ_ONCE(get_lowcore()->preempt_count) & ~PREEMPT_NEED_RESCHED */
+ asm_inline(
+ ALTERNATIVE("llgt %[count],%[offzero](%%r0)\n",
+ "llgt %[count],%[offalt](%%r0)\n",
+ ALT_FEATURE(MFEATURE_LOWCORE))
+ : [count] "=d" (count)
+ : [offzero] "i" (lc_preempt),
+ [offalt] "i" (lc_preempt + LOWCORE_ALT_ADDRESS),
+ "m" (((struct lowcore *)0)->preempt_count));
+ return count;
}
static __always_inline void preempt_count_set(int pc)