When available, use the cpu_id field from __rseq_abi on Linux to
implement sched_getcpu(). Fall-back on the vgetcpu vDSO if unavailable.
Benchmarks:
x86-64: Intel E5-2630 v3@2.40GHz, 16-core, hyperthreading
glibc sched_getcpu(): 13.7 ns (baseline)
glibc sched_getcpu() using rseq: 2.5 ns (speedup: 5.5x)
inline load cpuid from __rseq_abi TLS: 0.8 ns (speedup: 17.1x)
#include <errno.h>
#include <sched.h>
#include <sysdep.h>
+#include <atomic.h>
#include <sysdep-vdso.h>
+#include <sys/rseq.h>
-int
-sched_getcpu (void)
+static int
+vsyscall_sched_getcpu (void)
{
unsigned int cpu;
int r = -1;
#endif
return r == -1 ? r : cpu;
}
+
+#ifdef RSEQ_SIG
+int
+sched_getcpu (void)
+{
+ int cpu_id = atomic_load_relaxed (&__rseq_abi.cpu_id);
+
+ return cpu_id >= 0 ? cpu_id : vsyscall_sched_getcpu ();
+}
+#else /* RSEQ_SIG */
+int
+sched_getcpu (void)
+{
+ return vsyscall_sched_getcpu ();
+}
+#endif /* RSEQ_SIG */