From 27346b01aaa1e5f65dcd737e96bc1a4d46a6dd2a Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Wed, 10 Oct 2018 19:05:56 +0200 Subject: [PATCH] OPTIM: tools: optimize my_ffsl() for x86_64 This call is now used quite a bit in the fd cache, to decide which cache to add/remove the fd to/from, when waking up a task for a single thread in __task_wakeup(), in fd_cant_recv() and in fd_process_cached_events(), and we can replace it with a single instruction, removing ~30 instructions and ~80 bytes from the inner loop of some of these functions. In addition the test for zero value was replaced with a comment saying that it is illegal and leads to an undefined behaviour. The code does not make use of this useless case today. --- include/common/standard.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/include/common/standard.h b/include/common/standard.h index 5c7d15229e..fabe97276b 100644 --- a/include/common/standard.h +++ b/include/common/standard.h @@ -802,13 +802,16 @@ static inline unsigned int my_popcountl(unsigned long a) } /* Simple ffs implementation. It returns the position of the lowest bit set to - * one. */ + * one. It is illegal to call it with a==0 (undefined result). + */ static inline unsigned int my_ffsl(unsigned long a) { - unsigned int cnt; + unsigned long cnt; - if (!a) - return 0; +#if defined(__x86_64__) + __asm__("bsr %1,%0\n" : "=r" (cnt) : "rm" (a)); + cnt++; +#else cnt = 1; #if LONG_MAX > 0x7FFFFFFFL /* 64bits */ @@ -837,6 +840,7 @@ static inline unsigned int my_ffsl(unsigned long a) a >>= 1; cnt += 1; } +#endif /* x86_64 */ return cnt; } -- 2.39.5