]>
Commit | Line | Data |
---|---|---|
1fd55d47 GKH |
1 | From f8ae107eef209bff29a5816bc1aad40d5cd69a80 Mon Sep 17 00:00:00 2001 |
2 | From: Peter Zijlstra <peterz@infradead.org> | |
3 | Date: Fri, 17 Nov 2017 15:28:08 -0800 | |
4 | Subject: lib/int_sqrt: optimize initial value compute | |
5 | ||
6 | From: Peter Zijlstra <peterz@infradead.org> | |
7 | ||
8 | commit f8ae107eef209bff29a5816bc1aad40d5cd69a80 upstream. | |
9 | ||
10 | The initial value (@m) compute is: | |
11 | ||
12 | m = 1UL << (BITS_PER_LONG - 2); | |
13 | while (m > x) | |
14 | m >>= 2; | |
15 | ||
16 | Which is a linear search for the highest even bit smaller or equal to @x | |
17 | We can implement this using a binary search using __fls() (or better when | |
18 | its hardware implemented). | |
19 | ||
20 | m = 1UL << (__fls(x) & ~1UL); | |
21 | ||
22 | Especially for small values of @x; which are the more common arguments | |
23 | when doing a CDF on idle times; the linear search is near to worst case, | |
24 | while the binary search of __fls() is a constant 6 (or 5 on 32bit) | |
25 | branches. | |
26 | ||
27 | cycles: branches: branch-misses: | |
28 | ||
29 | PRE: | |
30 | ||
31 | hot: 43.633557 +- 0.034373 45.333132 +- 0.002277 0.023529 +- 0.000681 | |
32 | cold: 207.438411 +- 0.125840 45.333132 +- 0.002277 6.976486 +- 0.004219 | |
33 | ||
34 | SOFTWARE FLS: | |
35 | ||
36 | hot: 29.576176 +- 0.028850 26.666730 +- 0.004511 0.019463 +- 0.000663 | |
37 | cold: 165.947136 +- 0.188406 26.666746 +- 0.004511 6.133897 +- 0.004386 | |
38 | ||
39 | HARDWARE FLS: | |
40 | ||
41 | hot: 24.720922 +- 0.025161 20.666784 +- 0.004509 0.020836 +- 0.000677 | |
42 | cold: 132.777197 +- 0.127471 20.666776 +- 0.004509 5.080285 +- 0.003874 | |
43 | ||
44 | Averages computed over all values <128k using a LFSR to generate order. | |
45 | Cold numbers have a LFSR based branch trace buffer 'confuser' ran between | |
46 | each int_sqrt() invocation. | |
47 | ||
48 | Link: http://lkml.kernel.org/r/20171020164644.936577234@infradead.org | |
49 | Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> | |
50 | Suggested-by: Joe Perches <joe@perches.com> | |
51 | Acked-by: Will Deacon <will.deacon@arm.com> | |
52 | Acked-by: Linus Torvalds <torvalds@linux-foundation.org> | |
53 | Cc: Anshul Garg <aksgarg1989@gmail.com> | |
54 | Cc: Davidlohr Bueso <dave@stgolabs.net> | |
55 | Cc: David Miller <davem@davemloft.net> | |
56 | Cc: Ingo Molnar <mingo@kernel.org> | |
57 | Cc: Kees Cook <keescook@chromium.org> | |
58 | Cc: Matthew Wilcox <mawilcox@microsoft.com> | |
59 | Cc: Michael Davidson <md@google.com> | |
60 | Cc: Thomas Gleixner <tglx@linutronix.de> | |
61 | Signed-off-by: Andrew Morton <akpm@linux-foundation.org> | |
62 | Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> | |
63 | Cc: Joe Perches <joe@perches.com> | |
64 | Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> | |
65 | ||
66 | --- | |
67 | lib/int_sqrt.c | 6 ++---- | |
68 | 1 file changed, 2 insertions(+), 4 deletions(-) | |
69 | ||
70 | --- a/lib/int_sqrt.c | |
71 | +++ b/lib/int_sqrt.c | |
72 | @@ -8,6 +8,7 @@ | |
73 | ||
74 | #include <linux/kernel.h> | |
75 | #include <linux/export.h> | |
76 | +#include <linux/bitops.h> | |
77 | ||
78 | /** | |
79 | * int_sqrt - rough approximation to sqrt | |
80 | @@ -22,10 +23,7 @@ unsigned long int_sqrt(unsigned long x) | |
81 | if (x <= 1) | |
82 | return x; | |
83 | ||
84 | - m = 1UL << (BITS_PER_LONG - 2); | |
85 | - while (m > x) | |
86 | - m >>= 2; | |
87 | - | |
88 | + m = 1UL << (__fls(x) & ~1UL); | |
89 | while (m != 0) { | |
90 | b = y + m; | |
91 | y >>= 1; |