The asm code shows multiple conversions. Gcc has always been terribly
bad at dealing with chars, which are constantly converted to ints for
every operation and zero-extended after each operation. But here in
addition there are conversions before and after the flsnz(). Let's
just mark the variables as long and use flsnz_long() to process them
without any conversion. This shortens the code and makes it slightly
faster.
Note that the fls operations could make use of __builtin_clz() on
gcc 4.6 and above, and it would be useful to implement native support
for ARM as well.
This is cbtree commit
1f0f83ba26f2279c8bba0080a2e09a803dddde47.
This is ebtree commit
9c38dcae22a84f0b0d9c5a56facce1ca2ad0aaef.
return r+1;
}
+static inline long flsnz_long(unsigned long x)
+{
+ long r;
+ __asm__("bsr %1,%0\n"
+ : "=r" (r) : "rm" (x));
+ return r + 1;
+}
+
#else
// returns 1 to 32 for 1<<0 to 1<<31. Undefined for 0.
#define flsnz(___a) ({ \
return flsnz8_generic(x);
}
+#define flsnz_long(x) ((sizeof(long) > 4) ? flsnz64(x) : flsnz32(x))
#endif
int ignore)
{
int beg;
- unsigned char c;
+ unsigned long c, d;
beg = ignore >> 3;
* or at the first zero we encounter on either side.
*/
while (1) {
- unsigned char d;
-
c = a[beg];
d = b[beg];
beg++;
* identical bits. Note that low bit numbers are assigned to high positions
* in the byte, as we compare them as strings.
*/
- return (beg << 3) - flsnz8(c);
+ return (beg << 3) - flsnz_long(c);
}
static forceinline int cmp_bits(const unsigned char *a, const unsigned char *b, unsigned int pos)