the following testcase:
bool f(const std::vector<bool>& v, std::size_t x) {
return v[x];
}
is compiled as:
f(std::vector<bool, std::allocator<bool> > const&, unsigned long):
testq %rsi, %rsi
leaq 63(%rsi), %rax
movq (%rdi), %rdx
cmovns %rsi, %rax
sarq $6, %rax
leaq (%rdx,%rax,8), %rdx
movq %rsi, %rax
sarq $63, %rax
shrq $58, %rax
addq %rax, %rsi
andl $63, %esi
subq %rax, %rsi
jns .L2
addq $64, %rsi
subq $8, %rdx
.L2:
movl $1, %eax
shlx %rsi, %rax, %rax
andq (%rdx), %rax
setne %al
ret
which is quite expensive for simple bit access in a bitmap. The reason is that
the bit access is implemented using iterators
return begin()[__n];
Which in turn cares about situation where __n is negative yielding the extra
conditional.
_GLIBCXX20_CONSTEXPR
void
_M_incr(ptrdiff_t __i)
{
_M_assume_normalized();
difference_type __n = __i + _M_offset;
_M_p += __n / int(_S_word_bit);
__n = __n % int(_S_word_bit);
if (__n < 0)
{
__n += int(_S_word_bit);
--_M_p;
}
_M_offset = static_cast<unsigned int>(__n);
}
While we can use __builtin_unreachable to declare that __n is in range
0...max_size () but I think it is better to implement it directly, since
resulting code is shorter and much easier to optimize.
We now porduce:
.LFB1248:
.cfi_startproc
movq (%rdi), %rax
movq %rsi, %rdx
shrq $6, %rdx
andq (%rax,%rdx,8), %rsi
andl $63, %esi
setne %al
ret
Testcase suggests
movq (%rdi), %rax
movl %esi, %ecx
shrq $5, %rsi # does still need to be 64-bit
movl (%rax,%rsi,4), %eax
btl %ecx, %eax
setb %al
retq
Which is still one instruction shorter.
libstdc++-v3/ChangeLog:
PR target/80813
* include/bits/stl_bvector.h (vector<bool, _Alloc>::operator []): Do
not use iterators.
gcc/testsuite/ChangeLog:
PR target/80813
* g++.dg/tree-ssa/bvector-3.C: New test.
--- /dev/null
+// { dg-do compile }
+// { dg-options "-O2 -fdump-tree-optimized" }
+// { dg-skip-if "requires hosted libstdc++ for vector" { ! hostedlib } }
+
+#include <vector>
+bool f(const std::vector<bool>& v, std::size_t x) {
+ return v[x];
+}
+// All references to src should be optimized out, so there should be no name for it
+// { dg-final { scan-tree-dump-not "if \\(" optimized } }
operator[](size_type __n)
{
__glibcxx_requires_subscript(__n);
- return begin()[__n];
+ return _Bit_reference (this->_M_impl._M_start._M_p
+ + __n / int(_S_word_bit),
+ 1UL << __n % int(_S_word_bit));
}
_GLIBCXX_NODISCARD _GLIBCXX20_CONSTEXPR
operator[](size_type __n) const
{
__glibcxx_requires_subscript(__n);
- return begin()[__n];
+ return _Bit_reference (this->_M_impl._M_start._M_p
+ + __n / int(_S_word_bit),
+ 1UL << __n % int(_S_word_bit));
}
protected: