]>
git.ipfire.org Git - thirdparty/gcc.git/commit
sve: optimize add reduction patterns
The following loop does a conditional reduction using an add:
#include <stdint.h>
int32_t f (int32_t *restrict array, int len, int min)
{
int32_t iSum = 0;
for (int i=0; i<len; i++) {
if (array[i] >= min)
iSum += array[i];
}
return iSum;
}
for this we currently generate:
mov z1.b, #0
mov z2.s, w2
mov z3.d, z1.d
ptrue p2.b, all
ld1w z0.s, p0/z, [x0, x3, lsl 2]
cmpge p1.s, p2/z, z0.s, z2.s
add x3, x3, x4
sel z0.s, p1, z0.s, z3.s
add z1.s, p0/m, z1.s, z0.s
whilelo p0.s, w3, w1
where the SEL is unneeded as it's selecting between 0 or a value. This can be
optimized to just doing the conditional add on p1 instead of p0. After this
patch we generate:
mov z2.s, w2
mov z0.b, #0
ptrue p1.b, all
ld1w z1.s, p0/z, [x0, x3, lsl 2]
cmpge p0.s, p0/z, z1.s, z2.s
add x3, x3, x4
add z0.s, p0/m, z0.s, z1.s
whilelo p0.s, w3, w1
and so we drop the SEL and the 0 move.
gcc/ChangeLog:
* match.pd: New rule.
gcc/testsuite/ChangeLog:
* gcc.target/aarch64/sve/pred-cond-reduc.c: New test.