]> git.ipfire.org Git - thirdparty/gcc.git/commit
AArch64: Implement widen_[us]sum using dotproduct for SVE [PR122069]
authorTamar Christina <tamar.christina@arm.com>
Sat, 18 Oct 2025 07:21:56 +0000 (08:21 +0100)
committerTamar Christina <tamar.christina@arm.com>
Sat, 18 Oct 2025 07:24:17 +0000 (08:24 +0100)
commit2f719014bfec1d21eceb409db6eff78eb92942f3
tree04aaeefafde375db9d93884da935088c06d003b0
parentbb80fb6e9bd26f04d4654248184f93d62dc53a76
AArch64: Implement widen_[us]sum using dotproduct for SVE [PR122069]

This patch implements support for using dotproduct to do sum reductions by
changing += a into += (a * 1).  i.e. we seed the multiplication with 1.

Given the example

int foo_int(unsigned char *x, unsigned char * restrict y) {
  int sum = 0;
  for (int i = 0; i < 8000; i++)
     sum += char_abs(x[i] - y[i]);
  return sum;
}

we used to generate

.L2:
        ld1b    z1.b, p7/z, [x0, x2]
        ld1b    z29.b, p7/z, [x1, x2]
        sub     z29.b, z1.b, z29.b
        uunpklo z0.h, z29.b
        uunpkhi z29.h, z29.b
        uunpklo z30.s, z0.h
        add     z31.s, p6/m, z31.s, z30.s
        uunpkhi z0.s, z0.h
        add     z31.s, p5/m, z31.s, z0.s
        uunpklo z28.s, z29.h
        add     z31.s, p4/m, z31.s, z28.s
        uunpkhi z29.s, z29.h
        add     z31.s, p3/m, z31.s, z29.s
        add     x2, x2, x7
        whilelo p7.b, w2, w3
        whilelo p3.s, w2, w6
        whilelo p4.s, w2, w5
        whilelo p5.s, w2, w4
        whilelo p6.s, w2, w3
        b.any   .L2
        ptrue   p7.b, all
        uaddv   d31, p7, z31.s

but now generates with +dotprod

.L3:
        ld1b    z30.b, p7/z, [x5, x2]
        ld1b    z29.b, p7/z, [x1, x2]
        sub     z30.b, z30.b, z29.b
        udot    z31.s, z30.b, z28.b
        mov     x3, x2
        add     x2, x2, x6
        cmp     w2, w0
        bls     .L3
        incb    x3
        uaddv   d31, p7, z31.s

gcc/ChangeLog:

PR middle-end/122069
* config/aarch64/aarch64-sve.md (widen_<sur>sum<mode><vsi2qi>3): New.

gcc/testsuite/ChangeLog:

PR middle-end/122069
* gcc.target/aarch64/sve/pr122069_1.c: New test.
* gcc.target/aarch64/sve/pr122069_2.c: New test.
gcc/config/aarch64/aarch64-sve.md
gcc/testsuite/gcc.target/aarch64/sve/pr122069_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/pr122069_2.c [new file with mode: 0644]