This patch adds the missing (u)int8_t types to the binop tests.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/autovec/binop/shift-run.c: Adapt for
(u)int8_t.
* gcc.target/riscv/rvv/autovec/binop/shift-rv32gcv.c: Dito.
* gcc.target/riscv/rvv/autovec/binop/shift-rv64gcv.c: Dito.
* gcc.target/riscv/rvv/autovec/binop/shift-template.h: Dito.
* gcc.target/riscv/rvv/autovec/binop/vadd-run.c: Dito.
* gcc.target/riscv/rvv/autovec/binop/vadd-rv32gcv.c: Dito.
* gcc.target/riscv/rvv/autovec/binop/vadd-rv64gcv.c: Dito.
* gcc.target/riscv/rvv/autovec/binop/vadd-template.h: Dito.
* gcc.target/riscv/rvv/autovec/binop/vand-run.c: Dito.
* gcc.target/riscv/rvv/autovec/binop/vand-rv32gcv.c: Dito.
* gcc.target/riscv/rvv/autovec/binop/vand-rv64gcv.c: Dito.
* gcc.target/riscv/rvv/autovec/binop/vand-template.h: Dito.
* gcc.target/riscv/rvv/autovec/binop/vdiv-run.c: Dito.
* gcc.target/riscv/rvv/autovec/binop/vdiv-rv32gcv.c: Dito.
* gcc.target/riscv/rvv/autovec/binop/vdiv-rv64gcv.c: Dito.
* gcc.target/riscv/rvv/autovec/binop/vdiv-template.h: Dito.
* gcc.target/riscv/rvv/autovec/binop/vmax-run.c: Dito.
* gcc.target/riscv/rvv/autovec/binop/vmax-rv32gcv.c: Dito.
* gcc.target/riscv/rvv/autovec/binop/vmax-rv64gcv.c: Dito.
* gcc.target/riscv/rvv/autovec/binop/vmax-template.h: Dito.
* gcc.target/riscv/rvv/autovec/binop/vmin-run.c: Dito.
* gcc.target/riscv/rvv/autovec/binop/vmin-rv32gcv.c: Dito.
* gcc.target/riscv/rvv/autovec/binop/vmin-rv64gcv.c: Dito.
* gcc.target/riscv/rvv/autovec/binop/vmin-template.h: Dito.
* gcc.target/riscv/rvv/autovec/binop/vmul-run.c: Dito.
* gcc.target/riscv/rvv/autovec/binop/vmul-rv32gcv.c: Dito.
* gcc.target/riscv/rvv/autovec/binop/vmul-rv64gcv.c: Dito.
* gcc.target/riscv/rvv/autovec/binop/vmul-template.h: Dito.
* gcc.target/riscv/rvv/autovec/binop/vor-run.c: Dito.
* gcc.target/riscv/rvv/autovec/binop/vor-rv32gcv.c: Dito.
* gcc.target/riscv/rvv/autovec/binop/vor-rv64gcv.c: Dito.
* gcc.target/riscv/rvv/autovec/binop/vor-template.h: Dito.
* gcc.target/riscv/rvv/autovec/binop/vrem-run.c: Dito.
* gcc.target/riscv/rvv/autovec/binop/vrem-rv32gcv.c: Dito.
* gcc.target/riscv/rvv/autovec/binop/vrem-rv64gcv.c: Dito.
* gcc.target/riscv/rvv/autovec/binop/vrem-template.h: Dito.
* gcc.target/riscv/rvv/autovec/binop/vsub-run.c: Dito.
* gcc.target/riscv/rvv/autovec/binop/vsub-rv32gcv.c: Dito.
* gcc.target/riscv/rvv/autovec/binop/vsub-rv64gcv.c: Dito.
* gcc.target/riscv/rvv/autovec/binop/vsub-template.h: Dito.
* gcc.target/riscv/rvv/autovec/binop/vxor-run.c: Dito.
* gcc.target/riscv/rvv/autovec/binop/vxor-rv32gcv.c: Dito.
* gcc.target/riscv/rvv/autovec/binop/vxor-rv64gcv.c: Dito.
* gcc.target/riscv/rvv/autovec/binop/vxor-template.h: Dito.
assert (as##TYPE[i] == (VAL >> (i % 4)));
#define RUN_ALL() \
+ RUN(int8_t, 1) \
+ RUN(uint8_t, 2) \
RUN(int16_t, 1) \
RUN(uint16_t, 2) \
RUN(int32_t, 3) \
RUN(uint32_t, 4) \
RUN(int64_t, 5) \
RUN(uint64_t, 6) \
+ RUN2(int8_t, -7) \
+ RUN2(uint8_t, 8) \
RUN2(int16_t, -7) \
RUN2(uint16_t, 8) \
RUN2(int32_t, -9) \
#include "shift-template.h"
-/* TODO: For int16_t and uint16_t we need widening/promotion patterns.
- We don't check the assembler number since lacking patterns make
- auto-vectorization inconsistent in LMUL = 1/2/4/8. */
-
-/* { dg-final { scan-assembler {\tvsll\.vv} } } */
-/* { dg-final { scan-assembler {\tvsrl\.vv} } } */
-/* { dg-final { scan-assembler {\tvsra\.vv} } } */
+/* { dg-final { scan-assembler-times {\tvsll\.vv} 8 } } */
+/* { dg-final { scan-assembler-times {\tvsrl\.vv} 4 } } */
+/* { dg-final { scan-assembler-times {\tvsra\.vv} 4 } } */
#include "shift-template.h"
-/* { dg-final { scan-assembler-times {\tvsll\.vv} 6 } } */
-/* { dg-final { scan-assembler-times {\tvsrl\.vv} 3 } } */
-/* { dg-final { scan-assembler-times {\tvsra\.vv} 3 } } */
+/* { dg-final { scan-assembler-times {\tvsll\.vv} 8 } } */
+/* { dg-final { scan-assembler-times {\tvsrl\.vv} 4 } } */
+/* { dg-final { scan-assembler-times {\tvsra\.vv} 4 } } */
dst[i] = a[i] >> b[i]; \
}
-/* *int8_t not autovec currently. */
#define TEST_ALL() \
+ TEST1_TYPE(int8_t) \
+ TEST1_TYPE(uint8_t) \
TEST1_TYPE(int16_t) \
TEST1_TYPE(uint16_t) \
TEST1_TYPE(int32_t) \
TEST1_TYPE(uint32_t) \
TEST1_TYPE(int64_t) \
TEST1_TYPE(uint64_t) \
+ TEST2_TYPE(int8_t) \
+ TEST2_TYPE(uint8_t) \
TEST2_TYPE(int16_t) \
TEST2_TYPE(uint16_t) \
TEST2_TYPE(int32_t) \
assert (aim##TYPE[i] == VAL - 16);
#define RUN_ALL() \
+ RUN(int8_t, -1) \
+ RUN(uint8_t, 2) \
RUN(int16_t, -1) \
RUN(uint16_t, 2) \
RUN(int32_t, -3) \
RUN(uint32_t, 4) \
RUN(int64_t, -5) \
RUN(uint64_t, 6) \
+ RUN2(int8_t, -7) \
+ RUN2(uint8_t, 8) \
RUN2(int16_t, -7) \
RUN2(uint16_t, 8) \
RUN2(int32_t, -9) \
RUN2(uint32_t, 10) \
RUN2(int64_t, -11) \
RUN2(uint64_t, 12) \
+ RUN3M(int8_t, 13) \
+ RUN3(uint8_t, 14) \
RUN3M(int16_t, 13) \
RUN3(uint16_t, 14) \
RUN3M(int32_t, 15) \
#include "vadd-template.h"
-/* { dg-final { scan-assembler-times {\tvadd\.vv} 12 } } */
-/* { dg-final { scan-assembler-times {\tvadd\.vi} 6 } } */
+/* { dg-final { scan-assembler-times {\tvadd\.vv} 16 } } */
+/* { dg-final { scan-assembler-times {\tvadd\.vi} 8 } } */
#include "vadd-template.h"
-/* { dg-final { scan-assembler-times {\tvadd\.vv} 12 } } */
-/* { dg-final { scan-assembler-times {\tvadd\.vi} 6 } } */
+/* { dg-final { scan-assembler-times {\tvadd\.vv} 16 } } */
+/* { dg-final { scan-assembler-times {\tvadd\.vi} 8 } } */
dst[i] = a[i] - 16; \
}
-/* *int8_t not autovec currently. */
#define TEST_ALL() \
+ TEST_TYPE(int8_t) \
+ TEST_TYPE(uint8_t) \
TEST_TYPE(int16_t) \
TEST_TYPE(uint16_t) \
TEST_TYPE(int32_t) \
TEST_TYPE(uint32_t) \
TEST_TYPE(int64_t) \
TEST_TYPE(uint64_t) \
+ TEST2_TYPE(int8_t) \
+ TEST2_TYPE(uint8_t) \
TEST2_TYPE(int16_t) \
TEST2_TYPE(uint16_t) \
TEST2_TYPE(int32_t) \
TEST2_TYPE(uint32_t) \
TEST2_TYPE(int64_t) \
TEST2_TYPE(uint64_t) \
+ TEST3M_TYPE(int8_t) \
+ TEST3_TYPE(uint8_t) \
TEST3M_TYPE(int16_t) \
TEST3_TYPE(uint16_t) \
TEST3M_TYPE(int32_t) \
assert (aim##TYPE[i] == (VAL & -16));
#define RUN_ALL() \
+ RUN(int8_t, -1) \
+ RUN(uint8_t, 2) \
RUN(int16_t, -1) \
RUN(uint16_t, 2) \
RUN(int32_t, -3) \
RUN(uint32_t, 4) \
RUN(int64_t, -5) \
RUN(uint64_t, 6) \
+ RUN2(int8_t, -7) \
+ RUN2(uint8_t, 8) \
RUN2(int16_t, -7) \
RUN2(uint16_t, 8) \
RUN2(int32_t, -9) \
RUN2(uint32_t, 10) \
RUN2(int64_t, -11) \
RUN2(uint64_t, 12) \
+ RUN3M(int8_t, 13) \
+ RUN3(uint8_t, 14) \
RUN3M(int16_t, 13) \
RUN3(uint16_t, 14) \
RUN3M(int32_t, 15) \
#include "vand-template.h"
-/* { dg-final { scan-assembler-times {\tvand\.vv} 12 } } */
-/* { dg-final { scan-assembler-times {\tvand\.vi} 6 } } */
+/* { dg-final { scan-assembler-times {\tvand\.vv} 16 } } */
+/* { dg-final { scan-assembler-times {\tvand\.vi} 8 } } */
#include "vand-template.h"
-/* { dg-final { scan-assembler-times {\tvand\.vv} 12 } } */
-/* { dg-final { scan-assembler-times {\tvand\.vi} 6 } } */
+/* { dg-final { scan-assembler-times {\tvand\.vv} 16 } } */
+/* { dg-final { scan-assembler-times {\tvand\.vi} 8 } } */
dst[i] = a[i] & -16; \
}
-/* *int8_t not autovec currently. */
#define TEST_ALL() \
+ TEST_TYPE(int8_t) \
+ TEST_TYPE(uint8_t) \
TEST_TYPE(int16_t) \
TEST_TYPE(uint16_t) \
TEST_TYPE(int32_t) \
TEST_TYPE(uint32_t) \
TEST_TYPE(int64_t) \
TEST_TYPE(uint64_t) \
+ TEST2_TYPE(int8_t) \
+ TEST2_TYPE(uint8_t) \
TEST2_TYPE(int16_t) \
TEST2_TYPE(uint16_t) \
TEST2_TYPE(int32_t) \
TEST2_TYPE(uint32_t) \
TEST2_TYPE(int64_t) \
TEST2_TYPE(uint64_t) \
+ TEST3M_TYPE(int8_t) \
+ TEST3_TYPE(uint8_t) \
TEST3M_TYPE(int16_t) \
TEST3_TYPE(uint16_t) \
TEST3M_TYPE(int32_t) \
assert (as##TYPE[i] == 5);
#define RUN_ALL() \
+ RUN(int8_t, -1) \
+ RUN(uint8_t, 2) \
RUN(int16_t, -1) \
RUN(uint16_t, 2) \
RUN(int32_t, -3) \
RUN(uint32_t, 4) \
RUN(int64_t, -5) \
RUN(uint64_t, 6) \
+ RUN2(int8_t, -7) \
+ RUN2(uint8_t, 8) \
RUN2(int16_t, -7) \
RUN2(uint16_t, 8) \
RUN2(int32_t, -9) \
#include "vdiv-template.h"
/* Currently we use an epilogue loop which also contains vdivs. Therefore we
- expect 10 vdiv[u]s instead of 6. */
+ expect 14 vdiv[u]s instead of 8. */
-/* { dg-final { scan-assembler-times {\tvdiv\.vv} 10 } } */
-/* { dg-final { scan-assembler-times {\tvdivu\.vv} 10 } } */
+/* { dg-final { scan-assembler-times {\tvdiv\.vv} 14 } } */
+/* { dg-final { scan-assembler-times {\tvdivu\.vv} 14 } } */
#include "vdiv-template.h"
/* Currently we use an epilogue loop which also contains vdivs. Therefore we
- expect 10 vdiv[u]s instead of 6. */
+ expect 14 vdiv[u]s instead of 8. */
-/* { dg-final { scan-assembler-times {\tvdiv\.vv} 10 } } */
-/* { dg-final { scan-assembler-times {\tvdivu\.vv} 10 } } */
+/* { dg-final { scan-assembler-times {\tvdiv\.vv} 14 } } */
+/* { dg-final { scan-assembler-times {\tvdivu\.vv} 14 } } */
}
#define TEST_ALL() \
+ TEST_TYPE(int8_t) \
+ TEST_TYPE(uint8_t) \
TEST_TYPE(int16_t) \
TEST_TYPE(uint16_t) \
TEST_TYPE(int32_t) \
TEST_TYPE(uint32_t) \
TEST_TYPE(int64_t) \
TEST_TYPE(uint64_t) \
+ TEST2_TYPE(int8_t) \
+ TEST2_TYPE(uint8_t) \
TEST2_TYPE(int16_t) \
TEST2_TYPE(uint16_t) \
TEST2_TYPE(int32_t) \
assert (as##TYPE[i] == 0 > VAL ? 0 : VAL);
#define RUN_ALL() \
+ RUN(int8_t, -1) \
+ RUN(uint8_t, 2) \
RUN(int16_t, -1) \
RUN(uint16_t, 2) \
RUN(int32_t, -3) \
RUN(uint32_t, 4) \
RUN(int64_t, -5) \
RUN(uint64_t, 6) \
+ RUN2(int8_t, -7) \
+ RUN2(uint8_t, 8) \
RUN2(int16_t, -7) \
RUN2(uint16_t, 8) \
RUN2(int32_t, -9) \
#include "vmax-template.h"
-/* { dg-final { scan-assembler-times {\tvmax\.vv} 6 } } */
-/* { dg-final { scan-assembler-times {\tvmaxu\.vv} 6 } } */
+/* { dg-final { scan-assembler-times {\tvmax\.vv} 8 } } */
+/* { dg-final { scan-assembler-times {\tvmaxu\.vv} 8 } } */
#include "vmax-template.h"
-/* { dg-final { scan-assembler-times {\tvmax\.vv} 6 } } */
-/* { dg-final { scan-assembler-times {\tvmaxu\.vv} 6 } } */
+/* { dg-final { scan-assembler-times {\tvmax\.vv} 8 } } */
+/* { dg-final { scan-assembler-times {\tvmaxu\.vv} 8 } } */
dst[i] = a[i] > b ? a[i] : b; \
}
-/* *int8_t not autovec currently. */
#define TEST_ALL() \
+ TEST_TYPE(int8_t) \
+ TEST_TYPE(uint8_t) \
TEST_TYPE(int16_t) \
TEST_TYPE(uint16_t) \
TEST_TYPE(int32_t) \
TEST_TYPE(uint32_t) \
TEST_TYPE(int64_t) \
TEST_TYPE(uint64_t) \
+ TEST2_TYPE(int8_t) \
+ TEST2_TYPE(uint8_t) \
TEST2_TYPE(int16_t) \
TEST2_TYPE(uint16_t) \
TEST2_TYPE(int32_t) \
assert (as##TYPE[i] == 0 < VAL ? 0 : VAL);
#define RUN_ALL() \
+ RUN(int8_t, -1) \
+ RUN(uint8_t, 2) \
RUN(int16_t, -1) \
RUN(uint16_t, 2) \
RUN(int32_t, -3) \
RUN(uint32_t, 4) \
RUN(int64_t, -5) \
RUN(uint64_t, 6) \
+ RUN2(int8_t, -7) \
+ RUN2(uint8_t, 8) \
RUN2(int16_t, -7) \
RUN2(uint16_t, 8) \
RUN2(int32_t, -9) \
#include "vmin-template.h"
-/* { dg-final { scan-assembler-times {\tvmin\.vv} 6 } } */
-/* { dg-final { scan-assembler-times {\tvminu\.vv} 6 } } */
+/* { dg-final { scan-assembler-times {\tvmin\.vv} 8 } } */
+/* { dg-final { scan-assembler-times {\tvminu\.vv} 8 } } */
#include "vmin-template.h"
-/* { dg-final { scan-assembler-times {\tvmin\.vv} 6 } } */
-/* { dg-final { scan-assembler-times {\tvminu\.vv} 6 } } */
+/* { dg-final { scan-assembler-times {\tvmin\.vv} 8 } } */
+/* { dg-final { scan-assembler-times {\tvminu\.vv} 8 } } */
dst[i] = a[i] < b ? a[i] : b; \
}
-/* *int8_t not autovec currently. */
#define TEST_ALL() \
+ TEST_TYPE(int8_t) \
+ TEST_TYPE(uint8_t) \
TEST_TYPE(int16_t) \
TEST_TYPE(uint16_t) \
TEST_TYPE(int32_t) \
TEST_TYPE(uint32_t) \
TEST_TYPE(int64_t) \
TEST_TYPE(uint64_t) \
+ TEST2_TYPE(int8_t) \
+ TEST2_TYPE(uint8_t) \
TEST2_TYPE(int16_t) \
TEST2_TYPE(uint16_t) \
TEST2_TYPE(int32_t) \
a##TYPE[i] = 2; \
b##TYPE[i] = VAL; \
} \
- vadd_##TYPE (a##TYPE, a##TYPE, b##TYPE, SZ); \
+ vmul_##TYPE (a##TYPE, a##TYPE, b##TYPE, SZ); \
for (int i = 0; i < SZ; i++) \
assert (a##TYPE[i] == 2 * VAL);
TYPE as##TYPE[SZ]; \
for (int i = 0; i < SZ; i++) \
as##TYPE[i] = 3; \
- vadds_##TYPE (as##TYPE, as##TYPE, VAL, SZ); \
+ vmuls_##TYPE (as##TYPE, as##TYPE, VAL, SZ); \
for (int i = 0; i < SZ; i++) \
assert (as##TYPE[i] == 3 * VAL);
#define RUN_ALL() \
+ RUN(int8_t, -1) \
+ RUN(uint8_t, 2) \
RUN(int16_t, -1) \
RUN(uint16_t, 2) \
RUN(int32_t, -3) \
RUN(uint32_t, 4) \
RUN(int64_t, -5) \
RUN(uint64_t, 6) \
+ RUN2(int8_t, -7) \
+ RUN2(uint8_t, 8) \
RUN2(int16_t, -7) \
RUN2(uint16_t, 8) \
RUN2(int32_t, -9) \
#include "vmul-template.h"
-/* { dg-final { scan-assembler-times {\tvmul\.vv} 12 } } */
+/* { dg-final { scan-assembler-times {\tvmul\.vv} 16 } } */
#include "vmul-template.h"
-/* { dg-final { scan-assembler-times {\tvmul\.vv} 12 } } */
+/* { dg-final { scan-assembler-times {\tvmul\.vv} 16 } } */
#define TEST_TYPE(TYPE) \
__attribute__((noipa)) \
- void vadd_##TYPE (TYPE *dst, TYPE *a, TYPE *b, int n) \
+ void vmul_##TYPE (TYPE *dst, TYPE *a, TYPE *b, int n) \
{ \
for (int i = 0; i < n; i++) \
dst[i] = a[i] * b[i]; \
#define TEST2_TYPE(TYPE) \
__attribute__((noipa)) \
- void vadds_##TYPE (TYPE *dst, TYPE *a, TYPE b, int n) \
+ void vmuls_##TYPE (TYPE *dst, TYPE *a, TYPE b, int n) \
{ \
for (int i = 0; i < n; i++) \
dst[i] = a[i] * b; \
}
-/* *int8_t not autovec currently. */
#define TEST_ALL() \
+ TEST_TYPE(int8_t) \
+ TEST_TYPE(uint8_t) \
TEST_TYPE(int16_t) \
TEST_TYPE(uint16_t) \
TEST_TYPE(int32_t) \
TEST_TYPE(uint32_t) \
TEST_TYPE(int64_t) \
TEST_TYPE(uint64_t) \
+ TEST2_TYPE(int8_t) \
+ TEST2_TYPE(uint8_t) \
TEST2_TYPE(int16_t) \
TEST2_TYPE(uint16_t) \
TEST2_TYPE(int32_t) \
assert (aim##TYPE[i] == (VAL | -16));
#define RUN_ALL() \
+ RUN(int8_t, -1) \
+ RUN(uint8_t, 2) \
RUN(int16_t, -1) \
RUN(uint16_t, 2) \
RUN(int32_t, -3) \
RUN(uint32_t, 4) \
RUN(int64_t, -5) \
RUN(uint64_t, 6) \
+ RUN2(int8_t, -7) \
+ RUN2(uint8_t, 8) \
RUN2(int16_t, -7) \
RUN2(uint16_t, 8) \
RUN2(int32_t, -9) \
RUN2(uint32_t, 10) \
RUN2(int64_t, -11) \
RUN2(uint64_t, 12) \
+ RUN3M(int8_t, 13) \
+ RUN3(uint8_t, 14) \
RUN3M(int16_t, 13) \
RUN3(uint16_t, 14) \
RUN3M(int32_t, 15) \
#include "vor-template.h"
-/* { dg-final { scan-assembler-times {\tvor\.vv} 12 } } */
-/* { dg-final { scan-assembler-times {\tvor\.vi} 6 } } */
+/* { dg-final { scan-assembler-times {\tvor\.vv} 16 } } */
+/* { dg-final { scan-assembler-times {\tvor\.vi} 8 } } */
#include "vor-template.h"
-/* { dg-final { scan-assembler-times {\tvor\.vv} 12 } } */
-/* { dg-final { scan-assembler-times {\tvor\.vi} 6 } } */
+/* { dg-final { scan-assembler-times {\tvor\.vv} 16 } } */
+/* { dg-final { scan-assembler-times {\tvor\.vi} 8 } } */
dst[i] = a[i] | -16; \
}
-/* *int8_t not autovec currently. */
#define TEST_ALL() \
+ TEST_TYPE(int8_t) \
+ TEST_TYPE(uint8_t) \
TEST_TYPE(int16_t) \
TEST_TYPE(uint16_t) \
TEST_TYPE(int32_t) \
TEST_TYPE(uint32_t) \
TEST_TYPE(int64_t) \
TEST_TYPE(uint64_t) \
+ TEST2_TYPE(int8_t) \
+ TEST2_TYPE(uint8_t) \
TEST2_TYPE(int16_t) \
TEST2_TYPE(uint16_t) \
TEST2_TYPE(int32_t) \
TEST2_TYPE(uint32_t) \
TEST2_TYPE(int64_t) \
TEST2_TYPE(uint64_t) \
+ TEST3M_TYPE(int8_t) \
+ TEST3_TYPE(uint8_t) \
TEST3M_TYPE(int16_t) \
TEST3_TYPE(uint16_t) \
TEST3M_TYPE(int32_t) \
assert (as##TYPE[i] == 89 % VAL);
#define RUN_ALL() \
+ RUN(int8_t, -1) \
+ RUN(uint8_t, 2) \
RUN(int16_t, -1) \
RUN(uint16_t, 2) \
RUN(int32_t, -3) \
RUN(uint32_t, 4) \
RUN(int64_t, -5) \
RUN(uint64_t, 6) \
+ RUN2(int8_t, -7) \
+ RUN2(uint8_t, 8) \
RUN2(int16_t, -7) \
RUN2(uint16_t, 8) \
RUN2(int32_t, -9) \
#include "vrem-template.h"
/* Currently we use an epilogue loop which also contains vrems. Therefore we
- expect 10 vrem[u]s instead of 6. */
+ expect 14 vrem[u]s instead of 8. */
-/* { dg-final { scan-assembler-times {\tvrem\.vv} 10 } } */
-/* { dg-final { scan-assembler-times {\tvremu\.vv} 10 } } */
+/* { dg-final { scan-assembler-times {\tvrem\.vv} 14 } } */
+/* { dg-final { scan-assembler-times {\tvremu\.vv} 14 } } */
#include "vrem-template.h"
/* Currently we use an epilogue loop which also contains vrems. Therefore we
- expect 10 vrem[u]s instead of 6. */
+ expect 14 vrem[u]s instead of 8. */
-/* { dg-final { scan-assembler-times {\tvrem\.vv} 10 } } */
-/* { dg-final { scan-assembler-times {\tvremu\.vv} 10 } } */
+/* { dg-final { scan-assembler-times {\tvrem\.vv} 14 } } */
+/* { dg-final { scan-assembler-times {\tvremu\.vv} 14 } } */
dst[i] = a[i] % b; \
}
-/* *int8_t not autovec currently. */
#define TEST_ALL() \
+ TEST_TYPE(int8_t) \
+ TEST_TYPE(uint8_t) \
TEST_TYPE(int16_t) \
TEST_TYPE(uint16_t) \
TEST_TYPE(int32_t) \
TEST_TYPE(uint32_t) \
TEST_TYPE(int64_t) \
TEST_TYPE(uint64_t) \
+ TEST2_TYPE(int8_t) \
+ TEST2_TYPE(uint8_t) \
TEST2_TYPE(int16_t) \
TEST2_TYPE(uint16_t) \
TEST2_TYPE(int32_t) \
TYPE b##TYPE[SZ]; \
for (int i = 0; i < SZ; i++) \
{ \
- a##TYPE[i] = 999; \
+ a##TYPE[i] = 123; \
b##TYPE[i] = VAL; \
} \
vsub_##TYPE (a##TYPE, a##TYPE, b##TYPE, SZ); \
for (int i = 0; i < SZ; i++) \
- assert (a##TYPE[i] == 999 - VAL);
+ assert (a##TYPE[i] == 123 - VAL);
#define RUN2(TYPE,VAL) \
TYPE as##TYPE[SZ]; \
for (int i = 0; i < SZ; i++) \
- as##TYPE[i] = 999; \
+ as##TYPE[i] = 123; \
vsubs_##TYPE (as##TYPE, as##TYPE, VAL, SZ); \
for (int i = 0; i < SZ; i++) \
- assert (as##TYPE[i] == 999 - VAL);
+ assert (as##TYPE[i] == 123 - VAL);
#define RUN3(TYPE) \
TYPE as2##TYPE[SZ]; \
assert (as3##TYPE[i] == (TYPE)(15 - (i * -17 + 667)));
#define RUN_ALL() \
+ RUN(int8_t, 1) \
+ RUN(uint8_t, 2) \
RUN(int16_t, 1) \
RUN(uint16_t, 2) \
RUN(int32_t, 3) \
RUN(uint32_t, 4) \
RUN(int64_t, 5) \
RUN(uint64_t, 6) \
+ RUN2(int8_t, 7) \
+ RUN2(uint8_t, 8) \
RUN2(int16_t, 7) \
RUN2(uint16_t, 8) \
RUN2(int32_t, 9) \
RUN2(uint32_t, 10) \
RUN2(int64_t, 11) \
RUN2(uint64_t, 12) \
+ RUN3(int8_t) \
+ RUN3(uint8_t) \
RUN3(int16_t) \
RUN3(uint16_t) \
RUN3(int32_t) \
RUN3(uint32_t) \
RUN3(int64_t) \
RUN3(uint64_t) \
+ RUN4(int8_t) \
+ RUN4(uint8_t) \
RUN4(int16_t) \
RUN4(uint16_t) \
RUN4(int32_t) \
#include "vsub-template.h"
-/* { dg-final { scan-assembler-times {\tvsub\.vv} 12 } } */
-/* { dg-final { scan-assembler-times {\tvrsub\.vi} 12 } } */
+/* { dg-final { scan-assembler-times {\tvsub\.vv} 16 } } */
+/* { dg-final { scan-assembler-times {\tvrsub\.vi} 16 } } */
#include "vsub-template.h"
-/* { dg-final { scan-assembler-times {\tvsub\.vv} 12 } } */
-/* { dg-final { scan-assembler-times {\tvrsub\.vi} 12 } } */
+/* { dg-final { scan-assembler-times {\tvsub\.vv} 16 } } */
+/* { dg-final { scan-assembler-times {\tvrsub\.vi} 16 } } */
dst[i] = 15 - a[i]; \
}
-/* *int8_t not autovec currently. */
#define TEST_ALL() \
+ TEST_TYPE(int8_t) \
+ TEST_TYPE(uint8_t) \
TEST_TYPE(int16_t) \
TEST_TYPE(uint16_t) \
TEST_TYPE(int32_t) \
TEST_TYPE(uint32_t) \
TEST_TYPE(int64_t) \
TEST_TYPE(uint64_t) \
+ TEST2_TYPE(int8_t) \
+
+ TEST2_TYPE(uint8_t) \
TEST2_TYPE(int16_t) \
TEST2_TYPE(uint16_t) \
TEST2_TYPE(int32_t) \
TEST2_TYPE(uint32_t) \
TEST2_TYPE(int64_t) \
TEST2_TYPE(uint64_t)
+
+ TEST3_TYPE(int8_t) \
+ TEST3_TYPE(uint8_t) \
TEST3_TYPE(int16_t) \
TEST3_TYPE(uint16_t) \
TEST3_TYPE(int32_t) \
TEST3_TYPE(uint32_t) \
TEST3_TYPE(int64_t) \
TEST3_TYPE(uint64_t) \
+
+ TEST4_TYPE(int8_t) \
+ TEST4_TYPE(uint8_t) \
TEST4_TYPE(int16_t) \
TEST4_TYPE(uint16_t) \
TEST4_TYPE(int32_t) \
assert (aim##TYPE[i] == (VAL ^ -16));
#define RUN_ALL() \
+ RUN(int8_t, -1) \
+ RUN(uint8_t, 2) \
RUN(int16_t, -1) \
RUN(uint16_t, 2) \
RUN(int32_t, -3) \
RUN(uint32_t, 4) \
RUN(int64_t, -5) \
RUN(uint64_t, 6) \
+ RUN2(int8_t, -7) \
+ RUN2(uint8_t, 8) \
RUN2(int16_t, -7) \
RUN2(uint16_t, 8) \
RUN2(int32_t, -9) \
RUN2(uint32_t, 10) \
RUN2(int64_t, -11) \
RUN2(uint64_t, 12) \
+ RUN3M(int8_t, 13) \
+ RUN3(uint8_t, 14) \
RUN3M(int16_t, 13) \
RUN3(uint16_t, 14) \
RUN3M(int32_t, 15) \
#include "vxor-template.h"
-/* { dg-final { scan-assembler-times {\tvxor\.vv} 12 } } */
-/* { dg-final { scan-assembler-times {\tvxor\.vi} 6 } } */
+/* { dg-final { scan-assembler-times {\tvxor\.vv} 16 } } */
+/* { dg-final { scan-assembler-times {\tvxor\.vi} 8 } } */
#include "vxor-template.h"
-/* { dg-final { scan-assembler-times {\tvxor\.vv} 12 } } */
-/* { dg-final { scan-assembler-times {\tvxor\.vi} 6 } } */
+/* { dg-final { scan-assembler-times {\tvxor\.vv} 16 } } */
+/* { dg-final { scan-assembler-times {\tvxor\.vi} 8 } } */
dst[i] = a[i] ^ -16; \
}
-/* *int8_t not autovec currently. */
#define TEST_ALL() \
+ TEST_TYPE(int8_t) \
+ TEST_TYPE(uint8_t) \
TEST_TYPE(int16_t) \
TEST_TYPE(uint16_t) \
TEST_TYPE(int32_t) \
TEST_TYPE(uint32_t) \
TEST_TYPE(int64_t) \
TEST_TYPE(uint64_t) \
+ TEST2_TYPE(int8_t) \
+ TEST2_TYPE(uint8_t) \
TEST2_TYPE(int16_t) \
TEST2_TYPE(uint16_t) \
TEST2_TYPE(int32_t) \
TEST2_TYPE(uint32_t) \
TEST2_TYPE(int64_t) \
TEST2_TYPE(uint64_t) \
+ TEST3M_TYPE(int8_t) \
+ TEST3_TYPE(uint8_t) \
TEST3M_TYPE(int16_t) \
TEST3_TYPE(uint16_t) \
TEST3M_TYPE(int32_t) \