/* { dg-final { scan-assembler-times {vminu.vx} 2 } } */
/* { dg-final { scan-assembler-times {vsaddu.vx} 1 } } */
/* { dg-final { scan-assembler-times {vssubu.vx} 1 } } */
-/* { dg-final { scan-assembler-times {vaaddu.vx} 1 { target { no-opts "-O3 -mrvv-vector-bits=zvl -mrvv-max-lmul=m2" "-O3 -mrvv-vector-bits=zvl -mrvv-max-lmul=m4" } } } } */
+/* { dg-final { scan-assembler-times {vaaddu.vx} 2 { target { no-opts {
+ "-O3 -mrvv-vector-bits=zvl -mrvv-max-lmul=m2"
+ "-O3 -mrvv-vector-bits=zvl -mrvv-max-lmul=m4"
+ } } } } } */
/* { dg-final { scan-assembler-times {vminu.vx} 2 } } */
/* { dg-final { scan-assembler-times {vsaddu.vx} 1 } } */
/* { dg-final { scan-assembler-times {vssubu.vx} 1 } } */
-/* { dg-final { scan-assembler-times {vaaddu.vx} 1 } } */
+/* { dg-final { scan-assembler-times {vaaddu.vx} 2 } } */
DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_ADD_FUNC_WRAP(T), sat_add, VX_BINARY_FUNC_BODY_X8)
DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_SUB_FUNC_WRAP(T), sat_sub, VX_BINARY_FUNC_BODY_X8)
DEF_VX_BINARY_CASE_3_WRAP(T, AVG_FLOOR_FUNC_WRAP(T), avg_floor, VX_BINARY_FUNC_BODY_X8)
+DEF_VX_BINARY_CASE_3_WRAP(T, AVG_CEIL_FUNC_WRAP(T), avg_ceil, VX_BINARY_FUNC_BODY_X8)
/* { dg-final { scan-assembler {vadd.vx} } } */
/* { dg-final { scan-assembler {vsub.vx} } } */
DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_ADD_FUNC_WRAP(T), sat_add, VX_BINARY_FUNC_BODY_X4)
DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_SUB_FUNC_WRAP(T), sat_sub, VX_BINARY_FUNC_BODY_X4)
DEF_VX_BINARY_CASE_3_WRAP(T, AVG_FLOOR_FUNC_WRAP(T), avg_floor, VX_BINARY_FUNC_BODY_X4)
+DEF_VX_BINARY_CASE_3_WRAP(T, AVG_CEIL_FUNC_WRAP(T), avg_ceil, VX_BINARY_FUNC_BODY_X4)
/* { dg-final { scan-assembler {vadd.vx} } } */
/* { dg-final { scan-assembler {vsub.vx} } } */
/* { dg-final { scan-assembler {vremu.vx} } } */
/* { dg-final { scan-assembler {vmaxu.vx} } } */
/* { dg-final { scan-assembler {vminu.vx} } } */
+/* { dg-final { scan-assembler {vsaddu.vx} } } */
/* { dg-final { scan-assembler {vssubu.vx} } } */
/* { dg-final { scan-assembler {vaaddu.vx} } } */
DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_ADD_FUNC_WRAP(T), sat_add, VX_BINARY_FUNC_BODY)
DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_SUB_FUNC_WRAP(T), sat_sub, VX_BINARY_FUNC_BODY)
DEF_VX_BINARY_CASE_3_WRAP(T, AVG_FLOOR_FUNC_WRAP(T), avg_floor, VX_BINARY_FUNC_BODY)
+DEF_VX_BINARY_CASE_3_WRAP(T, AVG_CEIL_FUNC_WRAP(T), avg_ceil, VX_BINARY_FUNC_BODY)
/* { dg-final { scan-assembler {vadd.vx} } } */
/* { dg-final { scan-assembler {vsub.vx} } } */
/* { dg-final { scan-assembler {vminu.vx} } } */
/* { dg-final { scan-assembler-not {vsaddu.vx} } } */
/* { dg-final { scan-assembler-not {vssubu.vx} } } */
-/* { dg-final { scan-assembler {vaaddu.vx} { target { no-opts "-O3 -mrvv-vector-bits=zvl -mrvv-max-lmul=m2" "-O3 -mrvv-vector-bits=zvl -mrvv-max-lmul=m4" } } } } */
+/* { dg-final { scan-assembler {vaaddu.vx} { target { no-opts {
+ "-O3 -mrvv-vector-bits=zvl -mrvv-max-lmul=m2"
+ "-O3 -mrvv-vector-bits=zvl -mrvv-max-lmul=m4"
+ } } } } } */
DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_ADD_FUNC_WRAP(T), sat_add, VX_BINARY_FUNC_BODY_X8)
DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_SUB_FUNC_WRAP(T), sat_sub, VX_BINARY_FUNC_BODY_X8)
DEF_VX_BINARY_CASE_3_WRAP(T, AVG_FLOOR_FUNC_WRAP(T), avg_floor, VX_BINARY_FUNC_BODY_X8)
+DEF_VX_BINARY_CASE_3_WRAP(T, AVG_CEIL_FUNC_WRAP(T), avg_ceil, VX_BINARY_FUNC_BODY_X8)
/* { dg-final { scan-assembler {vadd.vx} } } */
/* { dg-final { scan-assembler {vsub.vx} } } */
DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_ADD_FUNC_WRAP(T), sat_add, VX_BINARY_FUNC_BODY_X8)
DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_SUB_FUNC_WRAP(T), sat_sub, VX_BINARY_FUNC_BODY_X8)
DEF_VX_BINARY_CASE_3_WRAP(T, AVG_FLOOR_FUNC_WRAP(T), avg_floor, VX_BINARY_FUNC_BODY_X8)
+DEF_VX_BINARY_CASE_3_WRAP(T, AVG_CEIL_FUNC_WRAP(T), avg_ceil, VX_BINARY_FUNC_BODY_X8)
/* { dg-final { scan-assembler {vadd.vx} } } */
/* { dg-final { scan-assembler {vsub.vx} } } */
DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_ADD_FUNC_WRAP(T), sat_add, VX_BINARY_FUNC_BODY_X4)
DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_SUB_FUNC_WRAP(T), sat_sub, VX_BINARY_FUNC_BODY_X4)
DEF_VX_BINARY_CASE_3_WRAP(T, AVG_FLOOR_FUNC_WRAP(T), avg_floor, VX_BINARY_FUNC_BODY_X4)
+DEF_VX_BINARY_CASE_3_WRAP(T, AVG_CEIL_FUNC_WRAP(T), avg_ceil, VX_BINARY_FUNC_BODY_X4)
/* { dg-final { scan-assembler {vadd.vx} } } */
/* { dg-final { scan-assembler {vsub.vx} } } */
DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_ADD_FUNC_WRAP(T), sat_add, VX_BINARY_FUNC_BODY)
DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_SUB_FUNC_WRAP(T), sat_sub, VX_BINARY_FUNC_BODY)
DEF_VX_BINARY_CASE_3_WRAP(T, AVG_FLOOR_FUNC_WRAP(T), avg_floor, VX_BINARY_FUNC_BODY)
+DEF_VX_BINARY_CASE_3_WRAP(T, AVG_CEIL_FUNC_WRAP(T), avg_ceil, VX_BINARY_FUNC_BODY)
/* { dg-final { scan-assembler {vadd.vx} } } */
/* { dg-final { scan-assembler {vsub.vx} } } */
DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_ADD_FUNC_WRAP(T), sat_add, VX_BINARY_FUNC_BODY_X8)
DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_SUB_FUNC_WRAP(T), sat_sub, VX_BINARY_FUNC_BODY_X8)
DEF_VX_BINARY_CASE_3_WRAP(T, AVG_FLOOR_FUNC_WRAP(T), avg_floor, VX_BINARY_FUNC_BODY_X8)
+DEF_VX_BINARY_CASE_3_WRAP(T, AVG_CEIL_FUNC_WRAP(T), avg_ceil, VX_BINARY_FUNC_BODY_X8)
/* { dg-final { scan-assembler {vadd.vx} } } */
/* { dg-final { scan-assembler {vsub.vx} } } */
DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_ADD_FUNC_WRAP(T), sat_add, VX_BINARY_FUNC_BODY_X8)
DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_SUB_FUNC_WRAP(T), sat_sub, VX_BINARY_FUNC_BODY_X8)
DEF_VX_BINARY_CASE_3_WRAP(T, AVG_FLOOR_FUNC_WRAP(T), avg_floor, VX_BINARY_FUNC_BODY_X8)
+DEF_VX_BINARY_CASE_3_WRAP(T, AVG_CEIL_FUNC_WRAP(T), avg_ceil, VX_BINARY_FUNC_BODY_X8)
/* { dg-final { scan-assembler {vadd.vx} } } */
/* { dg-final { scan-assembler {vsub.vx} } } */
DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_ADD_FUNC_WRAP(T), sat_add, VX_BINARY_FUNC_BODY_X4)
DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_SUB_FUNC_WRAP(T), sat_sub, VX_BINARY_FUNC_BODY_X4)
DEF_VX_BINARY_CASE_3_WRAP(T, AVG_FLOOR_FUNC_WRAP(T), avg_floor, VX_BINARY_FUNC_BODY_X4)
+DEF_VX_BINARY_CASE_3_WRAP(T, AVG_CEIL_FUNC_WRAP(T), avg_ceil, VX_BINARY_FUNC_BODY_X4)
/* { dg-final { scan-assembler {vadd.vx} } } */
/* { dg-final { scan-assembler {vsub.vx} } } */
DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_ADD_FUNC_WRAP(T), sat_add, VX_BINARY_FUNC_BODY)
DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_SUB_FUNC_WRAP(T), sat_sub, VX_BINARY_FUNC_BODY)
DEF_VX_BINARY_CASE_3_WRAP(T, AVG_FLOOR_FUNC_WRAP(T), avg_floor, VX_BINARY_FUNC_BODY)
+DEF_VX_BINARY_CASE_3_WRAP(T, AVG_CEIL_FUNC_WRAP(T), avg_ceil, VX_BINARY_FUNC_BODY)
/* { dg-final { scan-assembler-not {vadd.vx} } } */
/* { dg-final { scan-assembler-not {vsub.vx} } } */
DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_ADD_FUNC_WRAP(T), sat_add, VX_BINARY_FUNC_BODY_X8)
DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_SUB_FUNC_WRAP(T), sat_sub, VX_BINARY_FUNC_BODY_X8)
DEF_VX_BINARY_CASE_3_WRAP(T, AVG_FLOOR_FUNC_WRAP(T), avg_floor, VX_BINARY_FUNC_BODY_X8)
+DEF_VX_BINARY_CASE_3_WRAP(T, AVG_CEIL_FUNC_WRAP(T), avg_ceil, VX_BINARY_FUNC_BODY_X8)
/* { dg-final { scan-assembler {vadd.vx} } } */
/* { dg-final { scan-assembler {vsub.vx} } } */
DEF_AVG_FLOOR(int16_t, int32_t)
DEF_AVG_FLOOR(int32_t, int64_t)
+#define DEF_AVG_CEIL(NT, WT) \
+NT \
+test_##NT##_avg_ceil(NT x, NT y) \
+{ \
+ return (NT)(((WT)x + (WT)y + 1) >> 1); \
+}
+
+DEF_AVG_CEIL(uint8_t, uint16_t)
+DEF_AVG_CEIL(uint16_t, uint32_t)
+DEF_AVG_CEIL(uint32_t, uint64_t)
+
#ifdef HAS_INT128
DEF_AVG_FLOOR(uint64_t, uint128_t)
DEF_AVG_FLOOR(int64_t, int128_t)
+
+ DEF_AVG_CEIL(uint64_t, uint128_t)
#endif
#define AVG_FLOOR_FUNC(T) test_##T##_avg_floor
#define AVG_FLOOR_FUNC_WRAP(T) AVG_FLOOR_FUNC(T)
+#define AVG_CEIL_FUNC(T) test_##T##_avg_ceil
+#define AVG_CEIL_FUNC_WRAP(T) AVG_CEIL_FUNC(T)
+
#define TEST_BINARY_VX_SIGNED_0(T) \
DEF_VX_BINARY_CASE_0_WRAP(T, +, add) \
DEF_VX_BINARY_CASE_0_WRAP(T, -, sub) \
DEF_VX_BINARY_CASE_2_WRAP(T, SAT_U_ADD_FUNC(T), sat_add) \
DEF_VX_BINARY_CASE_2_WRAP(T, SAT_U_SUB_FUNC(T), sat_sub) \
DEF_VX_BINARY_CASE_2_WRAP(T, AVG_FLOOR_FUNC_WRAP(T), avg_floor) \
+ DEF_VX_BINARY_CASE_2_WRAP(T, AVG_CEIL_FUNC_WRAP(T), avg_ceil) \
#endif
},
};
+uint8_t TEST_BINARY_DATA(uint8_t, avg_ceil)[][3][N] =
+{
+ {
+ { 0 },
+ {
+ 2, 2, 2, 2,
+ 1, 1, 1, 1,
+ 0, 0, 0, 0,
+ 4, 4, 4, 4,
+ },
+ {
+ 1, 1, 1, 1,
+ 1, 1, 1, 1,
+ 0, 0, 0, 0,
+ 2, 2, 2, 2,
+ },
+ },
+ {
+ { 127 },
+ {
+ 127, 127, 127, 127,
+ 128, 128, 128, 128,
+ 255, 255, 255, 255,
+ 1, 1, 1, 1,
+ },
+ {
+ 127, 127, 127, 127,
+ 128, 128, 128, 128,
+ 191, 191, 191, 191,
+ 64, 64, 64, 64,
+ },
+ },
+ {
+ { 255 },
+ {
+ 0, 0, 0, 0,
+ 255, 255, 255, 255,
+ 254, 254, 254, 254,
+ 1, 1, 1, 1,
+ },
+ {
+ 128, 128, 128, 128,
+ 255, 255, 255, 255,
+ 255, 255, 255, 255,
+ 128, 128, 128, 128,
+ },
+ },
+};
+
+uint16_t TEST_BINARY_DATA(uint16_t, avg_ceil)[][3][N] =
+{
+ {
+ { 0 },
+ {
+ 2, 2, 2, 2,
+ 1, 1, 1, 1,
+ 0, 0, 0, 0,
+ 4, 4, 4, 4,
+ },
+ {
+ 1, 1, 1, 1,
+ 1, 1, 1, 1,
+ 0, 0, 0, 0,
+ 2, 2, 2, 2,
+ },
+ },
+ {
+ { 32767 },
+ {
+ 32767, 32767, 32767, 32767,
+ 32768, 32768, 32768, 32768,
+ 65535, 65535, 65535, 65535,
+ 1, 1, 1, 1,
+ },
+ {
+ 32767, 32767, 32767, 32767,
+ 32768, 32768, 32768, 32768,
+ 49151, 49151, 49151, 49151,
+ 16384, 16384, 16384, 16384,
+ },
+ },
+ {
+ { 65535 },
+ {
+ 0, 0, 0, 0,
+ 65535, 65535, 65535, 65535,
+ 65534, 65534, 65534, 65534,
+ 1, 1, 1, 1,
+ },
+ {
+ 32768, 32768, 32768, 32768,
+ 65535, 65535, 65535, 65535,
+ 65535, 65535, 65535, 65535,
+ 32768, 32768, 32768, 32768,
+ },
+ },
+};
+
+uint32_t TEST_BINARY_DATA(uint32_t, avg_ceil)[][3][N] =
+{
+ {
+ { 0 },
+ {
+ 2, 2, 2, 2,
+ 1, 1, 1, 1,
+ 0, 0, 0, 0,
+ 4, 4, 4, 4,
+ },
+ {
+ 1, 1, 1, 1,
+ 1, 1, 1, 1,
+ 0, 0, 0, 0,
+ 2, 2, 2, 2,
+ },
+ },
+ {
+ { 2147483647 },
+ {
+ 2147483647, 2147483647, 2147483647, 2147483647,
+ 2147483648, 2147483648, 2147483648, 2147483648,
+ 4294967295, 4294967295, 4294967295, 4294967295,
+ 1, 1, 1, 1,
+ },
+ {
+ 2147483647, 2147483647, 2147483647, 2147483647,
+ 2147483648, 2147483648, 2147483648, 2147483648,
+ 3221225471, 3221225471, 3221225471, 3221225471,
+ 1073741824, 1073741824, 1073741824, 1073741824,
+ },
+ },
+ {
+ { 4294967295 },
+ {
+ 0, 0, 0, 0,
+ 4294967295, 4294967295, 4294967295, 4294967295,
+ 4294967294, 4294967294, 4294967294, 4294967294,
+ 1, 1, 1, 1,
+ },
+ {
+ 2147483648, 2147483648, 2147483648, 2147483648,
+ 4294967295, 4294967295, 4294967295, 4294967295,
+ 4294967295, 4294967295, 4294967295, 4294967295,
+ 2147483648, 2147483648, 2147483648, 2147483648,
+ },
+ },
+};
+
+uint64_t TEST_BINARY_DATA(uint64_t, avg_ceil)[][3][N] =
+{
+ {
+ { 0 },
+ {
+ 2, 2, 2, 2,
+ 1, 1, 1, 1,
+ 0, 0, 0, 0,
+ 4, 4, 4, 4,
+ },
+ {
+ 1, 1, 1, 1,
+ 1, 1, 1, 1,
+ 0, 0, 0, 0,
+ 2, 2, 2, 2,
+ },
+ },
+ {
+ { 9223372036854775807ull },
+ {
+ 9223372036854775807ull, 9223372036854775807ull, 9223372036854775807ull, 9223372036854775807ull,
+ 9223372036854775808ull, 9223372036854775808ull, 9223372036854775808ull, 9223372036854775808ull,
+ 18446744073709551615ull, 18446744073709551615ull, 18446744073709551615ull, 18446744073709551615ull,
+ 1, 1, 1, 1,
+ },
+ {
+ 9223372036854775807ull, 9223372036854775807ull, 9223372036854775807ull, 9223372036854775807ull,
+ 9223372036854775808ull, 9223372036854775808ull, 9223372036854775808ull, 9223372036854775808ull,
+ 13835058055282163711ull, 13835058055282163711ull, 13835058055282163711ull, 13835058055282163711ull,
+ 4611686018427387904ull, 4611686018427387904ull, 4611686018427387904ull, 4611686018427387904ull,
+ },
+ },
+ {
+ { 18446744073709551615ull },
+ {
+ 0, 0, 0, 0,
+ 18446744073709551615ull, 18446744073709551615ull, 18446744073709551615ull, 18446744073709551615ull,
+ 18446744073709551614ull, 18446744073709551614ull, 18446744073709551614ull, 18446744073709551614ull,
+ 1, 1, 1, 1,
+ },
+ {
+ 9223372036854775808ull, 9223372036854775808ull, 9223372036854775808ull, 9223372036854775808ull,
+ 18446744073709551615ull, 18446744073709551615ull, 18446744073709551615ull, 18446744073709551615ull,
+ 18446744073709551615ull, 18446744073709551615ull, 18446744073709551615ull, 18446744073709551615ull,
+ 9223372036854775808ull, 9223372036854775808ull, 9223372036854775808ull, 9223372036854775808ull,
+ },
+ },
+};
+
#endif
--- /dev/null
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99 --param=gpr2vr-cost=0" } */
+
+#include "vx_binary.h"
+#include "vx_binary_data.h"
+
+#define T uint16_t
+#define NAME avg_ceil
+#define FUNC AVG_CEIL_FUNC_WRAP(T)
+#define TEST_DATA TEST_BINARY_DATA_WRAP(T, NAME)
+
+DEF_VX_BINARY_CASE_2_WRAP(T, FUNC, NAME)
+
+#define TEST_RUN(T, NAME, out, in, x, n) \
+ RUN_VX_BINARY_CASE_2_WRAP(T, NAME, FUNC, out, in, x, n)
+
+#include "vx_binary_run.h"
--- /dev/null
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99 --param=gpr2vr-cost=0" } */
+
+#include "vx_binary.h"
+#include "vx_binary_data.h"
+
+#define T uint32_t
+#define NAME avg_ceil
+#define FUNC AVG_CEIL_FUNC_WRAP(T)
+#define TEST_DATA TEST_BINARY_DATA_WRAP(T, NAME)
+
+DEF_VX_BINARY_CASE_2_WRAP(T, FUNC, NAME)
+
+#define TEST_RUN(T, NAME, out, in, x, n) \
+ RUN_VX_BINARY_CASE_2_WRAP(T, NAME, FUNC, out, in, x, n)
+
+#include "vx_binary_run.h"
--- /dev/null
+/* { dg-do run { target { riscv_v && rv64 } } } */
+/* { dg-additional-options "-std=c99 --param=gpr2vr-cost=0" } */
+
+#include "vx_binary.h"
+#include "vx_binary_data.h"
+
+#define T uint64_t
+#define NAME avg_ceil
+#define FUNC AVG_CEIL_FUNC_WRAP(T)
+#define TEST_DATA TEST_BINARY_DATA_WRAP(T, NAME)
+
+DEF_VX_BINARY_CASE_2_WRAP(T, FUNC, NAME)
+
+#define TEST_RUN(T, NAME, out, in, x, n) \
+ RUN_VX_BINARY_CASE_2_WRAP(T, NAME, FUNC, out, in, x, n)
+
+#include "vx_binary_run.h"
--- /dev/null
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99 --param=gpr2vr-cost=0" } */
+
+#include "vx_binary.h"
+#include "vx_binary_data.h"
+
+#define T uint8_t
+#define NAME avg_ceil
+#define FUNC AVG_CEIL_FUNC_WRAP(T)
+#define TEST_DATA TEST_BINARY_DATA_WRAP(T, NAME)
+
+DEF_VX_BINARY_CASE_2_WRAP(T, FUNC, NAME)
+
+#define TEST_RUN(T, NAME, out, in, x, n) \
+ RUN_VX_BINARY_CASE_2_WRAP(T, NAME, FUNC, out, in, x, n)
+
+#include "vx_binary_run.h"