DEF_VX_BINARY_CASE_0_WRAP(T, *, mul)
DEF_VX_BINARY_CASE_0_WRAP(T, /, div)
DEF_VX_BINARY_CASE_0_WRAP(T, %, rem)
+DEF_VX_BINARY_CASE_2_WRAP(T, MAX_FUNC_0_WARP(T), max)
/* { dg-final { scan-assembler-times {vadd.vx} 1 } } */
/* { dg-final { scan-assembler-times {vsub.vx} 1 } } */
/* { dg-final { scan-assembler-times {vmul.vx} 1 } } */
/* { dg-final { scan-assembler-times {vdiv.vx} 1 } } */
/* { dg-final { scan-assembler-times {vrem.vx} 1 } } */
+/* { dg-final { scan-assembler-times {vmax.vx} 1 } } */
DEF_VX_BINARY_CASE_0_WRAP(T, *, mul)
DEF_VX_BINARY_CASE_0_WRAP(T, /, div)
DEF_VX_BINARY_CASE_0_WRAP(T, %, rem)
+DEF_VX_BINARY_CASE_2_WRAP(T, MAX_FUNC_0_WARP(T), max)
/* { dg-final { scan-assembler-times {vadd.vx} 1 } } */
/* { dg-final { scan-assembler-times {vsub.vx} 1 } } */
/* { dg-final { scan-assembler-times {vmul.vx} 1 } } */
/* { dg-final { scan-assembler-times {vdiv.vx} 1 } } */
/* { dg-final { scan-assembler-times {vrem.vx} 1 } } */
+/* { dg-final { scan-assembler-times {vmax.vx} 1 } } */
DEF_VX_BINARY_CASE_0_WRAP(T, *, mul)
DEF_VX_BINARY_CASE_0_WRAP(T, /, div)
DEF_VX_BINARY_CASE_0_WRAP(T, %, rem)
+DEF_VX_BINARY_CASE_2_WRAP(T, MAX_FUNC_0_WARP(T), max)
/* { dg-final { scan-assembler-times {vadd.vx} 1 } } */
/* { dg-final { scan-assembler-times {vsub.vx} 1 } } */
/* { dg-final { scan-assembler-times {vmul.vx} 1 } } */
/* { dg-final { scan-assembler-times {vdiv.vx} 1 } } */
/* { dg-final { scan-assembler-times {vrem.vx} 1 } } */
+/* { dg-final { scan-assembler-times {vmax.vx} 1 } } */
DEF_VX_BINARY_CASE_0_WRAP(T, *, mul)
DEF_VX_BINARY_CASE_0_WRAP(T, /, div)
DEF_VX_BINARY_CASE_0_WRAP(T, %, rem)
+DEF_VX_BINARY_CASE_2_WRAP(T, MAX_FUNC_0_WARP(T), max)
/* { dg-final { scan-assembler-times {vadd.vx} 1 } } */
/* { dg-final { scan-assembler-times {vsub.vx} 1 } } */
/* { dg-final { scan-assembler-times {vmul.vx} 1 } } */
/* { dg-final { scan-assembler-times {vdiv.vx} 1 } } */
/* { dg-final { scan-assembler-times {vrem.vx} 1 } } */
+/* { dg-final { scan-assembler-times {vmax.vx} 1 } } */
DEF_VX_BINARY_CASE_0_WRAP(T, *, mul)
DEF_VX_BINARY_CASE_0_WRAP(T, /, div)
DEF_VX_BINARY_CASE_0_WRAP(T, %, rem)
+DEF_VX_BINARY_CASE_2_WRAP(T, MAX_FUNC_0_WARP(T), max)
/* { dg-final { scan-assembler-not {vadd.vx} } } */
/* { dg-final { scan-assembler-not {vsub.vx} } } */
/* { dg-final { scan-assembler-not {vmul.vx} } } */
/* { dg-final { scan-assembler-not {vdiv.vx} } } */
/* { dg-final { scan-assembler-not {vrem.vx} } } */
+/* { dg-final { scan-assembler-not {vmax.vx} } } */
DEF_VX_BINARY_CASE_0_WRAP(T, *, mul)
DEF_VX_BINARY_CASE_0_WRAP(T, /, div)
DEF_VX_BINARY_CASE_0_WRAP(T, %, rem)
+DEF_VX_BINARY_CASE_2_WRAP(T, MAX_FUNC_0_WARP(T), max)
/* { dg-final { scan-assembler-not {vadd.vx} } } */
/* { dg-final { scan-assembler-not {vsub.vx} } } */
/* { dg-final { scan-assembler-not {vmul.vx} } } */
/* { dg-final { scan-assembler-not {vdiv.vx} } } */
/* { dg-final { scan-assembler-not {vrem.vx} } } */
+/* { dg-final { scan-assembler-not {vmax.vx} } } */
DEF_VX_BINARY_CASE_0_WRAP(T, *, mul)
DEF_VX_BINARY_CASE_0_WRAP(T, /, div)
DEF_VX_BINARY_CASE_0_WRAP(T, %, rem)
+DEF_VX_BINARY_CASE_2_WRAP(T, MAX_FUNC_0_WARP(T), max)
/* { dg-final { scan-assembler-not {vadd.vx} } } */
/* { dg-final { scan-assembler-not {vsub.vx} } } */
/* { dg-final { scan-assembler-not {vmul.vx} } } */
/* { dg-final { scan-assembler-not {vdiv.vx} } } */
/* { dg-final { scan-assembler-not {vrem.vx} } } */
+/* { dg-final { scan-assembler-not {vmax.vx} } } */
DEF_VX_BINARY_CASE_0_WRAP(T, *, mul)
DEF_VX_BINARY_CASE_0_WRAP(T, /, div)
DEF_VX_BINARY_CASE_0_WRAP(T, %, rem)
+DEF_VX_BINARY_CASE_2_WRAP(T, MAX_FUNC_0_WARP(T), max)
/* { dg-final { scan-assembler-not {vadd.vx} } } */
/* { dg-final { scan-assembler-not {vsub.vx} } } */
/* { dg-final { scan-assembler-not {vmul.vx} } } */
/* { dg-final { scan-assembler-not {vdiv.vx} } } */
/* { dg-final { scan-assembler-not {vrem.vx} } } */
+/* { dg-final { scan-assembler-not {vmax.vx} } } */
DEF_VX_BINARY_CASE_0_WRAP(T, *, mul)
DEF_VX_BINARY_CASE_0_WRAP(T, /, div)
DEF_VX_BINARY_CASE_0_WRAP(T, %, rem)
+DEF_VX_BINARY_CASE_2_WRAP(T, MAX_FUNC_0_WARP(T), max)
/* { dg-final { scan-assembler-not {vadd.vx} } } */
/* { dg-final { scan-assembler-not {vsub.vx} } } */
/* { dg-final { scan-assembler-not {vmul.vx} } } */
/* { dg-final { scan-assembler-not {vdiv.vx} } } */
/* { dg-final { scan-assembler-not {vrem.vx} } } */
+/* { dg-final { scan-assembler-not {vmax.vx} } } */
DEF_VX_BINARY_CASE_0_WRAP(T, *, mul)
DEF_VX_BINARY_CASE_0_WRAP(T, /, div)
DEF_VX_BINARY_CASE_0_WRAP(T, %, rem)
+DEF_VX_BINARY_CASE_2_WRAP(T, MAX_FUNC_0_WARP(T), max)
/* { dg-final { scan-assembler-not {vadd.vx} } } */
/* { dg-final { scan-assembler-not {vsub.vx} } } */
/* { dg-final { scan-assembler-not {vmul.vx} } } */
/* { dg-final { scan-assembler-not {vdiv.vx} } } */
/* { dg-final { scan-assembler-not {vrem.vx} } } */
+/* { dg-final { scan-assembler-not {vmax.vx} } } */
DEF_VX_BINARY_CASE_0_WRAP(T, *, mul)
DEF_VX_BINARY_CASE_0_WRAP(T, /, div)
DEF_VX_BINARY_CASE_0_WRAP(T, %, rem)
+DEF_VX_BINARY_CASE_2_WRAP(T, MAX_FUNC_0_WARP(T), max)
/* { dg-final { scan-assembler-not {vadd.vx} } } */
/* { dg-final { scan-assembler-not {vsub.vx} } } */
/* { dg-final { scan-assembler-not {vmul.vx} } } */
/* { dg-final { scan-assembler-not {vdiv.vx} } } */
/* { dg-final { scan-assembler-not {vrem.vx} } } */
+/* { dg-final { scan-assembler-not {vmax.vx} } } */
DEF_VX_BINARY_CASE_0_WRAP(T, *, mul)
DEF_VX_BINARY_CASE_0_WRAP(T, /, div)
DEF_VX_BINARY_CASE_0_WRAP(T, %, rem)
+DEF_VX_BINARY_CASE_2_WRAP(T, MAX_FUNC_0_WARP(T), max)
/* { dg-final { scan-assembler-not {vadd.vx} } } */
/* { dg-final { scan-assembler-not {vsub.vx} } } */
/* { dg-final { scan-assembler-not {vmul.vx} } } */
/* { dg-final { scan-assembler-not {vdiv.vx} } } */
/* { dg-final { scan-assembler-not {vrem.vx} } } */
+/* { dg-final { scan-assembler-not {vmax.vx} } } */
#define DEF_VX_BINARY_REVERSE_CASE_1_WRAP(T, OP, NAME, BODY) \
DEF_VX_BINARY_REVERSE_CASE_1(T, OP, NAME, BODY)
+#define DEF_MAX_0(T) \
+static inline T \
+test_##T##_max_0 (T a, T b) \
+{ \
+ return a > b ? a : b; \
+}
+
+#define DEF_MAX_1(T) \
+static inline T \
+test_##T##_max_1 (T a, T b) \
+{ \
+ return a >= b ? a : b; \
+}
+
+DEF_MAX_0(int8_t)
+DEF_MAX_0(int16_t)
+DEF_MAX_0(int32_t)
+DEF_MAX_0(int64_t)
+
+DEF_MAX_1(int8_t)
+DEF_MAX_1(int16_t)
+DEF_MAX_1(int32_t)
+DEF_MAX_1(int64_t)
+
+#define MAX_FUNC_0(T) test_##T##_max_0
+#define MAX_FUNC_0_WARP(T) MAX_FUNC_0(T)
+
+#define MAX_FUNC_1(T) test_##T##_max_1
+#define MAX_FUNC_1_WARP(T) MAX_FUNC_1(T)
+
+#define DEF_VX_BINARY_CASE_2(T, FUNC, NAME) \
+void \
+test_vx_binary_##NAME##_##FUNC##_##T##_case_2 (T * restrict out, \
+ T * restrict in, \
+ T x, unsigned n) \
+{ \
+ for (unsigned i = 0; i < n; i++) \
+ out[i] = FUNC (in[i], x); \
+}
+#define DEF_VX_BINARY_CASE_2_WRAP(T, FUNC, NAME) \
+ DEF_VX_BINARY_CASE_2(T, FUNC, NAME)
+#define RUN_VX_BINARY_CASE_2(T, NAME, FUNC, out, in, x, n) \
+ test_vx_binary_##NAME##_##FUNC##_##T##_case_2(out, in, x, n)
+#define RUN_VX_BINARY_CASE_2_WRAP(T, NAME, FUNC, out, in, x, n) \
+ RUN_VX_BINARY_CASE_2(T, NAME, FUNC, out, in, x, n)
+
+#define DEF_VX_BINARY_CASE_3(T, FUNC, NAME, BODY) \
+void \
+test_vx_binary_##NAME##_##FUNC##_##T##_case_3 (T * restrict out, \
+ T * restrict in, \
+ T x, unsigned n) \
+{ \
+ unsigned k = 0; \
+ T tmp = x + 3; \
+ \
+ while (k < n) \
+ { \
+ tmp = tmp ^ 0x82; \
+ BODY(FUNC) \
+ } \
+}
+#define DEF_VX_BINARY_CASE_3_WRAP(T, FUNC, NAME, BODY) \
+ DEF_VX_BINARY_CASE_3(T, FUNC, NAME, BODY)
+
+#define VX_BINARY_FUNC_BODY(func) \
+ out[k + 0] = func (in[k + 0], tmp); \
+ out[k + 1] = func (in[k + 1], tmp); \
+ k += 2;
+
+#define VX_BINARY_FUNC_BODY_X4(op) \
+ VX_BINARY_FUNC_BODY(op) \
+ VX_BINARY_FUNC_BODY(op)
+
+#define VX_BINARY_FUNC_BODY_X8(op) \
+ VX_BINARY_FUNC_BODY_X4(op) \
+ VX_BINARY_FUNC_BODY_X4(op)
+
+#define VX_BINARY_FUNC_BODY_X16(op) \
+ VX_BINARY_FUNC_BODY_X8(op) \
+ VX_BINARY_FUNC_BODY_X8(op)
+
+#define VX_BINARY_FUNC_BODY_X32(op) \
+ VX_BINARY_FUNC_BODY_X16(op) \
+ VX_BINARY_FUNC_BODY_X16(op)
+
+#define VX_BINARY_FUNC_BODY_X64(op) \
+ VX_BINARY_FUNC_BODY_X32(op) \
+ VX_BINARY_FUNC_BODY_X32(op)
+
+#define VX_BINARY_FUNC_BODY_X128(op) \
+ VX_BINARY_FUNC_BODY_X64(op) \
+ VX_BINARY_FUNC_BODY_X64(op)
+
#endif
},
};
+int8_t TEST_BINARY_DATA(int8_t, max)[][3][N] =
+{
+ {
+ { 0 },
+ {
+ 2, 2, 2, 2,
+ 1, 1, 1, 1,
+ -1, -1, -1, -1,
+ -2, -2, -2, -2,
+ },
+ {
+ 2, 2, 2, 2,
+ 1, 1, 1, 1,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ },
+ },
+ {
+ { 127 },
+ {
+ 127, 127, 127, 127,
+ -1, -1, -1, -1,
+ -128, -128, -128, -128,
+ -2, -2, -2, -2,
+ },
+ {
+ 127, 127, 127, 127,
+ 127, 127, 127, 127,
+ 127, 127, 127, 127,
+ 127, 127, 127, 127,
+ },
+ },
+ {
+ { -128 },
+ {
+ -128, -128, -128, -128,
+ 1, 1, 1, 1,
+ 127, 127, 127, 127,
+ 2, 2, 2, 2,
+ },
+ {
+ -128, -128, -128, -128,
+ 1, 1, 1, 1,
+ 127, 127, 127, 127,
+ 2, 2, 2, 2,
+ },
+ },
+};
+
+int16_t TEST_BINARY_DATA(int16_t, max)[][3][N] =
+{
+ {
+ { 0 },
+ {
+ 2, 2, 2, 2,
+ 1, 1, 1, 1,
+ -1, -1, -1, -1,
+ -2, -2, -2, -2,
+ },
+ {
+ 2, 2, 2, 2,
+ 1, 1, 1, 1,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ },
+ },
+ {
+ { 32767 },
+ {
+ 32767, 32767, 32767, 32767,
+ -1, -1, -1, -1,
+ -32768, -32768, -32768, -32768,
+ -2, -2, -2, -2,
+ },
+ {
+ 32767, 32767, 32767, 32767,
+ 32767, 32767, 32767, 32767,
+ 32767, 32767, 32767, 32767,
+ 32767, 32767, 32767, 32767,
+ },
+ },
+ {
+ { -32768 },
+ {
+ -32768, -32768, -32768, -32768,
+ 1, 1, 1, 1,
+ 32767, 32767, 32767, 32767,
+ 2, 2, 2, 2,
+ },
+ {
+ -32768, -32768, -32768, -32768,
+ 1, 1, 1, 1,
+ 32767, 32767, 32767, 32767,
+ 2, 2, 2, 2,
+ },
+ },
+};
+
+int32_t TEST_BINARY_DATA(int32_t, max)[][3][N] =
+{
+ {
+ { 0 },
+ {
+ 2, 2, 2, 2,
+ 1, 1, 1, 1,
+ -1, -1, -1, -1,
+ -2, -2, -2, -2,
+ },
+ {
+ 2, 2, 2, 2,
+ 1, 1, 1, 1,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ },
+ },
+ {
+ { 2147483647 },
+ {
+ 2147483647, 2147483647, 2147483647, 2147483647,
+ -1, -1, -1, -1,
+ -2147483648, -2147483648, -2147483648, -2147483648,
+ -2, -2, -2, -2,
+ },
+ {
+ 2147483647, 2147483647, 2147483647, 2147483647,
+ 2147483647, 2147483647, 2147483647, 2147483647,
+ 2147483647, 2147483647, 2147483647, 2147483647,
+ 2147483647, 2147483647, 2147483647, 2147483647,
+ },
+ },
+ {
+ { -2147483648 },
+ {
+ -2147483648, -2147483648, -2147483648, -2147483648,
+ 1, 1, 1, 1,
+ 2147483647, 2147483647, 2147483647, 2147483647,
+ 2, 2, 2, 2,
+ },
+ {
+ -2147483648, -2147483648, -2147483648, -2147483648,
+ 1, 1, 1, 1,
+ 2147483647, 2147483647, 2147483647, 2147483647,
+ 2, 2, 2, 2,
+ },
+ },
+};
+
+int64_t TEST_BINARY_DATA(int64_t, max)[][3][N] =
+{
+ {
+ { 0 },
+ {
+ 2, 2, 2, 2,
+ 1, 1, 1, 1,
+ -1, -1, -1, -1,
+ -2, -2, -2, -2,
+ },
+ {
+ 2, 2, 2, 2,
+ 1, 1, 1, 1,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ },
+ },
+ {
+ { 9223372036854775807ll },
+ {
+ 9223372036854775807ll, 9223372036854775807ll, 9223372036854775807ll, 9223372036854775807ll,
+ -1, -1, -1, -1,
+ -9223372036854775808ull, -9223372036854775808ull, -9223372036854775808ull, -9223372036854775808ull,
+ -2, -2, -2, -2,
+ },
+ {
+ 9223372036854775807ll, 9223372036854775807ll, 9223372036854775807ll, 9223372036854775807ll,
+ 9223372036854775807ll, 9223372036854775807ll, 9223372036854775807ll, 9223372036854775807ll,
+ 9223372036854775807ll, 9223372036854775807ll, 9223372036854775807ll, 9223372036854775807ll,
+ 9223372036854775807ll, 9223372036854775807ll, 9223372036854775807ll, 9223372036854775807ll,
+ },
+ },
+ {
+ { -9223372036854775808ull },
+ {
+ -9223372036854775808ull, -9223372036854775808ull, -9223372036854775808ull, -9223372036854775808ull,
+ 1, 1, 1, 1,
+ 9223372036854775807ll, 9223372036854775807ll, 9223372036854775807ll, 9223372036854775807ll,
+ 2, 2, 2, 2,
+ },
+ {
+ -9223372036854775808ull, -9223372036854775808ull, -9223372036854775808ull, -9223372036854775808ull,
+ 1, 1, 1, 1,
+ 9223372036854775807ll, 9223372036854775807ll, 9223372036854775807ll, 9223372036854775807ll,
+ 2, 2, 2, 2,
+ },
+ },
+};
+
#endif
--- /dev/null
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99 --param=gpr2vr-cost=0" } */
+
+#include "vx_binary.h"
+#include "vx_binary_data.h"
+
+#define T int16_t
+#define NAME max
+#define FUNC MAX_FUNC_0_WARP(T)
+#define TEST_DATA TEST_BINARY_DATA_WRAP(T, NAME)
+
+DEF_VX_BINARY_CASE_2_WRAP(T, FUNC, max)
+
+#define TEST_RUN(T, NAME, out, in, x, n) \
+ RUN_VX_BINARY_CASE_2_WRAP(T, NAME, FUNC, out, in, x, n)
+
+#include "vx_binary_run.h"
--- /dev/null
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99 --param=gpr2vr-cost=0" } */
+
+#include "vx_binary.h"
+#include "vx_binary_data.h"
+
+#define T int32_t
+#define NAME max
+#define FUNC MAX_FUNC_0_WARP(T)
+#define TEST_DATA TEST_BINARY_DATA_WRAP(T, NAME)
+
+DEF_VX_BINARY_CASE_2_WRAP(T, FUNC, max)
+
+#define TEST_RUN(T, NAME, out, in, x, n) \
+ RUN_VX_BINARY_CASE_2_WRAP(T, NAME, FUNC, out, in, x, n)
+
+#include "vx_binary_run.h"
--- /dev/null
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99 --param=gpr2vr-cost=0" } */
+
+#include "vx_binary.h"
+#include "vx_binary_data.h"
+
+#define T int64_t
+#define NAME max
+#define FUNC MAX_FUNC_0_WARP(T)
+#define TEST_DATA TEST_BINARY_DATA_WRAP(T, NAME)
+
+DEF_VX_BINARY_CASE_2_WRAP(T, FUNC, max)
+
+#define TEST_RUN(T, NAME, out, in, x, n) \
+ RUN_VX_BINARY_CASE_2_WRAP(T, NAME, FUNC, out, in, x, n)
+
+#include "vx_binary_run.h"
--- /dev/null
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99 --param=gpr2vr-cost=0" } */
+
+#include "vx_binary.h"
+#include "vx_binary_data.h"
+
+#define T int8_t
+#define NAME max
+#define FUNC MAX_FUNC_0_WARP(T)
+#define TEST_DATA TEST_BINARY_DATA_WRAP(T, NAME)
+
+DEF_VX_BINARY_CASE_2_WRAP(T, FUNC, max)
+
+#define TEST_RUN(T, NAME, out, in, x, n) \
+ RUN_VX_BINARY_CASE_2_WRAP(T, NAME, FUNC, out, in, x, n)
+
+#include "vx_binary_run.h"