} \
}
+#define DEF_VEC_SAT_U_ADD_FMT_9(WT, T) \
+void __attribute__((noinline)) \
+vec_sat_u_add_##WT##_##T##_fmt_9 (T *out, T *op_1, T *op_2, unsigned limit) \
+{ \
+ unsigned i; \
+ T max = -1; \
+ for (i = 0; i < limit; i++) \
+ { \
+ T x = op_1[i]; \
+ T y = op_2[i]; \
+ WT val = (WT)x + (WT)y; \
+ out[i] = val > max ? max : (T)val; \
+ } \
+}
+#define DEF_VEC_SAT_U_ADD_FMT_9_WRAP(WT, T) DEF_VEC_SAT_U_ADD_FMT_9(WT, T)
+
#define RUN_VEC_SAT_U_ADD_FMT_1(T, out, op_1, op_2, N) \
vec_sat_u_add_##T##_fmt_1(out, op_1, op_2, N)
#define RUN_VEC_SAT_U_ADD_FMT_8(T, out, op_1, op_2, N) \
vec_sat_u_add_##T##_fmt_8(out, op_1, op_2, N)
+#define RUN_VEC_SAT_U_ADD_FMT_9_FROM_U16(T, out, op_1, op_2, N) \
+ vec_sat_u_add_uint16_t_##T##_fmt_9(out, op_1, op_2, N)
+#define RUN_VEC_SAT_U_ADD_FMT_9_FROM_U16_WRAP(T, out, op_1, op_2, N) \
+ RUN_VEC_SAT_U_ADD_FMT_9_FROM_U16(T, out, op_1, op_2, N)
+
+#define RUN_VEC_SAT_U_ADD_FMT_9_FROM_U32(T, out, op_1, op_2, N) \
+ vec_sat_u_add_uint32_t_##T##_fmt_9(out, op_1, op_2, N)
+#define RUN_VEC_SAT_U_ADD_FMT_9_FROM_U32_WRAP(T, out, op_1, op_2, N) \
+ RUN_VEC_SAT_U_ADD_FMT_9_FROM_U32(T, out, op_1, op_2, N)
+
+#define RUN_VEC_SAT_U_ADD_FMT_9_FROM_U64(T, out, op_1, op_2, N) \
+ vec_sat_u_add_uint64_t_##T##_fmt_9(out, op_1, op_2, N)
+#define RUN_VEC_SAT_U_ADD_FMT_9_FROM_U64_WRAP(T, out, op_1, op_2, N) \
+ RUN_VEC_SAT_U_ADD_FMT_9_FROM_U64(T, out, op_1, op_2, N)
+
#define DEF_VEC_SAT_U_ADD_IMM_FMT_1(T, IMM) \
T __attribute__((noinline)) \
vec_sat_u_add_imm##IMM##_##T##_fmt_1 (T *out, T *in, unsigned limit) \
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -ftree-vectorize -fdump-tree-optimized" } */
+
+#include "vec_sat_arith.h"
+
+DEF_VEC_SAT_U_ADD_FMT_9(uint32_t, uint16_t)
+
+/* { dg-final { scan-tree-dump-times ".SAT_ADD " 2 "optimized" } } */
+/* { dg-final { scan-assembler-times {vsaddu\.vv} 1 } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -ftree-vectorize -fdump-tree-optimized" } */
+
+#include "vec_sat_arith.h"
+
+DEF_VEC_SAT_U_ADD_FMT_9(uint64_t, uint16_t)
+
+/* { dg-final { scan-tree-dump-times ".SAT_ADD " 2 "optimized" } } */
+/* { dg-final { scan-assembler-times {vsaddu\.vv} 1 } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -ftree-vectorize -fdump-tree-optimized" } */
+
+#include "vec_sat_arith.h"
+
+DEF_VEC_SAT_U_ADD_FMT_9(uint64_t, uint32_t)
+
+/* { dg-final { scan-tree-dump-times ".SAT_ADD " 2 "optimized" } } */
+/* { dg-final { scan-assembler-times {vsaddu\.vv} 1 } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -ftree-vectorize -fdump-tree-optimized" } */
+
+#include "vec_sat_arith.h"
+
+DEF_VEC_SAT_U_ADD_FMT_9(uint16_t, uint8_t)
+
+/* { dg-final { scan-tree-dump-times ".SAT_ADD " 2 "optimized" } } */
+/* { dg-final { scan-assembler-times {vsaddu\.vv} 1 } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -ftree-vectorize -fdump-tree-optimized" } */
+
+#include "vec_sat_arith.h"
+
+DEF_VEC_SAT_U_ADD_FMT_9(uint32_t, uint8_t)
+
+/* { dg-final { scan-tree-dump-times ".SAT_ADD " 2 "optimized" } } */
+/* { dg-final { scan-assembler-times {vsaddu\.vv} 1 } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -ftree-vectorize -fdump-tree-optimized" } */
+
+#include "vec_sat_arith.h"
+
+DEF_VEC_SAT_U_ADD_FMT_9(uint64_t, uint8_t)
+
+/* { dg-final { scan-tree-dump-times ".SAT_ADD " 2 "optimized" } } */
+/* { dg-final { scan-assembler-times {vsaddu\.vv} 1 } } */
--- /dev/null
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "vec_sat_arith.h"
+
+#define T uint16_t
+#define WT uint32_t
+#define N 16
+#define RUN_VEC_SAT_BINARY RUN_VEC_SAT_U_ADD_FMT_9_FROM_U32_WRAP
+
+DEF_VEC_SAT_U_ADD_FMT_9_WRAP(WT, T)
+
+T test_data[][3][N] = {
+ {
+ {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ }, /* arg_0 */
+ {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ }, /* arg_1 */
+ {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ }, /* expect */
+ },
+ {
+ {
+ 65535, 65535, 65535, 65535,
+ 65535, 65535, 65535, 65535,
+ 65535, 65535, 65535, 65535,
+ 65535, 65535, 65535, 65535,
+ },
+ {
+ 65535, 65535, 65535, 65535,
+ 65535, 65535, 65535, 65535,
+ 65535, 65535, 65535, 65535,
+ 65535, 65535, 65535, 65535,
+ },
+ {
+ 65535, 65535, 65535, 65535,
+ 65535, 65535, 65535, 65535,
+ 65535, 65535, 65535, 65535,
+ 65535, 65535, 65535, 65535,
+ },
+ },
+ {
+ {
+ 0, 0, 1, 0,
+ 1, 2, 3, 0,
+ 1, 2, 3, 4,
+ 5, 65534, 65535, 9,
+ },
+ {
+ 0, 1, 1, 65534,
+ 65534, 65534, 65534, 65535,
+ 65535, 65535, 65535, 65535,
+ 65535, 65535, 65535, 9,
+ },
+ {
+ 0, 1, 2, 65534,
+ 65535, 65535, 65535, 65535,
+ 65535, 65535, 65535, 65535,
+ 65535, 65535, 65535, 18,
+ },
+ },
+};
+
+#include "vec_sat_binary_vvv_run.h"
--- /dev/null
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "vec_sat_arith.h"
+
+#define T uint16_t
+#define WT uint64_t
+#define N 16
+#define RUN_VEC_SAT_BINARY RUN_VEC_SAT_U_ADD_FMT_9_FROM_U64_WRAP
+
+DEF_VEC_SAT_U_ADD_FMT_9_WRAP(WT, T)
+
+T test_data[][3][N] = {
+ {
+ {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ }, /* arg_0 */
+ {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ }, /* arg_1 */
+ {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ }, /* expect */
+ },
+ {
+ {
+ 65535, 65535, 65535, 65535,
+ 65535, 65535, 65535, 65535,
+ 65535, 65535, 65535, 65535,
+ 65535, 65535, 65535, 65535,
+ },
+ {
+ 65535, 65535, 65535, 65535,
+ 65535, 65535, 65535, 65535,
+ 65535, 65535, 65535, 65535,
+ 65535, 65535, 65535, 65535,
+ },
+ {
+ 65535, 65535, 65535, 65535,
+ 65535, 65535, 65535, 65535,
+ 65535, 65535, 65535, 65535,
+ 65535, 65535, 65535, 65535,
+ },
+ },
+ {
+ {
+ 0, 0, 1, 0,
+ 1, 2, 3, 0,
+ 1, 2, 3, 4,
+ 5, 65534, 65535, 9,
+ },
+ {
+ 0, 1, 1, 65534,
+ 65534, 65534, 65534, 65535,
+ 65535, 65535, 65535, 65535,
+ 65535, 65535, 65535, 9,
+ },
+ {
+ 0, 1, 2, 65534,
+ 65535, 65535, 65535, 65535,
+ 65535, 65535, 65535, 65535,
+ 65535, 65535, 65535, 18,
+ },
+ },
+};
+
+#include "vec_sat_binary_vvv_run.h"
--- /dev/null
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "vec_sat_arith.h"
+
+#define T uint32_t
+#define WT uint64_t
+#define N 16
+#define RUN_VEC_SAT_BINARY RUN_VEC_SAT_U_ADD_FMT_9_FROM_U64_WRAP
+
+DEF_VEC_SAT_U_ADD_FMT_9_WRAP(WT, T)
+
+T test_data[][3][N] = {
+ {
+ {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ }, /* arg_0 */
+ {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ }, /* arg_1 */
+ {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ }, /* expect */
+ },
+ {
+ {
+ 4294967295, 4294967295, 4294967295, 4294967295,
+ 4294967295, 4294967295, 4294967295, 4294967295,
+ 4294967295, 4294967295, 4294967295, 4294967295,
+ 4294967295, 4294967295, 4294967295, 4294967295,
+ },
+ {
+ 4294967295, 4294967295, 4294967295, 4294967295,
+ 4294967295, 4294967295, 4294967295, 4294967295,
+ 4294967295, 4294967295, 4294967295, 4294967295,
+ 4294967295, 4294967295, 4294967295, 4294967295,
+ },
+ {
+ 4294967295, 4294967295, 4294967295, 4294967295,
+ 4294967295, 4294967295, 4294967295, 4294967295,
+ 4294967295, 4294967295, 4294967295, 4294967295,
+ 4294967295, 4294967295, 4294967295, 4294967295,
+ },
+ },
+ {
+ {
+ 0, 0, 1, 0,
+ 1, 2, 3, 0,
+ 1, 2, 3, 4,
+ 5, 4294967294, 4294967295, 9,
+ },
+ {
+ 0, 1, 1, 4294967294,
+ 4294967294, 4294967294, 4294967294, 4294967295,
+ 4294967295, 4294967295, 4294967295, 4294967295,
+ 4294967295, 4294967295, 4294967295, 9,
+ },
+ {
+ 0, 1, 2, 4294967294,
+ 4294967295, 4294967295, 4294967295, 4294967295,
+ 4294967295, 4294967295, 4294967295, 4294967295,
+ 4294967295, 4294967295, 4294967295, 18,
+ },
+ },
+};
+
+#include "vec_sat_binary_vvv_run.h"
--- /dev/null
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "vec_sat_arith.h"
+
+#define T uint8_t
+#define WT uint16_t
+#define N 16
+#define RUN_VEC_SAT_BINARY RUN_VEC_SAT_U_ADD_FMT_9_FROM_U16_WRAP
+
+DEF_VEC_SAT_U_ADD_FMT_9_WRAP(WT, T)
+
+T test_data[][3][N] = {
+ {
+ {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ }, /* arg_0 */
+ {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ }, /* arg_1 */
+ {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ }, /* expect */
+ },
+ {
+ {
+ 255, 255, 255, 255,
+ 255, 255, 255, 255,
+ 255, 255, 255, 255,
+ 255, 255, 255, 255,
+ },
+ {
+ 255, 255, 255, 255,
+ 255, 255, 255, 255,
+ 255, 255, 255, 255,
+ 255, 255, 255, 255,
+ },
+ {
+ 255, 255, 255, 255,
+ 255, 255, 255, 255,
+ 255, 255, 255, 255,
+ 255, 255, 255, 255,
+ },
+ },
+ {
+ {
+ 0, 0, 1, 0,
+ 1, 2, 3, 0,
+ 1, 2, 3, 4,
+ 5, 254, 255, 9,
+ },
+ {
+ 0, 1, 1, 254,
+ 254, 254, 254, 255,
+ 255, 255, 255, 255,
+ 255, 255, 255, 9,
+ },
+ {
+ 0, 1, 2, 254,
+ 255, 255, 255, 255,
+ 255, 255, 255, 255,
+ 255, 255, 255, 18,
+ },
+ },
+};
+
+#include "vec_sat_binary_vvv_run.h"
--- /dev/null
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "vec_sat_arith.h"
+
+#define T uint8_t
+#define WT uint32_t
+#define N 16
+#define RUN_VEC_SAT_BINARY RUN_VEC_SAT_U_ADD_FMT_9_FROM_U32_WRAP
+
+DEF_VEC_SAT_U_ADD_FMT_9_WRAP(WT, T)
+
+T test_data[][3][N] = {
+ {
+ {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ }, /* arg_0 */
+ {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ }, /* arg_1 */
+ {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ }, /* expect */
+ },
+ {
+ {
+ 255, 255, 255, 255,
+ 255, 255, 255, 255,
+ 255, 255, 255, 255,
+ 255, 255, 255, 255,
+ },
+ {
+ 255, 255, 255, 255,
+ 255, 255, 255, 255,
+ 255, 255, 255, 255,
+ 255, 255, 255, 255,
+ },
+ {
+ 255, 255, 255, 255,
+ 255, 255, 255, 255,
+ 255, 255, 255, 255,
+ 255, 255, 255, 255,
+ },
+ },
+ {
+ {
+ 0, 0, 1, 0,
+ 1, 2, 3, 0,
+ 1, 2, 3, 4,
+ 5, 254, 255, 9,
+ },
+ {
+ 0, 1, 1, 254,
+ 254, 254, 254, 255,
+ 255, 255, 255, 255,
+ 255, 255, 255, 9,
+ },
+ {
+ 0, 1, 2, 254,
+ 255, 255, 255, 255,
+ 255, 255, 255, 255,
+ 255, 255, 255, 18,
+ },
+ },
+};
+
+#include "vec_sat_binary_vvv_run.h"
--- /dev/null
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "vec_sat_arith.h"
+
+#define T uint8_t
+#define WT uint16_t
+#define N 16
+#define RUN_VEC_SAT_BINARY RUN_VEC_SAT_U_ADD_FMT_9_FROM_U16_WRAP
+
+DEF_VEC_SAT_U_ADD_FMT_9_WRAP(WT, T)
+
+T test_data[][3][N] = {
+ {
+ {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ }, /* arg_0 */
+ {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ }, /* arg_1 */
+ {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ }, /* expect */
+ },
+ {
+ {
+ 255, 255, 255, 255,
+ 255, 255, 255, 255,
+ 255, 255, 255, 255,
+ 255, 255, 255, 255,
+ },
+ {
+ 255, 255, 255, 255,
+ 255, 255, 255, 255,
+ 255, 255, 255, 255,
+ 255, 255, 255, 255,
+ },
+ {
+ 255, 255, 255, 255,
+ 255, 255, 255, 255,
+ 255, 255, 255, 255,
+ 255, 255, 255, 255,
+ },
+ },
+ {
+ {
+ 0, 0, 1, 0,
+ 1, 2, 3, 0,
+ 1, 2, 3, 4,
+ 5, 254, 255, 9,
+ },
+ {
+ 0, 1, 1, 254,
+ 254, 254, 254, 255,
+ 255, 255, 255, 255,
+ 255, 255, 255, 9,
+ },
+ {
+ 0, 1, 2, 254,
+ 255, 255, 255, 255,
+ 255, 255, 255, 255,
+ 255, 255, 255, 18,
+ },
+ },
+};
+
+#include "vec_sat_binary_vvv_run.h"