return COSTS_N_INSNS (2);
}
-/* Return the cost of the vector binary rtx like add, minus, mult.
- The cost of scalar2vr_cost will be appended if there one of the
- op comes from the VEC_DUPLICATE. */
-
-static int
-get_vector_binary_rtx_cost (rtx x, int scalar2vr_cost)
-{
- gcc_assert (riscv_v_ext_mode_p (GET_MODE (x)));
-
- rtx neg;
- rtx op_0;
- rtx op_1;
-
- if (GET_CODE (x) == UNSPEC)
- {
- op_0 = XVECEXP (x, 0, 0);
- op_1 = XVECEXP (x, 0, 1);
- }
- else
- {
- op_0 = XEXP (x, 0);
- op_1 = XEXP (x, 1);
- }
-
- if (GET_CODE (op_0) == VEC_DUPLICATE
- || GET_CODE (op_1) == VEC_DUPLICATE)
- return (scalar2vr_cost + 1) * COSTS_N_INSNS (1);
- else if (GET_CODE (neg = op_0) == NEG
- && (GET_CODE (op_1) == VEC_DUPLICATE
- || GET_CODE (XEXP (neg, 0)) == VEC_DUPLICATE))
- return (scalar2vr_cost + 1) * COSTS_N_INSNS (1);
- else
- return COSTS_N_INSNS (1);
-}
-
/* Implement TARGET_RTX_COSTS. */
#define SINGLE_SHIFT_COST 1
{
case SET:
{
- switch (GET_CODE (x))
+ if (GET_CODE (x) == VEC_DUPLICATE)
+ *total = (scalar2vr_cost + 1) * COSTS_N_INSNS (1);
+ else
{
- case VEC_DUPLICATE:
- *total = gr2vr_cost * COSTS_N_INSNS (1);
- break;
- case IF_THEN_ELSE:
- {
- rtx op = XEXP (x, 1);
+ int vec_dup_count = 0;
+ subrtx_var_iterator::array_type array;
- switch (GET_CODE (op))
- {
- case DIV:
- case UDIV:
- case MOD:
- case UMOD:
- case US_PLUS:
- case US_MINUS:
- case SS_PLUS:
- case SS_MINUS:
- *total = get_vector_binary_rtx_cost (op, scalar2vr_cost);
- break;
- case UNSPEC:
- {
- switch (XINT (op, 1))
- {
- case UNSPEC_VAADDU:
- case UNSPEC_VAADD:
- *total
- = get_vector_binary_rtx_cost (op, scalar2vr_cost);
- break;
- default:
- *total = COSTS_N_INSNS (1);
- break;
- }
- }
- break;
- default:
- *total = COSTS_N_INSNS (1);
- break;
- }
- }
- break;
- case PLUS:
- case MINUS:
- case AND:
- case IOR:
- case XOR:
- case MULT:
- case SMAX:
- case UMAX:
- case SMIN:
- case UMIN:
- {
- rtx op;
- rtx op_0 = XEXP (x, 0);
- rtx op_1 = XEXP (x, 1);
+ FOR_EACH_SUBRTX_VAR (iter, array, x, ALL)
+ if (GET_CODE (*iter) == VEC_DUPLICATE)
+ vec_dup_count++;
- if (GET_CODE (op = op_0) == MULT
- || GET_CODE (op = op_1) == MULT)
- *total = get_vector_binary_rtx_cost (op, scalar2vr_cost);
- else
- *total = get_vector_binary_rtx_cost (x, scalar2vr_cost);
- }
- break;
- default:
- *total = COSTS_N_INSNS (1);
- break;
+ int total_vec_dup_cost = vec_dup_count * scalar2vr_cost;
+
+ *total = COSTS_N_INSNS (1) * (total_vec_dup_cost + 1);
}
}
break;
/* { dg-final { scan-assembler-times {\tvadd\.vv} 16 } } */
/* { dg-final { scan-assembler-times {\tvadd\.vi} 8 } } */
-/* { dg-final { scan-assembler-times {\tvfadd\.vv} 3 } } */
-/* { dg-final { scan-assembler-times {\tvfadd\.vf} 6 } } */
+/* { dg-final { scan-assembler-times {\tvfadd\.vv} 9 } } */
+/* { dg-final { scan-assembler-not {\tvfadd\.vf} } } */
/* { dg-final { scan-tree-dump-times "\.COND_LEN_ADD" 9 "optimized" } } */
/* { dg-final { scan-assembler-times {\tvadd\.vv} 16 } } */
/* { dg-final { scan-assembler-times {\tvadd\.vi} 8 } } */
-/* { dg-final { scan-assembler-times {\tvfadd\.vv} 3 } } */
-/* { dg-final { scan-assembler-times {\tvfadd\.vf} 6 } } */
+/* { dg-final { scan-assembler-times {\tvfadd\.vv} 9 } } */
+/* { dg-final { scan-assembler-not {\tvfadd\.vf} } } */
/* { dg-final { scan-tree-dump-times "\.COND_LEN_ADD" 9 "optimized" } } */
#include "vmul-template.h"
/* { dg-final { scan-assembler-times {\tvmul\.vv} 16 } } */
-/* { dg-final { scan-assembler-times {\tvfmul\.vv} 3 } } */
-/* { dg-final { scan-assembler-times {\tvfmul\.vf} 3 } } */
+/* { dg-final { scan-assembler-times {\tvfmul\.vv} 6 } } */
+/* { dg-final { scan-assembler-not {\tvfmul\.vf} } } */
/* { dg-final { scan-tree-dump-times "\.COND_LEN_MUL" 6 "optimized" } } */
#include "vmul-template.h"
/* { dg-final { scan-assembler-times {\tvmul\.vv} 16 } } */
-/* { dg-final { scan-assembler-times {\tvfmul\.vv} 3 } } */
-/* { dg-final { scan-assembler-times {\tvfmul\.vf} 3 } } */
+/* { dg-final { scan-assembler-times {\tvfmul\.vv} 6 } } */
+/* { dg-final { scan-assembler-not {\tvfmul\.vf} } } */
/* { dg-final { scan-tree-dump-times "\.COND_LEN_MUL" 6 "optimized" } } */
/* { dg-final { scan-assembler-times {\tvsub\.vv} 16 } } */
/* { dg-final { scan-assembler-times {\tvrsub\.vi} 16 } } */
-/* { dg-final { scan-assembler-times {\tvfsub\.vv} 3 } } */
-/* { dg-final { scan-assembler-times {\tvfsub\.vf} 3 } } */
-/* { dg-final { scan-assembler-times {\tvfrsub\.vf} 6 } } */
+/* { dg-final { scan-assembler-times {\tvfsub\.vv} 12 } } */
+/* { dg-final { scan-assembler-not {\tvfsub\.vf} } } */
+/* { dg-final { scan-assembler-not {\tvfrsub\.vf} } } */
/* { dg-final { scan-tree-dump-times "\.COND_LEN_SUB" 12 "optimized" } } */
/* Do not expect vfrsub for now, because we do not properly
/* { dg-final { scan-assembler-times {\tvsub\.vv} 16 } } */
/* { dg-final { scan-assembler-times {\tvrsub\.vi} 16 } } */
-/* { dg-final { scan-assembler-times {\tvfsub\.vv} 3 } } */
-/* { dg-final { scan-assembler-times {\tvfsub\.vf} 3 } } */
-/* { dg-final { scan-assembler-times {\tvfrsub\.vf} 6 } } */
+/* { dg-final { scan-assembler-times {\tvfsub\.vv} 12 } } */
+/* { dg-final { scan-assembler-not {\tvfsub\.vf} } } */
+/* { dg-final { scan-assembler-not {\tvfrsub\.vf} } } */
/* { dg-final { scan-tree-dump-times "\.COND_LEN_SUB" 12 "optimized" } } */
/* Do not expect vfrsub for now, because we do not properly
#include "cond_copysign-template.h"
-/* { dg-final { scan-assembler-times {\tvfsgnj\.vv} 3 } } */
-/* { dg-final { scan-assembler-times {\tvfsgnj\.vf} 3 } } */
+/* { dg-final { scan-assembler-times {\tvfsgnj\.vv} 6 } } */
+/* { dg-final { scan-assembler-not {\tvfsgnj\.vf} } } */
/* 1. The vectorizer wraps scalar variants of copysign into vector constants which
expand cannot handle currently.
2. match.pd convert .COPYSIGN (1, b) + COND_MUL to AND + XOR currently. */
/* { dg-final { scan-assembler-times {\tvfsgnjx\.vv} 6 { xfail riscv*-*-* } } } */
-/* { dg-final { scan-assembler-times {\tvfsgnjn\.vv} 3 } } */
-/* { dg-final { scan-assembler-times {\tvfsgnjn\.vf} 3 } } */
+/* { dg-final { scan-assembler-times {\tvfsgnjn\.vv} 6 } } */
+/* { dg-final { scan-assembler-not {\tvfsgnjn\.vf} } } */
/* { dg-final { scan-assembler-not {\tvmerge\.vvm} } } */
/* { dg-final { scan-assembler-not {\tvf?merge\.v[vxi]m\t} } } */
#include "cond_copysign-template.h"
-/* { dg-final { scan-assembler-times {\tvfsgnj\.vv} 3 } } */
-/* { dg-final { scan-assembler-times {\tvfsgnj\.vf} 3 } } */
+/* { dg-final { scan-assembler-times {\tvfsgnj\.vv} 6 } } */
+/* { dg-final { scan-assembler-not {\tvfsgnj\.vf} } } */
/* 1. The vectorizer wraps scalar variants of copysign into vector constants which
expand cannot handle currently.
2. match.pd convert .COPYSIGN (1, b) + COND_MUL to AND + XOR currently. */
/* { dg-final { scan-assembler-times {\tvfsgnjx\.vv} 6 { xfail riscv*-*-* } } } */
-/* { dg-final { scan-assembler-times {\tvfsgnjn\.vv} 3 } } */
-/* { dg-final { scan-assembler-times {\tvfsgnjn\.vf} 3 } } */
+/* { dg-final { scan-assembler-times {\tvfsgnjn\.vv} 6 } } */
+/* { dg-final { scan-assembler-not {\tvfsgnjn\.vf} } } */
/* { dg-final { scan-assembler-not {\tvmerge\.vvm} } } */
/* { dg-final { scan-assembler-not {\tvf?merge\.v[vxi]m\t} } } */
TEST_ALL (DEF_LOOP)
-/* { dg-final { scan-assembler-times {vfadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 0 } } */
-/* { dg-final { scan-assembler-times {vfadd\.vf\s+v[0-9]+,v[0-9]+,fa[0-9],v0.t} 18 } } */
+/* { dg-final { scan-assembler-times {vfadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 18 } } */
+/* { dg-final { scan-assembler-not {vfadd\.vf\s+v[0-9]+,v[0-9]+,fa[0-9],v0.t} } } */
/* { dg-final { scan-assembler-not {\tvf?merge\.v[vxi]m\t} } } */
TEST_ALL (DEF_LOOP)
-/* { dg-final { scan-assembler-times {vfadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 0 } } */
-/* { dg-final { scan-assembler-times {vfadd\.vf\s+v[0-9]+,v[0-9]+,fa[0-9],v0.t} 12 } } */
+/* { dg-final { scan-assembler-times {vfadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 12 } } */
+/* { dg-final { scan-assembler-not {vfadd\.vf\s+v[0-9]+,v[0-9]+,fa[0-9],v0.t} } } */
/* { dg-final { scan-assembler-not {\tvf?merge\.v[vxi]m\t} } } */
TEST_ALL (DEF_LOOP)
-/* { dg-final { scan-assembler-times {vfadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 0 } } */
-/* { dg-final { scan-assembler-times {vfadd\.vf\s+v[0-9]+,v[0-9]+,fa[0-9],v0.t} 18 } } */
+/* { dg-final { scan-assembler-times {vfadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 18 } } */
+/* { dg-final { scan-assembler-not {vfadd\.vf\s+v[0-9]+,v[0-9]+,fa[0-9],v0.t} } } */
/* { dg-final { scan-assembler-not {\tvf?merge\.v[vxi]m\t} } } */
TEST_ALL (DEF_LOOP)
-/* { dg-final { scan-assembler-times {vfadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 0 } } */
-/* { dg-final { scan-assembler-times {vfadd\.vf\s+v[0-9]+,v[0-9]+,fa[0-9],v0.t} 18 } } */
+/* { dg-final { scan-assembler-times {vfadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 18 } } */
+/* { dg-final { scan-assembler-not {vfadd\.vf\s+v[0-9]+,v[0-9]+,fa[0-9],v0.t} } } */
/* { dg-final { scan-assembler-not {\tvf?merge\.v[vxi]m\t} } } */
TEST_ALL (DEF_LOOP)
-/* { dg-final { scan-assembler-times {vmadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 1 } } */
-/* { dg-final { scan-assembler-times {vmadd\.vx\s+v[0-9]+,[a-x][0-9]+,v[0-9]+,v0.t} 3 } } */
-/* { dg-final { scan-assembler-times {vnmsub\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 1 } } */
-/* { dg-final { scan-assembler-times {vnmsub\.vx\s+v[0-9]+,[a-x][0-9]+,v[0-9]+,v0.t} 3 } } */
-/* { dg-final { scan-assembler-times {vfmadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 0 } } */
-/* { dg-final { scan-assembler-times {vfmadd\.vf\s+v[0-9]+,fa[0-9],v[0-9]+,v0.t} 3 } } */
+/* { dg-final { scan-assembler-times {vmadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 4 } } */
+/* { dg-final { scan-assembler-not {vmadd\.vx\s+v[0-9]+,[a-x][0-9]+,v[0-9]+,v0.t} } } */
+/* { dg-final { scan-assembler-times {vnmsub\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 4 } } */
+/* { dg-final { scan-assembler-not {vnmsub\.vx\s+v[0-9]+,[a-x][0-9]+,v[0-9]+,v0.t} } } */
+/* { dg-final { scan-assembler-times {vfmadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 3 } } */
+/* { dg-final { scan-assembler-not {vfmadd\.vf\s+v[0-9]+,fa[0-9],v[0-9]+,v0.t} } } */
/* { dg-final { scan-assembler-times {vfnmsub\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 3 } } */
/* { dg-final { scan-assembler-not {\tvf?merge\.v[vxi]m\t} } } */
TEST_ALL (DEF_LOOP)
-/* { dg-final { scan-assembler-times {vmacc\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 1 } } */
-/* { dg-final { scan-assembler-times {vmacc\.vx\s+v[0-9]+,[a-x][0-9]+,v[0-9]+,v0.t} 3 } } */
-/* { dg-final { scan-assembler-times {vnmsac\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 1 } } */
-/* { dg-final { scan-assembler-times {vnmsac\.vx\s+v[0-9]+,[a-x][0-9]+,v[0-9]+,v0.t} 3 } } */
-/* { dg-final { scan-assembler-times {vfmacc\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 0 } } */
-/* { dg-final { scan-assembler-times {vfmacc\.vf\s+v[0-9]+,fa[0-9],v[0-9]+,v0.t} 3 } } */
+/* { dg-final { scan-assembler-times {vmacc\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 4 } } */
+/* { dg-final { scan-assembler-not {vmacc\.vx\s+v[0-9]+,[a-x][0-9]+,v[0-9]+,v0.t} } } */
+/* { dg-final { scan-assembler-times {vnmsac\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 4 } } */
+/* { dg-final { scan-assembler-not {vnmsac\.vx\s+v[0-9]+,[a-x][0-9]+,v[0-9]+,v0.t} } } */
+/* { dg-final { scan-assembler-times {vfmacc\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 3 } } */
+/* { dg-final { scan-assembler-not {vfmacc\.vf\s+v[0-9]+,fa[0-9],v[0-9]+,v0.t} } } */
/* { dg-final { scan-assembler-times {vfnmsac\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 3 } } */
/* { dg-final { scan-assembler-not {\tvf?merge\.v[vxi]m\t} } } */
TEST_ALL (DEF_LOOP)
-/* { dg-final { scan-assembler-times {vmacc\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 1 } } */
-/* { dg-final { scan-assembler-times {vmacc\.vx\s+v[0-9]+,[a-x][0-9]+,v[0-9]+,v0.t} 3 } } */
-/* { dg-final { scan-assembler-times {vnmsac\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 1 } } */
-/* { dg-final { scan-assembler-times {vnmsac\.vx\s+v[0-9]+,[a-x][0-9]+,v[0-9]+,v0.t} 3 } } */
-/* { dg-final { scan-assembler-times {vfmacc\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 0 } } */
-/* { dg-final { scan-assembler-times {vfmacc\.vf\s+v[0-9]+,fa[0-9],v[0-9]+,v0.t} 3 } } */
+/* { dg-final { scan-assembler-times {vmacc\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 4 } } */
+/* { dg-final { scan-assembler-not {vmacc\.vx\s+v[0-9]+,[a-x][0-9]+,v[0-9]+,v0.t} } } */
+/* { dg-final { scan-assembler-times {vnmsac\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 4 } } */
+/* { dg-final { scan-assembler-not {vnmsac\.vx\s+v[0-9]+,[a-x][0-9]+,v[0-9]+,v0.t} } } */
+/* { dg-final { scan-assembler-times {vfmacc\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 3 } } */
+/* { dg-final { scan-assembler-not {vfmacc\.vf\s+v[0-9]+,fa[0-9],v[0-9]+,v0.t} } } */
/* { dg-final { scan-assembler-times {vfnmsac\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 3 } } */
/* { dg-final { scan-assembler-times {\tvf?merge\.v[vxi]m\t} 14 } } */
TEST_ALL (DEF_LOOP)
-/* { dg-final { scan-assembler-times {vmacc\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 1 } } */
-/* { dg-final { scan-assembler-times {vmacc\.vx\s+v[0-9]+,[a-x][0-9]+,v[0-9]+,v0.t} 3 } } */
-/* { dg-final { scan-assembler-times {vnmsac\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 1 } } */
-/* { dg-final { scan-assembler-times {vnmsac\.vx\s+v[0-9]+,[a-x][0-9]+,v[0-9]+,v0.t} 3 } } */
-/* { dg-final { scan-assembler-times {vfmacc\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 0 } } */
-/* { dg-final { scan-assembler-times {vfmacc\.vf\s+v[0-9]+,fa[0-9],v[0-9]+,v0.t} 3 } } */
+/* { dg-final { scan-assembler-times {vmacc\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 4 } } */
+/* { dg-final { scan-assembler-not {vmacc\.vx\s+v[0-9]+,[a-x][0-9]+,v[0-9]+,v0.t} } } */
+/* { dg-final { scan-assembler-times {vnmsac\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 4 } } */
+/* { dg-final { scan-assembler-not {vnmsac\.vx\s+v[0-9]+,[a-x][0-9]+,v[0-9]+,v0.t} } } */
+/* { dg-final { scan-assembler-times {vfmacc\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 3 } } */
+/* { dg-final { scan-assembler-not {vfmacc\.vf\s+v[0-9]+,fa[0-9],v[0-9]+,v0.t} } } */
/* { dg-final { scan-assembler-times {vfnmsac\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 3 } } */
/* NOTE: 14 vmerge is need for other purpose. */
/* { dg-final { scan-assembler-times {\tvf?merge\.v[vxi]m\t} 14 } } */
TEST_ALL (DEF_LOOP)
-/* { dg-final { scan-assembler-times {vmacc\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 1 } } */
-/* { dg-final { scan-assembler-times {vmacc\.vx\s+v[0-9]+,[a-x][0-9]+,v[0-9]+,v0.t} 3 } } */
-/* { dg-final { scan-assembler-times {vnmsac\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 1 } } */
-/* { dg-final { scan-assembler-times {vnmsac\.vx\s+v[0-9]+,[a-x][0-9]+,v[0-9]+,v0.t} 3 } } */
-/* { dg-final { scan-assembler-times {vfmacc\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 0 } } */
-/* { dg-final { scan-assembler-times {vfmacc\.vf\s+v[0-9]+,fa[0-9],v[0-9]+,v0.t} 3 } } */
+/* { dg-final { scan-assembler-times {vmacc\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 4 } } */
+/* { dg-final { scan-assembler-not {vmacc\.vx\s+v[0-9]+,[a-x][0-9]+,v[0-9]+,v0.t} } } */
+/* { dg-final { scan-assembler-times {vnmsac\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 4 } } */
+/* { dg-final { scan-assembler-not {vnmsac\.vx\s+v[0-9]+,[a-x][0-9]+,v[0-9]+,v0.t} } } */
+/* { dg-final { scan-assembler-times {vfmacc\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 3 } } */
+/* { dg-final { scan-assembler-not {vfmacc\.vf\s+v[0-9]+,fa[0-9],v[0-9]+,v0.t} } } */
/* { dg-final { scan-assembler-times {vfnmsac\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 3 } } */
/* NOTE: 14 vmerge is need for other purpose. */
/* { dg-final { scan-assembler-times {\tvf?merge\.v[vxi]m\t} 14 } } */
TEST_ALL (DEF_LOOP)
-/* { dg-final { scan-assembler-times {vfmax\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 2 } } */
-/* { dg-final { scan-assembler-times {vfmax\.vf\s+v[0-9]+,v[0-9]+,fa[0-9],v0.t} 4 } } */
+/* { dg-final { scan-assembler-times {vfmax\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 6 } } */
+/* { dg-final { scan-assembler-not {vfmax\.vf\s+v[0-9]+,v[0-9]+,fa[0-9],v0.t} } } */
/* { dg-final { scan-assembler-not {\tvf?merge\.v[vxi]m\t} } } */
TEST_ALL (DEF_LOOP)
-/* { dg-final { scan-assembler-times {vfmax\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 2 } } */
-/* { dg-final { scan-assembler-times {vfmax\.vf\s+v[0-9]+,v[0-9]+,fa[0-9],v0.t} 4 } } */
+/* { dg-final { scan-assembler-times {vfmax\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 6 } } */
+/* { dg-final { scan-assembler-not {vfmax\.vf\s+v[0-9]+,v[0-9]+,fa[0-9],v0.t} } } */
/* { dg-final { scan-assembler-not {\tvf?merge\.v[vxi]m\t} } } */
TEST_ALL (DEF_LOOP)
-/* { dg-final { scan-assembler-times {vfmax\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 2 } } */
-/* { dg-final { scan-assembler-times {vfmax\.vf\s+v[0-9]+,v[0-9]+,fa[0-9],v0.t} 4 } } */
+/* { dg-final { scan-assembler-times {vfmax\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 6 } } */
+/* { dg-final { scan-assembler-not {vfmax\.vf\s+v[0-9]+,v[0-9]+,fa[0-9],v0.t} } } */
/* { dg-final { scan-assembler-not {\tvf?merge\.v[vxi]m\t} } } */
TEST_ALL (DEF_LOOP)
-/* { dg-final { scan-assembler-times {vfmax\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 2 } } */
-/* { dg-final { scan-assembler-times {vfmax\.vf\s+v[0-9]+,v[0-9]+,fa[0-9],v0.t} 4 } } */
+/* { dg-final { scan-assembler-times {vfmax\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 6 } } */
+/* { dg-final { scan-assembler-not {vfmax\.vf\s+v[0-9]+,v[0-9]+,fa[0-9],v0.t} } } */
/* { dg-final { scan-assembler-not {\tvf?merge\.v[vxi]m\t} } } */
TEST_ALL (DEF_LOOP)
-/* { dg-final { scan-assembler-times {vfmax\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 1 } } */
-/* { dg-final { scan-assembler-times {vfmax\.vf\s+v[0-9]+,v[0-9]+,fa[0-9],v0.t} 2 } } */
+/* { dg-final { scan-assembler-times {vfmax\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 3 } } */
+/* { dg-final { scan-assembler-not {vfmax\.vf\s+v[0-9]+,v[0-9]+,fa[0-9],v0.t} } } */
/* { dg-final { scan-assembler-not {\tvf?merge\.v[vxi]m\t} } } */
TEST_ALL (DEF_LOOP)
-/* { dg-final { scan-assembler-times {vfmax\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 1 } } */
-/* { dg-final { scan-assembler-times {vfmax\.vf\s+v[0-9]+,v[0-9]+,fa[0-9],v0.t} 2 } } */
+/* { dg-final { scan-assembler-times {vfmax\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 3 } } */
+/* { dg-final { scan-assembler-not {vfmax\.vf\s+v[0-9]+,v[0-9]+,fa[0-9],v0.t} } } */
/* { dg-final { scan-assembler-not {\tvf?merge\.v[vxi]m\t} } } */
TEST_ALL (DEF_LOOP)
-/* { dg-final { scan-assembler-times {vfmax\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 1 } } */
-/* { dg-final { scan-assembler-times {vfmax\.vf\s+v[0-9]+,v[0-9]+,fa[0-9],v0.t} 2 } } */
+/* { dg-final { scan-assembler-times {vfmax\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 3 } } */
+/* { dg-final { scan-assembler-not {vfmax\.vf\s+v[0-9]+,v[0-9]+,fa[0-9],v0.t} } } */
/* { dg-final { scan-assembler-not {\tvf?merge\.v[vxi]m\t} } } */
TEST_ALL (DEF_LOOP)
-/* { dg-final { scan-assembler-times {vfmax\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 1 } } */
-/* { dg-final { scan-assembler-times {vfmax\.vf\s+v[0-9]+,v[0-9]+,fa[0-9],v0.t} 2 } } */
+/* { dg-final { scan-assembler-times {vfmax\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 3 } } */
+/* { dg-final { scan-assembler-not {vfmax\.vf\s+v[0-9]+,v[0-9]+,fa[0-9],v0.t} } } */
/* { dg-final { scan-assembler-not {\tvf?merge\.v[vxi]m\t} } } */
#define FN(X) __builtin_fmin##X
#include "cond_fmax-1.c"
-/* { dg-final { scan-assembler-times {vfmin\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 2 } } */
-/* { dg-final { scan-assembler-times {vfmin\.vf\s+v[0-9]+,v[0-9]+,fa[0-9],v0.t} 4 } } */
+/* { dg-final { scan-assembler-times {vfmin\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 6 } } */
+/* { dg-final { scan-assembler-not {vfmin\.vf\s+v[0-9]+,v[0-9]+,fa[0-9],v0.t} } } */
/* { dg-final { scan-assembler-not {\tvf?merge\.v[vxi]m\t} } } */
#define FN(X) __builtin_fmin##X
#include "cond_fmax-2.c"
-/* { dg-final { scan-assembler-times {vfmin\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 2 } } */
-/* { dg-final { scan-assembler-times {vfmin\.vf\s+v[0-9]+,v[0-9]+,fa[0-9],v0.t} 4 } } */
+/* { dg-final { scan-assembler-times {vfmin\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 6 } } */
+/* { dg-final { scan-assembler-not {vfmin\.vf\s+v[0-9]+,v[0-9]+,fa[0-9],v0.t} } } */
/* { dg-final { scan-assembler-not {\tvf?merge\.v[vxi]m\t} } } */
#define FN(X) __builtin_fmin##X
#include "cond_fmax-3.c"
-/* { dg-final { scan-assembler-times {vfmin\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 2 } } */
-/* { dg-final { scan-assembler-times {vfmin\.vf\s+v[0-9]+,v[0-9]+,fa[0-9],v0.t} 4 } } */
+/* { dg-final { scan-assembler-times {vfmin\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 6 } } */
+/* { dg-final { scan-assembler-not {vfmin\.vf\s+v[0-9]+,v[0-9]+,fa[0-9],v0.t} } } */
/* { dg-final { scan-assembler-not {\tvf?merge\.v[vxi]m\t} } } */
#define FN(X) __builtin_fmin##X
#include "cond_fmax-4.c"
-/* { dg-final { scan-assembler-times {vfmin\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 2 } } */
-/* { dg-final { scan-assembler-times {vfmin\.vf\s+v[0-9]+,v[0-9]+,fa[0-9],v0.t} 4 } } */
+/* { dg-final { scan-assembler-times {vfmin\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 6 } } */
+/* { dg-final { scan-assembler-not {vfmin\.vf\s+v[0-9]+,v[0-9]+,fa[0-9],v0.t} } } */
/* { dg-final { scan-assembler-not {\tvf?merge\.v[vxi]m\t} } } */
#define FN(X) __builtin_fmin##X
#include "cond_fmax_zvfh-1.c"
-/* { dg-final { scan-assembler-times {vfmin\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 1 } } */
-/* { dg-final { scan-assembler-times {vfmin\.vf\s+v[0-9]+,v[0-9]+,fa[0-9],v0.t} 2 } } */
+/* { dg-final { scan-assembler-times {vfmin\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 3 } } */
+/* { dg-final { scan-assembler-not {vfmin\.vf\s+v[0-9]+,v[0-9]+,fa[0-9],v0.t} } } */
/* { dg-final { scan-assembler-not {\tvf?merge\.v[vxi]m\t} } } */
#define FN(X) __builtin_fmin##X
#include "cond_fmax_zvfh-2.c"
-/* { dg-final { scan-assembler-times {vfmin\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 1 } } */
-/* { dg-final { scan-assembler-times {vfmin\.vf\s+v[0-9]+,v[0-9]+,fa[0-9],v0.t} 2 } } */
+/* { dg-final { scan-assembler-times {vfmin\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 3 } } */
+/* { dg-final { scan-assembler-not {vfmin\.vf\s+v[0-9]+,v[0-9]+,fa[0-9],v0.t} } } */
/* { dg-final { scan-assembler-not {\tvf?merge\.v[vxi]m\t} } } */
#define FN(X) __builtin_fmin##X
#include "cond_fmax_zvfh-3.c"
-/* { dg-final { scan-assembler-times {vfmin\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 1 } } */
-/* { dg-final { scan-assembler-times {vfmin\.vf\s+v[0-9]+,v[0-9]+,fa[0-9],v0.t} 2 } } */
+/* { dg-final { scan-assembler-times {vfmin\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 3 } } */
+/* { dg-final { scan-assembler-not {vfmin\.vf\s+v[0-9]+,v[0-9]+,fa[0-9],v0.t} } } */
/* { dg-final { scan-assembler-not {\tvf?merge\.v[vxi]m\t} } } */
#define FN(X) __builtin_fmin##X
#include "cond_fmax_zvfh-4.c"
-/* { dg-final { scan-assembler-times {vfmin\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 1 } } */
-/* { dg-final { scan-assembler-times {vfmin\.vf\s+v[0-9]+,v[0-9]+,fa[0-9],v0.t} 2 } } */
+/* { dg-final { scan-assembler-times {vfmin\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 3 } } */
+/* { dg-final { scan-assembler-not {vfmin\.vf\s+v[0-9]+,v[0-9]+,fa[0-9],v0.t} } } */
/* { dg-final { scan-assembler-not {\tvf?merge\.v[vxi]m\t} } } */
TEST_ALL (DEF_LOOP)
/* { dg-final { scan-assembler-times {vfnmacc\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 3 { xfail riscv*-*-* } } } */
-/* { dg-final { scan-assembler-times {vfmsub\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 0 } } */
-/* { dg-final { scan-assembler-times {vfmsub\.vf\s+v[0-9]+,fa[0-9],v[0-9]+,v0.t} 3 } } */
+/* { dg-final { scan-assembler-times {vfmsub\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 3 } } */
+/* { dg-final { scan-assembler-not {vfmsub\.vf\s+v[0-9]+,fa[0-9],v[0-9]+,v0.t} } } */
/* { dg-final { scan-assembler-not {\tvf?merge\.v[vxi]m\t} } } */
TEST_ALL (DEF_LOOP)
/* { dg-final { scan-assembler-times {vfnmacc\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 3 { xfail riscv*-*-* } } } */
-/* { dg-final { scan-assembler-times {vfmsac\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 0 } } */
-/* { dg-final { scan-assembler-times {vfmsac\.vf\s+v[0-9]+,fa[0-9],v[0-9]+,v0.t} 3 } } */
+/* { dg-final { scan-assembler-times {vfmsac\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 3 } } */
+/* { dg-final { scan-assembler-not {vfmsac\.vf\s+v[0-9]+,fa[0-9],v[0-9]+,v0.t} } } */
/* { dg-final { scan-assembler-not {\tvf?merge\.v[vxi]m\t} } } */
TEST_ALL (DEF_LOOP)
/* { dg-final { scan-assembler-times {vfnmacc\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 3 { xfail riscv*-*-* } } } */
-/* { dg-final { scan-assembler-times {vfmsac\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 0 } } */
-/* { dg-final { scan-assembler-times {vfmsac\.vf\s+v[0-9]+,fa[0-9],v[0-9]+,v0.t} 3 } } */
+/* { dg-final { scan-assembler-times {vfmsac\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 3 } } */
+/* { dg-final { scan-assembler-not {vfmsac\.vf\s+v[0-9]+,fa[0-9],v[0-9]+,v0.t} } } */
/* NOTE: 3 vmerge is need for other purpose. */
/* { dg-final { scan-assembler-times {\tvf?merge\.v[vxi]m\t} 3 } } */
TEST_ALL (DEF_LOOP)
/* { dg-final { scan-assembler-times {vfnmacc\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 3 { xfail riscv*-*-* } } } */
-/* { dg-final { scan-assembler-times {vfmsac\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 0 } } */
-/* { dg-final { scan-assembler-times {vfmsac\.vf\s+v[0-9]+,fa[0-9],v[0-9]+,v0.t} 3 } } */
+/* { dg-final { scan-assembler-times {vfmsac\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 3 } } */
+/* { dg-final { scan-assembler-not {vfmsac\.vf\s+v[0-9]+,fa[0-9],v[0-9]+,v0.t} } } */
/* NOTE: 3 vmerge is need for other purpose. */
/* { dg-final { scan-assembler-times {\tvf?merge\.v[vxi]m\t} 3 } } */
TEST_ALL (DEF_LOOP)
/* { dg-final { scan-assembler-times {vfnmacc\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 3 { xfail riscv*-*-* } } } */
-/* { dg-final { scan-assembler-times {vfmsac\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 0 } } */
-/* { dg-final { scan-assembler-times {vfmsac\.vf\s+v[0-9]+,fa[0-9],v[0-9]+,v0.t} 3 } } */
+/* { dg-final { scan-assembler-times {vfmsac\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 3 } } */
+/* { dg-final { scan-assembler-not {vfmsac\.vf\s+v[0-9]+,fa[0-9],v[0-9]+,v0.t} } } */
/* NOTE: 3 vmerge is need for other purpose. */
/* { dg-final { scan-assembler-times {\tvf?merge\.v[vxi]m\t} 3 } } */
TEST_ALL (DEF_LOOP)
-/* { dg-final { scan-assembler-times {vfmul\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 0 } } */
-/* { dg-final { scan-assembler-times {vfmul\.vf\s+v[0-9]+,v[0-9]+,fa[0-9],v0.t} 9 } } */
+/* { dg-final { scan-assembler-times {vfmul\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 9 } } */
+/* { dg-final { scan-assembler-not {vfmul\.vf\s+v[0-9]+,v[0-9]+,fa[0-9],v0.t} } } */
/* { dg-final { scan-assembler-not {\tvf?merge\.v[vxi]m\t} } } */
TEST_ALL (DEF_LOOP)
-/* { dg-final { scan-assembler-times {vfmul\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 0 } } */
-/* { dg-final { scan-assembler-times {vfmul\.vf\s+v[0-9]+,v[0-9]+,fa[0-9],v0.t} 6 } } */
+/* { dg-final { scan-assembler-times {vfmul\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 6 } } */
+/* { dg-final { scan-assembler-not {vfmul\.vf\s+v[0-9]+,v[0-9]+,fa[0-9],v0.t} } } */
/* { dg-final { scan-assembler-not {\tvf?merge\.v[vxi]m\t} } } */
TEST_ALL (DEF_LOOP)
-/* { dg-final { scan-assembler-times {vfmul\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 0 } } */
-/* { dg-final { scan-assembler-times {vfmul\.vf\s+v[0-9]+,v[0-9]+,fa[0-9],v0.t} 9 } } */
+/* { dg-final { scan-assembler-times {vfmul\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 9 } } */
+/* { dg-final { scan-assembler-not {vfmul\.vf\s+v[0-9]+,v[0-9]+,fa[0-9],v0.t} } } */
/* { dg-final { scan-assembler-not {\tvf?merge\.v[vxi]m\t} } } */
TEST_ALL (DEF_LOOP)
-/* { dg-final { scan-assembler-times {vfmul\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 0 } } */
-/* { dg-final { scan-assembler-times {vfmul\.vf\s+v[0-9]+,v[0-9]+,fa[0-9],v0.t} 9 } } */
+/* { dg-final { scan-assembler-times {vfmul\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 9 } } */
+/* { dg-final { scan-assembler-not {vfmul\.vf\s+v[0-9]+,v[0-9]+,fa[0-9],v0.t} } } */
/* { dg-final { scan-assembler-not {\tvf?merge\.v[vxi]m\t} } } */
TEST_ALL (DEF_LOOP)
-/* { dg-final { scan-assembler-times {vfmul\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 0 } } */
-/* { dg-final { scan-assembler-times {vfmul\.vf\s+v[0-9]+,v[0-9]+,fa[0-9],v0.t} 9 } } */
+/* { dg-final { scan-assembler-times {vfmul\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0.t} 9 } } */
+/* { dg-final { scan-assembler-not {vfmul\.vf\s+v[0-9]+,v[0-9]+,fa[0-9],v0.t} } } */
/* { dg-final { scan-assembler-not {\tvf?merge\.v[vxi]m\t} } } */
#include "vf_mulop.h"
-DEF_VF_MULOP_CASE_1 (_Float16, +, +, add, VF_MULOP_BODY_X16)
-DEF_VF_MULOP_CASE_1 (_Float16, -, +, sub, VF_MULOP_BODY_X16)
-DEF_VF_MULOP_CASE_1 (_Float16, +, -, nadd, VF_MULOP_BODY_X16)
-DEF_VF_MULOP_CASE_1 (_Float16, -, -, nsub, VF_MULOP_BODY_X16)
+DEF_VF_MULOP_CASE_1 (_Float16, +, +, add, VF_MULOP_BODY_X128)
+DEF_VF_MULOP_CASE_1 (_Float16, -, +, sub, VF_MULOP_BODY_X128)
+DEF_VF_MULOP_CASE_1 (_Float16, +, -, nadd, VF_MULOP_BODY_X128)
+DEF_VF_MULOP_CASE_1 (_Float16, -, -, nsub, VF_MULOP_BODY_X128)
DEF_VF_MULOP_ACC_CASE_1 (_Float16, +, +, acc, VF_MULOP_ACC_BODY_X128)
DEF_VF_MULOP_ACC_CASE_1 (_Float16, -, +, sac, VF_MULOP_ACC_BODY_X128)
DEF_VF_MULOP_ACC_CASE_1 (_Float16, +, -, nacc, VF_MULOP_ACC_BODY_X128)
#include "vf_mulop.h"
-DEF_VF_MULOP_CASE_1 (float, +, +, add, VF_MULOP_BODY_X16)
-DEF_VF_MULOP_CASE_1 (float, -, +, sub, VF_MULOP_BODY_X16)
-DEF_VF_MULOP_CASE_1 (float, +, -, nadd, VF_MULOP_BODY_X16)
-DEF_VF_MULOP_CASE_1 (float, -, -, nsub, VF_MULOP_BODY_X16)
+DEF_VF_MULOP_CASE_1 (float, +, +, add, VF_MULOP_BODY_X128)
+DEF_VF_MULOP_CASE_1 (float, -, +, sub, VF_MULOP_BODY_X128)
+DEF_VF_MULOP_CASE_1 (float, +, -, nadd, VF_MULOP_BODY_X128)
+DEF_VF_MULOP_CASE_1 (float, -, -, nsub, VF_MULOP_BODY_X128)
DEF_VF_MULOP_ACC_CASE_1 (float, +, +, acc, VF_MULOP_ACC_BODY_X128)
DEF_VF_MULOP_ACC_CASE_1 (float, -, +, sac, VF_MULOP_ACC_BODY_X128)
DEF_VF_MULOP_ACC_CASE_1 (float, +, -, nacc, VF_MULOP_ACC_BODY_X128)
#include "vf_mulop.h"
-DEF_VF_MULOP_CASE_1 (double, +, +, add, VF_MULOP_BODY_X16)
-DEF_VF_MULOP_CASE_1 (double, -, +, sub, VF_MULOP_BODY_X16)
-DEF_VF_MULOP_CASE_1 (double, +, -, nadd, VF_MULOP_BODY_X16)
-DEF_VF_MULOP_CASE_1 (double, -, -, nsub, VF_MULOP_BODY_X16)
+DEF_VF_MULOP_CASE_1 (double, +, +, add, VF_MULOP_BODY_X128)
+DEF_VF_MULOP_CASE_1 (double, -, +, sub, VF_MULOP_BODY_X128)
+DEF_VF_MULOP_CASE_1 (double, +, -, nadd, VF_MULOP_BODY_X128)
+DEF_VF_MULOP_CASE_1 (double, -, -, nsub, VF_MULOP_BODY_X128)
DEF_VF_MULOP_ACC_CASE_1 (double, +, +, acc, VF_MULOP_ACC_BODY_X128)
DEF_VF_MULOP_ACC_CASE_1 (double, -, +, sac, VF_MULOP_ACC_BODY_X128)
DEF_VF_MULOP_ACC_CASE_1 (double, +, -, nacc, VF_MULOP_ACC_BODY_X128)
DEF_VX_BINARY_CASE_1_WRAP(T, |, or, VX_BINARY_BODY_X8)
DEF_VX_BINARY_CASE_1_WRAP(T, ^, xor, VX_BINARY_BODY_X8)
DEF_VX_BINARY_CASE_1_WRAP(T, *, mul, VX_BINARY_BODY_X8)
-DEF_VX_BINARY_CASE_1_WRAP(T, /, div, VX_BINARY_BODY_X8)
-DEF_VX_BINARY_CASE_1_WRAP(T, %, rem, VX_BINARY_BODY_X8)
+DEF_VX_BINARY_CASE_1_WRAP(T, /, div, VX_BINARY_BODY_X4)
+DEF_VX_BINARY_CASE_1_WRAP(T, %, rem, VX_BINARY_BODY_X4)
DEF_VX_BINARY_CASE_3_WRAP(T, MAX_FUNC_0_WARP(T), max, VX_BINARY_FUNC_BODY_X8)
DEF_VX_BINARY_CASE_3_WRAP(T, MAX_FUNC_1_WARP(T), max, VX_BINARY_FUNC_BODY_X8)
DEF_VX_BINARY_CASE_3_WRAP(T, MIN_FUNC_0_WARP(T), min, VX_BINARY_FUNC_BODY_X8)
DEF_VX_BINARY_CASE_1_WRAP(T, ^, xor, VX_BINARY_BODY_X16)
DEF_VX_BINARY_CASE_1_WRAP(T, *, mul, VX_BINARY_BODY_X16)
DEF_VX_BINARY_CASE_1_WRAP(T, /, div, VX_BINARY_BODY_X8)
-DEF_VX_BINARY_CASE_1_WRAP(T, %, rem, VX_BINARY_BODY_X8)
+DEF_VX_BINARY_CASE_1_WRAP(T, %, rem, VX_BINARY_BODY_X4)
DEF_VX_BINARY_CASE_3_WRAP(T, MAX_FUNC_0_WARP(T), max, VX_BINARY_FUNC_BODY_X8)
DEF_VX_BINARY_CASE_3_WRAP(T, MAX_FUNC_1_WARP(T), max, VX_BINARY_FUNC_BODY_X8)
DEF_VX_BINARY_CASE_3_WRAP(T, MIN_FUNC_0_WARP(T), min, VX_BINARY_FUNC_BODY_X8)