|| is_ptrue (pg, f.type_suffix (0).element_bytes)))
return gimple_build_assign (f.lhs, build_zero_cst (TREE_TYPE (f.lhs)));
- return NULL;
+ /* If one of the operands is a uniform power of 2, fold to a left shift
+ by immediate. */
+ tree op1_cst = uniform_integer_cst_p (op1);
+ tree op2_cst = uniform_integer_cst_p (op2);
+ tree shift_op1, shift_op2;
+ if (op1_cst && integer_pow2p (op1_cst)
+ && (f.pred != PRED_m
+ || is_ptrue (pg, f.type_suffix (0).element_bytes)))
+ {
+ shift_op1 = op2;
+ shift_op2 = op1_cst;
+ }
+ else if (op2_cst && integer_pow2p (op2_cst))
+ {
+ shift_op1 = op1;
+ shift_op2 = op2_cst;
+ }
+ else
+ return NULL;
+
+ if (integer_onep (shift_op2))
+ return NULL;
+
+ shift_op2 = wide_int_to_tree (unsigned_type_for (TREE_TYPE (shift_op2)),
+ tree_log2 (shift_op2));
+ function_instance instance ("svlsl", functions::svlsl,
+ shapes::binary_uint_opt_n, MODE_n,
+ f.type_suffix_ids, GROUP_none, f.pred);
+ gcall *call = f.redirect_call (instance);
+ gimple_call_set_arg (call, 1, shift_op1);
+ gimple_call_set_arg (call, 2, shift_op2);
+ return call;
}
};
#include "test_sve_acle.h"
+#define MAXPOW 1ULL<<14
+
/*
** mul_s16_m_tied1:
** mul z0\.h, p0/m, z0\.h, z1\.h
z0 = svmul_m (p0, z1, x0))
/*
-** mul_2_s16_m_tied1:
-** mov (z[0-9]+\.h), #2
+** mul_4dupop1_s16_m_tied2:
+** mov (z[0-9]+)\.h, #4
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, \1
+** mul z0\.h, p0/m, z0\.h, \2\.h
+** ret
+*/
+TEST_UNIFORM_Z (mul_4dupop1_s16_m_tied2, svint16_t,
+ z0 = svmul_m (p0, svdup_s16 (4), z0),
+ z0 = svmul_m (p0, svdup_s16 (4), z0))
+
+/*
+** mul_4dupop1ptrue_s16_m_tied2:
+** lsl z0\.h, z0\.h, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4dupop1ptrue_s16_m_tied2, svint16_t,
+ z0 = svmul_m (svptrue_b16 (), svdup_s16 (4), z0),
+ z0 = svmul_m (svptrue_b16 (), svdup_s16 (4), z0))
+
+/*
+** mul_4dupop2_s16_m_tied1:
+** lsl z0\.h, p0/m, z0\.h, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4dupop2_s16_m_tied1, svint16_t,
+ z0 = svmul_m (p0, z0, svdup_s16 (4)),
+ z0 = svmul_m (p0, z0, svdup_s16 (4)))
+
+/*
+** mul_4nop2_s16_m_tied1:
+** lsl z0\.h, p0/m, z0\.h, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4nop2_s16_m_tied1, svint16_t,
+ z0 = svmul_n_s16_m (p0, z0, 4),
+ z0 = svmul_m (p0, z0, 4))
+
+/*
+** mul_maxpownop2_s16_m_tied1:
+** lsl z0\.h, p0/m, z0\.h, #14
+** ret
+*/
+TEST_UNIFORM_Z (mul_maxpownop2_s16_m_tied1, svint16_t,
+ z0 = svmul_n_s16_m (p0, z0, MAXPOW),
+ z0 = svmul_m (p0, z0, MAXPOW))
+
+/*
+** mul_intminnop2_s16_m_tied1:
+** lsl z0\.h, p0/m, z0\.h, #15
+** ret
+*/
+TEST_UNIFORM_Z (mul_intminnop2_s16_m_tied1, svint16_t,
+ z0 = svmul_n_s16_m (p0, z0, INT16_MIN),
+ z0 = svmul_m (p0, z0, INT16_MIN))
+
+/*
+** mul_1_s16_m_tied1:
+** sel z0\.h, p0, z0\.h, z0\.h
+** ret
+*/
+TEST_UNIFORM_Z (mul_1_s16_m_tied1, svint16_t,
+ z0 = svmul_n_s16_m (p0, z0, 1),
+ z0 = svmul_m (p0, z0, 1))
+
+/*
+** mul_3_s16_m_tied1:
+** mov (z[0-9]+\.h), #3
** mul z0\.h, p0/m, z0\.h, \1
** ret
*/
-TEST_UNIFORM_Z (mul_2_s16_m_tied1, svint16_t,
- z0 = svmul_n_s16_m (p0, z0, 2),
- z0 = svmul_m (p0, z0, 2))
+TEST_UNIFORM_Z (mul_3_s16_m_tied1, svint16_t,
+ z0 = svmul_n_s16_m (p0, z0, 3),
+ z0 = svmul_m (p0, z0, 3))
/*
-** mul_2_s16_m_untied:
-** mov (z[0-9]+\.h), #2
+** mul_4dupop2_s16_m_untied:
+** movprfx z0, z1
+** lsl z0\.h, p0/m, z0\.h, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4dupop2_s16_m_untied, svint16_t,
+ z0 = svmul_m (p0, z1, svdup_s16 (4)),
+ z0 = svmul_m (p0, z1, svdup_s16 (4)))
+
+/*
+** mul_4nop2_s16_m_untied:
+** movprfx z0, z1
+** lsl z0\.h, p0/m, z0\.h, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4nop2_s16_m_untied, svint16_t,
+ z0 = svmul_n_s16_m (p0, z1, 4),
+ z0 = svmul_m (p0, z1, 4))
+
+/*
+** mul_maxpownop2_s16_m_untied:
+** movprfx z0, z1
+** lsl z0\.h, p0/m, z0\.h, #14
+** ret
+*/
+TEST_UNIFORM_Z (mul_maxpownop2_s16_m_untied, svint16_t,
+ z0 = svmul_n_s16_m (p0, z1, MAXPOW),
+ z0 = svmul_m (p0, z1, MAXPOW))
+
+/*
+** mul_3_s16_m_untied:
+** mov (z[0-9]+\.h), #3
** movprfx z0, z1
** mul z0\.h, p0/m, z0\.h, \1
** ret
*/
-TEST_UNIFORM_Z (mul_2_s16_m_untied, svint16_t,
- z0 = svmul_n_s16_m (p0, z1, 2),
- z0 = svmul_m (p0, z1, 2))
+TEST_UNIFORM_Z (mul_3_s16_m_untied, svint16_t,
+ z0 = svmul_n_s16_m (p0, z1, 3),
+ z0 = svmul_m (p0, z1, 3))
/*
** mul_m1_s16_m:
z0 = svmul_z (p0, z1, x0))
/*
-** mul_2_s16_z_tied1:
-** mov (z[0-9]+\.h), #2
+** mul_4dupop1_s16_z_tied2:
+** movprfx z0\.h, p0/z, z0\.h
+** lsl z0\.h, p0/m, z0\.h, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4dupop1_s16_z_tied2, svint16_t,
+ z0 = svmul_z (p0, svdup_s16 (4), z0),
+ z0 = svmul_z (p0, svdup_s16 (4), z0))
+
+/*
+** mul_4dupop1ptrue_s16_z_tied2:
+** lsl z0\.h, z0\.h, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4dupop1ptrue_s16_z_tied2, svint16_t,
+ z0 = svmul_z (svptrue_b16 (), svdup_s16 (4), z0),
+ z0 = svmul_z (svptrue_b16 (), svdup_s16 (4), z0))
+
+/*
+** mul_4dupop2_s16_z_tied1:
+** movprfx z0\.h, p0/z, z0\.h
+** lsl z0\.h, p0/m, z0\.h, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4dupop2_s16_z_tied1, svint16_t,
+ z0 = svmul_z (p0, z0, svdup_s16 (4)),
+ z0 = svmul_z (p0, z0, svdup_s16 (4)))
+
+/*
+** mul_4nop2_s16_z_tied1:
+** movprfx z0\.h, p0/z, z0\.h
+** lsl z0\.h, p0/m, z0\.h, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4nop2_s16_z_tied1, svint16_t,
+ z0 = svmul_n_s16_z (p0, z0, 4),
+ z0 = svmul_z (p0, z0, 4))
+
+/*
+** mul_maxpownop2_s16_z_tied1:
+** movprfx z0\.h, p0/z, z0\.h
+** lsl z0\.h, p0/m, z0\.h, #14
+** ret
+*/
+TEST_UNIFORM_Z (mul_maxpownop2_s16_z_tied1, svint16_t,
+ z0 = svmul_n_s16_z (p0, z0, MAXPOW),
+ z0 = svmul_z (p0, z0, MAXPOW))
+
+/*
+** mul_intminnop2_s16_z_tied1:
+** movprfx z0\.h, p0/z, z0\.h
+** lsl z0\.h, p0/m, z0\.h, #15
+** ret
+*/
+TEST_UNIFORM_Z (mul_intminnop2_s16_z_tied1, svint16_t,
+ z0 = svmul_n_s16_z (p0, z0, INT16_MIN),
+ z0 = svmul_z (p0, z0, INT16_MIN))
+
+/*
+** mul_1_s16_z_tied1:
+** mov z31.h, #1
+** movprfx z0.h, p0/z, z0.h
+** mul z0.h, p0/m, z0.h, z31.h
+** ret
+*/
+TEST_UNIFORM_Z (mul_1_s16_z_tied1, svint16_t,
+ z0 = svmul_n_s16_z (p0, z0, 1),
+ z0 = svmul_z (p0, z0, 1))
+
+/*
+** mul_3_s16_z_tied1:
+** mov (z[0-9]+\.h), #3
** movprfx z0\.h, p0/z, z0\.h
** mul z0\.h, p0/m, z0\.h, \1
** ret
*/
-TEST_UNIFORM_Z (mul_2_s16_z_tied1, svint16_t,
- z0 = svmul_n_s16_z (p0, z0, 2),
- z0 = svmul_z (p0, z0, 2))
+TEST_UNIFORM_Z (mul_3_s16_z_tied1, svint16_t,
+ z0 = svmul_n_s16_z (p0, z0, 3),
+ z0 = svmul_z (p0, z0, 3))
+
+/*
+** mul_4dupop2_s16_z_untied:
+** movprfx z0\.h, p0/z, z1\.h
+** lsl z0\.h, p0/m, z0\.h, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4dupop2_s16_z_untied, svint16_t,
+ z0 = svmul_z (p0, z1, svdup_s16 (4)),
+ z0 = svmul_z (p0, z1, svdup_s16 (4)))
+
+/*
+** mul_4nop2_s16_z_untied:
+** movprfx z0\.h, p0/z, z1\.h
+** lsl z0\.h, p0/m, z0\.h, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4nop2_s16_z_untied, svint16_t,
+ z0 = svmul_n_s16_z (p0, z1, 4),
+ z0 = svmul_z (p0, z1, 4))
+
+/*
+** mul_maxpownop2_s16_z_untied:
+** movprfx z0\.h, p0/z, z1\.h
+** lsl z0\.h, p0/m, z0\.h, #14
+** ret
+*/
+TEST_UNIFORM_Z (mul_maxpownop2_s16_z_untied, svint16_t,
+ z0 = svmul_n_s16_z (p0, z1, MAXPOW),
+ z0 = svmul_z (p0, z1, MAXPOW))
/*
-** mul_2_s16_z_untied:
-** mov (z[0-9]+\.h), #2
+** mul_3_s16_z_untied:
+** mov (z[0-9]+\.h), #3
** (
** movprfx z0\.h, p0/z, z1\.h
** mul z0\.h, p0/m, z0\.h, \1
** )
** ret
*/
-TEST_UNIFORM_Z (mul_2_s16_z_untied, svint16_t,
- z0 = svmul_n_s16_z (p0, z1, 2),
- z0 = svmul_z (p0, z1, 2))
+TEST_UNIFORM_Z (mul_3_s16_z_untied, svint16_t,
+ z0 = svmul_n_s16_z (p0, z1, 3),
+ z0 = svmul_z (p0, z1, 3))
/*
** mul_s16_x_tied1:
z0 = svmul_x (p0, z1, x0))
/*
-** mul_2_s16_x_tied1:
-** mul z0\.h, z0\.h, #2
+** mul_4dupop1_s16_x_tied2:
+** lsl z0\.h, z0\.h, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4dupop1_s16_x_tied2, svint16_t,
+ z0 = svmul_x (p0, svdup_s16 (4), z0),
+ z0 = svmul_x (p0, svdup_s16 (4), z0))
+
+/*
+** mul_4dupop1ptrue_s16_x_tied2:
+** lsl z0\.h, z0\.h, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4dupop1ptrue_s16_x_tied2, svint16_t,
+ z0 = svmul_x (svptrue_b16 (), svdup_s16 (4), z0),
+ z0 = svmul_x (svptrue_b16 (), svdup_s16 (4), z0))
+
+/*
+** mul_4dupop2_s16_x_tied1:
+** lsl z0\.h, z0\.h, #2
** ret
*/
-TEST_UNIFORM_Z (mul_2_s16_x_tied1, svint16_t,
- z0 = svmul_n_s16_x (p0, z0, 2),
- z0 = svmul_x (p0, z0, 2))
+TEST_UNIFORM_Z (mul_4dupop2_s16_x_tied1, svint16_t,
+ z0 = svmul_x (p0, z0, svdup_s16 (4)),
+ z0 = svmul_x (p0, z0, svdup_s16 (4)))
/*
-** mul_2_s16_x_untied:
+** mul_4nop2_s16_x_tied1:
+** lsl z0\.h, z0\.h, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4nop2_s16_x_tied1, svint16_t,
+ z0 = svmul_n_s16_x (p0, z0, 4),
+ z0 = svmul_x (p0, z0, 4))
+
+/*
+** mul_maxpownop2_s16_x_tied1:
+** lsl z0\.h, z0\.h, #14
+** ret
+*/
+TEST_UNIFORM_Z (mul_maxpownop2_s16_x_tied1, svint16_t,
+ z0 = svmul_n_s16_x (p0, z0, MAXPOW),
+ z0 = svmul_x (p0, z0, MAXPOW))
+
+/*
+** mul_intminnop2_s16_x_tied1:
+** lsl z0\.h, z0\.h, #15
+** ret
+*/
+TEST_UNIFORM_Z (mul_intminnop2_s16_x_tied1, svint16_t,
+ z0 = svmul_n_s16_x (p0, z0, INT16_MIN),
+ z0 = svmul_x (p0, z0, INT16_MIN))
+
+/*
+** mul_1_s16_x_tied1:
+** ret
+*/
+TEST_UNIFORM_Z (mul_1_s16_x_tied1, svint16_t,
+ z0 = svmul_n_s16_x (p0, z0, 1),
+ z0 = svmul_x (p0, z0, 1))
+
+/*
+** mul_3_s16_x_tied1:
+** mul z0\.h, z0\.h, #3
+** ret
+*/
+TEST_UNIFORM_Z (mul_3_s16_x_tied1, svint16_t,
+ z0 = svmul_n_s16_x (p0, z0, 3),
+ z0 = svmul_x (p0, z0, 3))
+
+/*
+** mul_4dupop2_s16_x_untied:
+** lsl z0\.h, z1\.h, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4dupop2_s16_x_untied, svint16_t,
+ z0 = svmul_x (p0, z1, svdup_s16 (4)),
+ z0 = svmul_x (p0, z1, svdup_s16 (4)))
+
+/*
+** mul_4nop2_s16_x_untied:
+** lsl z0\.h, z1\.h, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4nop2_s16_x_untied, svint16_t,
+ z0 = svmul_n_s16_x (p0, z1, 4),
+ z0 = svmul_x (p0, z1, 4))
+
+/*
+** mul_maxpownop2_s16_x_untied:
+** lsl z0\.h, z1\.h, #14
+** ret
+*/
+TEST_UNIFORM_Z (mul_maxpownop2_s16_x_untied, svint16_t,
+ z0 = svmul_n_s16_x (p0, z1, MAXPOW),
+ z0 = svmul_x (p0, z1, MAXPOW))
+
+/*
+** mul_3_s16_x_untied:
** movprfx z0, z1
-** mul z0\.h, z0\.h, #2
+** mul z0\.h, z0\.h, #3
** ret
*/
-TEST_UNIFORM_Z (mul_2_s16_x_untied, svint16_t,
- z0 = svmul_n_s16_x (p0, z1, 2),
- z0 = svmul_x (p0, z1, 2))
+TEST_UNIFORM_Z (mul_3_s16_x_untied, svint16_t,
+ z0 = svmul_n_s16_x (p0, z1, 3),
+ z0 = svmul_x (p0, z1, 3))
/*
** mul_127_s16_x:
/*
** mul_128_s16_x:
-** mov (z[0-9]+\.h), #128
-** mul z0\.h, p0/m, z0\.h, \1
+** lsl z0\.h, z0\.h, #7
** ret
*/
TEST_UNIFORM_Z (mul_128_s16_x, svint16_t,
#include "test_sve_acle.h"
+#define MAXPOW 1ULL<<30
+
/*
** mul_s32_m_tied1:
** mul z0\.s, p0/m, z0\.s, z1\.s
z0 = svmul_m (p0, z1, x0))
/*
-** mul_2_s32_m_tied1:
-** mov (z[0-9]+\.s), #2
+** mul_4dupop1_s32_m_tied2:
+** mov (z[0-9]+)\.s, #4
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, \1
+** mul z0\.s, p0/m, z0\.s, \2\.s
+** ret
+*/
+TEST_UNIFORM_Z (mul_4dupop1_s32_m_tied2, svint32_t,
+ z0 = svmul_m (p0, svdup_s32 (4), z0),
+ z0 = svmul_m (p0, svdup_s32 (4), z0))
+
+/*
+** mul_4dupop1ptrue_s32_m_tied2:
+** lsl z0\.s, z0\.s, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4dupop1ptrue_s32_m_tied2, svint32_t,
+ z0 = svmul_m (svptrue_b32 (), svdup_s32 (4), z0),
+ z0 = svmul_m (svptrue_b32 (), svdup_s32 (4), z0))
+
+/*
+** mul_4dupop2_s32_m_tied1:
+** lsl z0\.s, p0/m, z0\.s, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4dupop2_s32_m_tied1, svint32_t,
+ z0 = svmul_m (p0, z0, svdup_s32 (4)),
+ z0 = svmul_m (p0, z0, svdup_s32 (4)))
+
+/*
+** mul_4nop2_s32_m_tied1:
+** lsl z0\.s, p0/m, z0\.s, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4nop2_s32_m_tied1, svint32_t,
+ z0 = svmul_n_s32_m (p0, z0, 4),
+ z0 = svmul_m (p0, z0, 4))
+
+/*
+** mul_maxpownop2_s32_m_tied1:
+** lsl z0\.s, p0/m, z0\.s, #30
+** ret
+*/
+TEST_UNIFORM_Z (mul_maxpownop2_s32_m_tied1, svint32_t,
+ z0 = svmul_n_s32_m (p0, z0, MAXPOW),
+ z0 = svmul_m (p0, z0, MAXPOW))
+
+/*
+** mul_intminnop2_s32_m_tied1:
+** lsl z0\.s, p0/m, z0\.s, #31
+** ret
+*/
+TEST_UNIFORM_Z (mul_intminnop2_s32_m_tied1, svint32_t,
+ z0 = svmul_n_s32_m (p0, z0, INT32_MIN),
+ z0 = svmul_m (p0, z0, INT32_MIN))
+
+/*
+** mul_1_s32_m_tied1:
+** sel z0\.s, p0, z0\.s, z0\.s
+** ret
+*/
+TEST_UNIFORM_Z (mul_1_s32_m_tied1, svint32_t,
+ z0 = svmul_n_s32_m (p0, z0, 1),
+ z0 = svmul_m (p0, z0, 1))
+
+/*
+** mul_3_s32_m_tied1:
+** mov (z[0-9]+\.s), #3
** mul z0\.s, p0/m, z0\.s, \1
** ret
*/
-TEST_UNIFORM_Z (mul_2_s32_m_tied1, svint32_t,
- z0 = svmul_n_s32_m (p0, z0, 2),
- z0 = svmul_m (p0, z0, 2))
+TEST_UNIFORM_Z (mul_3_s32_m_tied1, svint32_t,
+ z0 = svmul_n_s32_m (p0, z0, 3),
+ z0 = svmul_m (p0, z0, 3))
/*
-** mul_2_s32_m_untied:
-** mov (z[0-9]+\.s), #2
+** mul_4dupop2_s32_m_untied:
+** movprfx z0, z1
+** lsl z0\.s, p0/m, z0\.s, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4dupop2_s32_m_untied, svint32_t,
+ z0 = svmul_m (p0, z1, svdup_s32 (4)),
+ z0 = svmul_m (p0, z1, svdup_s32 (4)))
+
+/*
+** mul_4nop2_s32_m_untied:
+** movprfx z0, z1
+** lsl z0\.s, p0/m, z0\.s, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4nop2_s32_m_untied, svint32_t,
+ z0 = svmul_n_s32_m (p0, z1, 4),
+ z0 = svmul_m (p0, z1, 4))
+
+/*
+** mul_maxpownop2_s32_m_untied:
+** movprfx z0, z1
+** lsl z0\.s, p0/m, z0\.s, #30
+** ret
+*/
+TEST_UNIFORM_Z (mul_maxpownop2_s32_m_untied, svint32_t,
+ z0 = svmul_n_s32_m (p0, z1, MAXPOW),
+ z0 = svmul_m (p0, z1, MAXPOW))
+
+/*
+** mul_3_s32_m_untied:
+** mov (z[0-9]+\.s), #3
** movprfx z0, z1
** mul z0\.s, p0/m, z0\.s, \1
** ret
*/
-TEST_UNIFORM_Z (mul_2_s32_m_untied, svint32_t,
- z0 = svmul_n_s32_m (p0, z1, 2),
- z0 = svmul_m (p0, z1, 2))
+TEST_UNIFORM_Z (mul_3_s32_m_untied, svint32_t,
+ z0 = svmul_n_s32_m (p0, z1, 3),
+ z0 = svmul_m (p0, z1, 3))
/*
** mul_m1_s32_m:
z0 = svmul_z (p0, z1, x0))
/*
-** mul_2_s32_z_tied1:
-** mov (z[0-9]+\.s), #2
+** mul_4dupop1_s32_z_tied2:
+** movprfx z0\.s, p0/z, z0\.s
+** lsl z0\.s, p0/m, z0\.s, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4dupop1_s32_z_tied2, svint32_t,
+ z0 = svmul_z (p0, svdup_s32 (4), z0),
+ z0 = svmul_z (p0, svdup_s32 (4), z0))
+
+/*
+** mul_4dupop1ptrue_s32_z_tied2:
+** lsl z0\.s, z0\.s, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4dupop1ptrue_s32_z_tied2, svint32_t,
+ z0 = svmul_z (svptrue_b32 (), svdup_s32 (4), z0),
+ z0 = svmul_z (svptrue_b32 (), svdup_s32 (4), z0))
+
+/*
+** mul_4dupop2_s32_z_tied1:
+** movprfx z0\.s, p0/z, z0\.s
+** lsl z0\.s, p0/m, z0\.s, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4dupop2_s32_z_tied1, svint32_t,
+ z0 = svmul_z (p0, z0, svdup_s32 (4)),
+ z0 = svmul_z (p0, z0, svdup_s32 (4)))
+
+/*
+** mul_4nop2_s32_z_tied1:
+** movprfx z0\.s, p0/z, z0\.s
+** lsl z0\.s, p0/m, z0\.s, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4nop2_s32_z_tied1, svint32_t,
+ z0 = svmul_n_s32_z (p0, z0, 4),
+ z0 = svmul_z (p0, z0, 4))
+
+/*
+** mul_maxpownop2_s32_z_tied1:
+** movprfx z0\.s, p0/z, z0\.s
+** lsl z0\.s, p0/m, z0\.s, #30
+** ret
+*/
+TEST_UNIFORM_Z (mul_maxpownop2_s32_z_tied1, svint32_t,
+ z0 = svmul_n_s32_z (p0, z0, MAXPOW),
+ z0 = svmul_z (p0, z0, MAXPOW))
+
+/*
+** mul_intminnop2_s32_z_tied1:
+** movprfx z0\.s, p0/z, z0\.s
+** lsl z0\.s, p0/m, z0\.s, #31
+** ret
+*/
+TEST_UNIFORM_Z (mul_intminnop2_s32_z_tied1, svint32_t,
+ z0 = svmul_n_s32_z (p0, z0, INT32_MIN),
+ z0 = svmul_z (p0, z0, INT32_MIN))
+
+/*
+** mul_1_s32_z_tied1:
+** mov z31.s, #1
+** movprfx z0.s, p0/z, z0.s
+** mul z0.s, p0/m, z0.s, z31.s
+** ret
+*/
+TEST_UNIFORM_Z (mul_1_s32_z_tied1, svint32_t,
+ z0 = svmul_n_s32_z (p0, z0, 1),
+ z0 = svmul_z (p0, z0, 1))
+
+/*
+** mul_3_s32_z_tied1:
+** mov (z[0-9]+\.s), #3
** movprfx z0\.s, p0/z, z0\.s
** mul z0\.s, p0/m, z0\.s, \1
** ret
*/
-TEST_UNIFORM_Z (mul_2_s32_z_tied1, svint32_t,
- z0 = svmul_n_s32_z (p0, z0, 2),
- z0 = svmul_z (p0, z0, 2))
+TEST_UNIFORM_Z (mul_3_s32_z_tied1, svint32_t,
+ z0 = svmul_n_s32_z (p0, z0, 3),
+ z0 = svmul_z (p0, z0, 3))
+
+/*
+** mul_4dupop2_s32_z_untied:
+** movprfx z0\.s, p0/z, z1\.s
+** lsl z0\.s, p0/m, z0\.s, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4dupop2_s32_z_untied, svint32_t,
+ z0 = svmul_z (p0, z1, svdup_s32 (4)),
+ z0 = svmul_z (p0, z1, svdup_s32 (4)))
+
+/*
+** mul_4nop2_s32_z_untied:
+** movprfx z0\.s, p0/z, z1\.s
+** lsl z0\.s, p0/m, z0\.s, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4nop2_s32_z_untied, svint32_t,
+ z0 = svmul_n_s32_z (p0, z1, 4),
+ z0 = svmul_z (p0, z1, 4))
+
+/*
+** mul_maxpownop2_s32_z_untied:
+** movprfx z0\.s, p0/z, z1\.s
+** lsl z0\.s, p0/m, z0\.s, #30
+** ret
+*/
+TEST_UNIFORM_Z (mul_maxpownop2_s32_z_untied, svint32_t,
+ z0 = svmul_n_s32_z (p0, z1, MAXPOW),
+ z0 = svmul_z (p0, z1, MAXPOW))
/*
-** mul_2_s32_z_untied:
-** mov (z[0-9]+\.s), #2
+** mul_3_s32_z_untied:
+** mov (z[0-9]+\.s), #3
** (
** movprfx z0\.s, p0/z, z1\.s
** mul z0\.s, p0/m, z0\.s, \1
** )
** ret
*/
-TEST_UNIFORM_Z (mul_2_s32_z_untied, svint32_t,
- z0 = svmul_n_s32_z (p0, z1, 2),
- z0 = svmul_z (p0, z1, 2))
+TEST_UNIFORM_Z (mul_3_s32_z_untied, svint32_t,
+ z0 = svmul_n_s32_z (p0, z1, 3),
+ z0 = svmul_z (p0, z1, 3))
/*
** mul_s32_x_tied1:
z0 = svmul_x (p0, z1, x0))
/*
-** mul_2_s32_x_tied1:
-** mul z0\.s, z0\.s, #2
+** mul_4dupop1_s32_x_tied2:
+** lsl z0\.s, z0\.s, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4dupop1_s32_x_tied2, svint32_t,
+ z0 = svmul_x (p0, svdup_s32 (4), z0),
+ z0 = svmul_x (p0, svdup_s32 (4), z0))
+
+/*
+** mul_4dupop1ptrue_s32_x_tied2:
+** lsl z0\.s, z0\.s, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4dupop1ptrue_s32_x_tied2, svint32_t,
+ z0 = svmul_x (svptrue_b32 (), svdup_s32 (4), z0),
+ z0 = svmul_x (svptrue_b32 (), svdup_s32 (4), z0))
+
+/*
+** mul_4dupop2_s32_x_tied1:
+** lsl z0\.s, z0\.s, #2
** ret
*/
-TEST_UNIFORM_Z (mul_2_s32_x_tied1, svint32_t,
- z0 = svmul_n_s32_x (p0, z0, 2),
- z0 = svmul_x (p0, z0, 2))
+TEST_UNIFORM_Z (mul_4dupop2_s32_x_tied1, svint32_t,
+ z0 = svmul_x (p0, z0, svdup_s32 (4)),
+ z0 = svmul_x (p0, z0, svdup_s32 (4)))
/*
-** mul_2_s32_x_untied:
+** mul_4nop2_s32_x_tied1:
+** lsl z0\.s, z0\.s, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4nop2_s32_x_tied1, svint32_t,
+ z0 = svmul_n_s32_x (p0, z0, 4),
+ z0 = svmul_x (p0, z0, 4))
+
+/*
+** mul_maxpownop2_s32_x_tied1:
+** lsl z0\.s, z0\.s, #30
+** ret
+*/
+TEST_UNIFORM_Z (mul_maxpownop2_s32_x_tied1, svint32_t,
+ z0 = svmul_n_s32_x (p0, z0, MAXPOW),
+ z0 = svmul_x (p0, z0, MAXPOW))
+
+/*
+** mul_intminnop2_s32_x_tied1:
+** lsl z0\.s, z0\.s, #31
+** ret
+*/
+TEST_UNIFORM_Z (mul_intminnop2_s32_x_tied1, svint32_t,
+ z0 = svmul_n_s32_x (p0, z0, INT32_MIN),
+ z0 = svmul_x (p0, z0, INT32_MIN))
+
+/*
+** mul_1_s32_x_tied1:
+** ret
+*/
+TEST_UNIFORM_Z (mul_1_s32_x_tied1, svint32_t,
+ z0 = svmul_n_s32_x (p0, z0, 1),
+ z0 = svmul_x (p0, z0, 1))
+
+/*
+** mul_3_s32_x_tied1:
+** mul z0\.s, z0\.s, #3
+** ret
+*/
+TEST_UNIFORM_Z (mul_3_s32_x_tied1, svint32_t,
+ z0 = svmul_n_s32_x (p0, z0, 3),
+ z0 = svmul_x (p0, z0, 3))
+
+/*
+** mul_4dupop2_s32_x_untied:
+** lsl z0\.s, z1\.s, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4dupop2_s32_x_untied, svint32_t,
+ z0 = svmul_x (p0, z1, svdup_s32 (4)),
+ z0 = svmul_x (p0, z1, svdup_s32 (4)))
+
+/*
+** mul_4nop2_s32_x_untied:
+** lsl z0\.s, z1\.s, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4nop2_s32_x_untied, svint32_t,
+ z0 = svmul_n_s32_x (p0, z1, 4),
+ z0 = svmul_x (p0, z1, 4))
+
+/*
+** mul_maxpownop2_s32_x_untied:
+** lsl z0\.s, z1\.s, #30
+** ret
+*/
+TEST_UNIFORM_Z (mul_maxpownop2_s32_x_untied, svint32_t,
+ z0 = svmul_n_s32_x (p0, z1, MAXPOW),
+ z0 = svmul_x (p0, z1, MAXPOW))
+
+/*
+** mul_3_s32_x_untied:
** movprfx z0, z1
-** mul z0\.s, z0\.s, #2
+** mul z0\.s, z0\.s, #3
** ret
*/
-TEST_UNIFORM_Z (mul_2_s32_x_untied, svint32_t,
- z0 = svmul_n_s32_x (p0, z1, 2),
- z0 = svmul_x (p0, z1, 2))
+TEST_UNIFORM_Z (mul_3_s32_x_untied, svint32_t,
+ z0 = svmul_n_s32_x (p0, z1, 3),
+ z0 = svmul_x (p0, z1, 3))
/*
** mul_127_s32_x:
/*
** mul_128_s32_x:
-** mov (z[0-9]+\.s), #128
-** mul z0\.s, p0/m, z0\.s, \1
+** lsl z0\.s, z0\.s, #7
** ret
*/
TEST_UNIFORM_Z (mul_128_s32_x, svint32_t,
#include "test_sve_acle.h"
+#define MAXPOW 1ULL<<62
+
/*
** mul_s64_m_tied1:
** mul z0\.d, p0/m, z0\.d, z1\.d
z0 = svmul_n_s64_m (p0, z1, x0),
z0 = svmul_m (p0, z1, x0))
+/*
+** mul_4dupop1_s64_m_tied2:
+** mov (z[0-9]+)\.d, #4
+** mov (z[0-9]+\.d), z0\.d
+** movprfx z0, \1
+** mul z0\.d, p0/m, z0\.d, \2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4dupop1_s64_m_tied2, svint64_t,
+ z0 = svmul_m (p0, svdup_s64 (4), z0),
+ z0 = svmul_m (p0, svdup_s64 (4), z0))
+
+/*
+** mul_4dupop1ptrue_s64_m_tied2:
+** lsl z0\.d, z0\.d, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4dupop1ptrue_s64_m_tied2, svint64_t,
+ z0 = svmul_m (svptrue_b64 (), svdup_s64 (4), z0),
+ z0 = svmul_m (svptrue_b64 (), svdup_s64 (4), z0))
+
+/*
+** mul_4dupop2_s64_m_tied1:
+** lsl z0\.d, p0/m, z0\.d, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4dupop2_s64_m_tied1, svint64_t,
+ z0 = svmul_m (p0, z0, svdup_s64 (4)),
+ z0 = svmul_m (p0, z0, svdup_s64 (4)))
+
+/*
+** mul_4nop2_s64_m_tied1:
+** lsl z0\.d, p0/m, z0\.d, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4nop2_s64_m_tied1, svint64_t,
+ z0 = svmul_n_s64_m (p0, z0, 4),
+ z0 = svmul_m (p0, z0, 4))
+
+/*
+** mul_maxpownop2_s64_m_tied1:
+** lsl z0\.d, p0/m, z0\.d, #62
+** ret
+*/
+TEST_UNIFORM_Z (mul_maxpownop2_s64_m_tied1, svint64_t,
+ z0 = svmul_n_s64_m (p0, z0, MAXPOW),
+ z0 = svmul_m (p0, z0, MAXPOW))
+
+/*
+** mul_intminnop2_s64_m_tied1:
+** lsl z0\.d, p0/m, z0\.d, #63
+** ret
+*/
+TEST_UNIFORM_Z (mul_intminnop2_s64_m_tied1, svint64_t,
+ z0 = svmul_n_s64_m (p0, z0, INT64_MIN),
+ z0 = svmul_m (p0, z0, INT64_MIN))
+
+/*
+** mul_1_s64_m_tied1:
+** sel z0\.d, p0, z0\.d, z0\.d
+** ret
+*/
+TEST_UNIFORM_Z (mul_1_s64_m_tied1, svint64_t,
+ z0 = svmul_n_s64_m (p0, z0, 1),
+ z0 = svmul_m (p0, z0, 1))
+
/*
** mul_2_s64_m_tied1:
-** mov (z[0-9]+\.d), #2
-** mul z0\.d, p0/m, z0\.d, \1
+** lsl z0\.d, p0/m, z0\.d, #1
** ret
*/
TEST_UNIFORM_Z (mul_2_s64_m_tied1, svint64_t,
z0 = svmul_m (p0, z0, 2))
/*
-** mul_2_s64_m_untied:
-** mov (z[0-9]+\.d), #2
+** mul_3_s64_m_tied1:
+** mov (z[0-9]+\.d), #3
+** mul z0\.d, p0/m, z0\.d, \1
+** ret
+*/
+TEST_UNIFORM_Z (mul_3_s64_m_tied1, svint64_t,
+ z0 = svmul_n_s64_m (p0, z0, 3),
+ z0 = svmul_m (p0, z0, 3))
+
+/*
+** mul_4dupop2_s64_m_untied:
+** movprfx z0, z1
+** lsl z0\.d, p0/m, z0\.d, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4dupop2_s64_m_untied, svint64_t,
+ z0 = svmul_m (p0, z1, svdup_s64 (4)),
+ z0 = svmul_m (p0, z1, svdup_s64 (4)))
+
+/*
+** mul_4nop2_s64_m_untied:
+** movprfx z0, z1
+** lsl z0\.d, p0/m, z0\.d, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4nop2_s64_m_untied, svint64_t,
+ z0 = svmul_n_s64_m (p0, z1, 4),
+ z0 = svmul_m (p0, z1, 4))
+
+/*
+** mul_maxpownop2_s64_m_untied:
+** movprfx z0, z1
+** lsl z0\.d, p0/m, z0\.d, #62
+** ret
+*/
+TEST_UNIFORM_Z (mul_maxpownop2_s64_m_untied, svint64_t,
+ z0 = svmul_n_s64_m (p0, z1, MAXPOW),
+ z0 = svmul_m (p0, z1, MAXPOW))
+
+/*
+** mul_3_s64_m_untied:
+** mov (z[0-9]+\.d), #3
** movprfx z0, z1
** mul z0\.d, p0/m, z0\.d, \1
** ret
*/
-TEST_UNIFORM_Z (mul_2_s64_m_untied, svint64_t,
- z0 = svmul_n_s64_m (p0, z1, 2),
- z0 = svmul_m (p0, z1, 2))
+TEST_UNIFORM_Z (mul_3_s64_m_untied, svint64_t,
+ z0 = svmul_n_s64_m (p0, z1, 3),
+ z0 = svmul_m (p0, z1, 3))
/*
** mul_m1_s64_m:
z0 = svmul_z (p0, z1, x0))
/*
-** mul_2_s64_z_tied1:
-** mov (z[0-9]+\.d), #2
+** mul_4dupop1_s64_z_tied2:
** movprfx z0\.d, p0/z, z0\.d
-** mul z0\.d, p0/m, z0\.d, \1
+** lsl z0\.d, p0/m, z0\.d, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4dupop1_s64_z_tied2, svint64_t,
+ z0 = svmul_z (p0, svdup_s64 (4), z0),
+ z0 = svmul_z (p0, svdup_s64 (4), z0))
+
+/*
+** mul_4dupop1ptrue_s64_z_tied2:
+** lsl z0\.d, z0\.d, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4dupop1ptrue_s64_z_tied2, svint64_t,
+ z0 = svmul_z (svptrue_b64 (), svdup_s64 (4), z0),
+ z0 = svmul_z (svptrue_b64 (), svdup_s64 (4), z0))
+
+/*
+** mul_4dupop2_s64_z_tied1:
+** movprfx z0\.d, p0/z, z0\.d
+** lsl z0\.d, p0/m, z0\.d, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4dupop2_s64_z_tied1, svint64_t,
+ z0 = svmul_z (p0, z0, svdup_s64 (4)),
+ z0 = svmul_z (p0, z0, svdup_s64 (4)))
+
+/*
+** mul_4nop2_s64_z_tied1:
+** movprfx z0\.d, p0/z, z0\.d
+** lsl z0\.d, p0/m, z0\.d, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4nop2_s64_z_tied1, svint64_t,
+ z0 = svmul_n_s64_z (p0, z0, 4),
+ z0 = svmul_z (p0, z0, 4))
+
+/*
+** mul_maxpownop2_s64_z_tied1:
+** movprfx z0\.d, p0/z, z0\.d
+** lsl z0\.d, p0/m, z0\.d, #62
+** ret
+*/
+TEST_UNIFORM_Z (mul_maxpownop2_s64_z_tied1, svint64_t,
+ z0 = svmul_n_s64_z (p0, z0, MAXPOW),
+ z0 = svmul_z (p0, z0, MAXPOW))
+
+/*
+** mul_intminnop2_s64_z_tied1:
+** movprfx z0\.d, p0/z, z0\.d
+** lsl z0\.d, p0/m, z0\.d, #63
+** ret
+*/
+TEST_UNIFORM_Z (mul_intminnop2_s64_z_tied1, svint64_t,
+ z0 = svmul_n_s64_z (p0, z0, INT64_MIN),
+ z0 = svmul_z (p0, z0, INT64_MIN))
+
+/*
+** mul_1_s64_z_tied1:
+** mov z31.d, #1
+** movprfx z0.d, p0/z, z0.d
+** mul z0.d, p0/m, z0.d, z31.d
+** ret
+*/
+TEST_UNIFORM_Z (mul_1_s64_z_tied1, svint64_t,
+ z0 = svmul_n_s64_z (p0, z0, 1),
+ z0 = svmul_z (p0, z0, 1))
+
+/*
+** mul_2_s64_z_tied1:
+** movprfx z0.d, p0/z, z0.d
+** lsl z0.d, p0/m, z0.d, #1
** ret
*/
TEST_UNIFORM_Z (mul_2_s64_z_tied1, svint64_t,
z0 = svmul_z (p0, z0, 2))
/*
-** mul_2_s64_z_untied:
-** mov (z[0-9]+\.d), #2
+** mul_3_s64_z_tied1:
+** mov (z[0-9]+\.d), #3
+** movprfx z0\.d, p0/z, z0\.d
+** mul z0\.d, p0/m, z0\.d, \1
+** ret
+*/
+TEST_UNIFORM_Z (mul_3_s64_z_tied1, svint64_t,
+ z0 = svmul_n_s64_z (p0, z0, 3),
+ z0 = svmul_z (p0, z0, 3))
+
+/*
+** mul_4dupop2_s64_z_untied:
+** movprfx z0\.d, p0/z, z1\.d
+** lsl z0\.d, p0/m, z0\.d, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4dupop2_s64_z_untied, svint64_t,
+ z0 = svmul_z (p0, z1, svdup_s64 (4)),
+ z0 = svmul_z (p0, z1, svdup_s64 (4)))
+
+/*
+** mul_4nop2_s64_z_untied:
+** movprfx z0\.d, p0/z, z1\.d
+** lsl z0\.d, p0/m, z0\.d, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4nop2_s64_z_untied, svint64_t,
+ z0 = svmul_n_s64_z (p0, z1, 4),
+ z0 = svmul_z (p0, z1, 4))
+
+/*
+** mul_maxpownop2_s64_z_untied:
+** movprfx z0\.d, p0/z, z1\.d
+** lsl z0\.d, p0/m, z0\.d, #62
+** ret
+*/
+TEST_UNIFORM_Z (mul_maxpownop2_s64_z_untied, svint64_t,
+ z0 = svmul_n_s64_z (p0, z1, MAXPOW),
+ z0 = svmul_z (p0, z1, MAXPOW))
+
+/*
+** mul_3_s64_z_untied:
+** mov (z[0-9]+\.d), #3
** (
** movprfx z0\.d, p0/z, z1\.d
** mul z0\.d, p0/m, z0\.d, \1
** )
** ret
*/
-TEST_UNIFORM_Z (mul_2_s64_z_untied, svint64_t,
- z0 = svmul_n_s64_z (p0, z1, 2),
- z0 = svmul_z (p0, z1, 2))
+TEST_UNIFORM_Z (mul_3_s64_z_untied, svint64_t,
+ z0 = svmul_n_s64_z (p0, z1, 3),
+ z0 = svmul_z (p0, z1, 3))
/*
** mul_s64_x_tied1:
z0 = svmul_n_s64_x (p0, z1, x0),
z0 = svmul_x (p0, z1, x0))
+/*
+** mul_4dupop1_s64_x_tied2:
+** lsl z0\.d, z0\.d, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4dupop1_s64_x_tied2, svint64_t,
+ z0 = svmul_x (p0, svdup_s64 (4), z0),
+ z0 = svmul_x (p0, svdup_s64 (4), z0))
+
+/*
+** mul_4dupop1ptrue_s64_x_tied2:
+** lsl z0\.d, z0\.d, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4dupop1ptrue_s64_x_tied2, svint64_t,
+ z0 = svmul_x (svptrue_b64 (), svdup_s64 (4), z0),
+ z0 = svmul_x (svptrue_b64 (), svdup_s64 (4), z0))
+
+/*
+** mul_4dupop2_s64_x_tied1:
+** lsl z0\.d, z0\.d, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4dupop2_s64_x_tied1, svint64_t,
+ z0 = svmul_x (p0, z0, svdup_s64 (4)),
+ z0 = svmul_x (p0, z0, svdup_s64 (4)))
+
+/*
+** mul_4nop2_s64_x_tied1:
+** lsl z0\.d, z0\.d, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4nop2_s64_x_tied1, svint64_t,
+ z0 = svmul_n_s64_x (p0, z0, 4),
+ z0 = svmul_x (p0, z0, 4))
+
+/*
+** mul_maxpownop2_s64_x_tied1:
+** lsl z0\.d, z0\.d, #62
+** ret
+*/
+TEST_UNIFORM_Z (mul_maxpownop2_s64_x_tied1, svint64_t,
+ z0 = svmul_n_s64_x (p0, z0, MAXPOW),
+ z0 = svmul_x (p0, z0, MAXPOW))
+
+/*
+** mul_intminnop2_s64_x_tied1:
+** lsl z0\.d, z0\.d, #63
+** ret
+*/
+TEST_UNIFORM_Z (mul_intminnop2_s64_x_tied1, svint64_t,
+ z0 = svmul_n_s64_x (p0, z0, INT64_MIN),
+ z0 = svmul_x (p0, z0, INT64_MIN))
+
+/*
+** mul_1_s64_x_tied1:
+** ret
+*/
+TEST_UNIFORM_Z (mul_1_s64_x_tied1, svint64_t,
+ z0 = svmul_n_s64_x (p0, z0, 1),
+ z0 = svmul_x (p0, z0, 1))
+
/*
** mul_2_s64_x_tied1:
-** mul z0\.d, z0\.d, #2
+** add z0\.d, z0\.d, z0\.d
** ret
*/
TEST_UNIFORM_Z (mul_2_s64_x_tied1, svint64_t,
z0 = svmul_x (p0, z0, 2))
/*
-** mul_2_s64_x_untied:
+** mul_3_s64_x_tied1:
+** mul z0\.d, z0\.d, #3
+** ret
+*/
+TEST_UNIFORM_Z (mul_3_s64_x_tied1, svint64_t,
+ z0 = svmul_n_s64_x (p0, z0, 3),
+ z0 = svmul_x (p0, z0, 3))
+
+/*
+** mul_4dupop2_s64_x_untied:
+** lsl z0\.d, z1\.d, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4dupop2_s64_x_untied, svint64_t,
+ z0 = svmul_x (p0, z1, svdup_s64 (4)),
+ z0 = svmul_x (p0, z1, svdup_s64 (4)))
+
+/*
+** mul_4nop2_s64_x_untied:
+** lsl z0\.d, z1\.d, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4nop2_s64_x_untied, svint64_t,
+ z0 = svmul_n_s64_x (p0, z1, 4),
+ z0 = svmul_x (p0, z1, 4))
+
+/*
+** mul_maxpownop2_s64_x_untied:
+** lsl z0\.d, z1\.d, #62
+** ret
+*/
+TEST_UNIFORM_Z (mul_maxpownop2_s64_x_untied, svint64_t,
+ z0 = svmul_n_s64_x (p0, z1, MAXPOW),
+ z0 = svmul_x (p0, z1, MAXPOW))
+
+/*
+** mul_3_s64_x_untied:
** movprfx z0, z1
-** mul z0\.d, z0\.d, #2
+** mul z0\.d, z0\.d, #3
** ret
*/
-TEST_UNIFORM_Z (mul_2_s64_x_untied, svint64_t,
- z0 = svmul_n_s64_x (p0, z1, 2),
- z0 = svmul_x (p0, z1, 2))
+TEST_UNIFORM_Z (mul_3_s64_x_untied, svint64_t,
+ z0 = svmul_n_s64_x (p0, z1, 3),
+ z0 = svmul_x (p0, z1, 3))
/*
** mul_127_s64_x:
/*
** mul_128_s64_x:
-** mov (z[0-9]+\.d), #128
-** mul z0\.d, p0/m, z0\.d, \1
+** lsl z0\.d, z0\.d, #7
** ret
*/
TEST_UNIFORM_Z (mul_128_s64_x, svint64_t,
#include "test_sve_acle.h"
+#define MAXPOW 1<<6
+
/*
** mul_s8_m_tied1:
** mul z0\.b, p0/m, z0\.b, z1\.b
z0 = svmul_m (p0, z1, x0))
/*
-** mul_2_s8_m_tied1:
-** mov (z[0-9]+\.b), #2
+** mul_4dupop1_s8_m_tied2:
+** mov (z[0-9]+)\.b, #4
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, \1
+** mul z0\.b, p0/m, z0\.b, \2\.b
+** ret
+*/
+TEST_UNIFORM_Z (mul_4dupop1_s8_m_tied2, svint8_t,
+ z0 = svmul_m (p0, svdup_s8 (4), z0),
+ z0 = svmul_m (p0, svdup_s8 (4), z0))
+
+/*
+** mul_4dupop1ptrue_s8_m_tied2:
+** lsl z0\.b, z0\.b, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4dupop1ptrue_s8_m_tied2, svint8_t,
+ z0 = svmul_m (svptrue_b8 (), svdup_s8 (4), z0),
+ z0 = svmul_m (svptrue_b8 (), svdup_s8 (4), z0))
+
+/*
+** mul_4dupop2_s8_m_tied1:
+** lsl z0\.b, p0/m, z0\.b, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4dupop2_s8_m_tied1, svint8_t,
+ z0 = svmul_m (p0, z0, svdup_s8 (4)),
+ z0 = svmul_m (p0, z0, svdup_s8 (4)))
+
+/*
+** mul_4nop2_s8_m_tied1:
+** lsl z0\.b, p0/m, z0\.b, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4nop2_s8_m_tied1, svint8_t,
+ z0 = svmul_n_s8_m (p0, z0, 4),
+ z0 = svmul_m (p0, z0, 4))
+
+/*
+** mul_maxpownop2_s8_m_tied1:
+** lsl z0\.b, p0/m, z0\.b, #6
+** ret
+*/
+TEST_UNIFORM_Z (mul_maxpownop2_s8_m_tied1, svint8_t,
+ z0 = svmul_n_s8_m (p0, z0, MAXPOW),
+ z0 = svmul_m (p0, z0, MAXPOW))
+
+/*
+** mul_intminnop2_s8_m_tied1:
+** lsl z0\.b, p0/m, z0\.b, #7
+** ret
+*/
+TEST_UNIFORM_Z (mul_intminnop2_s8_m_tied1, svint8_t,
+ z0 = svmul_n_s8_m (p0, z0, INT8_MIN),
+ z0 = svmul_m (p0, z0, INT8_MIN))
+
+/*
+** mul_1_s8_m_tied1:
+** sel z0\.b, p0, z0\.b, z0\.b
+** ret
+*/
+TEST_UNIFORM_Z (mul_1_s8_m_tied1, svint8_t,
+ z0 = svmul_n_s8_m (p0, z0, 1),
+ z0 = svmul_m (p0, z0, 1))
+
+/*
+** mul_3_s8_m_tied1:
+** mov (z[0-9]+\.b), #3
** mul z0\.b, p0/m, z0\.b, \1
** ret
*/
-TEST_UNIFORM_Z (mul_2_s8_m_tied1, svint8_t,
- z0 = svmul_n_s8_m (p0, z0, 2),
- z0 = svmul_m (p0, z0, 2))
+TEST_UNIFORM_Z (mul_3_s8_m_tied1, svint8_t,
+ z0 = svmul_n_s8_m (p0, z0, 3),
+ z0 = svmul_m (p0, z0, 3))
+
+/*
+** mul_4dupop2_s8_m_untied:
+** movprfx z0, z1
+** lsl z0\.b, p0/m, z0\.b, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4dupop2_s8_m_untied, svint8_t,
+ z0 = svmul_m (p0, z1, svdup_s8 (4)),
+ z0 = svmul_m (p0, z1, svdup_s8 (4)))
/*
-** mul_2_s8_m_untied:
-** mov (z[0-9]+\.b), #2
+** mul_4nop2_s8_m_untied:
+** movprfx z0, z1
+** lsl z0\.b, p0/m, z0\.b, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4nop2_s8_m_untied, svint8_t,
+ z0 = svmul_n_s8_m (p0, z1, 4),
+ z0 = svmul_m (p0, z1, 4))
+
+/*
+** mul_maxpownop2_s8_m_untied:
+** movprfx z0, z1
+** lsl z0\.b, p0/m, z0\.b, #6
+** ret
+*/
+TEST_UNIFORM_Z (mul_maxpownop2_s8_m_untied, svint8_t,
+ z0 = svmul_n_s8_m (p0, z1, MAXPOW),
+ z0 = svmul_m (p0, z1, MAXPOW))
+
+/*
+** mul_3_s8_m_untied:
+** mov (z[0-9]+\.b), #3
** movprfx z0, z1
** mul z0\.b, p0/m, z0\.b, \1
** ret
*/
-TEST_UNIFORM_Z (mul_2_s8_m_untied, svint8_t,
- z0 = svmul_n_s8_m (p0, z1, 2),
- z0 = svmul_m (p0, z1, 2))
+TEST_UNIFORM_Z (mul_3_s8_m_untied, svint8_t,
+ z0 = svmul_n_s8_m (p0, z1, 3),
+ z0 = svmul_m (p0, z1, 3))
/*
** mul_m1_s8_m:
-** mov (z[0-9]+\.b), #-1
-** mul z0\.b, p0/m, z0\.b, \1
+** mov (z[0-9]+)\.b, #-1
+** mul z0\.b, p0/m, z0\.b, \1\.b
** ret
*/
TEST_UNIFORM_Z (mul_m1_s8_m, svint8_t,
z0 = svmul_z (p0, z1, x0))
/*
-** mul_2_s8_z_tied1:
-** mov (z[0-9]+\.b), #2
+** mul_4dupop1_s8_z_tied2:
+** movprfx z0\.b, p0/z, z0\.b
+** lsl z0\.b, p0/m, z0\.b, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4dupop1_s8_z_tied2, svint8_t,
+ z0 = svmul_z (p0, svdup_s8 (4), z0),
+ z0 = svmul_z (p0, svdup_s8 (4), z0))
+
+/*
+** mul_4dupop1ptrue_s8_z_tied2:
+** lsl z0\.b, z0\.b, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4dupop1ptrue_s8_z_tied2, svint8_t,
+ z0 = svmul_z (svptrue_b8 (), svdup_s8 (4), z0),
+ z0 = svmul_z (svptrue_b8 (), svdup_s8 (4), z0))
+
+/*
+** mul_4dupop2_s8_z_tied1:
+** movprfx z0\.b, p0/z, z0\.b
+** lsl z0\.b, p0/m, z0\.b, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4dupop2_s8_z_tied1, svint8_t,
+ z0 = svmul_z (p0, z0, svdup_s8 (4)),
+ z0 = svmul_z (p0, z0, svdup_s8 (4)))
+
+/*
+** mul_4nop2_s8_z_tied1:
+** movprfx z0\.b, p0/z, z0\.b
+** lsl z0\.b, p0/m, z0\.b, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4nop2_s8_z_tied1, svint8_t,
+ z0 = svmul_n_s8_z (p0, z0, 4),
+ z0 = svmul_z (p0, z0, 4))
+
+/*
+** mul_maxpownop2_s8_z_tied1:
+** movprfx z0\.b, p0/z, z0\.b
+** lsl z0\.b, p0/m, z0\.b, #6
+** ret
+*/
+TEST_UNIFORM_Z (mul_maxpownop2_s8_z_tied1, svint8_t,
+ z0 = svmul_n_s8_z (p0, z0, MAXPOW),
+ z0 = svmul_z (p0, z0, MAXPOW))
+
+/*
+** mul_intminnop2_s8_z_tied1:
+** movprfx z0\.b, p0/z, z0\.b
+** lsl z0\.b, p0/m, z0\.b, #7
+** ret
+*/
+TEST_UNIFORM_Z (mul_intminnop2_s8_z_tied1, svint8_t,
+ z0 = svmul_n_s8_z (p0, z0, INT8_MIN),
+ z0 = svmul_z (p0, z0, INT8_MIN))
+
+/*
+** mul_1_s8_z_tied1:
+** mov z31.b, #1
+** movprfx z0.b, p0/z, z0.b
+** mul z0.b, p0/m, z0.b, z31.b
+** ret
+*/
+TEST_UNIFORM_Z (mul_1_s8_z_tied1, svint8_t,
+ z0 = svmul_n_s8_z (p0, z0, 1),
+ z0 = svmul_z (p0, z0, 1))
+
+/*
+** mul_3_s8_z_tied1:
+** mov (z[0-9]+\.b), #3
** movprfx z0\.b, p0/z, z0\.b
** mul z0\.b, p0/m, z0\.b, \1
** ret
*/
-TEST_UNIFORM_Z (mul_2_s8_z_tied1, svint8_t,
- z0 = svmul_n_s8_z (p0, z0, 2),
- z0 = svmul_z (p0, z0, 2))
+TEST_UNIFORM_Z (mul_3_s8_z_tied1, svint8_t,
+ z0 = svmul_n_s8_z (p0, z0, 3),
+ z0 = svmul_z (p0, z0, 3))
+
+/*
+** mul_4dupop2_s8_z_untied:
+** movprfx z0\.b, p0/z, z1\.b
+** lsl z0\.b, p0/m, z0\.b, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4dupop2_s8_z_untied, svint8_t,
+ z0 = svmul_z (p0, z1, svdup_s8 (4)),
+ z0 = svmul_z (p0, z1, svdup_s8 (4)))
/*
-** mul_2_s8_z_untied:
-** mov (z[0-9]+\.b), #2
+** mul_4nop2_s8_z_untied:
+** movprfx z0\.b, p0/z, z1\.b
+** lsl z0\.b, p0/m, z0\.b, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4nop2_s8_z_untied, svint8_t,
+ z0 = svmul_n_s8_z (p0, z1, 4),
+ z0 = svmul_z (p0, z1, 4))
+
+/*
+** mul_maxpownop2_s8_z_untied:
+** movprfx z0\.b, p0/z, z1\.b
+** lsl z0\.b, p0/m, z0\.b, #6
+** ret
+*/
+TEST_UNIFORM_Z (mul_maxpownop2_s8_z_untied, svint8_t,
+ z0 = svmul_n_s8_z (p0, z1, MAXPOW),
+ z0 = svmul_z (p0, z1, MAXPOW))
+
+/*
+** mul_3_s8_z_untied:
+** mov (z[0-9]+\.b), #3
** (
** movprfx z0\.b, p0/z, z1\.b
** mul z0\.b, p0/m, z0\.b, \1
** )
** ret
*/
-TEST_UNIFORM_Z (mul_2_s8_z_untied, svint8_t,
- z0 = svmul_n_s8_z (p0, z1, 2),
- z0 = svmul_z (p0, z1, 2))
+TEST_UNIFORM_Z (mul_3_s8_z_untied, svint8_t,
+ z0 = svmul_n_s8_z (p0, z1, 3),
+ z0 = svmul_z (p0, z1, 3))
/*
** mul_s8_x_tied1:
z0 = svmul_x (p0, z1, x0))
/*
-** mul_2_s8_x_tied1:
-** mul z0\.b, z0\.b, #2
+** mul_4dupop1_s8_x_tied2:
+** lsl z0\.b, z0\.b, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4dupop1_s8_x_tied2, svint8_t,
+ z0 = svmul_x (p0, svdup_s8 (4), z0),
+ z0 = svmul_x (p0, svdup_s8 (4), z0))
+
+/*
+** mul_4dupop1ptrue_s8_x_tied2:
+** lsl z0\.b, z0\.b, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4dupop1ptrue_s8_x_tied2, svint8_t,
+ z0 = svmul_x (svptrue_b8 (), svdup_s8 (4), z0),
+ z0 = svmul_x (svptrue_b8 (), svdup_s8 (4), z0))
+
+/*
+** mul_4dupop2_s8_x_tied1:
+** lsl z0\.b, z0\.b, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4dupop2_s8_x_tied1, svint8_t,
+ z0 = svmul_x (p0, z0, svdup_s8 (4)),
+ z0 = svmul_x (p0, z0, svdup_s8 (4)))
+
+/*
+** mul_4nop2_s8_x_tied1:
+** lsl z0\.b, z0\.b, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4nop2_s8_x_tied1, svint8_t,
+ z0 = svmul_n_s8_x (p0, z0, 4),
+ z0 = svmul_x (p0, z0, 4))
+
+/*
+** mul_maxpownop2_s8_x_tied1:
+** lsl z0\.b, z0\.b, #6
+** ret
+*/
+TEST_UNIFORM_Z (mul_maxpownop2_s8_x_tied1, svint8_t,
+ z0 = svmul_n_s8_x (p0, z0, MAXPOW),
+ z0 = svmul_x (p0, z0, MAXPOW))
+
+/*
+** mul_intminnop2_s8_x_tied1:
+** lsl z0\.b, z0\.b, #7
+** ret
+*/
+TEST_UNIFORM_Z (mul_intminnop2_s8_x_tied1, svint8_t,
+ z0 = svmul_n_s8_x (p0, z0, INT8_MIN),
+ z0 = svmul_x (p0, z0, INT8_MIN))
+
+/*
+** mul_1_s8_x_tied1:
+** ret
+*/
+TEST_UNIFORM_Z (mul_1_s8_x_tied1, svint8_t,
+ z0 = svmul_n_s8_x (p0, z0, 1),
+ z0 = svmul_x (p0, z0, 1))
+
+/*
+** mul_3_s8_x_tied1:
+** mul z0\.b, z0\.b, #3
+** ret
+*/
+TEST_UNIFORM_Z (mul_3_s8_x_tied1, svint8_t,
+ z0 = svmul_n_s8_x (p0, z0, 3),
+ z0 = svmul_x (p0, z0, 3))
+
+/*
+** mul_4dupop2_s8_x_untied:
+** lsl z0\.b, z1\.b, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4dupop2_s8_x_untied, svint8_t,
+ z0 = svmul_x (p0, z1, svdup_s8 (4)),
+ z0 = svmul_x (p0, z1, svdup_s8 (4)))
+
+/*
+** mul_4nop2_s8_x_untied:
+** lsl z0\.b, z1\.b, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4nop2_s8_x_untied, svint8_t,
+ z0 = svmul_n_s8_x (p0, z1, 4),
+ z0 = svmul_x (p0, z1, 4))
+
+/*
+** mul_maxpownop2_s8_x_untied:
+** lsl z0\.b, z1\.b, #6
** ret
*/
-TEST_UNIFORM_Z (mul_2_s8_x_tied1, svint8_t,
- z0 = svmul_n_s8_x (p0, z0, 2),
- z0 = svmul_x (p0, z0, 2))
+TEST_UNIFORM_Z (mul_maxpownop2_s8_x_untied, svint8_t,
+ z0 = svmul_n_s8_x (p0, z1, MAXPOW),
+ z0 = svmul_x (p0, z1, MAXPOW))
/*
-** mul_2_s8_x_untied:
+** mul_3_s8_x_untied:
** movprfx z0, z1
-** mul z0\.b, z0\.b, #2
+** mul z0\.b, z0\.b, #3
** ret
*/
-TEST_UNIFORM_Z (mul_2_s8_x_untied, svint8_t,
- z0 = svmul_n_s8_x (p0, z1, 2),
- z0 = svmul_x (p0, z1, 2))
+TEST_UNIFORM_Z (mul_3_s8_x_untied, svint8_t,
+ z0 = svmul_n_s8_x (p0, z1, 3),
+ z0 = svmul_x (p0, z1, 3))
/*
** mul_127_s8_x:
/*
** mul_128_s8_x:
-** mul z0\.b, z0\.b, #-128
+** lsl z0\.b, z0\.b, #7
** ret
*/
TEST_UNIFORM_Z (mul_128_s8_x, svint8_t,
/*
** mul_m128_s8_x:
-** mul z0\.b, z0\.b, #-128
+** lsl z0\.b, z0\.b, #7
** ret
*/
TEST_UNIFORM_Z (mul_m128_s8_x, svint8_t,
#include "test_sve_acle.h"
+#define MAXPOW 1ULL<<15
+
/*
** mul_u16_m_tied1:
** mul z0\.h, p0/m, z0\.h, z1\.h
z0 = svmul_m (p0, z1, x0))
/*
-** mul_2_u16_m_tied1:
-** mov (z[0-9]+\.h), #2
+** mul_4dupop1_u16_m_tied2:
+** mov (z[0-9]+)\.h, #4
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, \1
+** mul z0\.h, p0/m, z0\.h, \2\.h
+** ret
+*/
+TEST_UNIFORM_Z (mul_4dupop1_u16_m_tied2, svuint16_t,
+ z0 = svmul_m (p0, svdup_u16 (4), z0),
+ z0 = svmul_m (p0, svdup_u16 (4), z0))
+
+/*
+** mul_4dupop1ptrue_u16_m_tied2:
+** lsl z0\.h, z0\.h, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4dupop1ptrue_u16_m_tied2, svuint16_t,
+ z0 = svmul_m (svptrue_b16 (), svdup_u16 (4), z0),
+ z0 = svmul_m (svptrue_b16 (), svdup_u16 (4), z0))
+
+/*
+** mul_4dupop2_u16_m_tied1:
+** lsl z0\.h, p0/m, z0\.h, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4dupop2_u16_m_tied1, svuint16_t,
+ z0 = svmul_m (p0, z0, svdup_u16 (4)),
+ z0 = svmul_m (p0, z0, svdup_u16 (4)))
+
+/*
+** mul_4nop2_u16_m_tied1:
+** lsl z0\.h, p0/m, z0\.h, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4nop2_u16_m_tied1, svuint16_t,
+ z0 = svmul_n_u16_m (p0, z0, 4),
+ z0 = svmul_m (p0, z0, 4))
+
+/*
+** mul_maxpownop2_u16_m_tied1:
+** lsl z0\.h, p0/m, z0\.h, #15
+** ret
+*/
+TEST_UNIFORM_Z (mul_maxpownop2_u16_m_tied1, svuint16_t,
+ z0 = svmul_n_u16_m (p0, z0, MAXPOW),
+ z0 = svmul_m (p0, z0, MAXPOW))
+
+/*
+** mul_1_u16_m_tied1:
+** sel z0\.h, p0, z0\.h, z0\.h
+** ret
+*/
+TEST_UNIFORM_Z (mul_1_u16_m_tied1, svuint16_t,
+ z0 = svmul_n_u16_m (p0, z0, 1),
+ z0 = svmul_m (p0, z0, 1))
+
+/*
+** mul_3_u16_m_tied1:
+** mov (z[0-9]+\.h), #3
** mul z0\.h, p0/m, z0\.h, \1
** ret
*/
-TEST_UNIFORM_Z (mul_2_u16_m_tied1, svuint16_t,
- z0 = svmul_n_u16_m (p0, z0, 2),
- z0 = svmul_m (p0, z0, 2))
+TEST_UNIFORM_Z (mul_3_u16_m_tied1, svuint16_t,
+ z0 = svmul_n_u16_m (p0, z0, 3),
+ z0 = svmul_m (p0, z0, 3))
+
+/*
+** mul_4dupop2_u16_m_untied:
+** movprfx z0, z1
+** lsl z0\.h, p0/m, z0\.h, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4dupop2_u16_m_untied, svuint16_t,
+ z0 = svmul_m (p0, z1, svdup_u16 (4)),
+ z0 = svmul_m (p0, z1, svdup_u16 (4)))
/*
-** mul_2_u16_m_untied:
-** mov (z[0-9]+\.h), #2
+** mul_4nop2_u16_m_untied:
+** movprfx z0, z1
+** lsl z0\.h, p0/m, z0\.h, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4nop2_u16_m_untied, svuint16_t,
+ z0 = svmul_n_u16_m (p0, z1, 4),
+ z0 = svmul_m (p0, z1, 4))
+
+/*
+** mul_maxpownop2_u16_m_untied:
+** movprfx z0, z1
+** lsl z0\.h, p0/m, z0\.h, #15
+** ret
+*/
+TEST_UNIFORM_Z (mul_maxpownop2_u16_m_untied, svuint16_t,
+ z0 = svmul_n_u16_m (p0, z1, MAXPOW),
+ z0 = svmul_m (p0, z1, MAXPOW))
+
+/*
+** mul_3_u16_m_untied:
+** mov (z[0-9]+\.h), #3
** movprfx z0, z1
** mul z0\.h, p0/m, z0\.h, \1
** ret
*/
-TEST_UNIFORM_Z (mul_2_u16_m_untied, svuint16_t,
- z0 = svmul_n_u16_m (p0, z1, 2),
- z0 = svmul_m (p0, z1, 2))
+TEST_UNIFORM_Z (mul_3_u16_m_untied, svuint16_t,
+ z0 = svmul_n_u16_m (p0, z1, 3),
+ z0 = svmul_m (p0, z1, 3))
/*
** mul_m1_u16_m:
z0 = svmul_z (p0, z1, x0))
/*
-** mul_2_u16_z_tied1:
-** mov (z[0-9]+\.h), #2
+** mul_4dupop1_u16_z_tied2:
+** movprfx z0\.h, p0/z, z0\.h
+** lsl z0\.h, p0/m, z0\.h, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4dupop1_u16_z_tied2, svuint16_t,
+ z0 = svmul_z (p0, svdup_u16 (4), z0),
+ z0 = svmul_z (p0, svdup_u16 (4), z0))
+
+/*
+** mul_4dupop1ptrue_u16_z_tied2:
+** lsl z0\.h, z0\.h, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4dupop1ptrue_u16_z_tied2, svuint16_t,
+ z0 = svmul_z (svptrue_b16 (), svdup_u16 (4), z0),
+ z0 = svmul_z (svptrue_b16 (), svdup_u16 (4), z0))
+
+/*
+** mul_4dupop2_u16_z_tied1:
+** movprfx z0\.h, p0/z, z0\.h
+** lsl z0\.h, p0/m, z0\.h, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4dupop2_u16_z_tied1, svuint16_t,
+ z0 = svmul_z (p0, z0, svdup_u16 (4)),
+ z0 = svmul_z (p0, z0, svdup_u16 (4)))
+
+/*
+** mul_4nop2_u16_z_tied1:
+** movprfx z0\.h, p0/z, z0\.h
+** lsl z0\.h, p0/m, z0\.h, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4nop2_u16_z_tied1, svuint16_t,
+ z0 = svmul_n_u16_z (p0, z0, 4),
+ z0 = svmul_z (p0, z0, 4))
+
+/*
+** mul_maxpownop2_u16_z_tied1:
+** movprfx z0\.h, p0/z, z0\.h
+** lsl z0\.h, p0/m, z0\.h, #15
+** ret
+*/
+TEST_UNIFORM_Z (mul_maxpownop2_u16_z_tied1, svuint16_t,
+ z0 = svmul_n_u16_z (p0, z0, MAXPOW),
+ z0 = svmul_z (p0, z0, MAXPOW))
+
+/*
+** mul_1_u16_z_tied1:
+** mov z31.h, #1
+** movprfx z0.h, p0/z, z0.h
+** mul z0.h, p0/m, z0.h, z31.h
+** ret
+*/
+TEST_UNIFORM_Z (mul_1_u16_z_tied1, svuint16_t,
+ z0 = svmul_n_u16_z (p0, z0, 1),
+ z0 = svmul_z (p0, z0, 1))
+
+/*
+** mul_3_u16_z_tied1:
+** mov (z[0-9]+\.h), #3
** movprfx z0\.h, p0/z, z0\.h
** mul z0\.h, p0/m, z0\.h, \1
** ret
*/
-TEST_UNIFORM_Z (mul_2_u16_z_tied1, svuint16_t,
- z0 = svmul_n_u16_z (p0, z0, 2),
- z0 = svmul_z (p0, z0, 2))
+TEST_UNIFORM_Z (mul_3_u16_z_tied1, svuint16_t,
+ z0 = svmul_n_u16_z (p0, z0, 3),
+ z0 = svmul_z (p0, z0, 3))
+
+/*
+** mul_4dupop2_u16_z_untied:
+** movprfx z0\.h, p0/z, z1\.h
+** lsl z0\.h, p0/m, z0\.h, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4dupop2_u16_z_untied, svuint16_t,
+ z0 = svmul_z (p0, z1, svdup_u16 (4)),
+ z0 = svmul_z (p0, z1, svdup_u16 (4)))
+
+/*
+** mul_4nop2_u16_z_untied:
+** movprfx z0\.h, p0/z, z1\.h
+** lsl z0\.h, p0/m, z0\.h, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4nop2_u16_z_untied, svuint16_t,
+ z0 = svmul_n_u16_z (p0, z1, 4),
+ z0 = svmul_z (p0, z1, 4))
+
+/*
+** mul_maxpownop2_u16_z_untied:
+** movprfx z0\.h, p0/z, z1\.h
+** lsl z0\.h, p0/m, z0\.h, #15
+** ret
+*/
+TEST_UNIFORM_Z (mul_maxpownop2_u16_z_untied, svuint16_t,
+ z0 = svmul_n_u16_z (p0, z1, MAXPOW),
+ z0 = svmul_z (p0, z1, MAXPOW))
/*
-** mul_2_u16_z_untied:
-** mov (z[0-9]+\.h), #2
+** mul_3_u16_z_untied:
+** mov (z[0-9]+\.h), #3
** (
** movprfx z0\.h, p0/z, z1\.h
** mul z0\.h, p0/m, z0\.h, \1
** )
** ret
*/
-TEST_UNIFORM_Z (mul_2_u16_z_untied, svuint16_t,
- z0 = svmul_n_u16_z (p0, z1, 2),
- z0 = svmul_z (p0, z1, 2))
+TEST_UNIFORM_Z (mul_3_u16_z_untied, svuint16_t,
+ z0 = svmul_n_u16_z (p0, z1, 3),
+ z0 = svmul_z (p0, z1, 3))
/*
** mul_u16_x_tied1:
z0 = svmul_x (p0, z1, x0))
/*
-** mul_2_u16_x_tied1:
-** mul z0\.h, z0\.h, #2
+** mul_4dupop1_u16_x_tied2:
+** lsl z0\.h, z0\.h, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4dupop1_u16_x_tied2, svuint16_t,
+ z0 = svmul_x (p0, svdup_u16 (4), z0),
+ z0 = svmul_x (p0, svdup_u16 (4), z0))
+
+/*
+** mul_4dupop1ptrue_u16_x_tied2:
+** lsl z0\.h, z0\.h, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4dupop1ptrue_u16_x_tied2, svuint16_t,
+ z0 = svmul_x (svptrue_b16 (), svdup_u16 (4), z0),
+ z0 = svmul_x (svptrue_b16 (), svdup_u16 (4), z0))
+
+/*
+** mul_4dupop2_u16_x_tied1:
+** lsl z0\.h, z0\.h, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4dupop2_u16_x_tied1, svuint16_t,
+ z0 = svmul_x (p0, z0, svdup_u16 (4)),
+ z0 = svmul_x (p0, z0, svdup_u16 (4)))
+
+/*
+** mul_4nop2_u16_x_tied1:
+** lsl z0\.h, z0\.h, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4nop2_u16_x_tied1, svuint16_t,
+ z0 = svmul_n_u16_x (p0, z0, 4),
+ z0 = svmul_x (p0, z0, 4))
+
+/*
+** mul_maxpownop2_u16_x_tied1:
+** lsl z0\.h, z0\.h, #15
+** ret
+*/
+TEST_UNIFORM_Z (mul_maxpownop2_u16_x_tied1, svuint16_t,
+ z0 = svmul_n_u16_x (p0, z0, MAXPOW),
+ z0 = svmul_x (p0, z0, MAXPOW))
+
+/*
+** mul_1_u16_x_tied1:
+** ret
+*/
+TEST_UNIFORM_Z (mul_1_u16_x_tied1, svuint16_t,
+ z0 = svmul_n_u16_x (p0, z0, 1),
+ z0 = svmul_x (p0, z0, 1))
+
+/*
+** mul_3_u16_x_tied1:
+** mul z0\.h, z0\.h, #3
+** ret
+*/
+TEST_UNIFORM_Z (mul_3_u16_x_tied1, svuint16_t,
+ z0 = svmul_n_u16_x (p0, z0, 3),
+ z0 = svmul_x (p0, z0, 3))
+
+/*
+** mul_4dupop2_u16_x_untied:
+** lsl z0\.h, z1\.h, #2
** ret
*/
-TEST_UNIFORM_Z (mul_2_u16_x_tied1, svuint16_t,
- z0 = svmul_n_u16_x (p0, z0, 2),
- z0 = svmul_x (p0, z0, 2))
+TEST_UNIFORM_Z (mul_4dupop2_u16_x_untied, svuint16_t,
+ z0 = svmul_x (p0, z1, svdup_u16 (4)),
+ z0 = svmul_x (p0, z1, svdup_u16 (4)))
/*
-** mul_2_u16_x_untied:
+** mul_4nop2_u16_x_untied:
+** lsl z0\.h, z1\.h, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4nop2_u16_x_untied, svuint16_t,
+ z0 = svmul_n_u16_x (p0, z1, 4),
+ z0 = svmul_x (p0, z1, 4))
+
+/*
+** mul_maxpownop2_u16_x_untied:
+** lsl z0\.h, z1\.h, #15
+** ret
+*/
+TEST_UNIFORM_Z (mul_maxpownop2_u16_x_untied, svuint16_t,
+ z0 = svmul_n_u16_x (p0, z1, MAXPOW),
+ z0 = svmul_x (p0, z1, MAXPOW))
+
+/*
+** mul_3_u16_x_untied:
** movprfx z0, z1
-** mul z0\.h, z0\.h, #2
+** mul z0\.h, z0\.h, #3
** ret
*/
-TEST_UNIFORM_Z (mul_2_u16_x_untied, svuint16_t,
- z0 = svmul_n_u16_x (p0, z1, 2),
- z0 = svmul_x (p0, z1, 2))
+TEST_UNIFORM_Z (mul_3_u16_x_untied, svuint16_t,
+ z0 = svmul_n_u16_x (p0, z1, 3),
+ z0 = svmul_x (p0, z1, 3))
/*
** mul_127_u16_x:
/*
** mul_128_u16_x:
-** mov (z[0-9]+\.h), #128
-** mul z0\.h, p0/m, z0\.h, \1
+** lsl z0\.h, z0\.h, #7
** ret
*/
TEST_UNIFORM_Z (mul_128_u16_x, svuint16_t,
#include "test_sve_acle.h"
+#define MAXPOW 1ULL<<31
+
/*
** mul_u32_m_tied1:
** mul z0\.s, p0/m, z0\.s, z1\.s
z0 = svmul_m (p0, z1, x0))
/*
-** mul_2_u32_m_tied1:
-** mov (z[0-9]+\.s), #2
+** mul_4dupop1_u32_m_tied2:
+** mov (z[0-9]+)\.s, #4
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, \1
+** mul z0\.s, p0/m, z0\.s, \2\.s
+** ret
+*/
+TEST_UNIFORM_Z (mul_4dupop1_u32_m_tied2, svuint32_t,
+ z0 = svmul_m (p0, svdup_u32 (4), z0),
+ z0 = svmul_m (p0, svdup_u32 (4), z0))
+
+/*
+** mul_4dupop1ptrue_u32_m_tied2:
+** lsl z0\.s, z0\.s, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4dupop1ptrue_u32_m_tied2, svuint32_t,
+ z0 = svmul_m (svptrue_b32 (), svdup_u32 (4), z0),
+ z0 = svmul_m (svptrue_b32 (), svdup_u32 (4), z0))
+
+/*
+** mul_4dupop2_u32_m_tied1:
+** lsl z0\.s, p0/m, z0\.s, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4dupop2_u32_m_tied1, svuint32_t,
+ z0 = svmul_m (p0, z0, svdup_u32 (4)),
+ z0 = svmul_m (p0, z0, svdup_u32 (4)))
+
+/*
+** mul_4nop2_u32_m_tied1:
+** lsl z0\.s, p0/m, z0\.s, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4nop2_u32_m_tied1, svuint32_t,
+ z0 = svmul_n_u32_m (p0, z0, 4),
+ z0 = svmul_m (p0, z0, 4))
+
+/*
+** mul_maxpownop2_u32_m_tied1:
+** lsl z0\.s, p0/m, z0\.s, #31
+** ret
+*/
+TEST_UNIFORM_Z (mul_maxpownop2_u32_m_tied1, svuint32_t,
+ z0 = svmul_n_u32_m (p0, z0, MAXPOW),
+ z0 = svmul_m (p0, z0, MAXPOW))
+
+/*
+** mul_1_u32_m_tied1:
+** sel z0\.s, p0, z0\.s, z0\.s
+** ret
+*/
+TEST_UNIFORM_Z (mul_1_u32_m_tied1, svuint32_t,
+ z0 = svmul_n_u32_m (p0, z0, 1),
+ z0 = svmul_m (p0, z0, 1))
+
+/*
+** mul_3_u32_m_tied1:
+** mov (z[0-9]+\.s), #3
** mul z0\.s, p0/m, z0\.s, \1
** ret
*/
-TEST_UNIFORM_Z (mul_2_u32_m_tied1, svuint32_t,
- z0 = svmul_n_u32_m (p0, z0, 2),
- z0 = svmul_m (p0, z0, 2))
+TEST_UNIFORM_Z (mul_3_u32_m_tied1, svuint32_t,
+ z0 = svmul_n_u32_m (p0, z0, 3),
+ z0 = svmul_m (p0, z0, 3))
+
+/*
+** mul_4dupop2_u32_m_untied:
+** movprfx z0, z1
+** lsl z0\.s, p0/m, z0\.s, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4dupop2_u32_m_untied, svuint32_t,
+ z0 = svmul_m (p0, z1, svdup_u32 (4)),
+ z0 = svmul_m (p0, z1, svdup_u32 (4)))
/*
-** mul_2_u32_m_untied:
-** mov (z[0-9]+\.s), #2
+** mul_4nop2_u32_m_untied:
+** movprfx z0, z1
+** lsl z0\.s, p0/m, z0\.s, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4nop2_u32_m_untied, svuint32_t,
+ z0 = svmul_n_u32_m (p0, z1, 4),
+ z0 = svmul_m (p0, z1, 4))
+
+/*
+** mul_maxpownop2_u32_m_untied:
+** movprfx z0, z1
+** lsl z0\.s, p0/m, z0\.s, #31
+** ret
+*/
+TEST_UNIFORM_Z (mul_maxpownop2_u32_m_untied, svuint32_t,
+ z0 = svmul_n_u32_m (p0, z1, MAXPOW),
+ z0 = svmul_m (p0, z1, MAXPOW))
+
+/*
+** mul_3_u32_m_untied:
+** mov (z[0-9]+\.s), #3
** movprfx z0, z1
** mul z0\.s, p0/m, z0\.s, \1
** ret
*/
-TEST_UNIFORM_Z (mul_2_u32_m_untied, svuint32_t,
- z0 = svmul_n_u32_m (p0, z1, 2),
- z0 = svmul_m (p0, z1, 2))
+TEST_UNIFORM_Z (mul_3_u32_m_untied, svuint32_t,
+ z0 = svmul_n_u32_m (p0, z1, 3),
+ z0 = svmul_m (p0, z1, 3))
/*
** mul_m1_u32_m:
z0 = svmul_z (p0, z1, x0))
/*
-** mul_2_u32_z_tied1:
-** mov (z[0-9]+\.s), #2
+** mul_4dupop1_u32_z_tied2:
+** movprfx z0\.s, p0/z, z0\.s
+** lsl z0\.s, p0/m, z0\.s, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4dupop1_u32_z_tied2, svuint32_t,
+ z0 = svmul_z (p0, svdup_u32 (4), z0),
+ z0 = svmul_z (p0, svdup_u32 (4), z0))
+
+/*
+** mul_4dupop1ptrue_u32_z_tied2:
+** lsl z0\.s, z0\.s, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4dupop1ptrue_u32_z_tied2, svuint32_t,
+ z0 = svmul_z (svptrue_b32 (), svdup_u32 (4), z0),
+ z0 = svmul_z (svptrue_b32 (), svdup_u32 (4), z0))
+
+/*
+** mul_4dupop2_u32_z_tied1:
+** movprfx z0\.s, p0/z, z0\.s
+** lsl z0\.s, p0/m, z0\.s, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4dupop2_u32_z_tied1, svuint32_t,
+ z0 = svmul_z (p0, z0, svdup_u32 (4)),
+ z0 = svmul_z (p0, z0, svdup_u32 (4)))
+
+/*
+** mul_4nop2_u32_z_tied1:
+** movprfx z0\.s, p0/z, z0\.s
+** lsl z0\.s, p0/m, z0\.s, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4nop2_u32_z_tied1, svuint32_t,
+ z0 = svmul_n_u32_z (p0, z0, 4),
+ z0 = svmul_z (p0, z0, 4))
+
+/*
+** mul_maxpownop2_u32_z_tied1:
+** movprfx z0\.s, p0/z, z0\.s
+** lsl z0\.s, p0/m, z0\.s, #31
+** ret
+*/
+TEST_UNIFORM_Z (mul_maxpownop2_u32_z_tied1, svuint32_t,
+ z0 = svmul_n_u32_z (p0, z0, MAXPOW),
+ z0 = svmul_z (p0, z0, MAXPOW))
+
+/*
+** mul_1_u32_z_tied1:
+** mov z31.s, #1
+** movprfx z0.s, p0/z, z0.s
+** mul z0.s, p0/m, z0.s, z31.s
+** ret
+*/
+TEST_UNIFORM_Z (mul_1_u32_z_tied1, svuint32_t,
+ z0 = svmul_n_u32_z (p0, z0, 1),
+ z0 = svmul_z (p0, z0, 1))
+
+/*
+** mul_3_u32_z_tied1:
+** mov (z[0-9]+\.s), #3
** movprfx z0\.s, p0/z, z0\.s
** mul z0\.s, p0/m, z0\.s, \1
** ret
*/
-TEST_UNIFORM_Z (mul_2_u32_z_tied1, svuint32_t,
- z0 = svmul_n_u32_z (p0, z0, 2),
- z0 = svmul_z (p0, z0, 2))
+TEST_UNIFORM_Z (mul_3_u32_z_tied1, svuint32_t,
+ z0 = svmul_n_u32_z (p0, z0, 3),
+ z0 = svmul_z (p0, z0, 3))
+
+/*
+** mul_4dupop2_u32_z_untied:
+** movprfx z0\.s, p0/z, z1\.s
+** lsl z0\.s, p0/m, z0\.s, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4dupop2_u32_z_untied, svuint32_t,
+ z0 = svmul_z (p0, z1, svdup_u32 (4)),
+ z0 = svmul_z (p0, z1, svdup_u32 (4)))
+
+/*
+** mul_4nop2_u32_z_untied:
+** movprfx z0\.s, p0/z, z1\.s
+** lsl z0\.s, p0/m, z0\.s, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4nop2_u32_z_untied, svuint32_t,
+ z0 = svmul_n_u32_z (p0, z1, 4),
+ z0 = svmul_z (p0, z1, 4))
+
+/*
+** mul_maxpownop2_u32_z_untied:
+** movprfx z0\.s, p0/z, z1\.s
+** lsl z0\.s, p0/m, z0\.s, #31
+** ret
+*/
+TEST_UNIFORM_Z (mul_maxpownop2_u32_z_untied, svuint32_t,
+ z0 = svmul_n_u32_z (p0, z1, MAXPOW),
+ z0 = svmul_z (p0, z1, MAXPOW))
/*
-** mul_2_u32_z_untied:
-** mov (z[0-9]+\.s), #2
+** mul_3_u32_z_untied:
+** mov (z[0-9]+\.s), #3
** (
** movprfx z0\.s, p0/z, z1\.s
** mul z0\.s, p0/m, z0\.s, \1
** )
** ret
*/
-TEST_UNIFORM_Z (mul_2_u32_z_untied, svuint32_t,
- z0 = svmul_n_u32_z (p0, z1, 2),
- z0 = svmul_z (p0, z1, 2))
+TEST_UNIFORM_Z (mul_3_u32_z_untied, svuint32_t,
+ z0 = svmul_n_u32_z (p0, z1, 3),
+ z0 = svmul_z (p0, z1, 3))
/*
** mul_u32_x_tied1:
z0 = svmul_x (p0, z1, x0))
/*
-** mul_2_u32_x_tied1:
-** mul z0\.s, z0\.s, #2
+** mul_4dupop1_u32_x_tied2:
+** lsl z0\.s, z0\.s, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4dupop1_u32_x_tied2, svuint32_t,
+ z0 = svmul_x (p0, svdup_u32 (4), z0),
+ z0 = svmul_x (p0, svdup_u32 (4), z0))
+
+/*
+** mul_4dupop1ptrue_u32_x_tied2:
+** lsl z0\.s, z0\.s, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4dupop1ptrue_u32_x_tied2, svuint32_t,
+ z0 = svmul_x (svptrue_b32 (), svdup_u32 (4), z0),
+ z0 = svmul_x (svptrue_b32 (), svdup_u32 (4), z0))
+
+/*
+** mul_4dupop2_u32_x_tied1:
+** lsl z0\.s, z0\.s, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4dupop2_u32_x_tied1, svuint32_t,
+ z0 = svmul_x (p0, z0, svdup_u32 (4)),
+ z0 = svmul_x (p0, z0, svdup_u32 (4)))
+
+/*
+** mul_4nop2_u32_x_tied1:
+** lsl z0\.s, z0\.s, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4nop2_u32_x_tied1, svuint32_t,
+ z0 = svmul_n_u32_x (p0, z0, 4),
+ z0 = svmul_x (p0, z0, 4))
+
+/*
+** mul_maxpownop2_u32_x_tied1:
+** lsl z0\.s, z0\.s, #31
+** ret
+*/
+TEST_UNIFORM_Z (mul_maxpownop2_u32_x_tied1, svuint32_t,
+ z0 = svmul_n_u32_x (p0, z0, MAXPOW),
+ z0 = svmul_x (p0, z0, MAXPOW))
+
+/*
+** mul_1_u32_x_tied1:
+** ret
+*/
+TEST_UNIFORM_Z (mul_1_u32_x_tied1, svuint32_t,
+ z0 = svmul_n_u32_x (p0, z0, 1),
+ z0 = svmul_x (p0, z0, 1))
+
+/*
+** mul_3_u32_x_tied1:
+** mul z0\.s, z0\.s, #3
+** ret
+*/
+TEST_UNIFORM_Z (mul_3_u32_x_tied1, svuint32_t,
+ z0 = svmul_n_u32_x (p0, z0, 3),
+ z0 = svmul_x (p0, z0, 3))
+
+/*
+** mul_4dupop2_u32_x_untied:
+** lsl z0\.s, z1\.s, #2
** ret
*/
-TEST_UNIFORM_Z (mul_2_u32_x_tied1, svuint32_t,
- z0 = svmul_n_u32_x (p0, z0, 2),
- z0 = svmul_x (p0, z0, 2))
+TEST_UNIFORM_Z (mul_4dupop2_u32_x_untied, svuint32_t,
+ z0 = svmul_x (p0, z1, svdup_u32 (4)),
+ z0 = svmul_x (p0, z1, svdup_u32 (4)))
/*
-** mul_2_u32_x_untied:
+** mul_4nop2_u32_x_untied:
+** lsl z0\.s, z1\.s, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4nop2_u32_x_untied, svuint32_t,
+ z0 = svmul_n_u32_x (p0, z1, 4),
+ z0 = svmul_x (p0, z1, 4))
+
+/*
+** mul_maxpownop2_u32_x_untied:
+** lsl z0\.s, z1\.s, #31
+** ret
+*/
+TEST_UNIFORM_Z (mul_maxpownop2_u32_x_untied, svuint32_t,
+ z0 = svmul_n_u32_x (p0, z1, MAXPOW),
+ z0 = svmul_x (p0, z1, MAXPOW))
+
+/*
+** mul_3_u32_x_untied:
** movprfx z0, z1
-** mul z0\.s, z0\.s, #2
+** mul z0\.s, z0\.s, #3
** ret
*/
-TEST_UNIFORM_Z (mul_2_u32_x_untied, svuint32_t,
- z0 = svmul_n_u32_x (p0, z1, 2),
- z0 = svmul_x (p0, z1, 2))
+TEST_UNIFORM_Z (mul_3_u32_x_untied, svuint32_t,
+ z0 = svmul_n_u32_x (p0, z1, 3),
+ z0 = svmul_x (p0, z1, 3))
/*
** mul_127_u32_x:
/*
** mul_128_u32_x:
-** mov (z[0-9]+\.s), #128
-** mul z0\.s, p0/m, z0\.s, \1
+** lsl z0\.s, z0\.s, #7
** ret
*/
TEST_UNIFORM_Z (mul_128_u32_x, svuint32_t,
#include "test_sve_acle.h"
+#define MAXPOW 1ULL<<63
+
/*
** mul_u64_m_tied1:
** mul z0\.d, p0/m, z0\.d, z1\.d
z0 = svmul_n_u64_m (p0, z1, x0),
z0 = svmul_m (p0, z1, x0))
+/*
+** mul_4dupop1_u64_m_tied2:
+** mov (z[0-9]+)\.d, #4
+** mov (z[0-9]+\.d), z0\.d
+** movprfx z0, \1
+** mul z0\.d, p0/m, z0\.d, \2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4dupop1_u64_m_tied2, svuint64_t,
+ z0 = svmul_m (p0, svdup_u64 (4), z0),
+ z0 = svmul_m (p0, svdup_u64 (4), z0))
+
+/*
+** mul_4dupop1ptrue_u64_m_tied2:
+** lsl z0\.d, z0\.d, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4dupop1ptrue_u64_m_tied2, svuint64_t,
+ z0 = svmul_m (svptrue_b64 (), svdup_u64 (4), z0),
+ z0 = svmul_m (svptrue_b64 (), svdup_u64 (4), z0))
+
+/*
+** mul_4dupop2_u64_m_tied1:
+** lsl z0\.d, p0/m, z0\.d, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4dupop2_u64_m_tied1, svuint64_t,
+ z0 = svmul_m (p0, z0, svdup_u64 (4)),
+ z0 = svmul_m (p0, z0, svdup_u64 (4)))
+
+/*
+** mul_4nop2_u64_m_tied1:
+** lsl z0\.d, p0/m, z0\.d, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4nop2_u64_m_tied1, svuint64_t,
+ z0 = svmul_n_u64_m (p0, z0, 4),
+ z0 = svmul_m (p0, z0, 4))
+
+/*
+** mul_maxpownop2_u64_m_tied1:
+** lsl z0\.d, p0/m, z0\.d, #63
+** ret
+*/
+TEST_UNIFORM_Z (mul_maxpownop2_u64_m_tied1, svuint64_t,
+ z0 = svmul_n_u64_m (p0, z0, MAXPOW),
+ z0 = svmul_m (p0, z0, MAXPOW))
+
+/*
+** mul_1_u64_m_tied1:
+** sel z0\.d, p0, z0\.d, z0\.d
+** ret
+*/
+TEST_UNIFORM_Z (mul_1_u64_m_tied1, svuint64_t,
+ z0 = svmul_n_u64_m (p0, z0, 1),
+ z0 = svmul_m (p0, z0, 1))
+
/*
** mul_2_u64_m_tied1:
-** mov (z[0-9]+\.d), #2
-** mul z0\.d, p0/m, z0\.d, \1
+** lsl z0\.d, p0/m, z0\.d, #1
** ret
*/
TEST_UNIFORM_Z (mul_2_u64_m_tied1, svuint64_t,
z0 = svmul_m (p0, z0, 2))
/*
-** mul_2_u64_m_untied:
-** mov (z[0-9]+\.d), #2
+** mul_3_u64_m_tied1:
+** mov (z[0-9]+\.d), #3
+** mul z0\.d, p0/m, z0\.d, \1
+** ret
+*/
+TEST_UNIFORM_Z (mul_3_u64_m_tied1, svuint64_t,
+ z0 = svmul_n_u64_m (p0, z0, 3),
+ z0 = svmul_m (p0, z0, 3))
+
+/*
+** mul_4dupop2_u64_m_untied:
+** movprfx z0, z1
+** lsl z0\.d, p0/m, z0\.d, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4dupop2_u64_m_untied, svuint64_t,
+ z0 = svmul_m (p0, z1, svdup_u64 (4)),
+ z0 = svmul_m (p0, z1, svdup_u64 (4)))
+
+/*
+** mul_4nop2_u64_m_untied:
+** movprfx z0, z1
+** lsl z0\.d, p0/m, z0\.d, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4nop2_u64_m_untied, svuint64_t,
+ z0 = svmul_n_u64_m (p0, z1, 4),
+ z0 = svmul_m (p0, z1, 4))
+
+/*
+** mul_maxpownop2_u64_m_untied:
+** movprfx z0, z1
+** lsl z0\.d, p0/m, z0\.d, #63
+** ret
+*/
+TEST_UNIFORM_Z (mul_maxpownop2_u64_m_untied, svuint64_t,
+ z0 = svmul_n_u64_m (p0, z1, MAXPOW),
+ z0 = svmul_m (p0, z1, MAXPOW))
+
+/*
+** mul_3_u64_m_untied:
+** mov (z[0-9]+\.d), #3
** movprfx z0, z1
** mul z0\.d, p0/m, z0\.d, \1
** ret
*/
-TEST_UNIFORM_Z (mul_2_u64_m_untied, svuint64_t,
- z0 = svmul_n_u64_m (p0, z1, 2),
- z0 = svmul_m (p0, z1, 2))
+TEST_UNIFORM_Z (mul_3_u64_m_untied, svuint64_t,
+ z0 = svmul_n_u64_m (p0, z1, 3),
+ z0 = svmul_m (p0, z1, 3))
/*
** mul_m1_u64_m:
z0 = svmul_z (p0, z1, x0))
/*
-** mul_2_u64_z_tied1:
-** mov (z[0-9]+\.d), #2
+** mul_4dupop1_u64_z_tied2:
** movprfx z0\.d, p0/z, z0\.d
-** mul z0\.d, p0/m, z0\.d, \1
+** lsl z0\.d, p0/m, z0\.d, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4dupop1_u64_z_tied2, svuint64_t,
+ z0 = svmul_z (p0, svdup_u64 (4), z0),
+ z0 = svmul_z (p0, svdup_u64 (4), z0))
+
+/*
+** mul_4dupop1ptrue_u64_z_tied2:
+** lsl z0\.d, z0\.d, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4dupop1ptrue_u64_z_tied2, svuint64_t,
+ z0 = svmul_z (svptrue_b64 (), svdup_u64 (4), z0),
+ z0 = svmul_z (svptrue_b64 (), svdup_u64 (4), z0))
+
+/*
+** mul_4dupop2_u64_z_tied1:
+** movprfx z0\.d, p0/z, z0\.d
+** lsl z0\.d, p0/m, z0\.d, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4dupop2_u64_z_tied1, svuint64_t,
+ z0 = svmul_z (p0, z0, svdup_u64 (4)),
+ z0 = svmul_z (p0, z0, svdup_u64 (4)))
+
+/*
+** mul_4nop2_u64_z_tied1:
+** movprfx z0\.d, p0/z, z0\.d
+** lsl z0\.d, p0/m, z0\.d, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4nop2_u64_z_tied1, svuint64_t,
+ z0 = svmul_n_u64_z (p0, z0, 4),
+ z0 = svmul_z (p0, z0, 4))
+
+/*
+** mul_maxpownop2_u64_z_tied1:
+** movprfx z0\.d, p0/z, z0\.d
+** lsl z0\.d, p0/m, z0\.d, #63
+** ret
+*/
+TEST_UNIFORM_Z (mul_maxpownop2_u64_z_tied1, svuint64_t,
+ z0 = svmul_n_u64_z (p0, z0, MAXPOW),
+ z0 = svmul_z (p0, z0, MAXPOW))
+
+/*
+** mul_1_u64_z_tied1:
+** mov z31.d, #1
+** movprfx z0.d, p0/z, z0.d
+** mul z0.d, p0/m, z0.d, z31.d
+** ret
+*/
+TEST_UNIFORM_Z (mul_1_u64_z_tied1, svuint64_t,
+ z0 = svmul_n_u64_z (p0, z0, 1),
+ z0 = svmul_z (p0, z0, 1))
+
+/*
+** mul_2_u64_z_tied1:
+** movprfx z0.d, p0/z, z0.d
+** lsl z0.d, p0/m, z0.d, #1
** ret
*/
TEST_UNIFORM_Z (mul_2_u64_z_tied1, svuint64_t,
z0 = svmul_z (p0, z0, 2))
/*
-** mul_2_u64_z_untied:
-** mov (z[0-9]+\.d), #2
+** mul_3_u64_z_tied1:
+** mov (z[0-9]+\.d), #3
+** movprfx z0\.d, p0/z, z0\.d
+** mul z0\.d, p0/m, z0\.d, \1
+** ret
+*/
+TEST_UNIFORM_Z (mul_3_u64_z_tied1, svuint64_t,
+ z0 = svmul_n_u64_z (p0, z0, 3),
+ z0 = svmul_z (p0, z0, 3))
+
+/*
+** mul_4dupop2_u64_z_untied:
+** movprfx z0\.d, p0/z, z1\.d
+** lsl z0\.d, p0/m, z0\.d, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4dupop2_u64_z_untied, svuint64_t,
+ z0 = svmul_z (p0, z1, svdup_u64 (4)),
+ z0 = svmul_z (p0, z1, svdup_u64 (4)))
+
+/*
+** mul_4nop2_u64_z_untied:
+** movprfx z0\.d, p0/z, z1\.d
+** lsl z0\.d, p0/m, z0\.d, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4nop2_u64_z_untied, svuint64_t,
+ z0 = svmul_n_u64_z (p0, z1, 4),
+ z0 = svmul_z (p0, z1, 4))
+
+/*
+** mul_maxpownop2_u64_z_untied:
+** movprfx z0\.d, p0/z, z1\.d
+** lsl z0\.d, p0/m, z0\.d, #63
+** ret
+*/
+TEST_UNIFORM_Z (mul_maxpownop2_u64_z_untied, svuint64_t,
+ z0 = svmul_n_u64_z (p0, z1, MAXPOW),
+ z0 = svmul_z (p0, z1, MAXPOW))
+
+/*
+** mul_3_u64_z_untied:
+** mov (z[0-9]+\.d), #3
** (
** movprfx z0\.d, p0/z, z1\.d
** mul z0\.d, p0/m, z0\.d, \1
** )
** ret
*/
-TEST_UNIFORM_Z (mul_2_u64_z_untied, svuint64_t,
- z0 = svmul_n_u64_z (p0, z1, 2),
- z0 = svmul_z (p0, z1, 2))
+TEST_UNIFORM_Z (mul_3_u64_z_untied, svuint64_t,
+ z0 = svmul_n_u64_z (p0, z1, 3),
+ z0 = svmul_z (p0, z1, 3))
/*
** mul_u64_x_tied1:
z0 = svmul_n_u64_x (p0, z1, x0),
z0 = svmul_x (p0, z1, x0))
+/*
+** mul_4dupop1_u64_x_tied2:
+** lsl z0\.d, z0\.d, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4dupop1_u64_x_tied2, svuint64_t,
+ z0 = svmul_x (p0, svdup_u64 (4), z0),
+ z0 = svmul_x (p0, svdup_u64 (4), z0))
+
+/*
+** mul_4dupop1ptrue_u64_x_tied2:
+** lsl z0\.d, z0\.d, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4dupop1ptrue_u64_x_tied2, svuint64_t,
+ z0 = svmul_x (svptrue_b64 (), svdup_u64 (4), z0),
+ z0 = svmul_x (svptrue_b64 (), svdup_u64 (4), z0))
+
+/*
+** mul_4dupop2_u64_x_tied1:
+** lsl z0\.d, z0\.d, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4dupop2_u64_x_tied1, svuint64_t,
+ z0 = svmul_x (p0, z0, svdup_u64 (4)),
+ z0 = svmul_x (p0, z0, svdup_u64 (4)))
+
+/*
+** mul_4nop2_u64_x_tied1:
+** lsl z0\.d, z0\.d, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4nop2_u64_x_tied1, svuint64_t,
+ z0 = svmul_n_u64_x (p0, z0, 4),
+ z0 = svmul_x (p0, z0, 4))
+
+/*
+** mul_maxpownop2_u64_x_tied1:
+** lsl z0\.d, z0\.d, #63
+** ret
+*/
+TEST_UNIFORM_Z (mul_maxpownop2_u64_x_tied1, svuint64_t,
+ z0 = svmul_n_u64_x (p0, z0, MAXPOW),
+ z0 = svmul_x (p0, z0, MAXPOW))
+
+/*
+** mul_1_u64_x_tied1:
+** ret
+*/
+TEST_UNIFORM_Z (mul_1_u64_x_tied1, svuint64_t,
+ z0 = svmul_n_u64_x (p0, z0, 1),
+ z0 = svmul_x (p0, z0, 1))
+
/*
** mul_2_u64_x_tied1:
-** mul z0\.d, z0\.d, #2
+** add z0\.d, z0\.d, z0\.d
** ret
*/
TEST_UNIFORM_Z (mul_2_u64_x_tied1, svuint64_t,
z0 = svmul_x (p0, z0, 2))
/*
-** mul_2_u64_x_untied:
+** mul_3_u64_x_tied1:
+** mul z0\.d, z0\.d, #3
+** ret
+*/
+TEST_UNIFORM_Z (mul_3_u64_x_tied1, svuint64_t,
+ z0 = svmul_n_u64_x (p0, z0, 3),
+ z0 = svmul_x (p0, z0, 3))
+
+/*
+** mul_4dupop2_u64_x_untied:
+** lsl z0\.d, z1\.d, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4dupop2_u64_x_untied, svuint64_t,
+ z0 = svmul_x (p0, z1, svdup_u64 (4)),
+ z0 = svmul_x (p0, z1, svdup_u64 (4)))
+
+/*
+** mul_4nop2_u64_x_untied:
+** lsl z0\.d, z1\.d, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4nop2_u64_x_untied, svuint64_t,
+ z0 = svmul_n_u64_x (p0, z1, 4),
+ z0 = svmul_x (p0, z1, 4))
+
+/*
+** mul_maxpownop2_u64_x_untied:
+** lsl z0\.d, z1\.d, #63
+** ret
+*/
+TEST_UNIFORM_Z (mul_maxpownop2_u64_x_untied, svuint64_t,
+ z0 = svmul_n_u64_x (p0, z1, MAXPOW),
+ z0 = svmul_x (p0, z1, MAXPOW))
+
+/*
+** mul_3_u64_x_untied:
** movprfx z0, z1
-** mul z0\.d, z0\.d, #2
+** mul z0\.d, z0\.d, #3
** ret
*/
-TEST_UNIFORM_Z (mul_2_u64_x_untied, svuint64_t,
- z0 = svmul_n_u64_x (p0, z1, 2),
- z0 = svmul_x (p0, z1, 2))
+TEST_UNIFORM_Z (mul_3_u64_x_untied, svuint64_t,
+ z0 = svmul_n_u64_x (p0, z1, 3),
+ z0 = svmul_x (p0, z1, 3))
/*
** mul_127_u64_x:
/*
** mul_128_u64_x:
-** mov (z[0-9]+\.d), #128
-** mul z0\.d, p0/m, z0\.d, \1
+** lsl z0\.d, z0\.d, #7
** ret
*/
TEST_UNIFORM_Z (mul_128_u64_x, svuint64_t,
#include "test_sve_acle.h"
+#define MAXPOW 1<<7
+
/*
** mul_u8_m_tied1:
** mul z0\.b, p0/m, z0\.b, z1\.b
z0 = svmul_m (p0, z1, x0))
/*
-** mul_2_u8_m_tied1:
-** mov (z[0-9]+\.b), #2
+** mul_4dupop1_u8_m_tied2:
+** mov (z[0-9]+)\.b, #4
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, \1
+** mul z0\.b, p0/m, z0\.b, \2\.b
+** ret
+*/
+TEST_UNIFORM_Z (mul_4dupop1_u8_m_tied2, svuint8_t,
+ z0 = svmul_m (p0, svdup_u8 (4), z0),
+ z0 = svmul_m (p0, svdup_u8 (4), z0))
+
+/*
+** mul_4dupop1ptrue_u8_m_tied2:
+** lsl z0\.b, z0\.b, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4dupop1ptrue_u8_m_tied2, svuint8_t,
+ z0 = svmul_m (svptrue_b8 (), svdup_u8 (4), z0),
+ z0 = svmul_m (svptrue_b8 (), svdup_u8 (4), z0))
+
+/*
+** mul_4dupop2_u8_m_tied1:
+** lsl z0\.b, p0/m, z0\.b, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4dupop2_u8_m_tied1, svuint8_t,
+ z0 = svmul_m (p0, z0, svdup_u8 (4)),
+ z0 = svmul_m (p0, z0, svdup_u8 (4)))
+
+/*
+** mul_4nop2_u8_m_tied1:
+** lsl z0\.b, p0/m, z0\.b, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4nop2_u8_m_tied1, svuint8_t,
+ z0 = svmul_n_u8_m (p0, z0, 4),
+ z0 = svmul_m (p0, z0, 4))
+
+/*
+** mul_maxpownop2_u8_m_tied1:
+** lsl z0\.b, p0/m, z0\.b, #7
+** ret
+*/
+TEST_UNIFORM_Z (mul_maxpownop2_u8_m_tied1, svuint8_t,
+ z0 = svmul_n_u8_m (p0, z0, MAXPOW),
+ z0 = svmul_m (p0, z0, MAXPOW))
+
+/*
+** mul_1_u8_m_tied1:
+** sel z0\.b, p0, z0\.b, z0\.b
+** ret
+*/
+TEST_UNIFORM_Z (mul_1_u8_m_tied1, svuint8_t,
+ z0 = svmul_n_u8_m (p0, z0, 1),
+ z0 = svmul_m (p0, z0, 1))
+
+/*
+** mul_3_u8_m_tied1:
+** mov (z[0-9]+\.b), #3
** mul z0\.b, p0/m, z0\.b, \1
** ret
*/
-TEST_UNIFORM_Z (mul_2_u8_m_tied1, svuint8_t,
- z0 = svmul_n_u8_m (p0, z0, 2),
- z0 = svmul_m (p0, z0, 2))
+TEST_UNIFORM_Z (mul_3_u8_m_tied1, svuint8_t,
+ z0 = svmul_n_u8_m (p0, z0, 3),
+ z0 = svmul_m (p0, z0, 3))
+
+/*
+** mul_4dupop2_u8_m_untied:
+** movprfx z0, z1
+** lsl z0\.b, p0/m, z0\.b, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4dupop2_u8_m_untied, svuint8_t,
+ z0 = svmul_m (p0, z1, svdup_u8 (4)),
+ z0 = svmul_m (p0, z1, svdup_u8 (4)))
/*
-** mul_2_u8_m_untied:
-** mov (z[0-9]+\.b), #2
+** mul_4nop2_u8_m_untied:
+** movprfx z0, z1
+** lsl z0\.b, p0/m, z0\.b, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4nop2_u8_m_untied, svuint8_t,
+ z0 = svmul_n_u8_m (p0, z1, 4),
+ z0 = svmul_m (p0, z1, 4))
+
+/*
+** mul_maxpownop2_u8_m_untied:
+** movprfx z0, z1
+** lsl z0\.b, p0/m, z0\.b, #7
+** ret
+*/
+TEST_UNIFORM_Z (mul_maxpownop2_u8_m_untied, svuint8_t,
+ z0 = svmul_n_u8_m (p0, z1, MAXPOW),
+ z0 = svmul_m (p0, z1, MAXPOW))
+
+/*
+** mul_3_u8_m_untied:
+** mov (z[0-9]+\.b), #3
** movprfx z0, z1
** mul z0\.b, p0/m, z0\.b, \1
** ret
*/
-TEST_UNIFORM_Z (mul_2_u8_m_untied, svuint8_t,
- z0 = svmul_n_u8_m (p0, z1, 2),
- z0 = svmul_m (p0, z1, 2))
+TEST_UNIFORM_Z (mul_3_u8_m_untied, svuint8_t,
+ z0 = svmul_n_u8_m (p0, z1, 3),
+ z0 = svmul_m (p0, z1, 3))
/*
** mul_m1_u8_m:
-** mov (z[0-9]+\.b), #-1
-** mul z0\.b, p0/m, z0\.b, \1
+** mov (z[0-9]+)\.b, #-1
+** mul z0\.b, p0/m, z0\.b, \1\.b
** ret
*/
TEST_UNIFORM_Z (mul_m1_u8_m, svuint8_t,
z0 = svmul_z (p0, z1, x0))
/*
-** mul_2_u8_z_tied1:
-** mov (z[0-9]+\.b), #2
+** mul_4dupop1_u8_z_tied2:
+** movprfx z0\.b, p0/z, z0\.b
+** lsl z0\.b, p0/m, z0\.b, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4dupop1_u8_z_tied2, svuint8_t,
+ z0 = svmul_z (p0, svdup_u8 (4), z0),
+ z0 = svmul_z (p0, svdup_u8 (4), z0))
+
+/*
+** mul_4dupop1ptrue_u8_z_tied2:
+** lsl z0\.b, z0\.b, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4dupop1ptrue_u8_z_tied2, svuint8_t,
+ z0 = svmul_z (svptrue_b8 (), svdup_u8 (4), z0),
+ z0 = svmul_z (svptrue_b8 (), svdup_u8 (4), z0))
+
+/*
+** mul_4dupop2_u8_z_tied1:
+** movprfx z0\.b, p0/z, z0\.b
+** lsl z0\.b, p0/m, z0\.b, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4dupop2_u8_z_tied1, svuint8_t,
+ z0 = svmul_z (p0, z0, svdup_u8 (4)),
+ z0 = svmul_z (p0, z0, svdup_u8 (4)))
+
+/*
+** mul_4nop2_u8_z_tied1:
+** movprfx z0\.b, p0/z, z0\.b
+** lsl z0\.b, p0/m, z0\.b, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4nop2_u8_z_tied1, svuint8_t,
+ z0 = svmul_n_u8_z (p0, z0, 4),
+ z0 = svmul_z (p0, z0, 4))
+
+/*
+** mul_maxpownop2_u8_z_tied1:
+** movprfx z0\.b, p0/z, z0\.b
+** lsl z0\.b, p0/m, z0\.b, #7
+** ret
+*/
+TEST_UNIFORM_Z (mul_maxpownop2_u8_z_tied1, svuint8_t,
+ z0 = svmul_n_u8_z (p0, z0, MAXPOW),
+ z0 = svmul_z (p0, z0, MAXPOW))
+
+/*
+** mul_1_u8_z_tied1:
+** mov z31.b, #1
+** movprfx z0.b, p0/z, z0.b
+** mul z0.b, p0/m, z0.b, z31.b
+** ret
+*/
+TEST_UNIFORM_Z (mul_1_u8_z_tied1, svuint8_t,
+ z0 = svmul_n_u8_z (p0, z0, 1),
+ z0 = svmul_z (p0, z0, 1))
+
+/*
+** mul_3_u8_z_tied1:
+** mov (z[0-9]+\.b), #3
** movprfx z0\.b, p0/z, z0\.b
** mul z0\.b, p0/m, z0\.b, \1
** ret
*/
-TEST_UNIFORM_Z (mul_2_u8_z_tied1, svuint8_t,
- z0 = svmul_n_u8_z (p0, z0, 2),
- z0 = svmul_z (p0, z0, 2))
+TEST_UNIFORM_Z (mul_3_u8_z_tied1, svuint8_t,
+ z0 = svmul_n_u8_z (p0, z0, 3),
+ z0 = svmul_z (p0, z0, 3))
+
+/*
+** mul_4dupop2_u8_z_untied:
+** movprfx z0\.b, p0/z, z1\.b
+** lsl z0\.b, p0/m, z0\.b, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4dupop2_u8_z_untied, svuint8_t,
+ z0 = svmul_z (p0, z1, svdup_u8 (4)),
+ z0 = svmul_z (p0, z1, svdup_u8 (4)))
/*
-** mul_2_u8_z_untied:
-** mov (z[0-9]+\.b), #2
+** mul_4nop2_u8_z_untied:
+** movprfx z0\.b, p0/z, z1\.b
+** lsl z0\.b, p0/m, z0\.b, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4nop2_u8_z_untied, svuint8_t,
+ z0 = svmul_n_u8_z (p0, z1, 4),
+ z0 = svmul_z (p0, z1, 4))
+
+/*
+** mul_maxpownop2_u8_z_untied:
+** movprfx z0\.b, p0/z, z1\.b
+** lsl z0\.b, p0/m, z0\.b, #7
+** ret
+*/
+TEST_UNIFORM_Z (mul_maxpownop2_u8_z_untied, svuint8_t,
+ z0 = svmul_n_u8_z (p0, z1, MAXPOW),
+ z0 = svmul_z (p0, z1, MAXPOW))
+
+/*
+** mul_3_u8_z_untied:
+** mov (z[0-9]+\.b), #3
** (
** movprfx z0\.b, p0/z, z1\.b
** mul z0\.b, p0/m, z0\.b, \1
** )
** ret
*/
-TEST_UNIFORM_Z (mul_2_u8_z_untied, svuint8_t,
- z0 = svmul_n_u8_z (p0, z1, 2),
- z0 = svmul_z (p0, z1, 2))
+TEST_UNIFORM_Z (mul_3_u8_z_untied, svuint8_t,
+ z0 = svmul_n_u8_z (p0, z1, 3),
+ z0 = svmul_z (p0, z1, 3))
/*
** mul_u8_x_tied1:
z0 = svmul_x (p0, z1, x0))
/*
-** mul_2_u8_x_tied1:
-** mul z0\.b, z0\.b, #2
+** mul_4dupop1_u8_x_tied2:
+** lsl z0\.b, z0\.b, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4dupop1_u8_x_tied2, svuint8_t,
+ z0 = svmul_x (p0, svdup_u8 (4), z0),
+ z0 = svmul_x (p0, svdup_u8 (4), z0))
+
+/*
+** mul_4dupop1ptrue_u8_x_tied2:
+** lsl z0\.b, z0\.b, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4dupop1ptrue_u8_x_tied2, svuint8_t,
+ z0 = svmul_x (svptrue_b8 (), svdup_u8 (4), z0),
+ z0 = svmul_x (svptrue_b8 (), svdup_u8 (4), z0))
+
+/*
+** mul_4dupop2_u8_x_tied1:
+** lsl z0\.b, z0\.b, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4dupop2_u8_x_tied1, svuint8_t,
+ z0 = svmul_x (p0, z0, svdup_u8 (4)),
+ z0 = svmul_x (p0, z0, svdup_u8 (4)))
+
+/*
+** mul_4nop2_u8_x_tied1:
+** lsl z0\.b, z0\.b, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4nop2_u8_x_tied1, svuint8_t,
+ z0 = svmul_n_u8_x (p0, z0, 4),
+ z0 = svmul_x (p0, z0, 4))
+
+/*
+** mul_maxpownop2_u8_x_tied1:
+** lsl z0\.b, z0\.b, #7
+** ret
+*/
+TEST_UNIFORM_Z (mul_maxpownop2_u8_x_tied1, svuint8_t,
+ z0 = svmul_n_u8_x (p0, z0, MAXPOW),
+ z0 = svmul_x (p0, z0, MAXPOW))
+
+/*
+** mul_1_u8_x_tied1:
+** ret
+*/
+TEST_UNIFORM_Z (mul_1_u8_x_tied1, svuint8_t,
+ z0 = svmul_n_u8_x (p0, z0, 1),
+ z0 = svmul_x (p0, z0, 1))
+
+/*
+** mul_3_u8_x_tied1:
+** mul z0\.b, z0\.b, #3
+** ret
+*/
+TEST_UNIFORM_Z (mul_3_u8_x_tied1, svuint8_t,
+ z0 = svmul_n_u8_x (p0, z0, 3),
+ z0 = svmul_x (p0, z0, 3))
+
+/*
+** mul_4dupop2_u8_x_untied:
+** lsl z0\.b, z1\.b, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4dupop2_u8_x_untied, svuint8_t,
+ z0 = svmul_x (p0, z1, svdup_u8 (4)),
+ z0 = svmul_x (p0, z1, svdup_u8 (4)))
+
+/*
+** mul_4nop2_u8_x_untied:
+** lsl z0\.b, z1\.b, #2
+** ret
+*/
+TEST_UNIFORM_Z (mul_4nop2_u8_x_untied, svuint8_t,
+ z0 = svmul_n_u8_x (p0, z1, 4),
+ z0 = svmul_x (p0, z1, 4))
+
+/*
+** mul_maxpownop2_u8_x_untied:
+** lsl z0\.b, z1\.b, #7
** ret
*/
-TEST_UNIFORM_Z (mul_2_u8_x_tied1, svuint8_t,
- z0 = svmul_n_u8_x (p0, z0, 2),
- z0 = svmul_x (p0, z0, 2))
+TEST_UNIFORM_Z (mul_maxpownop2_u8_x_untied, svuint8_t,
+ z0 = svmul_n_u8_x (p0, z1, MAXPOW),
+ z0 = svmul_x (p0, z1, MAXPOW))
/*
-** mul_2_u8_x_untied:
+** mul_3_u8_x_untied:
** movprfx z0, z1
-** mul z0\.b, z0\.b, #2
+** mul z0\.b, z0\.b, #3
** ret
*/
-TEST_UNIFORM_Z (mul_2_u8_x_untied, svuint8_t,
- z0 = svmul_n_u8_x (p0, z1, 2),
- z0 = svmul_x (p0, z1, 2))
+TEST_UNIFORM_Z (mul_3_u8_x_untied, svuint8_t,
+ z0 = svmul_n_u8_x (p0, z1, 3),
+ z0 = svmul_x (p0, z1, 3))
/*
** mul_127_u8_x:
/*
** mul_128_u8_x:
-** mul z0\.b, z0\.b, #-128
+** lsl z0\.b, z0\.b, #7
** ret
*/
TEST_UNIFORM_Z (mul_128_u8_x, svuint8_t,
/*
** mul_m128_u8_x:
-** mul z0\.b, z0\.b, #-128
+** lsl z0\.b, z0\.b, #7
** ret
*/
TEST_UNIFORM_Z (mul_m128_u8_x, svuint8_t,
--- /dev/null
+/* { dg-do run { target aarch64_sve128_hw } } */
+/* { dg-options "-O2 -msve-vector-bits=128" } */
+
+#include <arm_sve.h>
+#include <stdint.h>
+
+typedef svbool_t pred __attribute__((arm_sve_vector_bits(128)));
+typedef svfloat16_t svfloat16_ __attribute__((arm_sve_vector_bits(128)));
+typedef svfloat32_t svfloat32_ __attribute__((arm_sve_vector_bits(128)));
+typedef svfloat64_t svfloat64_ __attribute__((arm_sve_vector_bits(128)));
+typedef svint32_t svint32_ __attribute__((arm_sve_vector_bits(128)));
+typedef svint64_t svint64_ __attribute__((arm_sve_vector_bits(128)));
+typedef svuint32_t svuint32_ __attribute__((arm_sve_vector_bits(128)));
+typedef svuint64_t svuint64_ __attribute__((arm_sve_vector_bits(128)));
+
+#define F(T, TS, P, OP1, OP2) \
+{ \
+ T##_t op1 = (T##_t) OP1; \
+ T##_t op2 = (T##_t) OP2; \
+ sv##T##_ res = svmul_##P (pg, svdup_##TS (op1), svdup_##TS (op2)); \
+ sv##T##_ exp = svdup_##TS (op1 * op2); \
+ if (svptest_any (pg, svcmpne (pg, exp, res))) \
+ __builtin_abort (); \
+ \
+ sv##T##_ res_n = svmul_##P (pg, svdup_##TS (op1), op2); \
+ if (svptest_any (pg, svcmpne (pg, exp, res_n))) \
+ __builtin_abort (); \
+}
+
+#define TEST_TYPES_1(T, TS) \
+ F (T, TS, m, 79, 16) \
+ F (T, TS, z, 79, 16) \
+ F (T, TS, x, 79, 16)
+
+#define TEST_TYPES \
+ TEST_TYPES_1 (float16, f16) \
+ TEST_TYPES_1 (float32, f32) \
+ TEST_TYPES_1 (float64, f64) \
+ TEST_TYPES_1 (int32, s32) \
+ TEST_TYPES_1 (int64, s64) \
+ TEST_TYPES_1 (uint32, u32) \
+ TEST_TYPES_1 (uint64, u64)
+
+#define TEST_VALUES_S_1(B, OP1, OP2) \
+ F (int##B, s##B, x, OP1, OP2)
+
+#define TEST_VALUES_S \
+ TEST_VALUES_S_1 (32, INT32_MIN, INT32_MIN) \
+ TEST_VALUES_S_1 (64, INT64_MIN, INT64_MIN) \
+ TEST_VALUES_S_1 (32, 4, 4) \
+ TEST_VALUES_S_1 (32, -7, 4) \
+ TEST_VALUES_S_1 (32, 4, -7) \
+ TEST_VALUES_S_1 (64, 4, 4) \
+ TEST_VALUES_S_1 (64, -7, 4) \
+ TEST_VALUES_S_1 (64, 4, -7) \
+ TEST_VALUES_S_1 (32, INT32_MAX, (1 << 30)) \
+ TEST_VALUES_S_1 (32, (1 << 30), INT32_MAX) \
+ TEST_VALUES_S_1 (64, INT64_MAX, (1ULL << 62)) \
+ TEST_VALUES_S_1 (64, (1ULL << 62), INT64_MAX) \
+ TEST_VALUES_S_1 (32, INT32_MIN, (1 << 30)) \
+ TEST_VALUES_S_1 (64, INT64_MIN, (1ULL << 62)) \
+ TEST_VALUES_S_1 (32, INT32_MAX, 1) \
+ TEST_VALUES_S_1 (32, INT32_MAX, 1) \
+ TEST_VALUES_S_1 (64, 1, INT64_MAX) \
+ TEST_VALUES_S_1 (64, 1, INT64_MAX) \
+ TEST_VALUES_S_1 (32, INT32_MIN, 16) \
+ TEST_VALUES_S_1 (64, INT64_MIN, 16) \
+ TEST_VALUES_S_1 (32, INT32_MAX, -5) \
+ TEST_VALUES_S_1 (64, INT64_MAX, -5) \
+ TEST_VALUES_S_1 (32, INT32_MIN, -4) \
+ TEST_VALUES_S_1 (64, INT64_MIN, -4)
+
+#define TEST_VALUES_U_1(B, OP1, OP2) \
+ F (uint##B, u##B, x, OP1, OP2)
+
+#define TEST_VALUES_U \
+ TEST_VALUES_U_1 (32, UINT32_MAX, UINT32_MAX) \
+ TEST_VALUES_U_1 (64, UINT64_MAX, UINT64_MAX) \
+ TEST_VALUES_U_1 (32, UINT32_MAX, (1 << 31)) \
+ TEST_VALUES_U_1 (64, UINT64_MAX, (1ULL << 63)) \
+ TEST_VALUES_U_1 (32, 7, 4) \
+ TEST_VALUES_U_1 (32, 4, 7) \
+ TEST_VALUES_U_1 (64, 7, 4) \
+ TEST_VALUES_U_1 (64, 4, 7) \
+ TEST_VALUES_U_1 (32, 7, 3) \
+ TEST_VALUES_U_1 (64, 7, 3) \
+ TEST_VALUES_U_1 (32, 11, 1) \
+ TEST_VALUES_U_1 (64, 11, 1)
+
+#define TEST_VALUES \
+ TEST_VALUES_S \
+ TEST_VALUES_U
+
+int
+main (void)
+{
+ const pred pg = svptrue_b8 ();
+ TEST_TYPES
+ TEST_VALUES
+ return 0;
+}