if (auto *res = f.fold_const_binary (TRUNC_DIV_EXPR))
return res;
- /* If the dividend is all zeros, fold to zero vector. */
+ /* If the divisor is all ones, fold to dividend. */
tree op1 = gimple_call_arg (f.call, 1);
- if (integer_zerop (op1))
- return gimple_build_assign (f.lhs, op1);
-
- /* If the divisor is all zeros, fold to zero vector. */
- tree pg = gimple_call_arg (f.call, 0);
tree op2 = gimple_call_arg (f.call, 2);
- if (integer_zerop (op2)
- && (f.pred != PRED_m
- || is_ptrue (pg, f.type_suffix (0).element_bytes)))
- return gimple_build_assign (f.lhs, build_zero_cst (TREE_TYPE (f.lhs)));
+ if (integer_onep (op2))
+ return f.fold_active_lanes_to (op1);
+
+ /* If one of the operands is all zeros, fold to zero vector. */
+ if (integer_zerop (op1) || integer_zerop (op2))
+ return f.fold_active_lanes_to (build_zero_cst (TREE_TYPE (f.lhs)));
/* If the divisor is a uniform power of 2, fold to a shift
instruction. */
if (auto *res = f.fold_const_binary (MULT_EXPR))
return res;
- /* If one of the operands is all zeros, fold to zero vector. */
+ /* If one of the operands is all ones, fold to other operand. */
tree op1 = gimple_call_arg (f.call, 1);
- if (integer_zerop (op1))
- return gimple_build_assign (f.lhs, op1);
-
- tree pg = gimple_call_arg (f.call, 0);
tree op2 = gimple_call_arg (f.call, 2);
- if (integer_zerop (op2)
- && (f.pred != PRED_m
- || is_ptrue (pg, f.type_suffix (0).element_bytes)))
- return gimple_build_assign (f.lhs, build_zero_cst (TREE_TYPE (f.lhs)));
+ if (integer_onep (op1))
+ return f.fold_active_lanes_to (op2);
+ if (integer_onep (op2))
+ return f.fold_active_lanes_to (op1);
+
+ /* If one of the operands is all zeros, fold to zero vector. */
+ if (integer_zerop (op1) || integer_zerop (op2))
+ return f.fold_active_lanes_to (build_zero_cst (TREE_TYPE (f.lhs)));
/* If one of the operands is a uniform power of 2, fold to a left shift
by immediate. */
+ tree pg = gimple_call_arg (f.call, 0);
tree op1_cst = uniform_integer_cst_p (op1);
tree op2_cst = uniform_integer_cst_p (op2);
tree shift_op1, shift_op2;
else
return NULL;
- if (integer_onep (shift_op2))
- return NULL;
-
shift_op2 = wide_int_to_tree (unsigned_type_for (TREE_TYPE (shift_op2)),
tree_log2 (shift_op2));
function_instance instance ("svlsl", functions::svlsl,
return NULL;
}
+/* Fold the active lanes to X and set the inactive lanes according to the
+ predication. Return the new statement. */
+gimple *
+gimple_folder::fold_active_lanes_to (tree x)
+{
+ /* If predication is _x or the predicate is ptrue, fold to X. */
+ if (pred == PRED_x
+ || is_ptrue (gimple_call_arg (call, 0), type_suffix (0).element_bytes))
+ return gimple_build_assign (lhs, x);
+
+ /* If the predication is _z or _m, calculate a vector that supplies the
+ values of inactive lanes (the first vector argument for m and a zero
+ vector from z). */
+ tree vec_inactive;
+ if (pred == PRED_z)
+ vec_inactive = build_zero_cst (TREE_TYPE (lhs));
+ else
+ vec_inactive = gimple_call_arg (call, 1);
+ if (operand_equal_p (x, vec_inactive, 0))
+ return gimple_build_assign (lhs, x);
+
+ gimple_seq stmts = NULL;
+ tree pred = convert_pred (stmts, vector_type (0), 0);
+ gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
+ return gimple_build_assign (lhs, VEC_COND_EXPR, pred, x, vec_inactive);
+}
+
/* Try to fold the call. Return the new statement on success and null
on failure. */
gimple *
gimple *fold_to_ptrue ();
gimple *fold_to_vl_pred (unsigned int);
gimple *fold_const_binary (enum tree_code);
+ gimple *fold_active_lanes_to (tree);
gimple *fold ();
/*
** div_1_s32_m_tied1:
-** sel z0\.s, p0, z0\.s, z0\.s
** ret
*/
TEST_UNIFORM_Z (div_1_s32_m_tied1, svint32_t,
/*
** div_1_s32_m_untied:
-** sel z0\.s, p0, z1\.s, z1\.s
+** mov z0\.d, z1\.d
** ret
*/
TEST_UNIFORM_Z (div_1_s32_m_untied, svint32_t,
/*
** div_1_s32_z_tied1:
-** mov (z[0-9]+\.s), #1
-** movprfx z0\.s, p0/z, z0\.s
-** sdiv z0\.s, p0/m, z0\.s, \1
+** movi? [vdz]([0-9]+)\.?(?:[0-9]*[bhsd])?, #?0
+** sel z0\.s, p0, z0\.s, z\1.s
** ret
*/
TEST_UNIFORM_Z (div_1_s32_z_tied1, svint32_t,
/*
** div_1_s32_z_untied:
-** mov z0\.s, #1
-** movprfx z0\.s, p0/z, z0\.s
-** sdivr z0\.s, p0/m, z0\.s, z1\.s
+** movi? [vdz]([0-9]+)\.?(?:[0-9]*[bhsd])?, #?0
+** sel z0\.s, p0, z1\.s, z\1.s
** ret
*/
TEST_UNIFORM_Z (div_1_s32_z_untied, svint32_t,
/*
** div_1_s64_m_tied1:
-** sel z0\.d, p0, z0\.d, z0\.d
** ret
*/
TEST_UNIFORM_Z (div_1_s64_m_tied1, svint64_t,
/*
** div_1_s64_m_untied:
-** sel z0\.d, p0, z1\.d, z1\.d
+** mov z0\.d, z1\.d
** ret
*/
TEST_UNIFORM_Z (div_1_s64_m_untied, svint64_t,
/*
** div_1_s64_z_tied1:
-** mov (z[0-9]+\.d), #1
-** movprfx z0\.d, p0/z, z0\.d
-** sdiv z0\.d, p0/m, z0\.d, \1
+** movi? [vdz]([0-9]+)\.?(?:[0-9]*[bhsd])?, #?0
+** sel z0\.d, p0, z0\.d, z\1.d
** ret
*/
TEST_UNIFORM_Z (div_1_s64_z_tied1, svint64_t,
/*
** div_1_s64_z_untied:
-** mov z0\.d, #1
-** movprfx z0\.d, p0/z, z0\.d
-** sdivr z0\.d, p0/m, z0\.d, z1\.d
+** movi? [vdz]([0-9]+)\.?(?:[0-9]*[bhsd])?, #?0
+** sel z0\.d, p0, z1\.d, z\1.d
** ret
*/
TEST_UNIFORM_Z (div_1_s64_z_untied, svint64_t,
/*
** div_1_u32_m_tied1:
-** sel z0\.s, p0, z0\.s, z0\.s
** ret
*/
TEST_UNIFORM_Z (div_1_u32_m_tied1, svuint32_t,
/*
** div_1_u32_m_untied:
-** sel z0\.s, p0, z1\.s, z1\.s
+** mov z0\.d, z1\.d
** ret
*/
TEST_UNIFORM_Z (div_1_u32_m_untied, svuint32_t,
/*
** div_1_u32_z_tied1:
-** mov (z[0-9]+\.s), #1
-** movprfx z0\.s, p0/z, z0\.s
-** udiv z0\.s, p0/m, z0\.s, \1
+** movi? [vdz]([0-9]+)\.?(?:[0-9]*[bhsd])?, #?0
+** sel z0\.s, p0, z0\.s, z\1.s
** ret
*/
TEST_UNIFORM_Z (div_1_u32_z_tied1, svuint32_t,
/*
** div_1_u32_z_untied:
-** mov z0\.s, #1
-** movprfx z0\.s, p0/z, z0\.s
-** udivr z0\.s, p0/m, z0\.s, z1\.s
+** movi? [vdz]([0-9]+)\.?(?:[0-9]*[bhsd])?, #?0
+** sel z0\.s, p0, z1\.s, z\1.s
** ret
*/
TEST_UNIFORM_Z (div_1_u32_z_untied, svuint32_t,
/*
** div_1_u64_m_tied1:
-** sel z0\.d, p0, z0\.d, z0\.d
** ret
*/
TEST_UNIFORM_Z (div_1_u64_m_tied1, svuint64_t,
/*
** div_1_u64_m_untied:
-** sel z0\.d, p0, z1\.d, z1\.d
+** mov z0\.d, z1\.d
** ret
*/
TEST_UNIFORM_Z (div_1_u64_m_untied, svuint64_t,
/*
** div_1_u64_z_tied1:
-** mov (z[0-9]+\.d), #1
-** movprfx z0\.d, p0/z, z0\.d
-** udiv z0\.d, p0/m, z0\.d, \1
+** movi? [vdz]([0-9]+)\.?(?:[0-9]*[bhsd])?, #?0
+** sel z0\.d, p0, z0\.d, z\1.d
** ret
*/
TEST_UNIFORM_Z (div_1_u64_z_tied1, svuint64_t,
/*
** div_1_u64_z_untied:
-** mov z0\.d, #1
-** movprfx z0\.d, p0/z, z0\.d
-** udivr z0\.d, p0/m, z0\.d, z1\.d
+** movi? [vdz]([0-9]+)\.?(?:[0-9]*[bhsd])?, #?0
+** sel z0\.d, p0, z1\.d, z\1.d
** ret
*/
TEST_UNIFORM_Z (div_1_u64_z_untied, svuint64_t,
/*
** mul_1_s16_m_tied1:
-** sel z0\.h, p0, z0\.h, z0\.h
** ret
*/
TEST_UNIFORM_Z (mul_1_s16_m_tied1, svint16_t,
z0 = svmul_n_s16_m (p0, z0, 1),
z0 = svmul_m (p0, z0, 1))
+/*
+** mul_1op1_s16_m_tied2:
+** mov (z[0-9]+\.h), #1
+** sel z0\.h, p0, z0\.h, \1
+** ret
+*/
+TEST_UNIFORM_Z (mul_1op1_s16_m_tied2, svint16_t,
+ z0 = svmul_s16_m (p0, svdup_s16 (1), z0),
+ z0 = svmul_m (p0, svdup_s16 (1), z0))
+
/*
** mul_3_s16_m_tied1:
** mov (z[0-9]+\.h), #3
/*
** mul_1_s16_z_tied1:
-** mov z31.h, #1
-** movprfx z0.h, p0/z, z0.h
-** mul z0.h, p0/m, z0.h, z31.h
+** movi? [vdz]([0-9]+)\.?(?:[0-9]*[bhsd])?, #?0
+** sel z0\.h, p0, z0\.h, z\1.h
** ret
*/
TEST_UNIFORM_Z (mul_1_s16_z_tied1, svint16_t,
z0 = svmul_n_s16_z (p0, z0, 1),
z0 = svmul_z (p0, z0, 1))
+/*
+** mul_1op1_s16_z_tied2:
+** movi? [vdz]([0-9]+)\.?(?:[0-9]*[bhsd])?, #?0
+** sel z0\.h, p0, z0\.h, z\1.h
+** ret
+*/
+TEST_UNIFORM_Z (mul_1op1_s16_z_tied2, svint16_t,
+ z0 = svmul_s16_z (p0, svdup_s16 (1), z0),
+ z0 = svmul_z (p0, svdup_s16 (1), z0))
+
/*
** mul_3_s16_z_tied1:
** mov (z[0-9]+\.h), #3
z0 = svmul_n_s16_x (p0, z0, 1),
z0 = svmul_x (p0, z0, 1))
+/*
+** mul_1op1_s16_x_tied2:
+** ret
+*/
+TEST_UNIFORM_Z (mul_1op1_s16_x_tied2, svint16_t,
+ z0 = svmul_s16_x (p0, svdup_s16 (1), z0),
+ z0 = svmul_x (p0, svdup_s16 (1), z0))
+
+/*
+** mul_1op1_s16_x_untied:
+** mov z0\.d, z1\.d
+** ret
+*/
+TEST_UNIFORM_Z (mul_1op1_s16_x_untied, svint16_t,
+ z0 = svmul_s16_x (p0, svdup_s16 (1), z1),
+ z0 = svmul_x (p0, svdup_s16 (1), z1))
+
/*
** mul_3_s16_x_tied1:
** mul z0\.h, z0\.h, #3
/*
** mul_1_s32_m_tied1:
-** sel z0\.s, p0, z0\.s, z0\.s
** ret
*/
TEST_UNIFORM_Z (mul_1_s32_m_tied1, svint32_t,
z0 = svmul_n_s32_m (p0, z0, 1),
z0 = svmul_m (p0, z0, 1))
+/*
+** mul_1op1_s32_m_tied2:
+** mov (z[0-9]+\.s), #1
+** sel z0\.s, p0, z0\.s, \1
+** ret
+*/
+TEST_UNIFORM_Z (mul_1op1_s32_m_tied2, svint32_t,
+ z0 = svmul_s32_m (p0, svdup_s32 (1), z0),
+ z0 = svmul_m (p0, svdup_s32 (1), z0))
+
/*
** mul_3_s32_m_tied1:
** mov (z[0-9]+\.s), #3
/*
** mul_1_s32_z_tied1:
-** mov z31.s, #1
-** movprfx z0.s, p0/z, z0.s
-** mul z0.s, p0/m, z0.s, z31.s
+** movi? [vdz]([0-9]+)\.?(?:[0-9]*[bhsd])?, #?0
+** sel z0\.s, p0, z0\.s, z\1.s
** ret
*/
TEST_UNIFORM_Z (mul_1_s32_z_tied1, svint32_t,
z0 = svmul_n_s32_z (p0, z0, 1),
z0 = svmul_z (p0, z0, 1))
+/*
+** mul_1op1_s32_z_tied2:
+** movi? [vdz]([0-9]+)\.?(?:[0-9]*[bhsd])?, #?0
+** sel z0\.s, p0, z0\.s, z\1.s
+** ret
+*/
+TEST_UNIFORM_Z (mul_1op1_s32_z_tied2, svint32_t,
+ z0 = svmul_s32_z (p0, svdup_s32 (1), z0),
+ z0 = svmul_z (p0, svdup_s32 (1), z0))
+
/*
** mul_3_s32_z_tied1:
** mov (z[0-9]+\.s), #3
z0 = svmul_n_s32_x (p0, z0, 1),
z0 = svmul_x (p0, z0, 1))
+/*
+** mul_1op1_s32_x_tied2:
+** ret
+*/
+TEST_UNIFORM_Z (mul_1op1_s32_x_tied2, svint32_t,
+ z0 = svmul_s32_x (p0, svdup_s32 (1), z0),
+ z0 = svmul_x (p0, svdup_s32 (1), z0))
+
+/*
+** mul_1op1_s32_x_untied:
+** mov z0\.d, z1\.d
+** ret
+*/
+TEST_UNIFORM_Z (mul_1op1_s32_x_untied, svint32_t,
+ z0 = svmul_s32_x (p0, svdup_s32 (1), z1),
+ z0 = svmul_x (p0, svdup_s32 (1), z1))
+
/*
** mul_3_s32_x_tied1:
** mul z0\.s, z0\.s, #3
/*
** mul_1_s64_m_tied1:
-** sel z0\.d, p0, z0\.d, z0\.d
** ret
*/
TEST_UNIFORM_Z (mul_1_s64_m_tied1, svint64_t,
z0 = svmul_n_s64_m (p0, z0, 1),
z0 = svmul_m (p0, z0, 1))
+/*
+** mul_1op1_s64_m_tied2:
+** mov (z[0-9]+\.d), #1
+** sel z0\.d, p0, z0\.d, \1
+** ret
+*/
+TEST_UNIFORM_Z (mul_1op1_s64_m_tied2, svint64_t,
+ z0 = svmul_s64_m (p0, svdup_s64 (1), z0),
+ z0 = svmul_m (p0, svdup_s64 (1), z0))
+
/*
** mul_2_s64_m_tied1:
** lsl z0\.d, p0/m, z0\.d, #1
/*
** mul_1_s64_z_tied1:
-** mov z31.d, #1
-** movprfx z0.d, p0/z, z0.d
-** mul z0.d, p0/m, z0.d, z31.d
+** movi? [vdz]([0-9]+)\.?(?:[0-9]*[bhsd])?, #?0
+** sel z0\.d, p0, z0\.d, z\1.d
** ret
*/
TEST_UNIFORM_Z (mul_1_s64_z_tied1, svint64_t,
z0 = svmul_n_s64_z (p0, z0, 1),
z0 = svmul_z (p0, z0, 1))
+/*
+** mul_1op1_s64_z_tied2:
+** movi? [vdz]([0-9]+)\.?(?:[0-9]*[bhsd])?, #?0
+** sel z0\.d, p0, z0\.d, z\1.d
+** ret
+*/
+TEST_UNIFORM_Z (mul_1op1_s64_z_tied2, svint64_t,
+ z0 = svmul_s64_z (p0, svdup_s64 (1), z0),
+ z0 = svmul_z (p0, svdup_s64 (1), z0))
+
/*
** mul_2_s64_z_tied1:
** movprfx z0.d, p0/z, z0.d
z0 = svmul_n_s64_x (p0, z0, 1),
z0 = svmul_x (p0, z0, 1))
+/*
+** mul_1op1_s64_x_tied2:
+** ret
+*/
+TEST_UNIFORM_Z (mul_1op1_s64_x_tied2, svint64_t,
+ z0 = svmul_s64_x (p0, svdup_s64 (1), z0),
+ z0 = svmul_x (p0, svdup_s64 (1), z0))
+
+/*
+** mul_1op1_s64_x_untied:
+** mov z0\.d, z1\.d
+** ret
+*/
+TEST_UNIFORM_Z (mul_1op1_s64_x_untied, svint64_t,
+ z0 = svmul_s64_x (p0, svdup_s64 (1), z1),
+ z0 = svmul_x (p0, svdup_s64 (1), z1))
+
/*
** mul_2_s64_x_tied1:
** add z0\.d, z0\.d, z0\.d
/*
** mul_1_s8_m_tied1:
-** sel z0\.b, p0, z0\.b, z0\.b
** ret
*/
TEST_UNIFORM_Z (mul_1_s8_m_tied1, svint8_t,
z0 = svmul_n_s8_m (p0, z0, 1),
z0 = svmul_m (p0, z0, 1))
+/*
+** mul_1op1_s8_m_tied2:
+** mov (z[0-9]+)\.b, #1
+** sel z0\.b, p0, z0\.b, \1\.b
+** ret
+*/
+TEST_UNIFORM_Z (mul_1op1_s8_m_tied2, svint8_t,
+ z0 = svmul_s8_m (p0, svdup_s8 (1), z0),
+ z0 = svmul_m (p0, svdup_s8 (1), z0))
+
/*
** mul_3_s8_m_tied1:
** mov (z[0-9]+\.b), #3
/*
** mul_1_s8_z_tied1:
-** mov z31.b, #1
-** movprfx z0.b, p0/z, z0.b
-** mul z0.b, p0/m, z0.b, z31.b
+** movi? [vdz]([0-9]+)\.?(?:[0-9]*[bhsd])?, #?0
+** sel z0\.b, p0, z0\.b, z\1.b
** ret
*/
TEST_UNIFORM_Z (mul_1_s8_z_tied1, svint8_t,
z0 = svmul_n_s8_z (p0, z0, 1),
z0 = svmul_z (p0, z0, 1))
+/*
+** mul_1op1_s8_z_tied2:
+** movi? [vdz]([0-9]+)\.?(?:[0-9]*[bhsd])?, #?0
+** sel z0\.b, p0, z0\.b, z\1.b
+** ret
+*/
+TEST_UNIFORM_Z (mul_1op1_s8_z_tied2, svint8_t,
+ z0 = svmul_s8_z (p0, svdup_s8 (1), z0),
+ z0 = svmul_z (p0, svdup_s8 (1), z0))
+
/*
** mul_3_s8_z_tied1:
** mov (z[0-9]+\.b), #3
z0 = svmul_n_s8_x (p0, z0, 1),
z0 = svmul_x (p0, z0, 1))
+/*
+** mul_1op1_s8_x_tied2:
+** ret
+*/
+TEST_UNIFORM_Z (mul_1op1_s8_x_tied2, svint8_t,
+ z0 = svmul_s8_x (p0, svdup_s8 (1), z0),
+ z0 = svmul_x (p0, svdup_s8 (1), z0))
+
+/*
+** mul_1op1_s8_x_untied:
+** mov z0\.d, z1\.d
+** ret
+*/
+TEST_UNIFORM_Z (mul_1op1_s8_x_untied, svint8_t,
+ z0 = svmul_s8_x (p0, svdup_s8 (1), z1),
+ z0 = svmul_x (p0, svdup_s8 (1), z1))
+
/*
** mul_3_s8_x_tied1:
** mul z0\.b, z0\.b, #3
/*
** mul_1_u16_m_tied1:
-** sel z0\.h, p0, z0\.h, z0\.h
** ret
*/
TEST_UNIFORM_Z (mul_1_u16_m_tied1, svuint16_t,
z0 = svmul_n_u16_m (p0, z0, 1),
z0 = svmul_m (p0, z0, 1))
+/*
+** mul_1op1_u16_m_tied2:
+** mov (z[0-9]+\.h), #1
+** sel z0\.h, p0, z0\.h, \1
+** ret
+*/
+TEST_UNIFORM_Z (mul_1op1_u16_m_tied2, svuint16_t,
+ z0 = svmul_u16_m (p0, svdup_u16 (1), z0),
+ z0 = svmul_m (p0, svdup_u16 (1), z0))
+
/*
** mul_3_u16_m_tied1:
** mov (z[0-9]+\.h), #3
/*
** mul_1_u16_z_tied1:
-** mov z31.h, #1
-** movprfx z0.h, p0/z, z0.h
-** mul z0.h, p0/m, z0.h, z31.h
+** movi? [vdz]([0-9]+)\.?(?:[0-9]*[bhsd])?, #?0
+** sel z0\.h, p0, z0\.h, z\1.h
** ret
*/
TEST_UNIFORM_Z (mul_1_u16_z_tied1, svuint16_t,
z0 = svmul_n_u16_z (p0, z0, 1),
z0 = svmul_z (p0, z0, 1))
+/*
+** mul_1op1_u16_z_tied2:
+** movi? [vdz]([0-9]+)\.?(?:[0-9]*[bhsd])?, #?0
+** sel z0\.h, p0, z0\.h, z\1.h
+** ret
+*/
+TEST_UNIFORM_Z (mul_1op1_u16_z_tied2, svuint16_t,
+ z0 = svmul_u16_z (p0, svdup_u16 (1), z0),
+ z0 = svmul_z (p0, svdup_u16 (1), z0))
+
/*
** mul_3_u16_z_tied1:
** mov (z[0-9]+\.h), #3
z0 = svmul_n_u16_x (p0, z0, 1),
z0 = svmul_x (p0, z0, 1))
+/*
+** mul_1op1_u16_x_tied2:
+** ret
+*/
+TEST_UNIFORM_Z (mul_1op1_u16_x_tied2, svuint16_t,
+ z0 = svmul_u16_x (p0, svdup_u16 (1), z0),
+ z0 = svmul_x (p0, svdup_u16 (1), z0))
+
+/*
+** mul_1op1_u16_x_untied:
+** mov z0\.d, z1\.d
+** ret
+*/
+TEST_UNIFORM_Z (mul_1op1_u16_x_untied, svuint16_t,
+ z0 = svmul_u16_x (p0, svdup_u16 (1), z1),
+ z0 = svmul_x (p0, svdup_u16 (1), z1))
+
/*
** mul_3_u16_x_tied1:
** mul z0\.h, z0\.h, #3
/*
** mul_1_u32_m_tied1:
-** sel z0\.s, p0, z0\.s, z0\.s
** ret
*/
TEST_UNIFORM_Z (mul_1_u32_m_tied1, svuint32_t,
z0 = svmul_n_u32_m (p0, z0, 1),
z0 = svmul_m (p0, z0, 1))
+/*
+** mul_1op1_u32_m_tied2:
+** mov (z[0-9]+\.s), #1
+** sel z0\.s, p0, z0\.s, \1
+** ret
+*/
+TEST_UNIFORM_Z (mul_1op1_u32_m_tied2, svuint32_t,
+ z0 = svmul_u32_m (p0, svdup_u32 (1), z0),
+ z0 = svmul_m (p0, svdup_u32 (1), z0))
+
/*
** mul_3_u32_m_tied1:
** mov (z[0-9]+\.s), #3
/*
** mul_1_u32_z_tied1:
-** mov z31.s, #1
-** movprfx z0.s, p0/z, z0.s
-** mul z0.s, p0/m, z0.s, z31.s
+** movi? [vdz]([0-9]+)\.?(?:[0-9]*[bhsd])?, #?0
+** sel z0\.s, p0, z0\.s, z\1.s
** ret
*/
TEST_UNIFORM_Z (mul_1_u32_z_tied1, svuint32_t,
z0 = svmul_n_u32_z (p0, z0, 1),
z0 = svmul_z (p0, z0, 1))
+/*
+** mul_1op1_u32_z_tied2:
+** movi? [vdz]([0-9]+)\.?(?:[0-9]*[bhsd])?, #?0
+** sel z0\.s, p0, z0\.s, z\1.s
+** ret
+*/
+TEST_UNIFORM_Z (mul_1op1_u32_z_tied2, svuint32_t,
+ z0 = svmul_u32_z (p0, svdup_u32 (1), z0),
+ z0 = svmul_z (p0, svdup_u32 (1), z0))
+
/*
** mul_3_u32_z_tied1:
** mov (z[0-9]+\.s), #3
z0 = svmul_n_u32_x (p0, z0, 1),
z0 = svmul_x (p0, z0, 1))
+/*
+** mul_1op1_u32_x_tied2:
+** ret
+*/
+TEST_UNIFORM_Z (mul_1op1_u32_x_tied2, svuint32_t,
+ z0 = svmul_u32_x (p0, svdup_u32 (1), z0),
+ z0 = svmul_x (p0, svdup_u32 (1), z0))
+
+/*
+** mul_1op1_u32_x_untied:
+** mov z0\.d, z1\.d
+** ret
+*/
+TEST_UNIFORM_Z (mul_1op1_u32_x_untied, svuint32_t,
+ z0 = svmul_u32_x (p0, svdup_u32 (1), z1),
+ z0 = svmul_x (p0, svdup_u32 (1), z1))
+
/*
** mul_3_u32_x_tied1:
** mul z0\.s, z0\.s, #3
/*
** mul_1_u64_m_tied1:
-** sel z0\.d, p0, z0\.d, z0\.d
** ret
*/
TEST_UNIFORM_Z (mul_1_u64_m_tied1, svuint64_t,
z0 = svmul_n_u64_m (p0, z0, 1),
z0 = svmul_m (p0, z0, 1))
+/*
+** mul_1op1_u64_m_tied2:
+** mov (z[0-9]+\.d), #1
+** sel z0\.d, p0, z0\.d, \1
+** ret
+*/
+TEST_UNIFORM_Z (mul_1op1_u64_m_tied2, svuint64_t,
+ z0 = svmul_u64_m (p0, svdup_u64 (1), z0),
+ z0 = svmul_m (p0, svdup_u64 (1), z0))
+
/*
** mul_2_u64_m_tied1:
** lsl z0\.d, p0/m, z0\.d, #1
/*
** mul_1_u64_z_tied1:
-** mov z31.d, #1
-** movprfx z0.d, p0/z, z0.d
-** mul z0.d, p0/m, z0.d, z31.d
+** movi? [vdz]([0-9]+)\.?(?:[0-9]*[bhsd])?, #?0
+** sel z0\.d, p0, z0\.d, z\1.d
** ret
*/
TEST_UNIFORM_Z (mul_1_u64_z_tied1, svuint64_t,
z0 = svmul_n_u64_z (p0, z0, 1),
z0 = svmul_z (p0, z0, 1))
+/*
+** mul_1op1_u64_z_tied2:
+** movi? [vdz]([0-9]+)\.?(?:[0-9]*[bhsd])?, #?0
+** sel z0\.d, p0, z0\.d, z\1.d
+** ret
+*/
+TEST_UNIFORM_Z (mul_1op1_u64_z_tied2, svuint64_t,
+ z0 = svmul_u64_z (p0, svdup_u64 (1), z0),
+ z0 = svmul_z (p0, svdup_u64 (1), z0))
+
/*
** mul_2_u64_z_tied1:
** movprfx z0.d, p0/z, z0.d
z0 = svmul_n_u64_x (p0, z0, 1),
z0 = svmul_x (p0, z0, 1))
+/*
+** mul_1op1_u64_x_tied2:
+** ret
+*/
+TEST_UNIFORM_Z (mul_1op1_u64_x_tied2, svuint64_t,
+ z0 = svmul_u64_x (p0, svdup_u64 (1), z0),
+ z0 = svmul_x (p0, svdup_u64 (1), z0))
+
+/*
+** mul_1op1_u64_x_untied:
+** mov z0\.d, z1\.d
+** ret
+*/
+TEST_UNIFORM_Z (mul_1op1_u64_x_untied, svuint64_t,
+ z0 = svmul_u64_x (p0, svdup_u64 (1), z1),
+ z0 = svmul_x (p0, svdup_u64 (1), z1))
+
/*
** mul_2_u64_x_tied1:
** add z0\.d, z0\.d, z0\.d
/*
** mul_1_u8_m_tied1:
-** sel z0\.b, p0, z0\.b, z0\.b
** ret
*/
TEST_UNIFORM_Z (mul_1_u8_m_tied1, svuint8_t,
z0 = svmul_n_u8_m (p0, z0, 1),
z0 = svmul_m (p0, z0, 1))
+/*
+** mul_1op1_u8_m_tied2:
+** mov (z[0-9]+)\.b, #1
+** sel z0\.b, p0, z0\.b, \1\.b
+** ret
+*/
+TEST_UNIFORM_Z (mul_1op1_u8_m_tied2, svuint8_t,
+ z0 = svmul_u8_m (p0, svdup_u8 (1), z0),
+ z0 = svmul_m (p0, svdup_u8 (1), z0))
+
/*
** mul_3_u8_m_tied1:
** mov (z[0-9]+\.b), #3
/*
** mul_1_u8_z_tied1:
-** mov z31.b, #1
-** movprfx z0.b, p0/z, z0.b
-** mul z0.b, p0/m, z0.b, z31.b
+** movi? [vdz]([0-9]+)\.?(?:[0-9]*[bhsd])?, #?0
+** sel z0\.b, p0, z0\.b, z\1.b
** ret
*/
TEST_UNIFORM_Z (mul_1_u8_z_tied1, svuint8_t,
z0 = svmul_n_u8_z (p0, z0, 1),
z0 = svmul_z (p0, z0, 1))
+/*
+** mul_1op1_u8_z_tied2:
+** movi? [vdz]([0-9]+)\.?(?:[0-9]*[bhsd])?, #?0
+** sel z0\.b, p0, z0\.b, z\1\.b
+** ret
+*/
+TEST_UNIFORM_Z (mul_1op1_u8_z_tied2, svuint8_t,
+ z0 = svmul_u8_z (p0, svdup_u8 (1), z0),
+ z0 = svmul_z (p0, svdup_u8 (1), z0))
+
/*
** mul_3_u8_z_tied1:
** mov (z[0-9]+\.b), #3
z0 = svmul_n_u8_x (p0, z0, 1),
z0 = svmul_x (p0, z0, 1))
+/*
+** mul_1op1_u8_x_tied2:
+** ret
+*/
+TEST_UNIFORM_Z (mul_1op1_u8_x_tied2, svuint8_t,
+ z0 = svmul_u8_x (p0, svdup_u8 (1), z0),
+ z0 = svmul_x (p0, svdup_u8 (1), z0))
+
+/*
+** mul_1op1_u8_x_untied:
+** mov z0\.d, z1\.d
+** ret
+*/
+TEST_UNIFORM_Z (mul_1op1_u8_x_untied, svuint8_t,
+ z0 = svmul_u8_x (p0, svdup_u8 (1), z1),
+ z0 = svmul_x (p0, svdup_u8 (1), z1))
+
/*
** mul_3_u8_x_tied1:
** mul z0\.b, z0\.b, #3
/*
** s64_m_pg_op2:
-** movi? [vdz]([0-9]+)\.?(?:[0-9]*[bhsd])?, #?0
-** sdiv (z[0-9]\.d), p[0-7]/m, \2, z\1\.d
+** mov z0\.d, p0/m, #0
** ret
*/
svint64_t s64_m_pg_op2 (svbool_t pg, svint64_t op1)
/*
** s64_n_m_pg_op2:
-** movi? [vdz]([0-9]+)\.?(?:[0-9]*[bhsd])?, #?0
-** sdiv (z[0-9]+\.d), p[0-7]/m, \2, z\1\.d
+** mov z0\.d, p0/m, #0
** ret
*/
svint64_t s64_n_m_pg_op2 (svbool_t pg, svint64_t op1)
/*
** u64_m_pg_op2:
-** movi? [vdz]([0-9]+)\.?(?:[0-9]*[bhsd])?, #?0
-** udiv (z[0-9]+\.d), p[0-7]/m, \2, z\1\.d
+** mov z0\.d, p0/m, #0
** ret
*/
svuint64_t u64_m_pg_op2 (svbool_t pg, svuint64_t op1)
/*
** u64_n_m_pg_op2:
-** movi? [vdz]([0-9]+)\.?(?:[0-9]*[bhsd])?, #?0
-** udiv (z[0-9]+\.d), p[0-7]/m, \2, z\1\.d
+** mov z0\.d, p0/m, #0
** ret
*/
svuint64_t u64_n_m_pg_op2 (svbool_t pg, svuint64_t op1)
typedef svfloat64_t svfloat64_ __attribute__((arm_sve_vector_bits(128)));
typedef svint32_t svint32_ __attribute__((arm_sve_vector_bits(128)));
typedef svint64_t svint64_ __attribute__((arm_sve_vector_bits(128)));
+typedef svuint8_t svuint8_ __attribute__((arm_sve_vector_bits(128)));
+typedef svuint16_t svuint16_ __attribute__((arm_sve_vector_bits(128)));
typedef svuint32_t svuint32_ __attribute__((arm_sve_vector_bits(128)));
typedef svuint64_t svuint64_ __attribute__((arm_sve_vector_bits(128)));
TEST_VALUES_U_1 (64, 4, 7) \
TEST_VALUES_U_1 (32, 7, 3) \
TEST_VALUES_U_1 (64, 7, 3) \
+ TEST_VALUES_U_1 (8, 1, 11) \
+ TEST_VALUES_U_1 (16, 1, UINT16_MAX) \
+ TEST_VALUES_U_1 (32, 1, 0) \
+ TEST_VALUES_U_1 (64, 1, (1ULL << 63)) \
TEST_VALUES_U_1 (32, 11, 1) \
TEST_VALUES_U_1 (64, 11, 1)