if (integer_zerop (op1) || integer_zerop (op2))
return f.fold_active_lanes_to (build_zero_cst (TREE_TYPE (f.lhs)));
+ /* If the divisor is all integer -1, fold to svneg. */
+ tree pg = gimple_call_arg (f.call, 0);
+ if (!f.type_suffix (0).unsigned_p && integer_minus_onep (op2))
+ {
+ function_instance instance ("svneg", functions::svneg,
+ shapes::unary, MODE_none,
+ f.type_suffix_ids, GROUP_none, f.pred);
+ gcall *call = f.redirect_call (instance);
+ unsigned offset_index = 0;
+ if (f.pred == PRED_m)
+ {
+ offset_index = 1;
+ gimple_call_set_arg (call, 0, op1);
+ }
+ else
+ gimple_set_num_ops (call, 5);
+ gimple_call_set_arg (call, offset_index, pg);
+ gimple_call_set_arg (call, offset_index + 1, op1);
+ return call;
+ }
+
/* If the divisor is a uniform power of 2, fold to a shift
instruction. */
tree op2_cst = uniform_integer_cst_p (op2);
if (integer_zerop (op1) || integer_zerop (op2))
return f.fold_active_lanes_to (build_zero_cst (TREE_TYPE (f.lhs)));
+ /* If one of the operands is all integer -1, fold to svneg. */
+ tree pg = gimple_call_arg (f.call, 0);
+ tree negated_op = NULL;
+ if (integer_minus_onep (op2))
+ negated_op = op1;
+ else if (integer_minus_onep (op1))
+ negated_op = op2;
+ if (!f.type_suffix (0).unsigned_p && negated_op)
+ {
+ function_instance instance ("svneg", functions::svneg,
+ shapes::unary, MODE_none,
+ f.type_suffix_ids, GROUP_none, f.pred);
+ gcall *call = f.redirect_call (instance);
+ unsigned offset_index = 0;
+ if (f.pred == PRED_m)
+ {
+ offset_index = 1;
+ gimple_call_set_arg (call, 0, op1);
+ }
+ else
+ gimple_set_num_ops (call, 5);
+ gimple_call_set_arg (call, offset_index, pg);
+ gimple_call_set_arg (call, offset_index + 1, negated_op);
+ return call;
+ }
+
/* If one of the operands is a uniform power of 2, fold to a left shift
by immediate. */
- tree pg = gimple_call_arg (f.call, 0);
tree op1_cst = uniform_integer_cst_p (op1);
tree op2_cst = uniform_integer_cst_p (op2);
- tree shift_op1, shift_op2;
+ tree shift_op1, shift_op2 = NULL;
if (op1_cst && integer_pow2p (op1_cst)
&& (f.pred != PRED_m
|| is_ptrue (pg, f.type_suffix (0).element_bytes)))
else
return NULL;
- shift_op2 = wide_int_to_tree (unsigned_type_for (TREE_TYPE (shift_op2)),
- tree_log2 (shift_op2));
- function_instance instance ("svlsl", functions::svlsl,
- shapes::binary_uint_opt_n, MODE_n,
- f.type_suffix_ids, GROUP_none, f.pred);
- gcall *call = f.redirect_call (instance);
- gimple_call_set_arg (call, 1, shift_op1);
- gimple_call_set_arg (call, 2, shift_op2);
- return call;
+ if (shift_op2)
+ {
+ shift_op2 = wide_int_to_tree (unsigned_type_for (TREE_TYPE (shift_op2)),
+ tree_log2 (shift_op2));
+ function_instance instance ("svlsl", functions::svlsl,
+ shapes::binary_uint_opt_n, MODE_n,
+ f.type_suffix_ids, GROUP_none, f.pred);
+ gcall *call = f.redirect_call (instance);
+ gimple_call_set_arg (call, 1, shift_op1);
+ gimple_call_set_arg (call, 2, shift_op2);
+ return call;
+ }
+
+ return NULL;
}
};
z0 = svdiv_n_s32_m (p0, z1, x0),
z0 = svdiv_m (p0, z1, x0))
+/*
+** div_m1_s32_m_tied1:
+** neg z0\.s, p0/m, z0\.s
+** ret
+*/
+TEST_UNIFORM_Z (div_m1_s32_m_tied1, svint32_t,
+ z0 = svdiv_n_s32_m (p0, z0, -1),
+ z0 = svdiv_m (p0, z0, -1))
+
/*
** div_1_s32_m_tied1:
** ret
z0 = svdiv_n_s32_m (p0, z0, 1),
z0 = svdiv_m (p0, z0, 1))
+/*
+** div_m1_s32_m_untied:
+** movprfx z0, z1
+** neg z0\.s, p0/m, z1\.s
+** ret
+*/
+TEST_UNIFORM_Z (div_m1_s32_m_untied, svint32_t,
+ z0 = svdiv_n_s32_m (p0, z1, -1),
+ z0 = svdiv_m (p0, z1, -1))
+
/*
** div_1_s32_m_untied:
** mov z0\.d, z1\.d
z0 = svdiv_n_s32_z (p0, z1, x0),
z0 = svdiv_z (p0, z1, x0))
+/*
+** div_m1_s32_z_tied1:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0\.s, p0/z, \1\.s
+** neg z0\.s, p0/m, \1\.s
+** ret
+*/
+TEST_UNIFORM_Z (div_m1_s32_z_tied1, svint32_t,
+ z0 = svdiv_n_s32_z (p0, z0, -1),
+ z0 = svdiv_z (p0, z0, -1))
+
/*
** div_1_s32_z_tied1:
** movi? [vdz]([0-9]+)\.?(?:[0-9]*[bhsd])?, #?0
z0 = svdiv_n_s32_z (p0, z0, 1),
z0 = svdiv_z (p0, z0, 1))
+/*
+** div_m1_s32_z_untied:
+** movprfx z0\.s, p0/z, z1\.s
+** neg z0\.s, p0/m, z1\.s
+** ret
+*/
+TEST_UNIFORM_Z (div_m1_s32_z_untied, svint32_t,
+ z0 = svdiv_n_s32_z (p0, z1, -1),
+ z0 = svdiv_z (p0, z1, -1))
+
/*
** div_1_s32_z_untied:
** movi? [vdz]([0-9]+)\.?(?:[0-9]*[bhsd])?, #?0
z0 = svdiv_n_s32_x (p0, z1, x0),
z0 = svdiv_x (p0, z1, x0))
+/*
+** div_m1_s32_x_tied1:
+** neg z0\.s, p0/m, z0\.s
+** ret
+*/
+TEST_UNIFORM_Z (div_m1_s32_x_tied1, svint32_t,
+ z0 = svdiv_n_s32_x (p0, z0, -1),
+ z0 = svdiv_x (p0, z0, -1))
+
/*
** div_1_s32_x_tied1:
** ret
z0 = svdiv_n_s32_x (p0, z0, 1),
z0 = svdiv_x (p0, z0, 1))
+/*
+** div_m1_s32_x_untied:
+** movprfx z0, z1
+** neg z0\.s, p0/m, z1\.s
+** ret
+*/
+TEST_UNIFORM_Z (div_m1_s32_x_untied, svint32_t,
+ z0 = svdiv_n_s32_x (p0, z1, -1),
+ z0 = svdiv_x (p0, z1, -1))
+
/*
** div_1_s32_x_untied:
** mov z0\.d, z1\.d
/*
** mul_m1_s16_m:
-** mov (z[0-9]+)\.b, #-1
-** mul z0\.h, p0/m, z0\.h, \1\.h
+** neg z0\.h, p0/m, z0\.h
** ret
*/
TEST_UNIFORM_Z (mul_m1_s16_m, svint16_t,
/*
** mul_m1_s16_x:
-** mul z0\.h, z0\.h, #-1
+** neg z0\.h, p0/m, z0\.h
** ret
*/
TEST_UNIFORM_Z (mul_m1_s16_x, svint16_t,
/*
** mul_m1_s32_m:
-** mov (z[0-9]+)\.b, #-1
-** mul z0\.s, p0/m, z0\.s, \1\.s
+** neg z0\.s, p0/m, z0\.s
** ret
*/
TEST_UNIFORM_Z (mul_m1_s32_m, svint32_t,
z0 = svmul_n_s32_m (p0, z0, -1),
z0 = svmul_m (p0, z0, -1))
+/*
+** mul_m1r_s32_m:
+** mov z0\.b, #-1
+** neg z0\.s, p0/m, z1\.s
+** ret
+*/
+TEST_UNIFORM_Z (mul_m1r_s32_m, svint32_t,
+ z0 = svmul_s32_m (p0, svdup_s32 (-1), z1),
+ z0 = svmul_m (p0, svdup_s32 (-1), z1))
+
/*
** mul_s32_z_tied1:
** movprfx z0\.s, p0/z, z0\.s
/*
** mul_m1_s32_x:
-** mul z0\.s, z0\.s, #-1
+** neg z0\.s, p0/m, z0\.s
** ret
*/
TEST_UNIFORM_Z (mul_m1_s32_x, svint32_t,
z0 = svmul_n_s32_x (p0, z0, -1),
z0 = svmul_x (p0, z0, -1))
+/*
+** mul_m1r_s32_x:
+** movprfx z0, z1
+** neg z0\.s, p0/m, z1\.s
+** ret
+*/
+TEST_UNIFORM_Z (mul_m1r_s32_x, svint32_t,
+ z0 = svmul_s32_x (p0, svdup_s32 (-1), z1),
+ z0 = svmul_x (p0, svdup_s32 (-1), z1))
+
+/*
+** mul_m1_s32_z:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0\.s, p0/z, \1\.s
+** neg z0\.s, p0/m, \1\.s
+** ret
+*/
+TEST_UNIFORM_Z (mul_m1_s32_z, svint32_t,
+ z0 = svmul_n_s32_z (p0, z0, -1),
+ z0 = svmul_z (p0, z0, -1))
+
+/*
+** mul_m1r_s32_z:
+** movprfx z0\.s, p0/z, z1\.s
+** neg z0\.s, p0/m, z1\.s
+** ret
+*/
+TEST_UNIFORM_Z (mul_m1r_s32_z, svint32_t,
+ z0 = svmul_s32_z (p0, svdup_s32 (-1), z1),
+ z0 = svmul_z (p0, svdup_s32 (-1), z1))
+
/*
** mul_m127_s32_x:
** mul z0\.s, z0\.s, #-127
/*
** mul_m1_s64_m:
-** mov (z[0-9]+)\.b, #-1
-** mul z0\.d, p0/m, z0\.d, \1\.d
+** neg z0\.d, p0/m, z0\.d
** ret
*/
TEST_UNIFORM_Z (mul_m1_s64_m, svint64_t,
/*
** mul_m1_s64_x:
-** mul z0\.d, z0\.d, #-1
+** neg z0\.d, p0/m, z0\.d
** ret
*/
TEST_UNIFORM_Z (mul_m1_s64_x, svint64_t,
/*
** mul_m1_s8_m:
-** mov (z[0-9]+)\.b, #-1
-** mul z0\.b, p0/m, z0\.b, \1\.b
+** neg z0\.b, p0/m, z0\.b
** ret
*/
TEST_UNIFORM_Z (mul_m1_s8_m, svint8_t,
/*
** mul_255_s8_x:
-** mul z0\.b, z0\.b, #-1
+** neg z0\.b, p0/m, z0\.b
** ret
*/
TEST_UNIFORM_Z (mul_255_s8_x, svint8_t,
/*
** mul_m1_s8_x:
-** mul z0\.b, z0\.b, #-1
+** neg z0\.b, p0/m, z0\.b
** ret
*/
TEST_UNIFORM_Z (mul_m1_s8_x, svint8_t,
TEST_TYPES_1 (uint64, u64)
#define TEST_VALUES_S_1(B, OP1, OP2) \
- F (int##B, s##B, x, OP1, OP2)
+ F (int##B, s##B, x, OP1, OP2) \
+ F (int##B, s##B, z, OP1, OP2) \
+ F (int##B, s##B, m, OP1, OP2)
#define TEST_VALUES_S \
TEST_VALUES_S_1 (32, INT32_MIN, INT32_MIN) \
TEST_VALUES_S_1 (32, INT32_MAX, -5) \
TEST_VALUES_S_1 (64, INT64_MAX, -5) \
TEST_VALUES_S_1 (32, INT32_MIN, -4) \
- TEST_VALUES_S_1 (64, INT64_MIN, -4)
+ TEST_VALUES_S_1 (64, INT64_MIN, -4) \
+ TEST_VALUES_S_1 (32, INT32_MAX, -1) \
+ TEST_VALUES_S_1 (32, -7, -1) \
+ TEST_VALUES_S_1 (64, INT64_MIN, -1) \
+ TEST_VALUES_S_1 (64, 16, -1)
#define TEST_VALUES_U_1(B, OP1, OP2) \
F (uint##B, u##B, x, OP1, OP2)
TEST_TYPES_1 (uint64, u64)
#define TEST_VALUES_S_1(B, OP1, OP2) \
- F (int##B, s##B, x, OP1, OP2)
+ F (int##B, s##B, x, OP1, OP2) \
+ F (int##B, s##B, m, OP1, OP2) \
+ F (int##B, s##B, z, OP1, OP2)
#define TEST_VALUES_S \
TEST_VALUES_S_1 (32, INT32_MIN, INT32_MIN) \
TEST_VALUES_S_1 (32, INT32_MAX, -5) \
TEST_VALUES_S_1 (64, INT64_MAX, -5) \
TEST_VALUES_S_1 (32, INT32_MIN, -4) \
- TEST_VALUES_S_1 (64, INT64_MIN, -4)
+ TEST_VALUES_S_1 (64, INT64_MIN, -4) \
+ TEST_VALUES_S_1 (32, INT32_MAX, -1) \
+ TEST_VALUES_S_1 (32, -7, -1) \
+ TEST_VALUES_S_1 (64, INT64_MIN, -1) \
+ TEST_VALUES_S_1 (64, 16, -1)
#define TEST_VALUES_U_1(B, OP1, OP2) \
F (uint##B, u##B, x, OP1, OP2)