if (auto *res = f.fold_const_binary (TRUNC_DIV_EXPR))
return res;
- /* If the divisor is a uniform power of 2, fold to a shift
- instruction. */
+ /* If the dividend is all zeros, fold to zero vector. */
+ tree op1 = gimple_call_arg (f.call, 1);
+ if (integer_zerop (op1))
+ return gimple_build_assign (f.lhs, op1);
+
+ /* If the divisor is all zeros, fold to zero vector. */
+ tree pg = gimple_call_arg (f.call, 0);
tree op2 = gimple_call_arg (f.call, 2);
- tree divisor_cst = uniform_integer_cst_p (op2);
+ if (integer_zerop (op2)
+ && (f.pred != PRED_m
+ || is_ptrue (pg, f.type_suffix (0).element_bytes)))
+ return gimple_build_assign (f.lhs, build_zero_cst (TREE_TYPE (f.lhs)));
- if (!divisor_cst || !integer_pow2p (divisor_cst))
+ /* If the divisor is a uniform power of 2, fold to a shift
+ instruction. */
+ tree op2_cst = uniform_integer_cst_p (op2);
+ if (!op2_cst || !integer_pow2p (op2_cst))
return NULL;
tree new_divisor;
gcall *call;
- if (f.type_suffix (0).unsigned_p && tree_to_uhwi (divisor_cst) != 1)
+ if (f.type_suffix (0).unsigned_p && tree_to_uhwi (op2_cst) != 1)
{
function_instance instance ("svlsr", functions::svlsr,
shapes::binary_uint_opt_n, MODE_n,
f.type_suffix_ids, GROUP_none, f.pred);
call = f.redirect_call (instance);
- tree d = INTEGRAL_TYPE_P (TREE_TYPE (op2)) ? op2 : divisor_cst;
+ tree d = INTEGRAL_TYPE_P (TREE_TYPE (op2)) ? op2 : op2_cst;
new_divisor = wide_int_to_tree (TREE_TYPE (d), tree_log2 (d));
}
else
{
- if (tree_int_cst_sign_bit (divisor_cst)
- || tree_to_shwi (divisor_cst) == 1)
+ if (tree_int_cst_sign_bit (op2_cst)
+ || tree_to_shwi (op2_cst) == 1)
return NULL;
function_instance instance ("svasrd", functions::svasrd,
f.type_suffix_ids, GROUP_none, f.pred);
call = f.redirect_call (instance);
new_divisor = wide_int_to_tree (scalar_types[VECTOR_TYPE_svuint64_t],
- tree_log2 (divisor_cst));
+ tree_log2 (op2_cst));
}
gimple_call_set_arg (call, 2, new_divisor);
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" } } */
+/* { dg-options "-O2" } */
+
+#include "arm_sve.h"
+
+/*
+** s64_x_pg_op1:
+** mov z[0-9]+\.b, #0
+** ret
+*/
+svint64_t s64_x_pg_op1 (svbool_t pg, svint64_t op2)
+{
+ return svdiv_x (pg, svdup_s64 (0), op2);
+}
+
+/*
+** s64_z_pg_op1:
+** mov z[0-9]+\.b, #0
+** ret
+*/
+svint64_t s64_z_pg_op1 (svbool_t pg, svint64_t op2)
+{
+ return svdiv_z (pg, svdup_s64 (0), op2);
+}
+
+/*
+** s64_m_pg_op1:
+** mov z[0-9]+\.b, #0
+** ret
+*/
+svint64_t s64_m_pg_op1 (svbool_t pg, svint64_t op2)
+{
+ return svdiv_m (pg, svdup_s64 (0), op2);
+}
+
+/*
+** s64_x_ptrue_op1:
+** mov z[0-9]+\.b, #0
+** ret
+*/
+svint64_t s64_x_ptrue_op1 (svint64_t op2)
+{
+ return svdiv_x (svptrue_b64 (), svdup_s64 (0), op2);
+}
+
+/*
+** s64_z_ptrue_op1:
+** mov z[0-9]+\.b, #0
+** ret
+*/
+svint64_t s64_z_ptrue_op1 (svint64_t op2)
+{
+ return svdiv_z (svptrue_b64 (), svdup_s64 (0), op2);
+}
+
+/*
+** s64_m_ptrue_op1:
+** mov z[0-9]+\.b, #0
+** ret
+*/
+svint64_t s64_m_ptrue_op1 (svint64_t op2)
+{
+ return svdiv_m (svptrue_b64 (), svdup_s64 (0), op2);
+}
+
+/*
+** s64_x_pg_op2:
+** mov z[0-9]+\.b, #0
+** ret
+*/
+svint64_t s64_x_pg_op2 (svbool_t pg, svint64_t op1)
+{
+ return svdiv_x (pg, op1, svdup_s64 (0));
+}
+
+/*
+** s64_z_pg_op2:
+** mov z[0-9]+\.b, #0
+** ret
+*/
+svint64_t s64_z_pg_op2 (svbool_t pg, svint64_t op1)
+{
+ return svdiv_z (pg, op1, svdup_s64 (0));
+}
+
+/*
+** s64_m_pg_op2:
+** mov (z[0-9]+)\.b, #0
+** sdiv (z[0-9]\.d), p[0-7]/m, \2, \1\.d
+** ret
+*/
+svint64_t s64_m_pg_op2 (svbool_t pg, svint64_t op1)
+{
+ return svdiv_m (pg, op1, svdup_s64 (0));
+}
+
+/*
+** s64_x_ptrue_op2:
+** mov z[0-9]+\.b, #0
+** ret
+*/
+svint64_t s64_x_ptrue_op2 (svint64_t op1)
+{
+ return svdiv_x (svptrue_b64 (), op1, svdup_s64 (0));
+}
+
+/*
+** s64_z_ptrue_op2:
+** mov z[0-9]+\.b, #0
+** ret
+*/
+svint64_t s64_z_ptrue_op2 (svint64_t op1)
+{
+ return svdiv_z (svptrue_b64 (), op1, svdup_s64 (0));
+}
+
+/*
+** s64_m_ptrue_op2:
+** mov z[0-9]+\.b, #0
+** ret
+*/
+svint64_t s64_m_ptrue_op2 (svint64_t op1)
+{
+ return svdiv_m (svptrue_b64 (), op1, svdup_s64 (0));
+}
+
+/*
+** s64_n_x_pg_op2:
+** mov z[0-9]+\.b, #0
+** ret
+*/
+svint64_t s64_n_x_pg_op2 (svbool_t pg, svint64_t op1)
+{
+ return svdiv_n_s64_x (pg, op1, 0);
+}
+
+/*
+** s64_n_z_pg_op2:
+** mov z[0-9]+\.b, #0
+** ret
+*/
+svint64_t s64_n_z_pg_op2 (svbool_t pg, svint64_t op1)
+{
+ return svdiv_n_s64_z (pg, op1, 0);
+}
+
+/*
+** s64_n_m_pg_op2:
+** mov (z[0-9]+)\.b, #0
+** sdiv (z[0-9]+\.d), p[0-7]/m, \2, \1\.d
+** ret
+*/
+svint64_t s64_n_m_pg_op2 (svbool_t pg, svint64_t op1)
+{
+ return svdiv_n_s64_m (pg, op1, 0);
+}
+
+/*
+** s64_n_x_ptrue_op2:
+** mov z[0-9]+\.b, #0
+** ret
+*/
+svint64_t s64_n_x_ptrue_op2 (svint64_t op1)
+{
+ return svdiv_n_s64_x (svptrue_b64 (), op1, 0);
+}
+
+/*
+** s64_n_z_ptrue_op2:
+** mov z[0-9]+\.b, #0
+** ret
+*/
+svint64_t s64_n_z_ptrue_op2 (svint64_t op1)
+{
+ return svdiv_n_s64_z (svptrue_b64 (), op1, 0);
+}
+
+/*
+** s64_n_m_ptrue_op2:
+** mov z[0-9]+\.b, #0
+** ret
+*/
+svint64_t s64_n_m_ptrue_op2 (svint64_t op1)
+{
+ return svdiv_n_s64_m (svptrue_b64 (), op1, 0);
+}
+
+/*
+** u64_x_pg_op1:
+** mov z[0-9]+\.b, #0
+** ret
+*/
+svuint64_t u64_x_pg_op1 (svbool_t pg, svuint64_t op2)
+{
+ return svdiv_x (pg, svdup_u64 (0), op2);
+}
+
+/*
+** u64_z_pg_op1:
+** mov z[0-9]+\.b, #0
+** ret
+*/
+svuint64_t u64_z_pg_op1 (svbool_t pg, svuint64_t op2)
+{
+ return svdiv_z (pg, svdup_u64 (0), op2);
+}
+
+/*
+** u64_m_pg_op1:
+** mov z[0-9]+\.b, #0
+** ret
+*/
+svuint64_t u64_m_pg_op1 (svbool_t pg, svuint64_t op2)
+{
+ return svdiv_m (pg, svdup_u64 (0), op2);
+}
+
+/*
+** u64_x_ptrue_op1:
+** mov z[0-9]+\.b, #0
+** ret
+*/
+svuint64_t u64_x_ptrue_op1 (svuint64_t op2)
+{
+ return svdiv_x (svptrue_b64 (), svdup_u64 (0), op2);
+}
+
+/*
+** u64_z_ptrue_op1:
+** mov z[0-9]+\.b, #0
+** ret
+*/
+svuint64_t u64_z_ptrue_op1 (svuint64_t op2)
+{
+ return svdiv_z (svptrue_b64 (), svdup_u64 (0), op2);
+}
+
+/*
+** u64_m_ptrue_op1:
+** mov z[0-9]+\.b, #0
+** ret
+*/
+svuint64_t u64_m_ptrue_op1 (svuint64_t op2)
+{
+ return svdiv_m (svptrue_b64 (), svdup_u64 (0), op2);
+}
+
+/*
+** u64_x_pg_op2:
+** mov z[0-9]+\.b, #0
+** ret
+*/
+svuint64_t u64_x_pg_op2 (svbool_t pg, svuint64_t op1)
+{
+ return svdiv_x (pg, op1, svdup_u64 (0));
+}
+
+/*
+** u64_z_pg_op2:
+** mov z[0-9]+\.b, #0
+** ret
+*/
+svuint64_t u64_z_pg_op2 (svbool_t pg, svuint64_t op1)
+{
+ return svdiv_z (pg, op1, svdup_u64 (0));
+}
+
+/*
+** u64_m_pg_op2:
+** mov (z[0-9]+)\.b, #0
+** udiv (z[0-9]+\.d), p[0-7]/m, \2, \1\.d
+** ret
+*/
+svuint64_t u64_m_pg_op2 (svbool_t pg, svuint64_t op1)
+{
+ return svdiv_m (pg, op1, svdup_u64 (0));
+}
+
+/*
+** u64_x_ptrue_op2:
+** mov z[0-9]+\.b, #0
+** ret
+*/
+svuint64_t u64_x_ptrue_op2 (svuint64_t op1)
+{
+ return svdiv_x (svptrue_b64 (), op1, svdup_u64 (0));
+}
+
+/*
+** u64_z_ptrue_op2:
+** mov z[0-9]+\.b, #0
+** ret
+*/
+svuint64_t u64_z_ptrue_op2 (svuint64_t op1)
+{
+ return svdiv_z (svptrue_b64 (), op1, svdup_u64 (0));
+}
+
+/*
+** u64_m_ptrue_op2:
+** mov z[0-9]+\.b, #0
+** ret
+*/
+svuint64_t u64_m_ptrue_op2 (svuint64_t op1)
+{
+ return svdiv_m (svptrue_b64 (), op1, svdup_u64 (0));
+}
+
+/*
+** u64_n_x_pg_op2:
+** mov z[0-9]+\.b, #0
+** ret
+*/
+svuint64_t u64_n_x_pg_op2 (svbool_t pg, svuint64_t op1)
+{
+ return svdiv_n_u64_x (pg, op1, 0);
+}
+
+/*
+** u64_n_z_pg_op2:
+** mov z[0-9]+\.b, #0
+** ret
+*/
+svuint64_t u64_n_z_pg_op2 (svbool_t pg, svuint64_t op1)
+{
+ return svdiv_n_u64_z (pg, op1, 0);
+}
+
+/*
+** u64_n_m_pg_op2:
+** mov (z[0-9]+)\.b, #0
+** udiv (z[0-9]+\.d), p[0-7]/m, \2, \1\.d
+** ret
+*/
+svuint64_t u64_n_m_pg_op2 (svbool_t pg, svuint64_t op1)
+{
+ return svdiv_n_u64_m (pg, op1, 0);
+}
+
+/*
+** u64_n_x_ptrue_op2:
+** mov z[0-9]+\.b, #0
+** ret
+*/
+svuint64_t u64_n_x_ptrue_op2 (svuint64_t op1)
+{
+ return svdiv_n_u64_x (svptrue_b64 (), op1, 0);
+}
+
+/*
+** u64_n_z_ptrue_op2:
+** mov z[0-9]+\.b, #0
+** ret
+*/
+svuint64_t u64_n_z_ptrue_op2 (svuint64_t op1)
+{
+ return svdiv_n_u64_z (svptrue_b64 (), op1, 0);
+}
+
+/*
+** u64_n_m_ptrue_op2:
+** mov z[0-9]+\.b, #0
+** ret
+*/
+svuint64_t u64_n_m_ptrue_op2 (svuint64_t op1)
+{
+ return svdiv_n_u64_m (svptrue_b64 (), op1, 0);
+}
+