implementation approaches itself.
@end deftypefn
+@deftypefn {Target Hook} bool TARGET_VECTORIZE_CAN_SPECIAL_DIV_BY_CONST (enum @var{tree_code}, tree @var{vectype}, wide_int @var{constant}, rtx *@var{output}, rtx @var{in0}, rtx @var{in1})
+This hook is used to test whether the target has a special method of
+division of vectors of type @var{vectype} using the value @var{constant},
+and producing a vector of type @var{vectype}. The division
+will then not be decomposed by the vectorizer and kept as a div.
+
+When the hook is being used to test whether the target supports a special
+divide, @var{in0}, @var{in1}, and @var{output} are all null. When the hook
+is being used to emit a division, @var{in0} and @var{in1} are the source
+vectors of type @var{vecttype} and @var{output} is the destination vector of
+type @var{vectype}.
+
+Return true if the operation is possible, emitting instructions for it
+if rtxes are provided and updating @var{output}.
+@end deftypefn
+
@deftypefn {Target Hook} tree TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION (unsigned @var{code}, tree @var{vec_type_out}, tree @var{vec_type_in})
This hook should return the decl of a function that implements the
vectorized variant of the function with the @code{combined_fn} code
@hook TARGET_VECTORIZE_VEC_PERM_CONST
+@hook TARGET_VECTORIZE_CAN_SPECIAL_DIV_BY_CONST
+
@hook TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
@hook TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION
TRUNC_DIV_EXPR. */
size = expand_binop (Pmode, add_optab, size, alignm1_rtx,
NULL_RTX, 1, OPTAB_LIB_WIDEN);
- size = expand_divmod (0, TRUNC_DIV_EXPR, Pmode, size, align_rtx,
+ size = expand_divmod (0, TRUNC_DIV_EXPR, Pmode, NULL, NULL, size, align_rtx,
NULL_RTX, 1);
size = expand_mult (Pmode, size, align_rtx, NULL_RTX, 1);
gen_int_mode (required_align / BITS_PER_UNIT - 1,
Pmode),
NULL_RTX, 1, OPTAB_LIB_WIDEN);
- target = expand_divmod (0, TRUNC_DIV_EXPR, Pmode, target,
+ target = expand_divmod (0, TRUNC_DIV_EXPR, Pmode, NULL, NULL, target,
gen_int_mode (required_align / BITS_PER_UNIT,
Pmode),
NULL_RTX, 1);
rtx
expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
- rtx op0, rtx op1, rtx target, int unsignedp,
- enum optab_methods methods)
+ tree treeop0, tree treeop1, rtx op0, rtx op1, rtx target,
+ int unsignedp, enum optab_methods methods)
{
machine_mode compute_mode;
rtx tquotient;
last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
+ /* Check if the target has specific expansions for the division. */
+ tree cst;
+ if (treeop0
+ && treeop1
+ && (cst = uniform_integer_cst_p (treeop1))
+ && targetm.vectorize.can_special_div_by_const (code, TREE_TYPE (treeop0),
+ wi::to_wide (cst),
+ &target, op0, op1))
+ return target;
+
+
/* Now convert to the best mode to use. */
if (compute_mode != mode)
{
|| (optab_handler (sdivmod_optab, int_mode)
!= CODE_FOR_nothing)))
quotient = expand_divmod (0, TRUNC_DIV_EXPR,
- int_mode, op0,
- gen_int_mode (abs_d,
+ int_mode, treeop0, treeop1,
+ op0, gen_int_mode (abs_d,
int_mode),
NULL_RTX, 0);
else
size - 1, NULL_RTX, 0);
t3 = force_operand (gen_rtx_MINUS (int_mode, t1, nsign),
NULL_RTX);
- t4 = expand_divmod (0, TRUNC_DIV_EXPR, int_mode, t3, op1,
- NULL_RTX, 0);
+ t4 = expand_divmod (0, TRUNC_DIV_EXPR, int_mode, treeop0,
+ treeop1, t3, op1, NULL_RTX, 0);
if (t4)
{
rtx t5;
extern rtx maybe_expand_shift (enum tree_code, machine_mode, rtx, int, rtx,
int);
#ifdef GCC_OPTABS_H
-extern rtx expand_divmod (int, enum tree_code, machine_mode, rtx, rtx,
- rtx, int, enum optab_methods = OPTAB_LIB_WIDEN);
+extern rtx expand_divmod (int, enum tree_code, machine_mode, tree, tree,
+ rtx, rtx, rtx, int,
+ enum optab_methods = OPTAB_LIB_WIDEN);
#endif
#endif
return expand_divmod (0,
FLOAT_MODE_P (GET_MODE (value))
? RDIV_EXPR : TRUNC_DIV_EXPR,
- GET_MODE (value), op1, op2, target, 0);
+ GET_MODE (value), NULL, NULL, op1, op2,
+ target, 0);
case MOD:
- return expand_divmod (1, TRUNC_MOD_EXPR, GET_MODE (value), op1, op2,
- target, 0);
+ return expand_divmod (1, TRUNC_MOD_EXPR, GET_MODE (value), NULL, NULL,
+ op1, op2, target, 0);
case UDIV:
- return expand_divmod (0, TRUNC_DIV_EXPR, GET_MODE (value), op1, op2,
- target, 1);
+ return expand_divmod (0, TRUNC_DIV_EXPR, GET_MODE (value), NULL, NULL,
+ op1, op2, target, 1);
case UMOD:
- return expand_divmod (1, TRUNC_MOD_EXPR, GET_MODE (value), op1, op2,
- target, 1);
+ return expand_divmod (1, TRUNC_MOD_EXPR, GET_MODE (value), NULL, NULL,
+ op1, op2, target, 1);
case ASHIFTRT:
return expand_simple_binop (GET_MODE (value), code, op1, op2,
target, 0, OPTAB_LIB_WIDEN);
bool speed_p = optimize_insn_for_speed_p ();
do_pending_stack_adjust ();
start_sequence ();
- rtx uns_ret = expand_divmod (mod_p, code, mode, op0, op1, target, 1);
+ rtx uns_ret = expand_divmod (mod_p, code, mode, treeop0, treeop1,
+ op0, op1, target, 1);
rtx_insn *uns_insns = get_insns ();
end_sequence ();
start_sequence ();
- rtx sgn_ret = expand_divmod (mod_p, code, mode, op0, op1, target, 0);
+ rtx sgn_ret = expand_divmod (mod_p, code, mode, treeop0, treeop1,
+ op0, op1, target, 0);
rtx_insn *sgn_insns = get_insns ();
end_sequence ();
unsigned uns_cost = seq_cost (uns_insns, speed_p);
emit_insn (sgn_insns);
return sgn_ret;
}
- return expand_divmod (mod_p, code, mode, op0, op1, target, unsignedp);
+ return expand_divmod (mod_p, code, mode, treeop0, treeop1,
+ op0, op1, target, unsignedp);
}
rtx
return NULL_RTX;
}
}
- rtx remainder = expand_divmod (1, TRUNC_MOD_EXPR, word_mode, sum,
- gen_int_mode (INTVAL (op1), word_mode),
+ rtx remainder = expand_divmod (1, TRUNC_MOD_EXPR, word_mode, NULL, NULL,
+ sum, gen_int_mode (INTVAL (op1),
+ word_mode),
NULL_RTX, 1, OPTAB_DIRECT);
if (remainder == NULL_RTX)
return NULL_RTX;
if (op11 != const1_rtx)
{
- rtx rem2 = expand_divmod (1, TRUNC_MOD_EXPR, mode, quot1, op11,
- NULL_RTX, unsignedp, OPTAB_DIRECT);
+ rtx rem2 = expand_divmod (1, TRUNC_MOD_EXPR, mode, NULL, NULL, quot1,
+ op11, NULL_RTX, unsignedp, OPTAB_DIRECT);
if (rem2 == NULL_RTX)
return NULL_RTX;
if (rem2 == NULL_RTX)
return NULL_RTX;
- rtx quot2 = expand_divmod (0, TRUNC_DIV_EXPR, mode, quot1, op11,
- NULL_RTX, unsignedp, OPTAB_DIRECT);
+ rtx quot2 = expand_divmod (0, TRUNC_DIV_EXPR, mode, NULL, NULL, quot1,
+ op11, NULL_RTX, unsignedp, OPTAB_DIRECT);
if (quot2 == NULL_RTX)
return NULL_RTX;
const vec_perm_indices &sel),
NULL)
+DEFHOOK
+(can_special_div_by_const,
+ "This hook is used to test whether the target has a special method of\n\
+division of vectors of type @var{vectype} using the value @var{constant},\n\
+and producing a vector of type @var{vectype}. The division\n\
+will then not be decomposed by the and kept as a div.\n\
+\n\
+When the hook is being used to test whether the target supports a special\n\
+divide, @var{in0}, @var{in1}, and @var{output} are all null. When the hook\n\
+is being used to emit a division, @var{in0} and @var{in1} are the source\n\
+vectors of type @var{vecttype} and @var{output} is the destination vector of\n\
+type @var{vectype}.\n\
+\n\
+Return true if the operation is possible, emitting instructions for it\n\
+if rtxes are provided and updating @var{output}.",
+ bool, (enum tree_code, tree vectype, wide_int constant, rtx *output,
+ rtx in0, rtx in1),
+ default_can_special_div_by_const)
+
/* Return true if the target supports misaligned store/load of a
specific factor denoted in the third parameter. The last parameter
is true if the access is defined in a packed struct. */
#include "insn-codes.h"
#include "tm.h"
#include "hard-reg-set.h"
+#include "tree-core.h"
#if CHECKING_P
return HAVE_conditional_execution;
}
+/* Default that no division by constant operations are special. */
+bool
+default_can_special_div_by_const (enum tree_code, tree, wide_int, rtx *, rtx,
+ rtx)
+{
+ return false;
+}
+
/* By default we assume that c99 functions are present at the runtime,
but sincos is not. */
bool
extern rtx default_addr_space_convert (rtx, tree, tree);
extern unsigned int default_case_values_threshold (void);
extern bool default_have_conditional_execution (void);
+extern bool default_can_special_div_by_const (enum tree_code, tree, wide_int,
+ rtx *, rtx, rtx);
extern bool default_libc_has_function (enum function_class, tree);
extern bool default_libc_has_fast_function (int fcode);
--- /dev/null
+/* { dg-require-effective-target vect_int } */
+
+#include <stdint.h>
+#include "tree-vect.h"
+
+#define N 50
+#define TYPE uint8_t
+
+__attribute__((noipa, noinline, optimize("O1")))
+void fun1(TYPE* restrict pixel, TYPE level, int n)
+{
+ for (int i = 0; i < n; i+=1)
+ pixel[i] = (pixel[i] * level) / 0xff;
+}
+
+__attribute__((noipa, noinline, optimize("O3")))
+void fun2(TYPE* restrict pixel, TYPE level, int n)
+{
+ for (int i = 0; i < n; i+=1)
+ pixel[i] = (pixel[i] * level) / 0xff;
+}
+
+#include "vect-div-bitmask.h"
+
+/* { dg-final { scan-tree-dump-not "vect_recog_divmod_pattern: detected" "vect" { target aarch64*-*-* } } } */
--- /dev/null
+/* { dg-require-effective-target vect_int } */
+
+#include <stdint.h>
+#include "tree-vect.h"
+
+#define N 50
+#define TYPE uint16_t
+
+__attribute__((noipa, noinline, optimize("O1")))
+void fun1(TYPE* restrict pixel, TYPE level, int n)
+{
+ for (int i = 0; i < n; i+=1)
+ pixel[i] = (pixel[i] * level) / 0xffffU;
+}
+
+__attribute__((noipa, noinline, optimize("O3")))
+void fun2(TYPE* restrict pixel, TYPE level, int n)
+{
+ for (int i = 0; i < n; i+=1)
+ pixel[i] = (pixel[i] * level) / 0xffffU;
+}
+
+#include "vect-div-bitmask.h"
+
+/* { dg-final { scan-tree-dump-not "vect_recog_divmod_pattern: detected" "vect" { target aarch64*-*-* } } } */
--- /dev/null
+/* { dg-require-effective-target vect_int } */
+/* { dg-additional-options "-fno-vect-cost-model" { target aarch64*-*-* } } */
+
+#include <stdint.h>
+#include "tree-vect.h"
+
+#define N 50
+#define TYPE uint32_t
+
+__attribute__((noipa, noinline, optimize("O1")))
+void fun1(TYPE* restrict pixel, TYPE level, int n)
+{
+ for (int i = 0; i < n; i+=1)
+ pixel[i] = (pixel[i] * (uint64_t)level) / 0xffffffffUL;
+}
+
+__attribute__((noipa, noinline, optimize("O3")))
+void fun2(TYPE* restrict pixel, TYPE level, int n)
+{
+ for (int i = 0; i < n; i+=1)
+ pixel[i] = (pixel[i] * (uint64_t)level) / 0xffffffffUL;
+}
+
+#include "vect-div-bitmask.h"
+
+/* { dg-final { scan-tree-dump-not "vect_recog_divmod_pattern: detected" "vect" { target aarch64*-*-* } } } */
--- /dev/null
+#include <stdio.h>
+
+#ifndef N
+#define N 65
+#endif
+
+#ifndef TYPE
+#define TYPE uint32_t
+#endif
+
+#ifndef DEBUG
+#define DEBUG 0
+#endif
+
+#define BASE ((TYPE) -1 < 0 ? -126 : 4)
+
+int main ()
+{
+ TYPE a[N];
+ TYPE b[N];
+
+ for (int i = 0; i < N; ++i)
+ {
+ a[i] = BASE + i * 13;
+ b[i] = BASE + i * 13;
+ if (DEBUG)
+ printf ("%d: 0x%x\n", i, a[i]);
+ }
+
+ fun1 (a, N / 2, N);
+ fun2 (b, N / 2, N);
+
+ for (int i = 0; i < N; ++i)
+ {
+ if (DEBUG)
+ printf ("%d = 0x%x == 0x%x\n", i, a[i], b[i]);
+
+ if (a[i] != b[i])
+ __builtin_abort ();
+ }
+ return 0;
+}
+
tree rhs2 = gimple_assign_rhs2 (assign);
tree ret;
+ /* Check if the target was going to handle it through the special
+ division callback hook. */
+ tree cst = uniform_integer_cst_p (rhs2);
+ if (cst &&
+ targetm.vectorize.can_special_div_by_const (code, type,
+ wi::to_wide (cst),
+ NULL,
+ NULL_RTX, NULL_RTX))
+ return NULL_TREE;
+
+
if (!optimize
|| !VECTOR_INTEGER_TYPE_P (type)
|| TREE_CODE (rhs2) != VECTOR_CST
gimple *pattern_stmt, *def_stmt;
enum tree_code rhs_code;
optab optab;
- tree q;
+ tree q, cst;
int dummy_int, prec;
if (!is_gimple_assign (last_stmt))
return pattern_stmt;
}
+ else if ((cst = uniform_integer_cst_p (oprnd1))
+ && targetm.vectorize.can_special_div_by_const (rhs_code, vectype,
+ wi::to_wide (cst),
+ NULL, NULL_RTX,
+ NULL_RTX))
+ {
+ return NULL;
+ }
if (prec > HOST_BITS_PER_WIDE_INT
|| integer_zerop (oprnd1))
}
target_support_p = (optab_handler (optab, vec_mode)
!= CODE_FOR_nothing);
+ tree cst;
+ if (!target_support_p
+ && op1
+ && (cst = uniform_integer_cst_p (op1)))
+ target_support_p
+ = targetm.vectorize.can_special_div_by_const (code, vectype,
+ wi::to_wide (cst),
+ NULL, NULL_RTX,
+ NULL_RTX);
}
bool using_emulated_vectors_p = vect_emulated_vector_p (vectype);