#include "rtl-iter.h"
#include "gimple-range.h"
+/* For lang_hooks.types.type_for_mode. */
+#include "langhooks.h"
+
/* The names of each internal function, indexed by function number. */
const char *const internal_fn_name_array[] = {
#define DEF_INTERNAL_FN(CODE, FLAGS, FNSPEC) #CODE,
emit_insn (pattern);
}
+/* Expand the IFN_DEFERRED_INIT function:
+ LHS = DEFERRED_INIT (SIZE of the DECL, INIT_TYPE, IS_VLA);
+
+ if IS_VLA is false, the LHS is the DECL itself,
+ if IS_VLA is true, the LHS is a MEM_REF whose address is the pointer
+ to this DECL.
+
+ Initialize the LHS with zero/pattern according to its second argument
+ INIT_TYPE:
+ if INIT_TYPE is AUTO_INIT_ZERO, use zeroes to initialize;
+ if INIT_TYPE is AUTO_INIT_PATTERN, use 0xFE byte-repeatable pattern
+ to initialize;
+ The LHS variable is initialized including paddings.
+ The reasons to choose 0xFE for pattern initialization are:
+ 1. It is a non-canonical virtual address on x86_64, and at the
+ high end of the i386 kernel address space.
+ 2. It is a very large float value (-1.694739530317379e+38).
+ 3. It is also an unusual number for integers. */
+#define INIT_PATTERN_VALUE 0xFE
+static void
+expand_DEFERRED_INIT (internal_fn, gcall *stmt)
+{
+ tree lhs = gimple_call_lhs (stmt);
+ tree var_size = gimple_call_arg (stmt, 0);
+ enum auto_init_type init_type
+ = (enum auto_init_type) TREE_INT_CST_LOW (gimple_call_arg (stmt, 1));
+ bool reg_lhs = true;
+
+ tree var_type = TREE_TYPE (lhs);
+ gcc_assert (init_type > AUTO_INIT_UNINITIALIZED);
+
+ if (TREE_CODE (lhs) == SSA_NAME)
+ reg_lhs = true;
+ else
+ {
+ rtx tem = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
+ reg_lhs = !MEM_P (tem);
+ }
+
+ if (!reg_lhs)
+ {
+ /* If this is a VLA or the variable is not in register,
+ expand to a memset to initialize it. */
+ mark_addressable (lhs);
+ tree var_addr = build_fold_addr_expr (lhs);
+
+ tree value = (init_type == AUTO_INIT_PATTERN) ?
+ build_int_cst (integer_type_node,
+ INIT_PATTERN_VALUE) :
+ integer_zero_node;
+ tree m_call = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMSET),
+ 3, var_addr, value, var_size);
+ /* Expand this memset call. */
+ expand_builtin_memset (m_call, NULL_RTX, TYPE_MODE (var_type));
+ }
+ else
+ {
+ /* If this variable is in a register, use expand_assignment might
+ generate better code. */
+ tree init = build_zero_cst (var_type);
+ unsigned HOST_WIDE_INT total_bytes
+ = tree_to_uhwi (TYPE_SIZE_UNIT (var_type));
+
+ if (init_type == AUTO_INIT_PATTERN)
+ {
+ unsigned char *buf = (unsigned char *) xmalloc (total_bytes);
+ memset (buf, INIT_PATTERN_VALUE, total_bytes);
+ if (can_native_interpret_type_p (var_type))
+ init = native_interpret_expr (var_type, buf, total_bytes);
+ else
+ {
+ tree itype = build_nonstandard_integer_type
+ (total_bytes * BITS_PER_UNIT, 1);
+ wide_int w = wi::from_buffer (buf, total_bytes);
+ init = build1 (VIEW_CONVERT_EXPR, var_type,
+ wide_int_to_tree (itype, w));
+ }
+ }
+
+ expand_assignment (lhs, init, false);
+ }
+}
+
/* The size of an OpenACC compute dimension. */
static void
case IFN_FNMS:
case IFN_AVG_FLOOR:
case IFN_AVG_CEIL:
+ case IFN_MULH:
case IFN_MULHS:
case IFN_MULHRS:
case IFN_FMIN:
bool
vectorized_internal_fn_supported_p (internal_fn ifn, tree type)
{
+ if (VECTOR_MODE_P (TYPE_MODE (type)))
+ return direct_internal_fn_supported_p (ifn, type, OPTIMIZE_FOR_SPEED);
+
scalar_mode smode;
- if (!VECTOR_TYPE_P (type) && is_a <scalar_mode> (TYPE_MODE (type), &smode))
+ if (!is_a <scalar_mode> (TYPE_MODE (type), &smode))
+ return false;
+
+ machine_mode vmode = targetm.vectorize.preferred_simd_mode (smode);
+ if (VECTOR_MODE_P (vmode))
{
- machine_mode vmode = targetm.vectorize.preferred_simd_mode (smode);
- if (VECTOR_MODE_P (vmode))
- type = build_vector_type_for_mode (type, vmode);
+ tree vectype = build_vector_type_for_mode (type, vmode);
+ if (direct_internal_fn_supported_p (ifn, vectype, OPTIMIZE_FOR_SPEED))
+ return true;
}
- return (VECTOR_MODE_P (TYPE_MODE (type))
- && direct_internal_fn_supported_p (ifn, type, OPTIMIZE_FOR_SPEED));
+ auto_vector_modes vector_modes;
+ targetm.vectorize.autovectorize_vector_modes (&vector_modes, true);
+ for (machine_mode base_mode : vector_modes)
+ if (related_vector_mode (base_mode, smode).exists (&vmode))
+ {
+ tree vectype = build_vector_type_for_mode (type, vmode);
+ if (direct_internal_fn_supported_p (ifn, vectype, OPTIMIZE_FOR_SPEED))
+ return true;
+ }
+
+ return false;
}
void