This patch would like to add the middle-end presentation for the
saturation truncation. Aka set the result of truncated value to
the max value when overflow. It will take the pattern similar
as below.
Form 1:
#define DEF_SAT_U_TRUC_FMT_1(WT, NT) \
NT __attribute__((noinline)) \
sat_u_truc_##T##_fmt_1 (WT x) \
{ \
bool overflow = x > (WT)(NT)(-1); \
return ((NT)x) | (NT)-overflow; \
}
For example, truncated uint16_t to uint8_t, we have
* SAT_TRUNC (254) => 254
* SAT_TRUNC (255) => 255
* SAT_TRUNC (256) => 255
* SAT_TRUNC (65536) => 255
Given below SAT_TRUNC from uint64_t to uint32_t.
DEF_SAT_U_TRUC_FMT_1 (uint64_t, uint32_t)
Before this patch:
__attribute__((noinline))
uint32_t sat_u_truc_T_fmt_1 (uint64_t x)
{
_Bool overflow;
unsigned int _1;
unsigned int _2;
unsigned int _3;
uint32_t _6;
;; basic block 2, loop depth 0
;; pred: ENTRY
overflow_5 = x_4(D) >
4294967295;
_1 = (unsigned int) x_4(D);
_2 = (unsigned int) overflow_5;
_3 = -_2;
_6 = _1 | _3;
return _6;
;; succ: EXIT
}
After this patch:
__attribute__((noinline))
uint32_t sat_u_truc_T_fmt_1 (uint64_t x)
{
uint32_t _6;
;; basic block 2, loop depth 0
;; pred: ENTRY
_6 = .SAT_TRUNC (x_4(D)); [tail call]
return _6;
;; succ: EXIT
}
The below tests are passed for this patch:
*. The rv64gcv fully regression tests.
*. The rv64gcv build with glibc.
*. The x86 bootstrap tests.
*. The x86 fully regression tests.
gcc/ChangeLog:
* internal-fn.def (SAT_TRUNC): Add new signed IFN sat_trunc as
unary_convert.
* match.pd: Add new matching pattern for unsigned int sat_trunc.
* optabs.def (OPTAB_CL): Add unsigned and signed optab.
* tree-ssa-math-opts.cc (gimple_unsigend_integer_sat_trunc): Add
new decl for the matching pattern generated func.
(match_unsigned_saturation_trunc): Add new func impl to match
the .SAT_TRUNC.
(math_opts_dom_walker::after_dom_children): Add .SAT_TRUNC match
function under BIT_IOR_EXPR case.
Signed-off-by: Pan Li <pan2.li@intel.com>
DEF_INTERNAL_SIGNED_OPTAB_FN (SAT_ADD, ECF_CONST, first, ssadd, usadd, binary)
DEF_INTERNAL_SIGNED_OPTAB_FN (SAT_SUB, ECF_CONST, first, sssub, ussub, binary)
+DEF_INTERNAL_SIGNED_OPTAB_FN (SAT_TRUNC, ECF_CONST, first, sstrunc, ustrunc, unary_convert)
+
DEF_INTERNAL_COND_FN (ADD, ECF_CONST, add, binary)
DEF_INTERNAL_COND_FN (SUB, ECF_CONST, sub, binary)
DEF_INTERNAL_COND_FN (MUL, ECF_CONST, smul, binary)
(if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
&& types_match (type, @0, @1))))
+/* Unsigned saturation truncate, case 1 (), sizeof (WT) > sizeof (NT).
+ SAT_U_TRUNC = (NT)x | (NT)(-(X > (WT)(NT)(-1))). */
+(match (unsigned_integer_sat_trunc @0)
+ (bit_ior:c (negate (convert (gt @0 INTEGER_CST@1)))
+ (convert @0))
+ (with {
+ unsigned itype_precision = TYPE_PRECISION (TREE_TYPE (@0));
+ unsigned otype_precision = TYPE_PRECISION (type);
+ wide_int trunc_max = wi::mask (itype_precision / 2, false, itype_precision);
+ wide_int int_cst = wi::to_wide (@1, itype_precision);
+ }
+ (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
+ && TYPE_UNSIGNED (TREE_TYPE (@0))
+ && otype_precision < itype_precision
+ && wi::eq_p (trunc_max, int_cst)))))
+
/* x > y && x != XXX_MIN --> x > y
x > y && x == XXX_MIN --> false . */
(for eqne (eq ne)
OPTAB_CL(satfract_optab, "satfract$b$Q$a2", SAT_FRACT, "satfract", gen_satfract_conv_libfunc)
OPTAB_CL(satfractuns_optab, "satfractuns$I$b$Q$a2", UNSIGNED_SAT_FRACT, "satfractuns", gen_satfractuns_conv_libfunc)
+OPTAB_CL(ustrunc_optab, "ustrunc$b$a2", US_TRUNCATE, "ustrunc", NULL)
+OPTAB_CL(sstrunc_optab, "sstrunc$b$a2", SS_TRUNCATE, "sstrunc", NULL)
+
OPTAB_CD(sfixtrunc_optab, "fix_trunc$F$b$I$a2")
OPTAB_CD(ufixtrunc_optab, "fixuns_trunc$F$b$I$a2")
extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree));
extern bool gimple_unsigned_integer_sat_sub (tree, tree*, tree (*)(tree));
+extern bool gimple_unsigned_integer_sat_trunc (tree, tree*, tree (*)(tree));
static void
build_saturation_binary_arith_call (gimple_stmt_iterator *gsi, internal_fn fn,
ops[0], ops[1]);
}
+/*
+ * Try to match saturation unsigned sub.
+ * uint16_t x_4(D);
+ * uint8_t _6;
+ * overflow_5 = x_4(D) > 255;
+ * _1 = (unsigned char) x_4(D);
+ * _2 = (unsigned char) overflow_5;
+ * _3 = -_2;
+ * _6 = _1 | _3;
+ * =>
+ * _6 = .SAT_TRUNC (x_4(D));
+ * */
+static void
+match_unsigned_saturation_trunc (gimple_stmt_iterator *gsi, gassign *stmt)
+{
+ tree ops[1];
+ tree lhs = gimple_assign_lhs (stmt);
+ tree type = TREE_TYPE (lhs);
+
+ if (gimple_unsigned_integer_sat_trunc (lhs, ops, NULL)
+ && direct_internal_fn_supported_p (IFN_SAT_TRUNC,
+ tree_pair (type, TREE_TYPE (ops[0])),
+ OPTIMIZE_FOR_BOTH))
+ {
+ gcall *call = gimple_build_call_internal (IFN_SAT_TRUNC, 1, ops[0]);
+ gimple_call_set_lhs (call, lhs);
+ gsi_replace (gsi, call, /* update_eh_info */ true);
+ }
+}
+
/* Recognize for unsigned x
x = y - z;
if (x > y)
case BIT_IOR_EXPR:
match_unsigned_saturation_add (&gsi, as_a<gassign *> (stmt));
+ match_unsigned_saturation_trunc (&gsi, as_a<gassign *> (stmt));
/* fall-through */
case BIT_XOR_EXPR:
match_uaddc_usubc (&gsi, stmt, code);