This patch would like to support the form 7 of the unsigned
integer SAT_ADD, aka below example.
#define DEF_SAT_U_ADD_FMT_7(WT, T) \
T __attribute__((noinline)) \
sat_u_add_##WT##_##T##_fmt_7(T x, T y) \
{ \
T max = -1; \
WT val = (WT)x + (WT)y; \
return val > max ? max : (T)val; \
}
DEF_SAT_U_ADD_FMT_7(uint64_t, uint32_t)
If we take -O3 build with -fdump-tree-optimized, we will have
Before this patch:
5 │ __attribute__((noinline))
6 │ uint32_t sat_u_add_uint64_t_uint32_t_fmt_7 (uint32_t x, uint32_t y)
7 │ {
8 │ uint64_t val;
9 │ long unsigned int _1;
10 │ long unsigned int _2;
11 │ uint32_t _3;
12 │ uint32_t _7;
13 │
14 │ <bb 2> [local count:
1073741824]:
15 │ _1 = (long unsigned int) x_4(D);
16 │ _2 = (long unsigned int) y_5(D);
17 │ val_6 = _1 + _2;
18 │ if (val_6 <=
4294967295)
19 │ goto <bb 3>; [65.00%]
20 │ else
21 │ goto <bb 4>; [35.00%]
22 │
23 │ <bb 3> [local count:
697932184]:
24 │ _7 = x_4(D) + y_5(D);
25 │
26 │ <bb 4> [local count:
1073741824]:
27 │ # _3 = PHI <
4294967295(2), _7(3)>
28 │ return _3;
29 │
30 │ }
After this patch:
4 │ __attribute__((noinline))
5 │ uint32_t sat_u_add_uint64_t_uint32_t_fmt_7 (uint32_t x, uint32_t y)
6 │ {
7 │ uint32_t _3;
8 │
9 │ <bb 2> [local count:
1073741824]:
10 │ _3 = .SAT_ADD (x_4(D), y_5(D)); [tail call]
11 │ return _3;
12 │
13 │ }
This change also effects on vector mode too.
The below test suites are passed for this patch.
* The rv64gcv fully regression test.
* The x86 bootstrap test.
* The x86 fully regression test.
gcc/ChangeLog:
* match.pd: Add form 7 matching pattern for unsigned integer
SAT_ADD.
Signed-off-by: Pan Li <pan2.li@intel.com>
SAT_U_ADD = IMAGPART (SUM) != 0 ? -1 : REALPART (SUM) */
(cond^ (ne (imagpart (IFN_ADD_OVERFLOW@2 @0 INTEGER_CST@1)) integer_zerop)
integer_minus_onep (realpart @2))
- (if (types_match (type, @0) && int_fits_type_p (@1, type)))))
+ (if (types_match (type, @0) && int_fits_type_p (@1, type))))
+ (match (unsigned_integer_sat_add @0 @1)
+ /* WIDEN_SUM = (WT)X + (WT)Y
+ SAT_U_ADD = WIDEN_SUM > MAX ? MAX : (NT)WIDEN_SUM */
+ (cond^ (le (plus (convert@2 @0) (convert@3 @1)) INTEGER_CST@4)
+ (plus:c @0 @1) integer_minus_onep)
+ (if (types_match (type, @0, @1) && types_match (@2, @3))
+ (with
+ {
+ unsigned precision = TYPE_PRECISION (type);
+ unsigned widen_precision = TYPE_PRECISION (TREE_TYPE (@2));
+ wide_int max = wi::mask (precision, false, widen_precision);
+ wide_int c4 = wi::to_wide (@4);
+ }
+ (if (wi::eq_p (c4, max) && widen_precision > precision))))))
/* Saturation sub for unsigned integer. */
(if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type))