From: Andrew Pinski Date: Sun, 7 Sep 2025 04:49:18 +0000 (-0700) Subject: forwprop: Handle memcpy for copy prop [PR121418, PR121417] X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=597b50abb0d2fc1f123a99e1b4beb89ad6259d9e;p=thirdparty%2Fgcc.git forwprop: Handle memcpy for copy prop [PR121418, PR121417] It turns out easy to add support for memcpy copy prop when the memcpy has changed into `MEM` copy. Instead of rejecting right out we need to figure out that `a` and `MEM[&a]` are equivalent in terms of address and size. And then create a VIEW_CONVER_EXPR from the original src to the new type. Note this also allows for `a.b` and `a` being considered equivalent if b is the only field (PR 121751). Changes since v1: * v2: Move check for IMAG/REAL and BFR earlier. Add a wrapping function around get_inner_reference and use that instead of get_addr_base_and_unit_offset. Bootstrapped and tested on x86_64-linux-gnu. PR tree-optimization/121751 PR tree-optimization/121418 PR tree-optimization/121417 gcc/ChangeLog: * tree-ssa-forwprop.cc (split_core_and_offset_size): New function. (optimize_agr_copyprop_1): Allow for the same address but different type accesses via a VCE. gcc/testsuite/ChangeLog: * gcc.dg/tree-ssa/copy-prop-aggregate-1.c: New test. * gcc.dg/tree-ssa/copy-prop-aggregate-memcpy-1.c: New test. * gcc.dg/tree-ssa/copy-prop-aggregate-memcpy-2.c: New test. Signed-off-by: Andrew Pinski --- diff --git a/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-aggregate-1.c b/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-aggregate-1.c new file mode 100644 index 00000000000..1094c4d768b --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-aggregate-1.c @@ -0,0 +1,33 @@ +/* { dg-do compile } */ +/* { dg-options "-O1 -fdump-tree-forwprop1-details -fdump-tree-optimized" } */ +/* PR tree-optimization/121751 */ + + +struct s1 +{ + int t[1024]; +}; + +struct s2 { + struct s1 t; +}; + +struct s3 +{ + struct s2 t; +}; + +void g(struct s3*); + +void f(struct s1 s) +{ + struct s2 removeme; + removeme.t = s; + struct s3 t1; + t1.t = removeme; + g(&t1); +} + + +/* { dg-final { scan-tree-dump-times "after previous" 1 "forwprop1" } } */ +/* { dg-final { scan-tree-dump-not "removeme " "optimized" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-aggregate-memcpy-1.c b/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-aggregate-memcpy-1.c new file mode 100644 index 00000000000..5faf6d0bf9b --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-aggregate-memcpy-1.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-O1 -fdump-tree-forwprop1-details -fdump-tree-optimized" } */ +/* PR tree-optimization/121418 */ + +struct s1 +{ + unsigned char t[1024]; +}; + +struct s1 f(struct s1 a) +{ + struct s1 removeme1 = a; + __builtin_memcpy (&removeme1, &a, sizeof(struct s1)); + return removeme1; +} + +/* { dg-final { scan-tree-dump-times "after previous" 1 "forwprop1" } } */ +/* { dg-final { scan-tree-dump-not "removeme1 " "optimized" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-aggregate-memcpy-2.c b/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-aggregate-memcpy-2.c new file mode 100644 index 00000000000..b1ba30d0aba --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-aggregate-memcpy-2.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-options "-O1 -fdump-tree-forwprop1-details -fdump-tree-optimized" } */ +/* PR tree-optimization/121417 */ + +struct s1 +{ + unsigned char t[1024]; +}; + +struct s1 f(struct s1 a) +{ + struct s1 removeme1 = a; + struct s1 removeme2; + __builtin_memcpy (&removeme2, &removeme1, sizeof(struct s1)); + return removeme2; +} + +/* { dg-final { scan-tree-dump-times "after previous" 2 "forwprop1" } } */ +/* { dg-final { scan-tree-dump-not "removeme1 " "optimized" } } */ +/* { dg-final { scan-tree-dump-not "removeme2 " "optimized" } } */ diff --git a/gcc/tree-ssa-forwprop.cc b/gcc/tree-ssa-forwprop.cc index 9c6f4b355d6..1eacff01587 100644 --- a/gcc/tree-ssa-forwprop.cc +++ b/gcc/tree-ssa-forwprop.cc @@ -1418,6 +1418,46 @@ optimize_aggr_zeroprop (gimple_stmt_iterator *gsip, bool full_walk) return changed; } +/* Returns the pointer to the base of the object of the + reference EXPR and extracts the information about + the offset of the access, storing it to PBYTESIZE, + PBYTEPOS and PREVERSEP. + If the access is not a byte sized or position is not + on the byte, return NULL. */ +static tree +split_core_and_offset_size (tree expr, + poly_int64 *pbytesize, poly_int64 *pbytepos, + tree *poffset, int *preversep) +{ + tree core; + machine_mode mode; + int unsignedp, volatilep; + poly_int64 bitsize; + poly_int64 bitpos; + location_t loc = EXPR_LOCATION (expr); + + core = get_inner_reference (expr, &bitsize, &bitpos, + poffset, &mode, &unsignedp, preversep, + &volatilep); + if (!multiple_p (bitsize, BITS_PER_UNIT, pbytesize)) + return NULL_TREE; + if (!multiple_p (bitpos, BITS_PER_UNIT, pbytepos)) + return NULL_TREE; + /* If we are left with MEM[a + CST] strip that and add it to the + pbytepos and return a. */ + if (TREE_CODE (core) == MEM_REF) + { + poly_offset_int tem; + tem = wi::to_poly_offset (TREE_OPERAND (core, 1)); + tem += *pbytepos; + if (tem.to_shwi (pbytepos)) + return TREE_OPERAND (core, 0); + } + core = build_fold_addr_expr_loc (loc, core); + STRIP_NOPS (core); + return core; +} + /* Helper function for optimize_agr_copyprop. For aggregate copies in USE_STMT, see if DEST is on the lhs of USE_STMT and replace it with SRC. */ @@ -1434,8 +1474,66 @@ optimize_agr_copyprop_1 (gimple *stmt, gimple *use_stmt, /* If the new store is `src2 = src2;` skip over it. */ if (operand_equal_p (src2, dest2, 0)) return false; + /* If the second src is not exactly the same as dest, + try to handle it seperately; see it is address/size equivalent. + Handles `a` and `a.b` and `MEM(&a)` which all have + the same size and offsets as address/size equivalent. + This allows copying over a memcpy and also one for copying + where one field is the same size as the whole struct. */ if (!operand_equal_p (dest, src2, 0)) - return false; + { + /* A VCE can't be used with imag/real or BFR so reject them early. */ + if (TREE_CODE (src) == IMAGPART_EXPR + || TREE_CODE (src) == REALPART_EXPR + || TREE_CODE (src) == BIT_FIELD_REF) + return false; + tree core1, core2; + poly_int64 bytepos1, bytepos2; + poly_int64 bytesize1, bytesize2; + tree toffset1, toffset2; + int reversep1 = 0; + int reversep2 = 0; + poly_int64 diff = 0; + core1 = split_core_and_offset_size (dest, &bytesize1, &bytepos1, + &toffset1, &reversep1); + core2 = split_core_and_offset_size (src2, &bytesize2, &bytepos2, + &toffset2, &reversep2); + if (!core1 || !core2) + return false; + if (reversep1 != reversep2) + return false; + /* The sizes of the 2 accesses need to be the same. */ + if (!known_eq (bytesize1, bytesize2)) + return false; + if (!operand_equal_p (core1, core2, 0)) + return false; + + if (toffset1 && toffset2) + { + tree type = TREE_TYPE (toffset1); + if (type != TREE_TYPE (toffset2)) + toffset2 = fold_convert (type, toffset2); + + tree tdiff = fold_build2 (MINUS_EXPR, type, toffset1, toffset2); + if (!cst_and_fits_in_hwi (tdiff)) + return false; + + diff = int_cst_value (tdiff); + } + else if (toffset1 || toffset2) + { + /* If only one of the offsets is non-constant, the difference cannot + be a constant. */ + return false; + } + diff += bytepos1 - bytepos2; + /* The offset between the 2 need to be 0. */ + if (!known_eq (diff, 0)) + return false; + src = fold_build1_loc (gimple_location (use_stmt), + VIEW_CONVERT_EXPR, + TREE_TYPE (src2), src); + } /* For 2 memory refences and using a temporary to do the copy, don't remove the temporary as the 2 memory references might overlap. Note t does not need to be decl as it could be field.