--- /dev/null
+/* Late RTL pass to fold memory offsets.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "tree.h"
+#include "expr.h"
+#include "backend.h"
+#include "regs.h"
+#include "target.h"
+#include "memmodel.h"
+#include "emit-rtl.h"
+#include "insn-config.h"
+#include "recog.h"
+#include "predict.h"
+#include "df.h"
+#include "tree-pass.h"
+#include "cfgrtl.h"
+
+/* This pass tries to optimize memory offset calculations by moving constants
+ from add instructions to the memory instructions (loads / stores).
+ For example it can transform code like this:
+
+ add t4, sp, 16
+ add t2, a6, t4
+ shl t3, t2, 1
+ ld a2, 0(t3)
+ add a2, 1
+ sd a2, 8(t2)
+
+ into the following (one instruction less):
+
+ add t2, a6, sp
+ shl t3, t2, 1
+ ld a2, 32(t3)
+ add a2, 1
+ sd a2, 24(t2)
+
+ Although the previous passes try to emit efficient offset calculations
+ this pass is still beneficial because:
+
+ - The mechanisms that optimize memory offsets usually work with specific
+ patterns or have limitations. This pass is designed to fold offsets
+ through complex calculations that affect multiple memory operations
+ and have partially overlapping calculations.
+
+ - There are cases where add instructions are introduced in late rtl passes
+ and the rest of the pipeline cannot eliminate them. Arrays and structs
+ allocated on the stack can result in unwanted add instructions that
+ cannot be eliminated easily.
+
+ This pass works on a basic block level and consists of 4 phases:
+
+ - Phase 1 (Analysis): Find "foldable" instructions.
+ Foldable instructions are those that we know how to propagate
+ a constant addition through (add, shift, move, ...) and only have other
+ foldable instructions for uses. In that phase a DFS traversal on the
+ definition tree is performed and foldable instructions are marked on
+ a bitmap. The add immediate instructions that are reachable in this
+ DFS are candidates for folding since all the intermediate calculations
+ affected by them are also foldable.
+
+ - Phase 2 (Validity): Traverse and calculate the offsets that would result
+ from folding the add immediate instructions. Check whether the
+ calculated offsets result in a valid instruction for the target.
+
+ - Phase 3 (Commit offsets): Traverse again. It is now known which folds
+ are valid so at this point change the offsets in the memory instructions.
+
+ - Phase 4 (Commit instruction deletions): Scan all instructions and delete
+ or simplify (reduce to move) all add immediate instructions that were
+ folded.
+
+ This pass should run before hard register propagation because it creates
+ register moves that we expect to be eliminated. */
+
+namespace {
+
+const pass_data pass_data_fold_mem =
+{
+ RTL_PASS, /* type */
+ "fold_mem_offsets", /* name */
+ OPTGROUP_NONE, /* optinfo_flags */
+ TV_NONE, /* tv_id */
+ 0, /* properties_required */
+ 0, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ TODO_df_finish, /* todo_flags_finish */
+};
+
+class pass_fold_mem_offsets : public rtl_opt_pass
+{
+public:
+ pass_fold_mem_offsets (gcc::context *ctxt)
+ : rtl_opt_pass (pass_data_fold_mem, ctxt)
+ {}
+
+ /* opt_pass methods: */
+ virtual bool gate (function *)
+ {
+ return flag_fold_mem_offsets && optimize >= 2;
+ }
+
+ virtual unsigned int execute (function *);
+}; // class pass_fold_mem_offsets
+
+/* Class that holds in FOLD_INSNS the instructions that if folded the offset
+ of a memory instruction would increase by ADDED_OFFSET. */
+class fold_mem_info {
+public:
+ auto_bitmap fold_insns;
+ HOST_WIDE_INT added_offset;
+};
+
+typedef hash_map<rtx_insn *, fold_mem_info *> fold_info_map;
+
+/* Tracks which instructions can be reached through instructions that can
+ propagate offsets for folding. */
+static bitmap_head can_fold_insns;
+
+/* Marks instructions that are currently eligible for folding. */
+static bitmap_head candidate_fold_insns;
+
+/* Tracks instructions that cannot be folded because it turned out that
+ folding will result in creating an invalid memory instruction.
+ An instruction can be in both CANDIDATE_FOLD_INSNS and CANNOT_FOLD_INSNS
+ at the same time, in which case it is not legal to fold. */
+static bitmap_head cannot_fold_insns;
+
+/* The number of instructions that were simplified or eliminated. */
+static int stats_fold_count;
+
+/* Get the single reaching definition of an instruction inside a BB.
+ The definition is desired for REG used in INSN.
+ Return the definition insn or NULL if there's no definition with
+ the desired criteria. */
+static rtx_insn*
+get_single_def_in_bb (rtx_insn *insn, rtx reg)
+{
+ df_ref use;
+ struct df_link *ref_chain, *ref_link;
+
+ FOR_EACH_INSN_USE (use, insn)
+ {
+ if (GET_CODE (DF_REF_REG (use)) == SUBREG)
+ return NULL;
+ if (REGNO (DF_REF_REG (use)) == REGNO (reg))
+ break;
+ }
+
+ if (!use)
+ return NULL;
+
+ ref_chain = DF_REF_CHAIN (use);
+
+ if (!ref_chain)
+ return NULL;
+
+ for (ref_link = ref_chain; ref_link; ref_link = ref_link->next)
+ {
+ /* Problem getting some definition for this instruction. */
+ if (ref_link->ref == NULL)
+ return NULL;
+ if (DF_REF_INSN_INFO (ref_link->ref) == NULL)
+ return NULL;
+ if (global_regs[REGNO (reg)]
+ && !set_of (reg, DF_REF_INSN (ref_link->ref)))
+ return NULL;
+ }
+
+ if (ref_chain->next)
+ return NULL;
+
+ rtx_insn *def = DF_REF_INSN (ref_chain->ref);
+
+ if (BLOCK_FOR_INSN (def) != BLOCK_FOR_INSN (insn))
+ return NULL;
+
+ if (DF_INSN_LUID (def) > DF_INSN_LUID (insn))
+ return NULL;
+
+ return def;
+}
+
+/* Get all uses of REG which is set in INSN. Return the use list or NULL if a
+ use is missing / irregular. If SUCCESS is not NULL then set it to false if
+ there are missing / irregular uses and true otherwise. */
+static struct df_link*
+get_uses (rtx_insn *insn, rtx reg, bool *success)
+{
+ df_ref def;
+ struct df_link *ref_chain, *ref_link;
+
+ if (success)
+ *success = false;
+
+ FOR_EACH_INSN_DEF (def, insn)
+ if (REGNO (DF_REF_REG (def)) == REGNO (reg))
+ break;
+
+ if (!def)
+ return NULL;
+
+ ref_chain = DF_REF_CHAIN (def);
+
+ for (ref_link = ref_chain; ref_link; ref_link = ref_link->next)
+ {
+ /* Problem getting a use for this instruction. */
+ if (ref_link->ref == NULL)
+ return NULL;
+ if (DF_REF_CLASS (ref_link->ref) != DF_REF_REGULAR)
+ return NULL;
+ /* We do not handle REG_EQUIV/REG_EQ notes for now. */
+ if (DF_REF_FLAGS (ref_link->ref) & DF_REF_IN_NOTE)
+ return NULL;
+ }
+
+ if (success)
+ *success = true;
+
+ return ref_chain;
+}
+
+static HOST_WIDE_INT
+fold_offsets (rtx_insn *insn, rtx reg, bool analyze, bitmap foldable_insns);
+
+/* Helper function for fold_offsets.
+
+ If DO_RECURSION is false and ANALYZE is true this function returns true iff
+ it understands the structure of INSN and knows how to propagate constants
+ through it. In this case OFFSET_OUT and FOLDABLE_INSNS are unused.
+
+ If DO_RECURSION is true then it also calls fold_offsets for each recognized
+ part of INSN with the appropriate arguments.
+
+ If DO_RECURSION is true and ANALYZE is false then offset that would result
+ from folding is computed and is returned through the pointer OFFSET_OUT.
+ The instructions that can be folded are recorded in FOLDABLE_INSNS.
+*/
+static bool
+fold_offsets_1 (rtx_insn *insn, bool analyze, bool do_recursion,
+ HOST_WIDE_INT *offset_out, bitmap foldable_insns)
+{
+ /* Doesn't make sense if both DO_RECURSION and ANALYZE are false. */
+ gcc_checking_assert (do_recursion || analyze);
+ gcc_checking_assert (GET_CODE (PATTERN (insn)) == SET);
+
+ rtx src = SET_SRC (PATTERN (insn));
+ HOST_WIDE_INT offset = 0;
+
+ switch (GET_CODE (src))
+ {
+ case PLUS:
+ {
+ /* Propagate through add. */
+ rtx arg1 = XEXP (src, 0);
+ rtx arg2 = XEXP (src, 1);
+
+ if (REG_P (arg1))
+ {
+ if (do_recursion)
+ offset += fold_offsets (insn, arg1, analyze, foldable_insns);
+ }
+ else if (GET_CODE (arg1) == ASHIFT
+ && REG_P (XEXP (arg1, 0))
+ && CONST_INT_P (XEXP (arg1, 1)))
+ {
+ /* Handle R1 = (R2 << C) + ... */
+ if (do_recursion)
+ {
+ HOST_WIDE_INT scale
+ = (HOST_WIDE_INT_1U << INTVAL (XEXP (arg1, 1)));
+ offset += scale * fold_offsets (insn, XEXP (arg1, 0), analyze,
+ foldable_insns);
+ }
+ }
+ else if (GET_CODE (arg1) == PLUS
+ && REG_P (XEXP (arg1, 0))
+ && REG_P (XEXP (arg1, 1)))
+ {
+ /* Handle R1 = (R2 + R3) + ... */
+ if (do_recursion)
+ {
+ offset += fold_offsets (insn, XEXP (arg1, 0), analyze,
+ foldable_insns);
+ offset += fold_offsets (insn, XEXP (arg1, 1), analyze,
+ foldable_insns);
+ }
+ }
+ else if (GET_CODE (arg1) == PLUS
+ && GET_CODE (XEXP (arg1, 0)) == ASHIFT
+ && REG_P (XEXP (XEXP (arg1, 0), 0))
+ && CONST_INT_P (XEXP (XEXP (arg1, 0), 1))
+ && REG_P (XEXP (arg1, 1)))
+ {
+ /* Handle R1 = ((R2 << C) + R3) + ... */
+ if (do_recursion)
+ {
+ HOST_WIDE_INT scale
+ = (HOST_WIDE_INT_1U << INTVAL (XEXP (XEXP (arg1, 0), 1)));
+ offset += scale * fold_offsets (insn, XEXP (XEXP (arg1, 0), 0),
+ analyze, foldable_insns);
+ offset += fold_offsets (insn, XEXP (arg1, 1), analyze,
+ foldable_insns);
+ }
+ }
+ else
+ return false;
+
+ if (REG_P (arg2))
+ {
+ if (do_recursion)
+ offset += fold_offsets (insn, arg2, analyze, foldable_insns);
+ }
+ else if (CONST_INT_P (arg2))
+ {
+ if (REG_P (arg1))
+ {
+ offset += INTVAL (arg2);
+ /* This is a R1 = R2 + C instruction, candidate for folding. */
+ if (!analyze)
+ bitmap_set_bit (foldable_insns, INSN_UID (insn));
+ }
+ }
+ else
+ return false;
+
+ /* Pattern recognized for folding. */
+ break;
+ }
+ case MINUS:
+ {
+ /* Propagate through minus. */
+ rtx arg1 = XEXP (src, 0);
+ rtx arg2 = XEXP (src, 1);
+
+ if (REG_P (arg1))
+ {
+ if (do_recursion)
+ offset += fold_offsets (insn, arg1, analyze, foldable_insns);
+ }
+ else
+ return false;
+
+ if (REG_P (arg2))
+ {
+ if (do_recursion)
+ offset -= fold_offsets (insn, arg2, analyze, foldable_insns);
+ }
+ else if (CONST_INT_P (arg2))
+ {
+ if (REG_P (arg1))
+ {
+ offset -= INTVAL (arg2);
+ /* This is a R1 = R2 - C instruction, candidate for folding. */
+ if (!analyze)
+ bitmap_set_bit (foldable_insns, INSN_UID (insn));
+ }
+ }
+ else
+ return false;
+
+ /* Pattern recognized for folding. */
+ break;
+ }
+ case NEG:
+ {
+ /* Propagate through negation. */
+ rtx arg1 = XEXP (src, 0);
+ if (REG_P (arg1))
+ {
+ if (do_recursion)
+ offset = -fold_offsets (insn, arg1, analyze, foldable_insns);
+ }
+ else
+ return false;
+
+ /* Pattern recognized for folding. */
+ break;
+ }
+ case MULT:
+ {
+ /* Propagate through multiply by constant. */
+ rtx arg1 = XEXP (src, 0);
+ rtx arg2 = XEXP (src, 1);
+
+ if (REG_P (arg1) && CONST_INT_P (arg2))
+ {
+ if (do_recursion)
+ {
+ HOST_WIDE_INT scale = INTVAL (arg2);
+ offset = scale * fold_offsets (insn, arg1, analyze,
+ foldable_insns);
+ }
+ }
+ else
+ return false;
+
+ /* Pattern recognized for folding. */
+ break;
+ }
+ case ASHIFT:
+ {
+ /* Propagate through shift left by constant. */
+ rtx arg1 = XEXP (src, 0);
+ rtx arg2 = XEXP (src, 1);
+
+ if (REG_P (arg1) && CONST_INT_P (arg2))
+ {
+ if (do_recursion)
+ {
+ HOST_WIDE_INT scale = (HOST_WIDE_INT_1U << INTVAL (arg2));
+ offset = scale * fold_offsets (insn, arg1, analyze,
+ foldable_insns);
+ }
+ }
+ else
+ return false;
+
+ /* Pattern recognized for folding. */
+ break;
+ }
+ case REG:
+ {
+ /* Propagate through register move. */
+ if (do_recursion)
+ offset = fold_offsets (insn, src, analyze, foldable_insns);
+
+ /* Pattern recognized for folding. */
+ break;
+ }
+ case CONST_INT:
+ {
+ offset = INTVAL (src);
+ /* R1 = C is candidate for folding. */
+ if (!analyze)
+ bitmap_set_bit (foldable_insns, INSN_UID (insn));
+
+ /* Pattern recognized for folding. */
+ break;
+ }
+ default:
+ /* Cannot recognize. */
+ return false;
+ }
+
+ if (do_recursion && !analyze)
+ *offset_out = offset;
+
+ return true;
+}
+
+/* Function that computes the offset that would have to be added to all uses
+ of REG if the instructions marked in FOLDABLE_INSNS were to be eliminated.
+
+ If ANALYZE is true then mark in CAN_FOLD_INSNS which instructions
+ transitively only affect other instructions found in CAN_FOLD_INSNS.
+ If ANALYZE is false then compute the offset required for folding. */
+static HOST_WIDE_INT
+fold_offsets (rtx_insn *insn, rtx reg, bool analyze, bitmap foldable_insns)
+{
+ rtx_insn *def = get_single_def_in_bb (insn, reg);
+
+ if (!def || GET_CODE (PATTERN (def)) != SET)
+ return 0;
+
+ rtx dest = SET_DEST (PATTERN (def));
+
+ if (!REG_P (dest))
+ return 0;
+
+ /* We can only affect the values of GPR registers. */
+ unsigned int dest_regno = REGNO (dest);
+ if (fixed_regs[dest_regno]
+ || !TEST_HARD_REG_BIT (reg_class_contents[GENERAL_REGS], dest_regno))
+ return 0;
+
+ if (analyze)
+ {
+ /* Check if we know how to handle DEF. */
+ if (!fold_offsets_1 (def, true, false, NULL, NULL))
+ return 0;
+
+ /* We only fold through instructions that are transitively used as
+ memory addresses and do not have other uses. Use the same logic
+ from offset calculation to visit instructions that can propagate
+ offsets and keep track of them in CAN_FOLD_INSNS. */
+ bool success;
+ struct df_link *uses = get_uses (def, dest, &success), *ref_link;
+
+ if (!success)
+ return 0;
+
+ for (ref_link = uses; ref_link; ref_link = ref_link->next)
+ {
+ rtx_insn *use = DF_REF_INSN (ref_link->ref);
+
+ if (DEBUG_INSN_P (use))
+ continue;
+
+ /* Punt if the use is anything more complicated than a set
+ (clobber, use, etc). */
+ if (!NONJUMP_INSN_P (use) || GET_CODE (PATTERN (use)) != SET)
+ return 0;
+
+ /* This use affects instructions outside of CAN_FOLD_INSNS. */
+ if (!bitmap_bit_p (&can_fold_insns, INSN_UID (use)))
+ return 0;
+
+ rtx use_set = PATTERN (use);
+
+ /* Special case: A foldable memory store is not foldable if it
+ mentions DEST outside of the address calculation. */
+ if (use_set && MEM_P (SET_DEST (use_set))
+ && reg_mentioned_p (dest, SET_SRC (use_set)))
+ return 0;
+ }
+
+ bitmap_set_bit (&can_fold_insns, INSN_UID (def));
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "Instruction marked for propagation: ");
+ print_rtl_single (dump_file, def);
+ }
+ }
+ else
+ {
+ /* We cannot propagate through this instruction. */
+ if (!bitmap_bit_p (&can_fold_insns, INSN_UID (def)))
+ return 0;
+ }
+
+ HOST_WIDE_INT offset = 0;
+ bool recognized = fold_offsets_1 (def, analyze, true, &offset,
+ foldable_insns);
+
+ if (!recognized)
+ return 0;
+
+ return offset;
+}
+
+/* Test if INSN is a memory load / store that can have an offset folded to it.
+ Return true iff INSN is such an instruction and return through MEM_OUT,
+ REG_OUT and OFFSET_OUT the RTX that has a MEM code, the register that is
+ used as a base address and the offset accordingly.
+ All of the out pointers may be NULL in which case they will be ignored. */
+bool
+get_fold_mem_root (rtx_insn *insn, rtx *mem_out, rtx *reg_out,
+ HOST_WIDE_INT *offset_out)
+{
+ rtx set = single_set (insn);
+ rtx mem = NULL_RTX;
+
+ if (set != NULL_RTX)
+ {
+ rtx src = SET_SRC (set);
+ rtx dest = SET_DEST (set);
+
+ /* Don't fold when we have unspec / volatile. */
+ if (GET_CODE (src) == UNSPEC
+ || GET_CODE (src) == UNSPEC_VOLATILE
+ || GET_CODE (dest) == UNSPEC
+ || GET_CODE (dest) == UNSPEC_VOLATILE)
+ return false;
+
+ if (MEM_P (src))
+ mem = src;
+ else if (MEM_P (dest))
+ mem = dest;
+ else if ((GET_CODE (src) == SIGN_EXTEND
+ || GET_CODE (src) == ZERO_EXTEND)
+ && MEM_P (XEXP (src, 0)))
+ mem = XEXP (src, 0);
+ }
+
+ if (mem == NULL_RTX)
+ return false;
+
+ rtx mem_addr = XEXP (mem, 0);
+ rtx reg;
+ HOST_WIDE_INT offset;
+
+ if (REG_P (mem_addr))
+ {
+ reg = mem_addr;
+ offset = 0;
+ }
+ else if (GET_CODE (mem_addr) == PLUS
+ && REG_P (XEXP (mem_addr, 0))
+ && CONST_INT_P (XEXP (mem_addr, 1)))
+ {
+ reg = XEXP (mem_addr, 0);
+ offset = INTVAL (XEXP (mem_addr, 1));
+ }
+ else
+ return false;
+
+ if (mem_out)
+ *mem_out = mem;
+ if (reg_out)
+ *reg_out = reg;
+ if (offset_out)
+ *offset_out = offset;
+
+ return true;
+}
+
+/* If INSN is a root memory instruction then do a DFS traversal on its
+ definitions and find folding candidates. */
+static void
+do_analysis (rtx_insn *insn)
+{
+ rtx reg;
+ if (!get_fold_mem_root (insn, NULL, ®, NULL))
+ return;
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "Starting analysis from root: ");
+ print_rtl_single (dump_file, insn);
+ }
+
+ /* Analyse folding opportunities for this memory instruction. */
+ bitmap_set_bit (&can_fold_insns, INSN_UID (insn));
+ fold_offsets (insn, reg, true, NULL);
+}
+
+static void
+do_fold_info_calculation (rtx_insn *insn, fold_info_map *fold_info)
+{
+ rtx mem, reg;
+ HOST_WIDE_INT cur_offset;
+ if (!get_fold_mem_root (insn, &mem, ®, &cur_offset))
+ return;
+
+ fold_mem_info *info = new fold_mem_info;
+ info->added_offset = fold_offsets (insn, reg, false, info->fold_insns);
+
+ fold_info->put (insn, info);
+}
+
+/* If INSN is a root memory instruction then compute a potentially new offset
+ for it and test if the resulting instruction is valid. */
+static void
+do_check_validity (rtx_insn *insn, fold_mem_info *info)
+{
+ rtx mem, reg;
+ HOST_WIDE_INT cur_offset;
+ if (!get_fold_mem_root (insn, &mem, ®, &cur_offset))
+ return;
+
+ HOST_WIDE_INT new_offset = cur_offset + info->added_offset;
+
+ /* Test if it is valid to change MEM's address offset to NEW_OFFSET. */
+ int icode = INSN_CODE (insn);
+ INSN_CODE (insn) = -1;
+ rtx mem_addr = XEXP (mem, 0);
+ machine_mode mode = GET_MODE (mem_addr);
+ if (new_offset != 0)
+ XEXP (mem, 0) = gen_rtx_PLUS (mode, reg, gen_int_mode (new_offset, mode));
+ else
+ XEXP (mem, 0) = reg;
+
+ bool illegal = insn_invalid_p (insn, false)
+ || !memory_address_addr_space_p (mode, XEXP (mem, 0),
+ MEM_ADDR_SPACE (mem));
+
+ /* Restore the instruction. */
+ XEXP (mem, 0) = mem_addr;
+ INSN_CODE (insn) = icode;
+
+ if (illegal)
+ bitmap_ior_into (&cannot_fold_insns, info->fold_insns);
+ else
+ bitmap_ior_into (&candidate_fold_insns, info->fold_insns);
+}
+
+static bool
+compute_validity_closure (fold_info_map *fold_info)
+{
+ /* Let's say we have an arbitrary chain of foldable instructions xN = xN + C
+ and memory operations rN that use xN as shown below. If folding x1 in r1
+ turns out to be invalid for whatever reason then it's also invalid to fold
+ any of the other xN into any rN. That means that we need the transitive
+ closure of validity to determine whether we can fold a xN instruction.
+
+ +--------------+ +-------------------+ +-------------------+
+ | r1 = mem[x1] | | r2 = mem[x1 + x2] | | r3 = mem[x2 + x3] | ...
+ +--------------+ +-------------------+ +-------------------+
+ ^ ^ ^ ^ ^
+ | / | / | ...
+ | / | / |
+ +-------------+ / +-------------+ / +-------------+
+ | x1 = x1 + 1 |-----+ | x2 = x2 + 1 |-----+ | x3 = x3 + 1 |--- ...
+ +-------------+ +-------------+ +-------------+
+ ^ ^ ^
+ | | |
+ ... ... ...
+ */
+
+ /* In general three iterations should be enough for most cases, but allow up
+ to five when -fexpensive-optimizations is used. */
+ int max_iters = 3 + 2 * flag_expensive_optimizations;
+ for (int pass = 0; pass < max_iters; pass++)
+ {
+ bool made_changes = false;
+ for (fold_info_map::iterator iter = fold_info->begin ();
+ iter != fold_info->end (); ++iter)
+ {
+ fold_mem_info *info = (*iter).second;
+ if (bitmap_intersect_p (&cannot_fold_insns, info->fold_insns))
+ made_changes |= bitmap_ior_into (&cannot_fold_insns,
+ info->fold_insns);
+ }
+
+ if (!made_changes)
+ return true;
+ }
+
+ return false;
+}
+
+/* If INSN is a root memory instruction that was affected by any folding
+ then update its offset as necessary. */
+static void
+do_commit_offset (rtx_insn *insn, fold_mem_info *info)
+{
+ rtx mem, reg;
+ HOST_WIDE_INT cur_offset;
+ if (!get_fold_mem_root (insn, &mem, ®, &cur_offset))
+ return;
+
+ HOST_WIDE_INT new_offset = cur_offset + info->added_offset;
+
+ if (new_offset == cur_offset)
+ return;
+
+ gcc_assert (!bitmap_empty_p (info->fold_insns));
+
+ if (bitmap_intersect_p (&cannot_fold_insns, info->fold_insns))
+ return;
+
+ if (dump_file)
+ {
+ fprintf (dump_file, "Memory offset changed from "
+ HOST_WIDE_INT_PRINT_DEC " to " HOST_WIDE_INT_PRINT_DEC
+ " for instruction:\n", cur_offset, new_offset);
+ print_rtl_single (dump_file, insn);
+ }
+
+ machine_mode mode = GET_MODE (XEXP (mem, 0));
+ if (new_offset != 0)
+ XEXP (mem, 0) = gen_rtx_PLUS (mode, reg, gen_int_mode (new_offset, mode));
+ else
+ XEXP (mem, 0) = reg;
+ INSN_CODE (insn) = recog (PATTERN (insn), insn, 0);
+ df_insn_rescan (insn);
+}
+
+/* If INSN is a move / add instruction that was folded then replace its
+ constant part with zero. */
+static void
+do_commit_insn (rtx_insn *insn)
+{
+ if (bitmap_bit_p (&candidate_fold_insns, INSN_UID (insn))
+ && !bitmap_bit_p (&cannot_fold_insns, INSN_UID (insn)))
+ {
+ if (dump_file)
+ {
+ fprintf (dump_file, "Instruction folded:");
+ print_rtl_single (dump_file, insn);
+ }
+
+ stats_fold_count++;
+
+ rtx set = single_set (insn);
+ rtx dest = SET_DEST (set);
+ rtx src = SET_SRC (set);
+
+ /* Emit a move and let subsequent passes eliminate it if possible. */
+ if (GET_CODE (src) == CONST_INT)
+ {
+ /* INSN is R1 = C.
+ Replace it with R1 = 0 because C was folded. */
+ rtx mov_rtx
+ = gen_move_insn (dest, gen_int_mode (0, GET_MODE (dest)));
+ df_insn_rescan (emit_insn_after (mov_rtx, insn));
+ }
+ else
+ {
+ /* INSN is R1 = R2 + C.
+ Replace it with R1 = R2 because C was folded. */
+ rtx arg1 = XEXP (src, 0);
+
+ /* If the DEST == ARG1 then the move is a no-op. */
+ if (REGNO (dest) != REGNO (arg1))
+ {
+ gcc_checking_assert (GET_MODE (dest) == GET_MODE (arg1));
+ rtx mov_rtx = gen_move_insn (dest, arg1);
+ df_insn_rescan (emit_insn_after (mov_rtx, insn));
+ }
+ }
+
+ /* Delete the original move / add instruction. */
+ delete_insn (insn);
+ }
+}
+
+unsigned int
+pass_fold_mem_offsets::execute (function *fn)
+{
+ df_set_flags (DF_EQ_NOTES + DF_RD_PRUNE_DEAD_DEFS + DF_DEFER_INSN_RESCAN);
+ df_chain_add_problem (DF_UD_CHAIN + DF_DU_CHAIN);
+ df_analyze ();
+
+ bitmap_initialize (&can_fold_insns, NULL);
+ bitmap_initialize (&candidate_fold_insns, NULL);
+ bitmap_initialize (&cannot_fold_insns, NULL);
+
+ stats_fold_count = 0;
+
+ basic_block bb;
+ rtx_insn *insn;
+ FOR_ALL_BB_FN (bb, fn)
+ {
+ /* There is a conflict between this pass and RISCV's shorten-memrefs
+ pass. For now disable folding if optimizing for size because
+ otherwise this cancels the effects of shorten-memrefs. */
+ if (optimize_bb_for_size_p (bb))
+ continue;
+
+ fold_info_map fold_info;
+
+ bitmap_clear (&can_fold_insns);
+ bitmap_clear (&candidate_fold_insns);
+ bitmap_clear (&cannot_fold_insns);
+
+ FOR_BB_INSNS (bb, insn)
+ do_analysis (insn);
+
+ FOR_BB_INSNS (bb, insn)
+ do_fold_info_calculation (insn, &fold_info);
+
+ FOR_BB_INSNS (bb, insn)
+ if (fold_mem_info **info = fold_info.get (insn))
+ do_check_validity (insn, *info);
+
+ if (compute_validity_closure (&fold_info))
+ {
+ FOR_BB_INSNS (bb, insn)
+ if (fold_mem_info **info = fold_info.get (insn))
+ do_commit_offset (insn, *info);
+
+ FOR_BB_INSNS (bb, insn)
+ do_commit_insn (insn);
+ }
+
+ for (fold_info_map::iterator iter = fold_info.begin ();
+ iter != fold_info.end (); ++iter)
+ delete (*iter).second;
+ }
+
+ statistics_counter_event (cfun, "Number of folded instructions",
+ stats_fold_count);
+
+ bitmap_release (&can_fold_insns);
+ bitmap_release (&candidate_fold_insns);
+ bitmap_release (&cannot_fold_insns);
+
+ return 0;
+}
+
+} // anon namespace
+
+rtl_opt_pass *
+make_pass_fold_mem_offsets (gcc::context *ctxt)
+{
+ return new pass_fold_mem_offsets (ctxt);
+}