]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
Allow mode-switching to introduce internal loops [PR113220]
authorRichard Sandiford <richard.sandiford@arm.com>
Wed, 21 Feb 2024 11:12:26 +0000 (11:12 +0000)
committerRichard Sandiford <richard.sandiford@arm.com>
Wed, 21 Feb 2024 11:12:26 +0000 (11:12 +0000)
In this PR, the SME mode-switching code needs to insert a stack-probe
loop for an alloca.  This patch allows the target to do that.

There are two parts to it: allowing loops for insertions in blocks,
and allowing them for insertions on edges.  The former can be handled
entirely within mode-switching itself, by recording which blocks have
had new branches inserted.  The latter requires an extension to
commit_one_edge_insertion.

I think the extension to commit_one_edge_insertion makes logical sense,
since it already explicitly allows internal loops during RTL expansion.
The single-block find_sub_basic_blocks is a relatively recent addition,
so wouldn't have been available when the code was originally written.

The patch also has a small and obvious fix to make the aarch64 emit
hook cope with labels.

I've added specific -fstack-clash-protection versions of all
aarch64-sme.exp tests that previously failed because of this bug.
I've also added -fno-stack-clash-protection to the original versions
of these tests if they contain scans that assume no protection.

gcc/
PR target/113220
* cfgrtl.cc (commit_one_edge_insertion): Handle sequences that
contain jumps even if called after initial RTL expansion.
* mode-switching.cc: Include cfgbuild.h.
(optimize_mode_switching): Allow the sequence returned by the
emit hook to contain internal jumps.  Record which blocks
contain such jumps and split the blocks at the end.
* config/aarch64/aarch64.cc (aarch64_mode_emit): Check for
non-debug insns when scanning the sequence.

gcc/testsuite/
PR target/113220
* gcc.target/aarch64/sme/call_sm_switch_5.c: Add
-fno-stack-clash-protection.
* gcc.target/aarch64/sme/call_sm_switch_5_scp.c: New test.
* gcc.target/aarch64/sme/sibcall_6_scp.c: New test.
* gcc.target/aarch64/sme/za_state_4.c: Add
-fno-stack-clash-protection.
* gcc.target/aarch64/sme/za_state_4_scp.c: New test.
* gcc.target/aarch64/sme/za_state_5.c: Add
-fno-stack-clash-protection.
* gcc.target/aarch64/sme/za_state_5_scp.c: New test.

gcc/cfgrtl.cc
gcc/config/aarch64/aarch64.cc
gcc/mode-switching.cc
gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_5.c
gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_5_scp.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sme/sibcall_6_scp.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sme/za_state_4.c
gcc/testsuite/gcc.target/aarch64/sme/za_state_4_scp.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sme/za_state_5.c
gcc/testsuite/gcc.target/aarch64/sme/za_state_5_scp.c [new file with mode: 0644]

index 15259c5e98478a97c343591d6f0370f2df5a262f..304c429c99be23354c298d5aa80897534b7d62d9 100644 (file)
@@ -2018,6 +2018,21 @@ commit_one_edge_insertion (edge e)
   insns = e->insns.r;
   e->insns.r = NULL;
 
+  /* Allow the sequence to contain internal jumps, such as a memcpy loop
+     or an allocation loop.  If such a sequence is emitted during RTL
+     expansion, we'll create the appropriate basic blocks later,
+     at the end of the pass.  But if such a sequence is emitted after
+     initial expansion, we'll need to find the subblocks ourselves.  */
+  bool contains_jump = false;
+  if (!currently_expanding_to_rtl)
+    for (rtx_insn *insn = insns; insn; insn = NEXT_INSN (insn))
+      if (JUMP_P (insn))
+       {
+         rebuild_jump_labels_chain (insns);
+         contains_jump = true;
+         break;
+       }
+
   /* Figure out where to put these insns.  If the destination has
      one predecessor, insert there.  Except for the exit block.  */
   if (single_pred_p (e->dest) && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
@@ -2112,13 +2127,13 @@ commit_one_edge_insertion (edge e)
        delete_insn (before);
     }
   else
-    /* Some builtin expanders, such as those for memset and memcpy,
-       may generate loops and conditionals, and those may get emitted
-       into edges.  That's ok while expanding to rtl, basic block
-       boundaries will be identified and split afterwards.  ???  Need
-       we check whether the destination labels of any inserted jumps
-       are also part of the inserted sequence?  */
+    /* Sequences inserted after RTL expansion are expected to be SESE,
+       with only internal branches allowed.  If the sequence jumps outside
+       itself then we do not know how to add the associated edges here.  */
     gcc_assert (!JUMP_P (last) || currently_expanding_to_rtl);
+
+  if (contains_jump)
+    find_sub_basic_blocks (bb);
 }
 
 /* Update the CFG for all queued instructions.  */
index 76f1230d81a4dc397746fa37afae0a99a5505dc0..104f7e1831ef1f99bf963f40876fafaf77fd132e 100644 (file)
@@ -29513,6 +29513,8 @@ aarch64_mode_emit (int entity, int mode, int prev_mode, HARD_REG_SET live)
   HARD_REG_SET clobbers = {};
   for (rtx_insn *insn = seq; insn; insn = NEXT_INSN (insn))
     {
+      if (!NONDEBUG_INSN_P (insn))
+       continue;
       vec_rtx_properties properties;
       properties.add_insn (insn, false);
       for (rtx_obj_reference ref : properties.refs ())
index 01754e9eed41bc859b11fdee57ad1e19e78ae3f9..583929184ce70f0393da404633e23a0971ebd4b6 100644 (file)
@@ -34,6 +34,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "lcm.h"
 #include "cfgcleanup.h"
 #include "tree-pass.h"
+#include "cfgbuild.h"
 
 /* We want target macros for the mode switching code to be able to refer
    to instruction attribute values.  */
@@ -1105,6 +1106,8 @@ optimize_mode_switching (void)
   edge_list = pre_edge_lcm_avs (n_entities * max_num_modes, transp, comp, antic,
                                kill, avin, avout, &insert, &del);
 
+  auto_sbitmap jumping_blocks (last_basic_block_for_fn (cfun));
+  bitmap_clear (jumping_blocks);
   for (j = n_entities - 1; j >= 0; j--)
     {
       int no_mode = num_modes[entity_map[j]];
@@ -1215,6 +1218,13 @@ optimize_mode_switching (void)
                  /* Insert MODE_SET only if it is nonempty.  */
                  if (mode_set != NULL_RTX)
                    {
+                     for (auto insn = mode_set; insn; insn = NEXT_INSN (insn))
+                       if (JUMP_P (insn))
+                         {
+                           rebuild_jump_labels_chain (mode_set);
+                           bitmap_set_bit (jumping_blocks, bb->index);
+                           break;
+                         }
                      emitted = true;
                      if (NOTE_INSN_BASIC_BLOCK_P (ptr->insn_ptr))
                        /* We need to emit the insns in a FIFO-like manner,
@@ -1252,6 +1262,11 @@ optimize_mode_switching (void)
   sbitmap_vector_free (avin);
   sbitmap_vector_free (avout);
 
+  gcc_assert (SBITMAP_SIZE ((sbitmap) jumping_blocks)
+             == (unsigned int) last_basic_block_for_fn (cfun));
+  if (!bitmap_empty_p (jumping_blocks))
+    find_many_sub_basic_blocks (jumping_blocks);
+
   if (need_commit)
     commit_edge_insertions ();
 
index 6238ab80da2ffb974c26945ff14ae6cea25658ea..d31b6b91f1f02bb30a9d9925460f9590b103d8e0 100644 (file)
@@ -1,4 +1,4 @@
-// { dg-options "-O -fomit-frame-pointer -fno-optimize-sibling-calls -funwind-tables" }
+// { dg-options "-O -fomit-frame-pointer -fno-optimize-sibling-calls -funwind-tables -fno-stack-clash-protection" }
 // { dg-final { check-function-bodies "**" "" } }
 
 #include <arm_sve.h>
diff --git a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_5_scp.c b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_5_scp.c
new file mode 100644 (file)
index 0000000..09c6446
--- /dev/null
@@ -0,0 +1,3 @@
+// { dg-options "-O -fomit-frame-pointer -fno-optimize-sibling-calls -funwind-tables" }
+
+#include "call_sm_switch_5.c"
diff --git a/gcc/testsuite/gcc.target/aarch64/sme/sibcall_6_scp.c b/gcc/testsuite/gcc.target/aarch64/sme/sibcall_6_scp.c
new file mode 100644 (file)
index 0000000..4f2e574
--- /dev/null
@@ -0,0 +1,3 @@
+/* { dg-options "-O2 -fstack-clash-protection" } */
+
+#include "sibcall_6.c"
index cec0abf0ea990632f9535914156c25b0037edca6..f27470bb8a0066e5c6df4afafdb379b8c3784a52 100644 (file)
@@ -1,4 +1,4 @@
-// { dg-options "-O -fno-optimize-sibling-calls" }
+// { dg-options "-O -fno-optimize-sibling-calls -fno-stack-clash-protection" }
 // { dg-final { check-function-bodies "**" "" } }
 
 void private_za();
diff --git a/gcc/testsuite/gcc.target/aarch64/sme/za_state_4_scp.c b/gcc/testsuite/gcc.target/aarch64/sme/za_state_4_scp.c
new file mode 100644 (file)
index 0000000..1620f88
--- /dev/null
@@ -0,0 +1,3 @@
+// { dg-options "-O -fno-optimize-sibling-calls -fstack-clash-protection" }
+
+#include "za_state_4.c"
index d54840d3d774e3dddf37f148e074681fa6a972b8..52c96b3c32c7d5d18c4db4e93913543ff4382acc 100644 (file)
@@ -1,4 +1,4 @@
-// { dg-options "-O2 -fno-optimize-sibling-calls" }
+// { dg-options "-O2 -fno-optimize-sibling-calls -fno-stack-clash-protection" }
 // { dg-final { check-function-bodies "**" "" } }
 
 void private_za();
diff --git a/gcc/testsuite/gcc.target/aarch64/sme/za_state_5_scp.c b/gcc/testsuite/gcc.target/aarch64/sme/za_state_5_scp.c
new file mode 100644 (file)
index 0000000..8b4d6e1
--- /dev/null
@@ -0,0 +1,3 @@
+// { dg-options "-O -fno-optimize-sibling-calls -fstack-clash-protection" }
+
+#include "za_state_5.c"