]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
pru: Split 64-bit moves into a sequence of 32-bit moves
authorDimitar Dimitrov <dimitar@dinux.eu>
Sun, 9 Feb 2025 15:55:03 +0000 (17:55 +0200)
committerDimitar Dimitrov <dimitar@dinux.eu>
Thu, 26 Jun 2025 19:09:11 +0000 (22:09 +0300)
The 64-bit register-to-register moves on PRU are implemented with two
instructions moving 32-bit registers.  Defining a split for the 64-bit
moves allows this to be described in RTL, and thus one of the 32-bit
moves to be eliminated if the destination register is dead.

Also, split the loading of non-trivial 64-bit integer constants.  The
resulting 32-bit integer constants have better chance to be loaded with
something more optimal than an "ldi32".

For now do the splits only after register allocation, because LRA does
not yet efficiently handle subregs.  See
https://gcc.gnu.org/pipermail/gcc-patches/2024-May/651366.html

This patch shows slight improvement for wikisort benchmark from
embench-iot:

Benchmark          size-before  size-after  difference
---------          -----------  ----------  ----------
aha-mont64          1,648       1,648       0
crc32                 104       104         0
depthconv           1,172       1,172       0
edn                 3,040       3,040       0
huffbench           1,616       1,616       0
matmult-int           748       748         0
md5sum                700       700         0
nettle-aes          2,664       2,664       0
nettle-sha256       5,732       5,732       0
nsichneu           21,372       21,372      0
picojpeg            9,716       9,716       0
qrduino             8,556       8,556       0
sglib-combined      3,724       3,724       0
slre                3,488       3,488       0
statemate           1,132       1,132       0
tarfind               652       652         0
ud                  1,004       1,004       0
wikisort           18,120       18,092      -28
xgboost               300       300         0

gcc/ChangeLog:

* config/pru/pru.md (reg move splitter): New splitter for 64-bit
register moves into two 32-bit moves.
(const_int move splitter): New splitter for 64-bit constant
integer moves into two 32-bit moves.

gcc/testsuite/ChangeLog:

* gcc.target/pru/mov64-subreg-1.c: New test.
* gcc.target/pru/mov64-subreg-2.c: New test.

Signed-off-by: Dimitar Dimitrov <dimitar@dinux.eu>
gcc/config/pru/pru.md
gcc/testsuite/gcc.target/pru/mov64-subreg-1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/pru/mov64-subreg-2.c [new file with mode: 0644]

index fcd310613f508ad6d0db8d26dad4b05809b82f96..3504e42e9002e78127811230c5843d3900f84ca5 100644 (file)
   [(set_attr "type" "st,ld,alu,alu,alu,alu,alu,alu")
    (set_attr "length" "4,4,4,4,8,8,8,16")])
 
+; Break 64-bit register-to-register moves into 32-bit moves.
+; If only a subreg of the destination is used, this split would allow
+; for the other 32-bit subreg of the DI register to be eliminated.
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+       (match_operand:DI 1 "register_operand"))]
+  "
+   /* TODO - LRA does not yet handle subregs efficiently.
+      So it is profitable to split only after register allocation is
+      complete.
+      Once https://gcc.gnu.org/pipermail/gcc-patches/2024-May/651366.html
+      is merged, this condition should be removed to allow splitting
+      before LRA.  */
+   reload_completed
+   /* Sign-extended paradoxical registers require expansion
+      of the proper pattern.  We can do only zero extension here.  */
+   && (SUBREG_P (operands[1]) && paradoxical_subreg_p (operands[1])
+       ? SUBREG_PROMOTED_VAR_P (operands[1])
+         && SUBREG_PROMOTED_UNSIGNED_P (operands[1]) > 0
+       : true)"
+  [(set (match_dup 0) (match_dup 1))
+   (set (match_dup 2) (match_dup 3))]
+  "
+  rtx dst_lo = simplify_gen_subreg (SImode, operands[0], DImode, 0);
+  rtx dst_hi = simplify_gen_subreg (SImode, operands[0], DImode, 4);
+  rtx src_lo = simplify_gen_subreg (SImode, operands[1], DImode, 0);
+  rtx src_hi = simplify_gen_subreg (SImode, operands[1], DImode, 4);
+
+  if (SUBREG_P (operands[1]) && paradoxical_subreg_p (operands[1]))
+    {
+      gcc_assert (SUBREG_PROMOTED_VAR_P (operands[1]));
+      gcc_assert (SUBREG_PROMOTED_UNSIGNED_P (operands[1]) > 0);
+
+      operands[0] = dst_lo;
+      operands[1] = src_lo;
+      operands[2] = dst_hi;
+      operands[3] = const0_rtx;
+    }
+  else if (!reg_overlap_mentioned_p (dst_lo, src_hi))
+    {
+      operands[0] = dst_lo;
+      operands[1] = src_lo;
+      operands[2] = dst_hi;
+      operands[3] = src_hi;
+    }
+  else
+    {
+      operands[0] = dst_hi;
+      operands[1] = src_hi;
+      operands[2] = dst_lo;
+      operands[3] = src_lo;
+    }
+  "
+)
+
+; Break loading of non-trivial 64-bit constant integers.  The split
+; will not generate better code sequence, but at least would allow
+; dropping a non-live 32-bit part of the destination, or better
+; constant propagation.
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+       (match_operand:DI 1 "const_int_operand"))]
+  "reload_completed
+   && !satisfies_constraint_Z (operands[1])
+   && !satisfies_constraint_Um (operands[1])
+   && !satisfies_constraint_T (operands[1])"
+
+  [(set (match_dup 0) (match_dup 1))
+   (set (match_dup 2) (match_dup 3))]
+  "
+  operands[2] = simplify_gen_subreg (SImode, operands[0], DImode, 4);
+  operands[3] = simplify_gen_subreg (SImode, operands[1], DImode, 4);;
+  operands[0] = simplify_gen_subreg (SImode, operands[0], DImode, 0);
+  operands[1] = simplify_gen_subreg (SImode, operands[1], DImode, 0);
+  "
+)
+
 ;
 ; load_multiple pattern(s).
 ;
diff --git a/gcc/testsuite/gcc.target/pru/mov64-subreg-1.c b/gcc/testsuite/gcc.target/pru/mov64-subreg-1.c
new file mode 100644 (file)
index 0000000..9b60aa0
--- /dev/null
@@ -0,0 +1,9 @@
+/* { dg-do assemble } */
+/* { dg-options "-Os" } */
+/* { dg-final { object-size text == 8 } } */
+
+
+unsigned test(char a, unsigned long long b)
+{
+        return b;
+}
diff --git a/gcc/testsuite/gcc.target/pru/mov64-subreg-2.c b/gcc/testsuite/gcc.target/pru/mov64-subreg-2.c
new file mode 100644 (file)
index 0000000..146cf94
--- /dev/null
@@ -0,0 +1,8 @@
+/* { dg-do assemble } */
+/* { dg-options "-Os" } */
+/* { dg-final { object-size text == 12 } } */
+
+unsigned long long test(void)
+{
+       return 0xffffffff00000000UL;
+}