pru: Inline muldi3 when optimizing for speed

author Dimitar Dimitrov <dimitar@dinux.eu>

Wed, 25 Mar 2026 20:34:05 +0000 (22:34 +0200)

committer Dimitar Dimitrov <dimitar@dinux.eu>

Sat, 23 May 2026 06:55:33 +0000 (09:55 +0300)
author Dimitar Dimitrov <dimitar@dinux.eu>
Wed, 25 Mar 2026 20:34:05 +0000 (22:34 +0200)
committer Dimitar Dimitrov <dimitar@dinux.eu>
Sat, 23 May 2026 06:55:33 +0000 (09:55 +0300)
diff --git a/gcc/config/pru/constraints.md b/gcc/config/pru/constraints.md

index 07d82284a548f6003756d624d6ae0acb0a593535..265a3c8f91e54836bfa46d02894bbb5ab2b2cfa3 100644 (file)
--- a/gcc/config/pru/constraints.md
+++ b/gcc/config/pru/constraints.md
@@ -53,7 +53,8 @@
  
  (define_register_constraint "Rmd0" "MULDST_REGS"
    "@internal
-  The multiply destination register.")
+  The multiply destination register."
+  "regno == MULDST_REGNUM")
  
  (define_register_constraint "Rms0" "MULSRC0_REGS"
    "@internal
diff --git a/gcc/config/pru/pru.h b/gcc/config/pru/pru.h

index f469122beea657ef48c7761c6d0b79748f8ca2cb..a370d08c38545d468df077bdd83b2f1072014621 100644 (file)
--- a/gcc/config/pru/pru.h
+++ b/gcc/config/pru/pru.h
@@ -246,7 +246,7 @@ enum reg_class
      /* NO_REGS       */ { 0, 0, 0, 0, 0},                      \
      /* SIB_REGS              */ { 0xf, 0xff000000u, ~0u, 0xffffffu, 0},\
      /* LOOPCNTR_REGS  */ { 0, 0, 0, 0, 0xf},                   \
-    /* MULDST_REGS    */ { 0, 0, 0, 0x00000f00u, 0},           \
+    /* MULDST_REGS    */ { 0, 0, 0, 0x0000ff00u, 0},           \
      /* MULSRC0_REGS   */ { 0, 0, 0, 0x000f0000u, 0},           \
      /* MULSRC1_REGS   */ { 0, 0, 0, 0x00f00000u, 0},           \
      /* REGIO_REGS     */ { 0, 0, 0, 0xff000000u, 0},           \
diff --git a/gcc/config/pru/pru.md b/gcc/config/pru/pru.md

index e62fcf27a863a08357ba739eedbe072b79d178d9..93638963d29e0d21a2f05c34ea0b8323a2eb986f 100644 (file)
--- a/gcc/config/pru/pru.md
+++ b/gcc/config/pru/pru.md
@@ -1220,6 +1220,82 @@
    "nop\;xin\\t0, %0, 4"
    [(set_attr "type" "alu")
     (set_attr "length" "8")])
+
+(define_insn "umulsidi3"
+  [(set (match_operand:DI 0 "pru_muldst_operand"          "=Rmd0")
+       (mult:DI
+         (zero_extend:DI
+           (match_operand:SI 1 "pru_mulsrc0_operand"      "%Rms0"))
+         (zero_extend:DI
+           (match_operand:SI 2 "pru_mulsrc1_operand"      "Rms1"))))]
+  "TARGET_OPT_MUL"
+  "nop\;xin\\t0, %0, 8"
+  [(set_attr "type" "alu")
+   (set_attr "length" "8")])
+
+
+;; If optimizing for speed, prefer to inline 64-bit multiplication,
+;; in order to avoid the overhead of calling a library function.
+(define_expand "muldi3"
+  [(parallel [(set (match_operand:DI 0 "register_operand")
+       (mult:DI (match_operand:DI 1 "register_operand")
+                (match_operand:DI 2 "register_operand")))
+                (clobber (reg:DI MULDST_REGNUM))
+                (clobber (reg:SI MULSRC0_REGNUM))
+                (clobber (reg:SI MULSRC1_REGNUM))
+                ])]
+  "TARGET_OPT_MUL && !optimize_size && can_create_pseudo_p ()"
+{
+  rtx op0_lo = simplify_gen_subreg (SImode, operands[0], DImode, 0);
+  rtx op0_hi = simplify_gen_subreg (SImode, operands[0], DImode, 4);
+  rtx op1_lo = simplify_gen_subreg (SImode, operands[1], DImode, 0);
+  rtx op1_hi = simplify_gen_subreg (SImode, operands[1], DImode, 4);
+  rtx op2_lo = simplify_gen_subreg (SImode, operands[2], DImode, 0);
+  rtx op2_hi = simplify_gen_subreg (SImode, operands[2], DImode, 4);
+
+  gcc_assert (!reload_completed);
+
+  rtx cross_product1 = gen_reg_rtx (SImode);
+  rtx cross_product2 = gen_reg_rtx (SImode);
+  rtx low_product = gen_reg_rtx (DImode);
+  rtx cross_scratch1_hi = gen_reg_rtx (SImode);
+
+  rtx reg_mulsrc0_si = gen_reg_rtx (SImode);
+  rtx reg_mulsrc1_si = gen_reg_rtx (SImode);
+  rtx reg_muldst_di = gen_reg_rtx (DImode);
+
+  /* (al + C * ah) * (bl + C * bh) =   al * bl
+                                       + C * ah * bl
+                                       + C * al * bh
+                                       + C * C * ah * bh  -> discard, overflow
+      Where C=(1 << 32).  */
+
+  emit_move_insn (cross_product1,
+                 gen_rtx_MULT (SImode, op1_hi, op2_lo));
+  emit_move_insn (cross_product2,
+                 gen_rtx_MULT (SImode, op1_lo, op2_hi));
+
+  /* Calculate "al * bl".  */
+  emit_move_insn (reg_mulsrc0_si, op1_lo);
+  emit_move_insn (reg_mulsrc1_si, op2_lo);
+  emit_insn (gen_umulsidi3 (reg_muldst_di, reg_mulsrc0_si, reg_mulsrc1_si));
+  emit_move_insn (low_product, reg_muldst_di);
+
+  emit_move_insn (cross_scratch1_hi,
+                 gen_rtx_PLUS (SImode,
+                               cross_product1,
+                               cross_product2));
+  emit_move_insn (op0_lo,
+                 simplify_gen_subreg (SImode, low_product, DImode, 0));
+  emit_move_insn (op0_hi,
+                 gen_rtx_PLUS (SImode,
+                               simplify_gen_subreg (SImode,
+                                                    low_product,
+                                                    DImode,
+                                                    4),
+                               cross_scratch1_hi));
+  DONE;
+})
  \f
  ;; Prologue, Epilogue and Return
author	Dimitar Dimitrov <dimitar@dinux.eu>
	Wed, 25 Mar 2026 20:34:05 +0000 (22:34 +0200)
committer	Dimitar Dimitrov <dimitar@dinux.eu>
	Sat, 23 May 2026 06:55:33 +0000 (09:55 +0300)
gcc/config/pru/constraints.md		patch \| blob \| blame \| history
gcc/config/pru/pru.h		patch \| blob \| blame \| history
gcc/config/pru/pru.md		patch \| blob \| blame \| history