Improve initial code generation for addsi/adddi

author Shreya Munnangi <smunnangi1@ventanamicro.com>

Tue, 12 Aug 2025 03:42:50 +0000 (21:42 -0600)

committer Jeff Law <jlaw@ventanamicro.com>

Tue, 12 Aug 2025 03:46:04 +0000 (21:46 -0600)
author Shreya Munnangi <smunnangi1@ventanamicro.com>
Tue, 12 Aug 2025 03:42:50 +0000 (21:42 -0600)
committer Jeff Law <jlaw@ventanamicro.com>
Tue, 12 Aug 2025 03:46:04 +0000 (21:46 -0600)
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h

index 539321ff95b8379f24cf92868dcaa27aabd37bac..b497325153521664c37e118b4eeba9b419c50c89 100644 (file)
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -143,6 +143,7 @@ extern void riscv_expand_sstrunc (rtx, rtx);
  extern int riscv_register_move_cost (machine_mode, reg_class_t, reg_class_t);
  extern bool synthesize_ior_xor (rtx_code, rtx [3]);
  extern bool synthesize_and (rtx [3]);
+extern bool synthesize_add (rtx [3]);
  
  #ifdef RTX_CODE
  extern void riscv_expand_int_scc (rtx, enum rtx_code, rtx, rtx, bool *invert_ptr = 0);
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc

index c336584554fecc14946b9b5459aa35af1844b9ee..4935367dd0e03f383bf2cee97d1f66f2a9839f57 100644 (file)
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -15353,6 +15353,100 @@ synthesize_and (rtx operands[3])
    return true;
  }
  
+/* Synthesize OPERANDS[0] = OPERANDS[1] + OPERANDS[2].
+
+    OPERANDS[0] and OPERANDS[1] will be a REG and may be the same
+    REG.
+
+    OPERANDS[2] is a CONST_INT.
+
+    Return TRUE if the operation was fully synthesized and the caller
+    need not generate additional code.  Return FALSE if the operation
+    was not synthesized and the caller is responsible for emitting the
+    proper sequence.  */
+
+bool
+synthesize_add (rtx operands[3])
+{
+  /* Trivial cases that don't need synthesis.  */
+  if (SMALL_OPERAND (INTVAL (operands[2])))
+    return false;
+
+  int budget1 = riscv_const_insns (operands[2], true);
+  int budget2 = riscv_const_insns (GEN_INT (-INTVAL (operands[2])), true);
+
+  HOST_WIDE_INT ival = INTVAL (operands[2]);
+
+  /* If we can emit two addi insns then that's better than synthesizing
+     the constant into a temporary, then adding the temporary to the
+     other input.  The exception is when the constant can be loaded
+     in a single instruction which can issue whenever its convenient.  */
+  if (SUM_OF_TWO_S12 (ival) && budget1 >= 2)
+    {
+      HOST_WIDE_INT saturated = HOST_WIDE_INT_M1U << (IMM_BITS - 1);
+
+      if (ival >= 0)
+       saturated = ~saturated;
+
+      ival -= saturated;
+
+      rtx x = gen_rtx_PLUS (word_mode, operands[1], GEN_INT (saturated));
+      emit_insn (gen_rtx_SET (operands[0], x));
+      rtx output = gen_rtx_PLUS (word_mode, operands[0], GEN_INT (ival));
+      emit_insn (gen_rtx_SET (operands[0], output));
+      return true;
+    }
+
+  /* If we can shift the constant by 1, 2, or 3 bit positions
+     and the result is a cheaper constant, then do so.  */
+  ival = INTVAL (operands[2]);
+  if (TARGET_ZBA
+      && (((ival % 2) == 0 && budget1
+          > riscv_const_insns (GEN_INT (ival >> 1), true))
+          || ((ival % 4) == 0 && budget1
+              > riscv_const_insns (GEN_INT (ival >> 2), true))
+          || ((ival % 8) == 0 && budget1
+              > riscv_const_insns (GEN_INT (ival >> 3), true))))
+    {
+      // Load the shifted constant into a temporary
+      int shct = ctz_hwi (ival);
+
+      /* We can handle shifting up to 3 bit positions via shNadd.  */
+      if (shct > 3)
+       shct = 3;
+
+      /* The adjusted constant may still need synthesis, so do not copy
+        it directly into register.  Let the expander handle it.  */
+      rtx tmp = force_reg (word_mode, GEN_INT (ival >> shct));
+
+      /* Generate shift-add of temporary and operands[1]
+        into the final destination.  */
+      rtx x = gen_rtx_ASHIFT (word_mode, tmp, GEN_INT (shct));
+      rtx output = gen_rtx_PLUS (word_mode, x, operands[1]);
+      emit_insn (gen_rtx_SET (operands[0], output));
+      return true;
+    }
+
+  /* If the negated constant is cheaper than the original, then negate
+     the constant and use sub.  */
+  if (budget2 < budget1)
+    {
+      // load -INTVAL (operands[2]) into a temporary
+      rtx tmp = force_reg (word_mode, GEN_INT (-INTVAL (operands[2])));
+
+      // subtract operads[2] from operands[1]
+      rtx output = gen_rtx_MINUS (word_mode, operands[1], tmp);
+      emit_insn (gen_rtx_SET (operands[0], output));
+      return true;
+    }
+
+  /* No add synthesis was found.  Synthesize the constant into
+     a temporary and use that.  */
+  rtx x = force_reg (word_mode, operands[2]);
+  x = gen_rtx_PLUS (word_mode, operands[1], x);
+  emit_insn (gen_rtx_SET (operands[0], x));
+  return true;
+}
  
  /* Initialize the GCC target structure.  */
  #undef TARGET_ASM_ALIGNED_HI_OP
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md

index 578dd43441e2d18340a53646712eb90cba3aa63d..a72604e292183e94ee17c3f267b7b79c49a8731e 100644 (file)
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -712,14 +712,17 @@
     (set_attr "mode" "SI")])
  
  (define_expand "addsi3"
-  [(set (match_operand:SI          0 "register_operand" "=r,r")
-       (plus:SI (match_operand:SI 1 "register_operand" " r,r")
-                (match_operand:SI 2 "arith_operand"    " r,I")))]
+  [(set (match_operand:SI          0 "register_operand")
+       (plus:SI (match_operand:SI 1 "register_operand")
+                (match_operand:SI 2 "reg_or_const_int_operand")))]
    ""
  {
    if (TARGET_64BIT)
      {
        rtx t = gen_reg_rtx (DImode);
+
+      if (CONST_INT_P (operands[2]) && !SMALL_OPERAND (operands[2]))
+       operands[2] = force_reg (SImode, operands[2]);
        emit_insn (gen_addsi3_extended (t, operands[1], operands[2]));
        t = gen_lowpart (SImode, t);
        SUBREG_PROMOTED_VAR_P (t) = 1;
@@ -727,9 +730,26 @@
        emit_move_insn (operands[0], t);
        DONE;
      }
+
+  /* We may be able to find a faster sequence, if so, then we are
+     done.  Otherwise let expansion continue normally.  */
+  if (CONST_INT_P (operands[2]) && synthesize_add (operands))
+    DONE;
+})
+
+(define_expand "adddi3"
+  [(set (match_operand:DI          0 "register_operand")
+       (plus:DI (match_operand:DI 1 "register_operand")
+                (match_operand:DI 2 "reg_or_const_int_operand")))]
+  "TARGET_64BIT"
+{
+  /* We may be able to find a faster sequence, if so, then we are
+     done.  Otherwise let expansion continue normally.  */
+  if (CONST_INT_P (operands[2]) && synthesize_add (operands))
+    DONE;
  })
  
-(define_insn "adddi3"
+(define_insn "*adddi3"
    [(set (match_operand:DI          0 "register_operand" "=r,r")
         (plus:DI (match_operand:DI 1 "register_operand" " r,r")
                  (match_operand:DI 2 "arith_operand"    " r,I")))]
diff --git a/gcc/testsuite/gcc.target/riscv/add-synthesis-1.c b/gcc/testsuite/gcc.target/riscv/add-synthesis-1.c

new file mode 100644 (file)

index 0000000..247096c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/add-synthesis-1.c
@@ -0,0 +1,40 @@
+/* { dg-options "-march=rv32gcb -mabi=ilp32d" { target { rv32 } } } */
+/* { dg-options "-march=rv64gcb -mabi=lp64d" { target { rv64 } } } */
+/* { dg-skip-if "" { *-*-* } { "-O0" } } */
+
+
+
+#if  __riscv_xlen == 64
+#define TYPE long
+#else
+#define TYPE int
+#endif
+
+#define T(C)  TYPE foo_##C (TYPE x) { return x + C; }
+#define TM(C)  TYPE foo_M##C (TYPE x) { return x + -C; }
+
+/* These cases were selected because they all can be synthesized
+   at expansion time without synthesizing the constant directly.
+
+   That makes the assembler scan testing simpler.  I've verified
+   by hand that cases that should synthesize the constant do in
+   fact still generate code that way.  */
+T (2050)
+T (4094)
+T (4100)
+T (8200)
+
+TM (2049)
+TM (4096)
+TM (4100)
+TM (8200)
+
+#if  __riscv_xlen == 64
+TM (0x200000000)
+#endif
+
+/* We have 4/5 tests which should use shNadd insns and 4
+   which used paired addi insns.  */
+/* { dg-final { scan-assembler-times "sh.add\t" 4 { target { rv32 } } } } */
+/* { dg-final { scan-assembler-times "sh.add\t" 5 { target { rv64 } } } } */
+/* { dg-final { scan-assembler-times "addi\t" 8 } } */
author	Shreya Munnangi <smunnangi1@ventanamicro.com>
	Tue, 12 Aug 2025 03:42:50 +0000 (21:42 -0600)
committer	Jeff Law <jlaw@ventanamicro.com>
	Tue, 12 Aug 2025 03:46:04 +0000 (21:46 -0600)
gcc/config/riscv/riscv-protos.h		patch \| blob \| blame \| history
gcc/config/riscv/riscv.cc		patch \| blob \| blame \| history
gcc/config/riscv/riscv.md		patch \| blob \| blame \| history
gcc/testsuite/gcc.target/riscv/add-synthesis-1.c	[new file with mode: 0644]	patch \| blob