return true;
}
+/* Synthesize OPERANDS[0] = OPERANDS[1] + OPERANDS[2].
+
+ OPERANDS[0] and OPERANDS[1] will be a REG and may be the same
+ REG.
+
+ OPERANDS[2] is a CONST_INT.
+
+ Return TRUE if the operation was fully synthesized and the caller
+ need not generate additional code. Return FALSE if the operation
+ was not synthesized and the caller is responsible for emitting the
+ proper sequence. */
+
+bool
+synthesize_add (rtx operands[3])
+{
+ /* Trivial cases that don't need synthesis. */
+ if (SMALL_OPERAND (INTVAL (operands[2])))
+ return false;
+
+ int budget1 = riscv_const_insns (operands[2], true);
+ int budget2 = riscv_const_insns (GEN_INT (-INTVAL (operands[2])), true);
+
+ HOST_WIDE_INT ival = INTVAL (operands[2]);
+
+ /* If we can emit two addi insns then that's better than synthesizing
+ the constant into a temporary, then adding the temporary to the
+ other input. The exception is when the constant can be loaded
+ in a single instruction which can issue whenever its convenient. */
+ if (SUM_OF_TWO_S12 (ival) && budget1 >= 2)
+ {
+ HOST_WIDE_INT saturated = HOST_WIDE_INT_M1U << (IMM_BITS - 1);
+
+ if (ival >= 0)
+ saturated = ~saturated;
+
+ ival -= saturated;
+
+ rtx x = gen_rtx_PLUS (word_mode, operands[1], GEN_INT (saturated));
+ emit_insn (gen_rtx_SET (operands[0], x));
+ rtx output = gen_rtx_PLUS (word_mode, operands[0], GEN_INT (ival));
+ emit_insn (gen_rtx_SET (operands[0], output));
+ return true;
+ }
+
+ /* If we can shift the constant by 1, 2, or 3 bit positions
+ and the result is a cheaper constant, then do so. */
+ ival = INTVAL (operands[2]);
+ if (TARGET_ZBA
+ && (((ival % 2) == 0 && budget1
+ > riscv_const_insns (GEN_INT (ival >> 1), true))
+ || ((ival % 4) == 0 && budget1
+ > riscv_const_insns (GEN_INT (ival >> 2), true))
+ || ((ival % 8) == 0 && budget1
+ > riscv_const_insns (GEN_INT (ival >> 3), true))))
+ {
+ // Load the shifted constant into a temporary
+ int shct = ctz_hwi (ival);
+
+ /* We can handle shifting up to 3 bit positions via shNadd. */
+ if (shct > 3)
+ shct = 3;
+
+ /* The adjusted constant may still need synthesis, so do not copy
+ it directly into register. Let the expander handle it. */
+ rtx tmp = force_reg (word_mode, GEN_INT (ival >> shct));
+
+ /* Generate shift-add of temporary and operands[1]
+ into the final destination. */
+ rtx x = gen_rtx_ASHIFT (word_mode, tmp, GEN_INT (shct));
+ rtx output = gen_rtx_PLUS (word_mode, x, operands[1]);
+ emit_insn (gen_rtx_SET (operands[0], output));
+ return true;
+ }
+
+ /* If the negated constant is cheaper than the original, then negate
+ the constant and use sub. */
+ if (budget2 < budget1)
+ {
+ // load -INTVAL (operands[2]) into a temporary
+ rtx tmp = force_reg (word_mode, GEN_INT (-INTVAL (operands[2])));
+
+ // subtract operads[2] from operands[1]
+ rtx output = gen_rtx_MINUS (word_mode, operands[1], tmp);
+ emit_insn (gen_rtx_SET (operands[0], output));
+ return true;
+ }
+
+ /* No add synthesis was found. Synthesize the constant into
+ a temporary and use that. */
+ rtx x = force_reg (word_mode, operands[2]);
+ x = gen_rtx_PLUS (word_mode, operands[1], x);
+ emit_insn (gen_rtx_SET (operands[0], x));
+ return true;
+}
/* Initialize the GCC target structure. */
#undef TARGET_ASM_ALIGNED_HI_OP
(set_attr "mode" "SI")])
(define_expand "addsi3"
- [(set (match_operand:SI 0 "register_operand" "=r,r")
- (plus:SI (match_operand:SI 1 "register_operand" " r,r")
- (match_operand:SI 2 "arith_operand" " r,I")))]
+ [(set (match_operand:SI 0 "register_operand")
+ (plus:SI (match_operand:SI 1 "register_operand")
+ (match_operand:SI 2 "reg_or_const_int_operand")))]
""
{
if (TARGET_64BIT)
{
rtx t = gen_reg_rtx (DImode);
+
+ if (CONST_INT_P (operands[2]) && !SMALL_OPERAND (operands[2]))
+ operands[2] = force_reg (SImode, operands[2]);
emit_insn (gen_addsi3_extended (t, operands[1], operands[2]));
t = gen_lowpart (SImode, t);
SUBREG_PROMOTED_VAR_P (t) = 1;
emit_move_insn (operands[0], t);
DONE;
}
+
+ /* We may be able to find a faster sequence, if so, then we are
+ done. Otherwise let expansion continue normally. */
+ if (CONST_INT_P (operands[2]) && synthesize_add (operands))
+ DONE;
+})
+
+(define_expand "adddi3"
+ [(set (match_operand:DI 0 "register_operand")
+ (plus:DI (match_operand:DI 1 "register_operand")
+ (match_operand:DI 2 "reg_or_const_int_operand")))]
+ "TARGET_64BIT"
+{
+ /* We may be able to find a faster sequence, if so, then we are
+ done. Otherwise let expansion continue normally. */
+ if (CONST_INT_P (operands[2]) && synthesize_add (operands))
+ DONE;
})
-(define_insn "adddi3"
+(define_insn "*adddi3"
[(set (match_operand:DI 0 "register_operand" "=r,r")
(plus:DI (match_operand:DI 1 "register_operand" " r,r")
(match_operand:DI 2 "arith_operand" " r,I")))]
--- /dev/null
+/* { dg-options "-march=rv32gcb -mabi=ilp32d" { target { rv32 } } } */
+/* { dg-options "-march=rv64gcb -mabi=lp64d" { target { rv64 } } } */
+/* { dg-skip-if "" { *-*-* } { "-O0" } } */
+
+
+
+#if __riscv_xlen == 64
+#define TYPE long
+#else
+#define TYPE int
+#endif
+
+#define T(C) TYPE foo_##C (TYPE x) { return x + C; }
+#define TM(C) TYPE foo_M##C (TYPE x) { return x + -C; }
+
+/* These cases were selected because they all can be synthesized
+ at expansion time without synthesizing the constant directly.
+
+ That makes the assembler scan testing simpler. I've verified
+ by hand that cases that should synthesize the constant do in
+ fact still generate code that way. */
+T (2050)
+T (4094)
+T (4100)
+T (8200)
+
+TM (2049)
+TM (4096)
+TM (4100)
+TM (8200)
+
+#if __riscv_xlen == 64
+TM (0x200000000)
+#endif
+
+/* We have 4/5 tests which should use shNadd insns and 4
+ which used paired addi insns. */
+/* { dg-final { scan-assembler-times "sh.add\t" 4 { target { rv32 } } } } */
+/* { dg-final { scan-assembler-times "sh.add\t" 5 { target { rv64 } } } } */
+/* { dg-final { scan-assembler-times "addi\t" 8 } } */