This patch improves the 4-byte ASHIFT insns.
1) It adds a "r,r,C15" alternative for improved long << 15.
2) It adds 3-operand alternatives (depending on options) and
splits them after peephole2 / before avr-fuse-move into
a 3-operand byte shift and a 2-operand residual bit shift.
For better control, it introduces new option -msplit-bit-shift
that's activated at -O2 and higher per default. 2) is even
performed with -Os, but not with -Oz.
PR target/117726
gcc/
* config/avr/avr.opt (-msplit-bit-shift): Add new optimization option.
* common/config/avr/avr-common.cc (avr_option_optimization_table)
[OPT_LEVELS_2_PLUS]: Turn on -msplit-bit-shift.
* config/avr/avr.h (machine_function.n_avr_fuse_add_executed):
New bool component.
* config/avr/avr.md (attr "isa") <2op, 3op>: Add new values.
(attr "enabled"): Handle them.
(ashlsi3, *ashlsi3, *ashlsi3_const): Add "r,r,C15" alternative.
Add "r,0,C4l" and "r,r,C4l" alternatives (depending on 2op / 3op).
(define_split) [avr_split_bit_shift]: Add 2 new ashift:ALL4 splitters.
(define_peephole2) [ashift:ALL4]: Add (match_dup 3) so that the scratch
won't overlap with the output operand of the matched insn.
(*ashl<mode>3_const_split): Remove unused ashift:ALL4 splitter.
* config/avr/avr-passes.cc (emit_valid_insn)
(emit_valid_move_clobbercc): Move out of anonymous namespace.
(make_avr_pass_fuse_add) <gate>: Don't override.
<execute>: Set n_avr_fuse_add_executed according to
func->machine->n_avr_fuse_add_executed.
(pass_data avr_pass_data_split_after_peephole2): New object.
(avr_pass_split_after_peephole2): New rtl_opt_pass.
(avr_emit_shift): New static function.
(avr_shift_is_3op, avr_split_shift_p, avr_split_shift)
(make_avr_pass_split_after_peephole2): New functions.
* config/avr/avr-passes.def (avr_pass_split_after_peephole2):
Insert new pass after pass_peephole2.
* config/avr/avr-protos.h
(n_avr_fuse_add_executed, avr_shift_is_3op, avr_split_shift_p)
(avr_split_shift, avr_optimize_size_level)
(make_avr_pass_split_after_peephole2): New prototypes.
* config/avr/avr.cc (n_avr_fuse_add_executed): New global variable.
(avr_optimize_size_level): New function.
(avr_set_current_function): Set n_avr_fuse_add_executed
according to cfun->machine->n_avr_fuse_add_executed.
(ashlsi3_out) [case 15]: Output optimized code for this offset.
(avr_rtx_costs_1) [ASHIFT, SImode]: Adjust costs of oggsets 15, 16.
* config/avr/constraints.md (C4a, C4r, C4r): New constraints.
* pass_manager.h (pass_manager): Adjust comments.
{ OPT_LEVELS_2_PLUS, OPT_mfuse_add_, NULL, 2 },
{ OPT_LEVELS_1_PLUS_NOT_DEBUG, OPT_mfuse_move_, NULL, 3 },
{ OPT_LEVELS_2_PLUS, OPT_mfuse_move_, NULL, 23 },
+ { OPT_LEVELS_2_PLUS, OPT_msplit_bit_shift, NULL, 1 },
// Stick to the "old" placement of the subreg lowering pass.
{ OPT_LEVELS_1_PLUS, OPT_fsplit_wide_types_early, NULL, 1 },
/* Allow optimizer to introduce store data races. This used to be the
#define FIRST_GPR (AVR_TINY ? REG_18 : REG_2)
+
+// Emit pattern PAT, and ICE when the insn is not valid / not recognized.
+
+static rtx_insn *
+emit_valid_insn (rtx pat)
+{
+ rtx_insn *insn = emit_insn (pat);
+
+ if (! valid_insn_p (insn)) // Also runs recog().
+ fatal_insn ("emit unrecognizable insn", insn);
+
+ return insn;
+}
+
+// Emit a single_set with an optional scratch operand. This function
+// asserts that the new insn is valid and recognized.
+
+static rtx_insn *
+emit_valid_move_clobbercc (rtx dest, rtx src, rtx scratch = NULL_RTX)
+{
+ rtx pat = scratch
+ ? gen_gen_move_clobbercc_scratch (dest, src, scratch)
+ : gen_gen_move_clobbercc (dest, src);
+
+ return emit_valid_insn (pat);
+}
+
+
namespace
{
return single_set (insn);
}
-// Emit pattern PAT, and ICE when the insn is not valid / not recognized.
-
-static rtx_insn *
-emit_valid_insn (rtx pat)
-{
- rtx_insn *insn = emit_insn (pat);
-
- if (! valid_insn_p (insn)) // Also runs recog().
- fatal_insn ("emit unrecognizable insn", insn);
-
- return insn;
-}
-
-// Emit a single_set with an optional scratch operand. This function
-// asserts that the new insn is valid and recognized.
-
-static rtx_insn *
-emit_valid_move_clobbercc (rtx dest, rtx src, rtx scratch = NULL_RTX)
-{
- rtx pat = scratch
- ? gen_gen_move_clobbercc_scratch (dest, src, scratch)
- : gen_gen_move_clobbercc (dest, src);
-
- return emit_valid_insn (pat);
-}
// One bit for each GRP in REG_0 ... REG_31.
using gprmask_t = uint32_t;
return make_avr_pass_fuse_add (m_ctxt);
}
- bool gate (function *) final override
+ unsigned int execute (function *func) final override
{
- return optimize && avr_fuse_add > 0;
+ func->machine->n_avr_fuse_add_executed += 1;
+ n_avr_fuse_add_executed = func->machine->n_avr_fuse_add_executed;
+
+ if (optimize && avr_fuse_add > 0)
+ return execute1 (func);
+ return 0;
}
- unsigned int execute (function *) final override;
+ unsigned int execute1 (function *);
struct Some_Insn
{
as PRE_DEC + PRE_DEC for two adjacent locations. */
unsigned int
-avr_pass_fuse_add::execute (function *func)
+avr_pass_fuse_add::execute1 (function *func)
{
df_note_add_problem ();
df_analyze ();
}
+\f
+//////////////////////////////////////////////////////////////////////////////
+// Split insns after peephole2 / befor avr-fuse-move.
+static const pass_data avr_pass_data_split_after_peephole2 =
+{
+ RTL_PASS, // type
+ "", // name (will be patched)
+ OPTGROUP_NONE, // optinfo_flags
+ TV_DF_SCAN, // tv_id
+ 0, // properties_required
+ 0, // properties_provided
+ 0, // properties_destroyed
+ 0, // todo_flags_start
+ 0 // todo_flags_finish
+};
+
+class avr_pass_split_after_peephole2 : public rtl_opt_pass
+{
+public:
+ avr_pass_split_after_peephole2 (gcc::context *ctxt, const char *name)
+ : rtl_opt_pass (avr_pass_data_split_after_peephole2, ctxt)
+ {
+ this->name = name;
+ }
+
+ unsigned int execute (function *) final override
+ {
+ if (avr_shift_is_3op ())
+ split_all_insns ();
+ return 0;
+ }
+
+}; // avr_pass_split_after_peephole2
+
+} // anonymous namespace
+
+
+/* Whether some shift insn alternatives are a 3-operand insn or a
+ 2-operand insn. This 3op alternatives allow the source and the
+ destination register of the shift to be different right from the
+ start, because the splitter will split the 3op shift into a 3op byte
+ shift and a 2op residual bit shift.
+ (When the residual shift has an offset of one less than the bitsize,
+ then the residual shift is also a 3op insn. */
+
+bool
+avr_shift_is_3op ()
+{
+ // Don't split for OPTIMIZE_SIZE_MAX (-Oz).
+ // For OPTIMIZE_SIZE_BALANCED (-Os), we still split because
+ // the size overhead (if exists at all) is marginal.
+
+ return (avr_split_bit_shift
+ && optimize > 0
+ && avr_optimize_size_level () < OPTIMIZE_SIZE_MAX);
+}
+
+
+/* Implement constraints `C4a', `C4l' and `C4r'.
+ Whether we split an N_BYTES shift of code CODE in { ASHIFTRT,
+ LSHIFTRT, ASHIFT } into a byte shift and a residual bit shift. */
+
+bool
+avr_split_shift_p (int n_bytes, int offset, rtx_code)
+{
+ gcc_assert (n_bytes == 4);
+
+ return (avr_shift_is_3op ()
+ && offset % 8 != 0 && IN_RANGE (offset, 17, 30));
+}
+
+
+static void
+avr_emit_shift (rtx_code code, rtx dest, rtx src, int off, rtx scratch)
+{
+ machine_mode mode = GET_MODE (dest);
+ rtx shift;
+
+ if (off == GET_MODE_BITSIZE (mode) - 1)
+ {
+ shift = gen_rtx_fmt_ee (code, mode, src, GEN_INT (off));
+ }
+ else
+ {
+ if (REGNO (dest) != REGNO (src))
+ emit_valid_move_clobbercc (dest, src);
+ shift = gen_rtx_fmt_ee (code, mode, dest, GEN_INT (off));
+ }
+
+ emit_valid_move_clobbercc (dest, shift, scratch);
+}
+
+
+/* Worker for define_split that run when -msplit-bit-shift is on.
+ Split a shift of code CODE into a 3op byte shift and a residual bit shift.
+ Return 'true' when a split has been performed and insns have been emitted.
+ Otherwise, return 'false'. */
+
+bool
+avr_split_shift (rtx xop[], rtx scratch, rtx_code code)
+{
+ scratch = scratch && REG_P (scratch) ? scratch : NULL_RTX;
+ rtx dest = xop[0];
+ rtx src = xop[1];
+ int ioff = INTVAL (xop[2]);
+
+ gcc_assert (GET_MODE_SIZE (GET_MODE (dest)) == 4);
+
+ if (code == ASHIFT)
+ {
+ if (ioff >= 25)
+ {
+ rtx dst8 = avr_byte (dest, 3);
+ rtx src8 = avr_byte (src, 0);
+ avr_emit_shift (code, dst8, src8, ioff % 8, NULL_RTX);
+ emit_valid_move_clobbercc (avr_byte (dest, 2), const0_rtx);
+ emit_valid_move_clobbercc (avr_word (dest, 0), const0_rtx);
+ return true;
+ }
+ else if (ioff >= 17)
+ {
+ rtx dst16 = avr_word (dest, 2);
+ rtx src16 = avr_word (src, 0);
+ avr_emit_shift (code, dst16, src16, ioff % 16, scratch);
+ emit_valid_move_clobbercc (avr_word (dest, 0), const0_rtx);
+ return true;
+ }
+ else
+ gcc_unreachable ();
+ }
+ else
+ gcc_unreachable ();
+
+ return false;
+}
+
+
+namespace
+{
+
\f
//////////////////////////////////////////////////////////////////////////////
// Determine whether an ISR may use the __gcc_isr pseudo-instruction.
{
return new avr_pass_fuse_move (ctxt, "avr-fuse-move");
}
+
+// Split insns after peephole2 / befor avr-fuse-move.
+
+rtl_opt_pass *
+make_avr_pass_split_after_peephole2 (gcc::context *ctxt)
+{
+ return new avr_pass_split_after_peephole2 (ctxt, "avr-split-after-peephole2");
+}
- The RTL peepholer may optimize insns involving lower registers. */
INSERT_PASS_AFTER (pass_peephole2, 1, avr_pass_fuse_move);
+
+ /* Run an instance of post-reload split prior to avr-fuse-move.
+ Purpose is to split 3-operand shift insns into a 3-operand shift
+ with a byte offset, and a 2-operand residual shift after
+ RTL peepholes but prior to the avr-fuse-move pass. */
+
+INSERT_PASS_AFTER (pass_peephole2, 1, avr_pass_split_after_peephole2);
extern rtx ccn_reg_rtx;
extern rtx cczn_reg_rtx;
+extern int n_avr_fuse_add_executed;
+extern bool avr_shift_is_3op ();
+extern bool avr_split_shift_p (int n_bytes, int offset, rtx_code);
+extern bool avr_split_shift (rtx xop[], rtx xscratch, rtx_code);
+
+extern int avr_optimize_size_level ();
+
#endif /* RTX_CODE */
#ifdef REAL_VALUE_TYPE
extern rtl_opt_pass *make_avr_pass_recompute_notes (gcc::context *);
extern rtl_opt_pass *make_avr_pass_casesi (gcc::context *);
extern rtl_opt_pass *make_avr_pass_ifelse (gcc::context *);
+extern rtl_opt_pass *make_avr_pass_split_after_peephole2 (gcc::context *);
#ifdef RTX_CODE
extern bool avr_casei_sequence_check_operands (rtx *xop);
extern bool avr_split_fake_addressing_move (rtx_insn *insn, rtx *operands);
bool avr_need_copy_data_p = false;
bool avr_has_rodata_p = false;
+/* Counts how often pass avr-fuse-add has been executed. Is is kept in
+ sync with cfun->machine->n_avr_fuse_add_executed and serves as an
+ insn condition for shift insn splitters. */
+int n_avr_fuse_add_executed = 0;
+
+
\f
/* Transform UP into lowercase and write the result to LO.
You must provide enough space for LO. Return LO. */
}
+int avr_optimize_size_level ()
+{
+ return cfun && cfun->decl
+ ? opt_for_fn (cfun->decl, optimize_size)
+ : optimize_size;
+}
+
+
/* Implement `INIT_EXPANDERS'. */
/* The function works like a singleton. */
if (decl == NULL_TREE
|| current_function_decl == NULL_TREE
|| current_function_decl == error_mark_node
- || ! cfun->machine
- || cfun->machine->attributes_checked_p)
+ || ! cfun->machine)
+ return;
+
+ n_avr_fuse_add_executed = cfun->machine->n_avr_fuse_add_executed;
+
+ if (cfun->machine->attributes_checked_p)
return;
location_t loc = DECL_SOURCE_LOCATION (decl);
/* Generate asm equivalent for various shifts. This only handles cases
- that are not already carefully hand-optimized in ?sh??i3_out.
+ that are not already carefully hand-optimized in ?sh<mode>3_out.
OPERANDS[0] resp. %0 in TEMPL is the operand to be shifted.
OPERANDS[2] is the shift count as CONST_INT, MEM or REG.
{
int reg0 = true_regnum (operands[0]);
int reg1 = true_regnum (operands[1]);
+ bool reg1_unused_after_p = reg_unused_after (insn, operands[1]);
if (plen)
*plen = 0;
"mov %B0,%A1" CR_TAB
"mov %C0,%B1" CR_TAB
"mov %D0,%C1", operands, plen, 4);
+ case 15:
+ avr_asm_len (reg1_unused_after_p
+ ? "lsr %C1"
+ : "bst %C1,0", operands, plen, 1);
+ if (reg0 + 2 != reg1)
+ {
+ if (AVR_HAVE_MOVW)
+ avr_asm_len ("movw %C0,%A1", operands, plen, 1);
+ else
+ avr_asm_len ("mov %C0,%A1" CR_TAB
+ "mov %D0,%B1", operands, plen, 2);
+ }
+ return reg1_unused_after_p
+ ? avr_asm_len ("clr %A0" CR_TAB
+ "clr %B0" CR_TAB
+ "ror %D0" CR_TAB
+ "ror %C0" CR_TAB
+ "ror %B0", operands, plen, 5)
+ : avr_asm_len ("clr %A0" CR_TAB
+ "clr %B0" CR_TAB
+ "lsr %D0" CR_TAB
+ "ror %C0" CR_TAB
+ "ror %B0" CR_TAB
+ "bld %D0,7", operands, plen, 6);
case 16:
if (reg0 + 2 == reg1)
return avr_asm_len ("clr %B0" CR_TAB
break;
case 1:
case 8:
- case 16:
*total = COSTS_N_INSNS (4);
break;
+ case 15:
+ *total = COSTS_N_INSNS (8 - AVR_HAVE_MOVW);
+ break;
+ case 16:
+ *total = COSTS_N_INSNS (4 - AVR_HAVE_MOVW);
+ break;
case 31:
*total = COSTS_N_INSNS (6);
break;
/* 'true' if this function references .L__stack_usage like with
__builtin_return_address. */
bool use_L__stack_usage;
+
+ /* Counts how many times the execute() method of the avr-fuse-add
+ has been invoked. The count is even increased when the optimization
+ itself is not run. This purpose of this variable is to provide
+ information about where in the pass sequence we are. */
+ int n_avr_fuse_add_executed;
};
/* AVR does not round pushes, but the existence of this macro is
;; no_xmega: non-XMEGA core xmega : XMEGA core
;; no_adiw: ISA has no ADIW, SBIW adiw : ISA has ADIW, SBIW
+;; The following ISA attributes are actually not architecture specific,
+;; but depend on (optimization) options. This is because the "enabled"
+;; attribut can't depend on more than one other attribute. This means
+;; that 2op and 3op must work for all ISAs, and hence a 'flat' attribue
+;; scheme can be used (as opposed to a true cartesian product).
+
+;; 2op : insn is a 2-operand insn 3op : insn is a 3-operand insn
+
(define_attr "isa"
"mov,movw, rjmp,jmp, ijmp,eijmp, lpm,lpmx, elpm,elpmx, no_xmega,xmega,
no_adiw,adiw,
+ 2op,3op,
standard"
(const_string "standard"))
(define_attr "enabled" ""
- (cond [(eq_attr "isa" "standard")
- (const_int 1)
+ (if_then_else
+ (ior (eq_attr "isa" "standard")
+
+ (and (eq_attr "isa" "mov")
+ (match_test "!AVR_HAVE_MOVW"))
- (and (eq_attr "isa" "mov")
- (match_test "!AVR_HAVE_MOVW"))
- (const_int 1)
+ (and (eq_attr "isa" "movw")
+ (match_test "AVR_HAVE_MOVW"))
- (and (eq_attr "isa" "movw")
- (match_test "AVR_HAVE_MOVW"))
- (const_int 1)
+ (and (eq_attr "isa" "rjmp")
+ (match_test "!AVR_HAVE_JMP_CALL"))
- (and (eq_attr "isa" "rjmp")
- (match_test "!AVR_HAVE_JMP_CALL"))
- (const_int 1)
+ (and (eq_attr "isa" "jmp")
+ (match_test "AVR_HAVE_JMP_CALL"))
- (and (eq_attr "isa" "jmp")
- (match_test "AVR_HAVE_JMP_CALL"))
- (const_int 1)
+ (and (eq_attr "isa" "ijmp")
+ (match_test "!AVR_HAVE_EIJMP_EICALL"))
- (and (eq_attr "isa" "ijmp")
- (match_test "!AVR_HAVE_EIJMP_EICALL"))
- (const_int 1)
+ (and (eq_attr "isa" "eijmp")
+ (match_test "AVR_HAVE_EIJMP_EICALL"))
- (and (eq_attr "isa" "eijmp")
- (match_test "AVR_HAVE_EIJMP_EICALL"))
- (const_int 1)
+ (and (eq_attr "isa" "lpm")
+ (match_test "!AVR_HAVE_LPMX"))
- (and (eq_attr "isa" "lpm")
- (match_test "!AVR_HAVE_LPMX"))
- (const_int 1)
+ (and (eq_attr "isa" "lpmx")
+ (match_test "AVR_HAVE_LPMX"))
- (and (eq_attr "isa" "lpmx")
- (match_test "AVR_HAVE_LPMX"))
- (const_int 1)
+ (and (eq_attr "isa" "elpm")
+ (match_test "AVR_HAVE_ELPM && !AVR_HAVE_ELPMX"))
- (and (eq_attr "isa" "elpm")
- (match_test "AVR_HAVE_ELPM && !AVR_HAVE_ELPMX"))
- (const_int 1)
+ (and (eq_attr "isa" "elpmx")
+ (match_test "AVR_HAVE_ELPMX"))
- (and (eq_attr "isa" "elpmx")
- (match_test "AVR_HAVE_ELPMX"))
- (const_int 1)
+ (and (eq_attr "isa" "xmega")
+ (match_test "AVR_XMEGA"))
- (and (eq_attr "isa" "xmega")
- (match_test "AVR_XMEGA"))
- (const_int 1)
+ (and (eq_attr "isa" "no_xmega")
+ (match_test "!AVR_XMEGA"))
- (and (eq_attr "isa" "no_xmega")
- (match_test "!AVR_XMEGA"))
- (const_int 1)
+ (and (eq_attr "isa" "adiw")
+ (match_test "AVR_HAVE_ADIW"))
- (and (eq_attr "isa" "adiw")
- (match_test "AVR_HAVE_ADIW"))
- (const_int 1)
+ (and (eq_attr "isa" "no_adiw")
+ (match_test "!AVR_HAVE_ADIW"))
- (and (eq_attr "isa" "no_adiw")
- (match_test "!AVR_HAVE_ADIW"))
- (const_int 1)
+ (and (eq_attr "isa" "2op")
+ (match_test "!avr_shift_is_3op ()"))
- ] (const_int 0)))
+ (and (eq_attr "isa" "3op")
+ (match_test "avr_shift_is_3op ()"))
+ )
+ (const_int 1)
+ (const_int 0)))
;; Define mode iterators
;; "ashlsq3" "ashlusq3"
;; "ashlsa3" "ashlusa3"
(define_insn_and_split "ashl<mode>3"
- [(set (match_operand:ALL4 0 "register_operand" "=r,r,r,r ,r,r,r")
- (ashift:ALL4 (match_operand:ALL4 1 "register_operand" "0,0,0,r ,0,0,0")
- (match_operand:QI 2 "nop_general_operand" "r,L,P,O C31,K,n,Qm")))]
+ [(set (match_operand:ALL4 0 "register_operand" "=r,r ,r ,r ,r ,r,r")
+ (ashift:ALL4 (match_operand:ALL4 1 "register_operand" "0,0 ,r ,0 ,r ,0,0")
+ (match_operand:QI 2 "nop_general_operand" "r,LPK,O C15 C31,C4l,C4l,n,Qm")))]
""
"#"
"&& reload_completed"
[(parallel [(set (match_dup 0)
(ashift:ALL4 (match_dup 1)
(match_dup 2)))
- (clobber (reg:CC REG_CC))])])
+ (clobber (reg:CC REG_CC))])]
+ ""
+ [(set_attr "isa" "*,*,*,2op,3op,*,*")])
(define_insn "*ashl<mode>3"
- [(set (match_operand:ALL4 0 "register_operand" "=r,r,r,r ,r,r,r")
- (ashift:ALL4 (match_operand:ALL4 1 "register_operand" "0,0,0,r ,0,0,0")
- (match_operand:QI 2 "nop_general_operand" "r,L,P,O C31,K,n,Qm")))
+ [(set (match_operand:ALL4 0 "register_operand" "=r,r ,r ,r ,r ,r,r")
+ (ashift:ALL4 (match_operand:ALL4 1 "register_operand" "0,0 ,r ,0 ,r ,0,0")
+ (match_operand:QI 2 "nop_general_operand" "r,LPK,O C15 C31,C4l,C4l,n,Qm")))
(clobber (reg:CC REG_CC))]
"reload_completed"
{
return ashlsi3_out (insn, operands, NULL);
}
- [(set_attr "length" "8,0,4,5,8,10,12")
- (set_attr "adjust_len" "ashlsi")])
+ [(set_attr "length" "12")
+ (set_attr "adjust_len" "ashlsi")
+ (set_attr "isa" "*,*,*,2op,3op,*,*")])
;; Optimize if a scratch register from LD_REGS happens to be available.
[(set_attr "length" "0,2,2,4,10")
(set_attr "adjust_len" "ashlhi")])
+
+;; Split shift into a byte shift and a residual bit shift (without scratch)
+(define_split
+ [(parallel [(set (match_operand:ALL4 0 "register_operand")
+ (ashift:ALL4 (match_operand:ALL4 1 "register_operand")
+ (match_operand:QI 2 "const_int_operand")))
+ (clobber (reg:CC REG_CC))])]
+ "avr_split_bit_shift
+ && n_avr_fuse_add_executed >= 1
+ && satisfies_constraint_C4l (operands[2])"
+ [(parallel [(set (match_dup 0)
+ (ashift:ALL4 (match_dup 1)
+ (match_dup 3)))
+ (clobber (reg:CC REG_CC))])
+ (parallel [(set (match_dup 0)
+ (ashift:ALL4 (match_dup 0)
+ (match_dup 4)))
+ (clobber (reg:CC REG_CC))])]
+ {
+ if (avr_split_shift (operands, NULL_RTX, ASHIFT))
+ DONE;
+ else if (REGNO (operands[0]) == REGNO (operands[1]))
+ FAIL;
+ int offset = INTVAL (operands[2]);
+ operands[3] = GEN_INT (offset & ~7);
+ operands[4] = GEN_INT (offset & 7);
+ })
+
+;; Split shift into a byte shift and a residual bit shift (with scratch)
+(define_split
+ [(parallel [(set (match_operand:ALL4 0 "register_operand")
+ (ashift:ALL4 (match_operand:ALL4 1 "register_operand")
+ (match_operand:QI 2 "const_int_operand")))
+ (clobber (match_operand:QI 3 "scratch_or_d_register_operand"))
+ (clobber (reg:CC REG_CC))])]
+ "avr_split_bit_shift
+ && n_avr_fuse_add_executed >= 1
+ && satisfies_constraint_C4l (operands[2])"
+ [(parallel [(set (match_dup 0)
+ (ashift:ALL4 (match_dup 1)
+ (match_dup 4)))
+ (clobber (reg:CC REG_CC))])
+ (parallel [(set (match_dup 0)
+ (ashift:ALL4 (match_dup 0)
+ (match_dup 5)))
+ (clobber (match_dup 3))
+ (clobber (reg:CC REG_CC))])]
+ {
+ if (avr_split_shift (operands, operands[3], ASHIFT))
+ DONE;
+ else if (REGNO (operands[0]) == REGNO (operands[1]))
+ FAIL;
+ int offset = INTVAL (operands[2]);
+ operands[4] = GEN_INT (offset & ~7);
+ operands[5] = GEN_INT (offset & 7);
+ })
+
+
(define_peephole2
[(match_scratch:QI 3 "d")
(parallel [(set (match_operand:ALL4 0 "register_operand" "")
(ashift:ALL4 (match_operand:ALL4 1 "register_operand" "")
(match_operand:QI 2 "const_int_operand" "")))
- (clobber (reg:CC REG_CC))])]
+ (clobber (reg:CC REG_CC))])
+ ;; $3 must not overlap with the output of the insn above.
+ (match_dup 3)]
""
[(parallel [(set (match_dup 0)
(ashift:ALL4 (match_dup 1)
(clobber (match_dup 3))
(clobber (reg:CC REG_CC))])])
-;; "*ashlsi3_const"
-;; "*ashlsq3_const" "*ashlusq3_const"
-;; "*ashlsa3_const" "*ashlusa3_const"
-(define_insn_and_split "*ashl<mode>3_const_split"
- [(set (match_operand:ALL4 0 "register_operand" "=r,r,r ,r")
- (ashift:ALL4 (match_operand:ALL4 1 "register_operand" "0,0,r ,0")
- (match_operand:QI 2 "const_int_operand" "L,P,O C31,n")))
- (clobber (match_scratch:QI 3 "=X,X,X ,&d"))]
- "reload_completed"
- "#"
- "&& reload_completed"
- [(parallel [(set (match_dup 0)
- (ashift:ALL4 (match_dup 1)
- (match_dup 2)))
- (clobber (match_dup 3))
- (clobber (reg:CC REG_CC))])])
(define_insn "*ashl<mode>3_const"
- [(set (match_operand:ALL4 0 "register_operand" "=r,r,r ,r")
- (ashift:ALL4 (match_operand:ALL4 1 "register_operand" "0,0,r ,0")
- (match_operand:QI 2 "const_int_operand" "L,P,O C31,n")))
- (clobber (match_scratch:QI 3 "=X,X,X ,&d"))
+ [(set (match_operand:ALL4 0 "register_operand" "=r ,r ,r ,r ,r")
+ (ashift:ALL4 (match_operand:ALL4 1 "register_operand" "0 ,r ,0 ,r ,0")
+ (match_operand:QI 2 "const_int_operand" "LP,O C15 C31,C4l,C4l,n")))
+ (clobber (match_operand:QI 3 "scratch_or_d_register_operand" "=X ,X ,&d ,&d ,&d"))
(clobber (reg:CC REG_CC))]
"reload_completed"
{
return ashlsi3_out (insn, operands, NULL);
}
- [(set_attr "length" "0,4,5,10")
- (set_attr "adjust_len" "ashlsi")])
+ [(set_attr "length" "10")
+ (set_attr "adjust_len" "ashlsi")
+ (set_attr "isa" "*,*,2op,3op,*")])
(define_expand "ashlpsi3"
[(parallel [(set (match_operand:PSI 0 "register_operand" "")
Target Mask(ACCUMULATE_OUTGOING_ARGS) Optimization
Optimization. Accumulate outgoing function arguments and acquire/release the needed stack space for outgoing function arguments in function prologue/epilogue. Without this option, outgoing arguments are pushed before calling a function and popped afterwards. This option can lead to reduced code size for functions that call many functions that get their arguments on the stack like, for example printf.
+msplit-bit-shift
+Target Var(avr_split_bit_shift) Init(0) Optimization
+Optimization. Split shifts of 4-byte values into a byte shift and a residual bit shift.
+
mstrict-X
Target Var(avr_strict_X) Init(0) Optimization
Optimization. When accessing RAM, use X as imposed by the hardware, i.e. just use pre-decrement, post-increment and indirect addressing with the X register. Without this option, the compiler may assume that there is an addressing mode X+const similar to Y+const and Z+const and emit instructions to emulate such an addressing mode for X.
(and (match_code "const_int,symbol_ref,const")
(match_test "const_0mod256_operand (op, HImode)")))
+(define_constraint "C4a"
+ "A constant integer shift offset for a 4-byte ASHIFTRT that's opt to being split."
+ (and (match_code "const_int")
+ (match_test "avr_split_shift_p (4, ival, ASHIFTRT)")))
+
+(define_constraint "C4r"
+ "A constant integer shift offset for a 4-byte LSHIFTRT that's opt to being split."
+ (and (match_code "const_int")
+ (match_test "avr_split_shift_p (4, ival, LSHIFTRT)")))
+
+(define_constraint "C4l"
+ "A constant integer shift offset for a 4-byte ASHIFT that's opt to being split."
+ (and (match_code "const_int")
+ (match_test "avr_split_shift_p (4, ival, ASHIFT)")))
+
+
;; CONST_FIXED is no element of 'n' so cook our own.
;; "i" or "s" would match but because the insn uses iterators that cover
;; INT_MODE, "i" or "s" is not always possible.
void execute_early_local_passes ();
unsigned int execute_pass_mode_switching ();
- /* Various passes are manually cloned by epiphany. */
+ /* Various passes are manually cloned by avr and epiphany. */
opt_pass *get_pass_split_all_insns () const {
return pass_split_all_insns_1;
}