]>
git.ipfire.org Git - thirdparty/gcc.git/commit
AVR: target/84211 - Add a post reload register optimization pass.
This introduces a new post reload pass that tracks known values held
in registers and performs optimizations based on that knowledge.
It runs between the two instances of the RTL peephole pass.
The optimizations are activated by new option -mfuse-move=<0,23>
which provides a 3:2:2:2 mixed radix value:
Digit 0: Activates try_fuse:
Tries to use a MOVW instead of two LDIs.
Digit 1: Activates try_bin_arg1:
Simplify the 2nd operand of a binary operation, for example
X xor Y can be simplified to X when Y = 0. When Y is an
expensive constant that's already held in some register R, then
the expression can be replaced by X xor R.
Digit 2: Activates try_split_any:
Split multi-byte operations like shifts into 8-bit instructions.
Digit 3: Activates try_split_ldi:
Decompose LDI-like insns into a sequence of instructions with better
performance. For example, R2[4] = 0x1ff may be performed as:
CLR R5
CLR R4
MOVW R2, R4
INC R3
DEC R2
Digit 3 can have a value of 0, 1 or 2, where value=2 may come up
with code that performs better than with value=1 at the expense of
reduced traceability of the generated assembly code.
Here are some examples:
Without optimization | With optimization
==================== | =================
long long fn_zero (void)
{
return 0;
}
ldi r18, 0 ; movqi_insn | ldi r18, 0 ; movqi_insn
ldi r19, 0 ; movqi_insn | ldi r19, 0 ; movqi_insn
ldi r20, 0 ; movqi_insn | movw r20, r18 ; *movhi
ldi r21, 0 ; movqi_insn |
ldi r22, 0 ; movqi_insn | movw r22, r18 ; *movhi
ldi r23, 0 ; movqi_insn |
ldi r24, 0 ; movqi_insn | movw r24, r18 ; *movhi
ldi r25, 0 ; movqi_insn |
ret | ret
int fn_eq0 (char c)
{
return c == 0;
}
mov r18, r24 ; movqi_insn | mov r18, r24 ; movqi_insn
ldi r24, 1 ; *movhi | ldi r24, 1 ; *movhi
ldi r25, 0 | ldi r25, 0
cp r18, ZERO ; cmpqi3 | cpse r18, ZERO ; peephole
breq .+4 ; branch |
ldi r24, 0 ; *movhi | ldi r24, 0 ; movqi_insn
ldi r25, 0 |
ret | ret
unsigned fn_crc (unsigned x, unsigned y)
{
for (char i = 8; i--; x <<= 1)
y ^= (x ^ y) & 0x80 ? 79u : 0u;
return y;
}
movw r18, r24 ; *movhi | movw r18, r24 ; *movhi
movw r24, r22 ; *movhi | movw r24, r22 ; *movhi
ldi r22, 8 ; movqi_insn | ldi r22, 8 ; movqi_insn
.L13: | .L13:
movw r30, r18 ; *movhi | movw r30, r18 ; *movhi
eor r30, r24 ; *xorqi3 | eor r30, r24 ; *xorqi3
eor r31, r25 ; *xorqi3 | eor r31, r25 ; *xorqi3
mov r20, r30 ; *andhi3 | mov r20, r30 ; *andqi3
andi r20, 1<<7 | andi r20, 1<<7
clr r21 |
sbrs r30, 7 ; *sbrx_branchhi | sbrc r30, 7 ; *sbrx_branchhi
rjmp .+4 |
ldi r20, 79 ; movqi_insn | ldi r20, 79 ; movqi_insn
ldi r21, 0 ; movqi_insn |
eor r24, r20 ; *xorqi3 | eor r24, r20 ; *xorqi3
eor r25, r21 ; *xorqi3 |
lsl r18 ; *ashlhi3_const | lsl r18 ; *ashlhi3_const
rol r19 | rol r19
subi r22, 1 ; *op8.for.cczn.p| subi r22, 1 ; *op8.for.cczn.plus
brne .L13 ; branch_ZN | brne .L13 ; branch_ZN
ret | ret
#define SPDR (*(uint8_t volatile*) 0x2c)
void fn_PR49807 (long big)
{
SPDR = big >> 24;
SPDR = big >> 16;
SPDR = big >> 8;
SPDR = big;
}
movw r20, r22 ; *movhi | movw r20, r22 ; *movhi
movw r22, r24 ; *movhi | movw r22, r24 ; *movhi
mov r24, r23 ; *ashrsi3_const |
clr r27 |
sbrc r24,7 |
com r27 |
mov r25, r27 |
mov r26, r27 |
out 0xc, r24 ; movqi_insn | out 0xc, r23 ; movqi_insn
movw r24, r22 ; *ashrsi3_const |
clr r27 |
sbrc r25, 7 |
com r27 |
mov r26, r27 |
out 0xc, r24 ; movqi_insn | out 0xc, r24 ; movqi_insn
clr r27 ; *ashrsi3_const |
sbrc r23, 7 |
dec r27 |
mov r26, r23 |
mov r25, r22 |
mov r24, r21 |
out 0xc, r24 ; movqi_insn | out 0xc, r21 ; movqi_insn
out 0xc, r20 ; movqi_insn | out 0xc, r20 ; movqi_insn
ret | ret
PR target/84211
gcc/
* doc/invoke.texi (AVR Options) [-mfuse-move]: Document new option.
* common/config/avr/avr-common.cc (avr_option_optimization_table):
Set -mfuse-move= depending on optimization level.
* config/avr/avr.opt (-mfuse-move, -mfuse-move=): New options.
* config/avr/t-avr (avr-passes.o): Depend on avr-passes-fuse-move.h.
* config/avr/avr-passes-fuse-move.h: New file, used by avr-passes.cc.
* config/avr/avr-passes.def (avr_pass_fuse_move): Insert new pass.
* config/avr/avr-passes.cc (INCLUDE_ARRAY): Define it.
(insn-attr.h): Include it.
(avr_pass_data_fuse_move): New const pass_data.
(avr_pass_fuse_move): New public rtl_opt_pass class.
(make_avr_pass_fuse_move): New function.
(gprmask_t): New typedef.
(next_nondebug_insn_bb, prev_nondebug_insn_bb)
(single_set_with_scratch, size_to_mask, size_to_mode)
(emit_valid_insn, emit_valid_move_clobbercc)
(gpr_regno_p, regmask, has_bits_in)
(find_arith, find_arith2, any_shift_p): New local functions.
(AVRasm): New namespace.
(FUSE_MOVE_MAX_MODESIZE): New define.
(avr-passes-fuse-move.h): New include.
(memento_t, absint_t, absins_byte_t, absint_val_t)
(optimize_data_t, insn_optimizedata_t, find_plies_data_t)
(insninfo_t, bbinfo_t, ply_t, plies_t): New structs / classes.
* config/avr/avr-protos.h (avr_chunk, avr_byte, avr_word, avr_int8)
(avr_uint8, avr_int16, avr_uint16)
(avr_out_set_some, avr_set_some_operation)
(output_reload_in_const, make_avr_pass_fuse_move): New protos.
(avr_dump): Depend macro definition on GCC_DUMPFILE_H.
* config/avr/avr.cc (avr_option_override): Insert after
pass "avr-fuse-move" instead of after "peephole2".
(avr_chunk, avr_byte, avr_word, avr_int8, avr_uint8, avr_int16)
(avr_uint16, output_reload_in_const): Functions are no more static.
(avr_out_set_some, avr_set_some_operation): New functions.
(ashrqi3_out, ashlqi3_out) [offset=7]: Handle "r,r,C07" alternative.
(avr_out_insert_notbit): Comment also allows QImode.
(avr_adjust_insn_length) [ADJUST_LEN_SET_SOME]: Handle case.
* config/avr/avr.md (adjust_len) <set_some>: New attribute value.
(set_some): New insn.
(andqi3, *andqi3): Add "r,r,Cb1" alternative.
(ashrqi3, *ashrqi3 ashlqi3, *ashlqi3): Add a "r,r,C07" alternative.
(gen_move_clobbercc_scratch): New emit helper.
* config/avr/constraints.md (Cb1): New constraint.
* config/avr/predicates.md (dreg_or_0_operand, set_some_operation): New.
* config/avr/avr-log.cc (avr_forward_to_printf): New static func.
(avr_log_vadump): Use it to recognize more formats.
gcc/testsuite/
* gcc.target/avr/torture/test-gprs.h: New file.
* gcc.target/avr/torture/pr84211-fuse-move-1.c: New test.
* gcc.target/avr/torture/pr84211-fuse-move-2.c: New test.
16 files changed: