--- /dev/null
+/* Support for avr-passes.cc for AVR 8-bit microcontrollers.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GCC; see the file COPYING3. If not see
+ <http://www.gnu.org/licenses/>. */
+
+
+/* FIXME: The documentation in hard-reg-set.h is wrong in that it states
+ that HARD_REG_SET is a scalar iff HARD_REG_SET is a macro.
+ This is not the case: HARD_REG_SET is a typedef no matter what.
+ So in order to get the lower 32 bits (and maybe more) as a scalar
+ we have to invoke type traits as we can't #ifdef HARD_REG_SET */
+template<typename T, typename ELT, bool = std::is_same<T, ELT>::value>
+struct elt0_getter;
+
+// All hard regs fit in one HARD_REG_ELT_TYPE: T === ELT.
+template<typename T, typename ELT>
+struct elt0_getter<T, ELT, true>
+{
+ static inline const ELT &get (const T &t)
+ {
+ return t;
+ }
+};
+
+// HARD_REG_SET is not a scalar but a composite with HARD_REG_ELT_TYPE elts[].
+template<typename T, typename ELT>
+struct elt0_getter<T, ELT, false>
+{
+ static inline const ELT &get (const T &t)
+ {
+ return t.elts[0];
+ }
+};
+
+
+// To track known values held in General Purpose Registers R2 ... R31.
+
+struct memento_t
+{
+ // One bit for each GPR.
+ gprmask_t known = 0;
+
+ std::array<uint8_t, REG_32> values;
+
+ static gprmask_t fixed_regs_mask;
+
+ void apply (const ply_t &);
+
+ void apply_insn (rtx_insn *insn, bool unused)
+ {
+ apply_insn1 (insn, unused);
+ known &= ~memento_t::fixed_regs_mask;
+ }
+
+private:
+ void apply_insn1 (rtx_insn *, bool);
+
+public:
+ bool knows (int rno, int n = 1) const
+ {
+ gcc_checking_assert (gpr_regno_p (rno, n));
+ const gprmask_t mask = regmask (rno, n);
+ return (known & mask) == mask;
+ }
+
+ uint8_t operator[] (int rno) const
+ {
+ gcc_checking_assert (gpr_regno_p (rno));
+ return values[rno];
+ }
+
+ // Set the 8-bit register number DEST as known to hold value VAL.
+ void set_value (int dest, int val)
+ {
+ gcc_checking_assert (gpr_regno_p (dest, 1));
+ values[dest] = (uint8_t) val;
+ set_known (dest);
+ }
+
+ void copy_value (int dest, int src)
+ {
+ gcc_checking_assert (gpr_regno_p (dest, 1));
+ gcc_checking_assert (gpr_regno_p (src, 1));
+ values[dest] = values[src];
+ set_known (dest, knows (src));
+ }
+
+ void copy_values (int dest, int src, int n_bytes)
+ {
+ gcc_checking_assert (gpr_regno_p (dest, n_bytes));
+ gcc_checking_assert (gpr_regno_p (src, n_bytes));
+ if (dest < src)
+ for (int n = 0; n < n_bytes; ++n)
+ copy_value (n + dest, n + src);
+ else if (dest > src)
+ for (int n = n_bytes - 1; n >= 0; --n)
+ copy_value (n + dest, n + src);
+ }
+
+ // Get the value as a CONST_INT or NULL_RTX when any byte is unknown.
+ rtx get_value_as_const_int (int regno, int n_bytes) const
+ {
+ gcc_checking_assert (gpr_regno_p (regno, n_bytes));
+
+ if (! knows (regno, n_bytes))
+ return NULL_RTX;
+
+ const machine_mode mode = size_to_mode (n_bytes);
+ uint64_t val = 0;
+
+ for (int i = n_bytes - 1; i >= 0; --i)
+ val = 256 * val + values[regno + i];
+
+ return gen_int_mode (val, mode);
+ }
+
+ // Copy the known state and the value (provided it is known) from
+ // register SRC to register DEST.
+ void copy_values (rtx dest, rtx src)
+ {
+ if (REG_P (dest) && REG_P (src)
+ && GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (GET_MODE (dest)))
+ {
+ int n_bytes = std::min (GET_MODE_SIZE (GET_MODE (src)),
+ GET_MODE_SIZE (GET_MODE (dest)));
+ copy_values (REGNO (dest), REGNO (src), n_bytes);
+ }
+ }
+
+ void set_values (rtx dest, rtx src)
+ {
+ gcc_assert (REG_P (dest) && CONST_INT_P (src));
+ int regno = REGNO (dest);
+ for (int i = 0; i < GET_MODE_SIZE (GET_MODE (dest)); ++i)
+ set_value (regno + i, avr_uint8 (src, i));
+ }
+
+ // Value >= 0 of the i-th reg or -1 if unknown.
+ int value (int i) const
+ {
+ gcc_checking_assert (gpr_regno_p (i));
+ return knows (i) ? (int) values[i] : -1;
+ }
+
+ // Value >= 0 of the rno-th reg[n] or -1 if unknown.
+ int64_t value (int rno, int n, bool strict = true) const
+ {
+ gcc_assert (n <= 4);
+ gcc_checking_assert (gpr_regno_p (rno, n));
+ if (! knows (rno, n))
+ {
+ if (! strict)
+ return -1;
+ gcc_unreachable ();
+ }
+
+ uint64_t val = 0;
+ for (int r = rno + n - 1; r >= rno; --r)
+ val = 256 * val + values[r];
+
+ return val;
+ }
+
+ void set_known (int r, bool kno = true)
+ {
+ gcc_checking_assert (gpr_regno_p (r));
+ known = kno
+ ? known | (1u << r)
+ : known & ~(1u << r);
+ }
+
+ void set_unknown (int r)
+ {
+ gcc_checking_assert (gpr_regno_p (r));
+ set_known (r, false);
+ }
+
+ int n_known () const
+ {
+ return popcount_hwi (known);
+ }
+
+ // Hamming byte distance of R[n] to VAL.
+ int hamming (int r, int n, uint64_t val) const
+ {
+ gcc_assert (n <= 8);
+ gcc_checking_assert (gpr_regno_p (r, n));
+
+ int ham = 0;
+ for (int i = 0; i < n; ++i)
+ ham += value (r + i) != (uint8_t) (val >> (8 * i));
+
+ return ham;
+ }
+
+ // Calculate the Hamming byte distance, ignoring regs in IGNORES.
+ int distance_to (const memento_t &that, gprmask_t ignores = 0) const
+ {
+ int d = 0;
+ for (int r = FIRST_GPR; r < REG_32; ++r)
+ if (! (ignores & (1u << r)))
+ d += value (r) != that.value (r);
+ return d;
+ }
+
+ // Return true when *this and THAT are the same, with the only allowed
+ // exceptions as of mask IGNORES.
+ bool equals (const memento_t &that, gprmask_t ignores) const
+ {
+ if ((known & ~ignores) != (that.known & ~ignores))
+ return false;
+
+ for (int r = FIRST_GPR; r < REG_32; ++r)
+ if (! (ignores & (1u << r)))
+ if (value (r) != that.value (r))
+ return false;
+
+ return true;
+ }
+
+ // Return TRUE iff the N_BYTES registers starting at REGNO are known
+ // to contain VAL.
+ bool have_value (int rno, int n_bytes, int val) const
+ {
+ gcc_assert (n_bytes <= 4);
+ for (int i = rno; i < rno + n_bytes; ++i)
+ if (value (i) != (uint8_t) val)
+ return false;
+ else
+ val >>= 8;
+
+ return true;
+ }
+
+ // The regno of a d-reg that has a known value, or 0 if none found.
+ int known_dregno (void) const
+ {
+ const gprmask_t dregs = known & 0xffff0000 & ~memento_t::fixed_regs_mask;
+ return dregs ? clz_hwi (1) - clz_hwi (dregs) : 0;
+ }
+
+ // Return a regno for a register that contains value VAL8 and that does
+ // not overlap with the registers mentioned in EXCLUDES. Else return 0.
+ int regno_with_value (uint8_t val8, gprmask_t excludes) const
+ {
+ for (int r = REG_31; r >= FIRST_GPR; --r)
+ if (value (r) == val8
+ && ! (regmask (r, 1) & excludes))
+ return r;
+ return 0;
+ }
+
+ // Return a regno for a 16-bit reg that contains value HI8:LO8 and that does
+ // not overlap with the registers mentioned in EXCLUDES. Else return 0.
+ int reg16_with_value (uint8_t lo8, uint8_t hi8, gprmask_t excludes) const
+ {
+ for (int r = REG_30; r >= FIRST_GPR; r -= 2)
+ if (! (regmask (r, 2) & excludes)
+ && value (r) == lo8
+ && value (r + 1) == hi8)
+ return r;
+ return 0;
+ }
+
+ void operator&= (const HARD_REG_SET &hrs)
+ {
+ known &= elt0_getter<HARD_REG_SET, HARD_REG_ELT_TYPE>::get (hrs);
+ }
+
+ // Coalesce register knowledge about *this and THAT.
+ void coalesce (const memento_t &that)
+ {
+ known &= that.known;
+
+ for (int i = FIRST_GPR; i < REG_32; ++i)
+ if (values[i] != that.values[i])
+ set_unknown (i);
+ }
+
+ void dump (const char *msg = nullptr, FILE *f = dump_file) const
+ {
+ if (f)
+ {
+ msg = msg && msg[0] ? msg : "%s\n";
+ const char *const xs = strstr (msg, "%s");
+ gcc_assert (xs);
+
+ fprintf (f, "%.*s", (int) (xs - msg), msg);
+ fprintf (f, " (%d known): ", n_known ());
+ for (int i = FIRST_GPR; i < REG_32; ++i)
+ if (knows (i))
+ fprintf (f, " r%d=%02x", i, values[i]);
+
+ fprintf (f, "%s", xs + strlen ("%s"));
+ }
+ }
+}; // memento_t
+
+
+// In avr-fuse-move, a possible step towards an optimal code sequence
+// to load a compile-time constant. A ply_t represents one or two
+// instructions. There are cases where there is no 1-to-1 correspondence
+// between a ply_t and an insn; but a sequence of ply_ts can be mapped to
+// a sequence of insns; though there are cases where 2 or more ply_ts map
+// to a single insn and vice versa.
+
+struct ply_t
+{
+ // The destination register with .size in { 1, 2 }.
+ int regno;
+ int size;
+
+ // The performed operation where .arg represents an optional source operand.
+ // .code may be one of: SET (ldi, clr, ldi+mov), REG (mov, movw), NEG (neg),
+ // NOT (com), PRE_INC (inc), PRE_DEC (dec), ROTATE (swap), ASHIFT (lsl),
+ // LSHIFTRT (lsr), ASHIFTRT (asr), PLUS (add), MINUS (sub), AND (and),
+ // IOR (or), XOR (eor), SS_PLUS (adiw, sbiw), MOD (set+bld, clt+bld, bld).
+ rtx_code code;
+ int arg;
+
+ // Code size in terms of words / instructions. Extra costs for, say
+ // a CLT prior to a sequence of BLDs, are added to the 1st element.
+ int cost;
+
+ // We only consider ply_ts that reduce the Hamming distance by 0, 1 or 2.
+ // There are exotic cases where the Hamming distance temporarily increases,
+ // but we don't consider them. (They may fall out of the algorithm anyways,
+ // for example when a "set_some" insn is used that restores its scratch.
+ int dhamming = 1;
+
+ // Whether this is a SET that's intended for insn "set_some"'s payload.
+ bool in_set_some = false;
+
+ // 0 or an upper scratch register. One needed for SETs of a lower reg.
+ // SETs in a set_some don't need a scratch.
+ int scratch = 0;
+
+ // Statistics.
+ static int n_ply_ts;
+ static int max_n_ply_ts;
+
+ gprmask_t mask_dest () const
+ {
+ return regmask (regno, size);
+ }
+
+ gprmask_t mask_src () const
+ {
+ if (code == SET)
+ return 0;
+ else if (code == REG)
+ return regmask (arg, size);
+ else if (code == PLUS || code == MINUS || code == AND
+ || code == IOR || code == XOR)
+ return regmask (arg, size) | mask_dest ();
+ else
+ return mask_dest ();
+ }
+
+ bool is_movw () const
+ {
+ return size == 2 && code == REG;
+ }
+
+ bool is_adiw () const
+ {
+ return size == 2 && code == SS_PLUS;
+ }
+
+ bool is_bld () const
+ {
+ return code == MOD;
+ }
+
+ // A BLD setting one bit.
+ bool is_setbld () const
+ {
+ return is_bld () && popcount_hwi (arg) == 1;
+ }
+
+ // A BLD clearing one bit.
+ bool is_cltbld () const
+ {
+ return is_bld () && popcount_hwi (arg) == 7;
+ }
+
+ rtx_code bld_rtx_code () const
+ {
+ return select<rtx_code>()
+ : is_setbld () ? IOR
+ : is_cltbld () ? AND
+ : UNKNOWN;
+ }
+
+ // Is *P a BLD of the same kind?
+ bool is_same_bld (const ply_t *p) const
+ {
+ gcc_assert (is_bld ());
+ return p && bld_rtx_code () == p->bld_rtx_code ();
+ }
+
+ int bld_bitno () const
+ {
+ gcc_assert (is_bld ());
+ int bit = exact_log2 (popcount_hwi (arg) == 1 ? arg : 0xff ^ arg);
+ gcc_assert (IN_RANGE (bit, 0, 7));
+
+ return bit;
+ }
+
+ bool needs_scratch () const
+ {
+ return code == SET && AVRasm::ldi_needs_scratch (regno, arg);
+ }
+
+ // Return true when *this modifies (changes *AND* uses) the result
+ // generated by *P.
+ bool changes_result_of (const ply_t *p) const
+ {
+ return code != REG && code != SET && (mask_dest() & p->mask_dest());
+ }
+
+ bool overrides (const ply_t *p) const
+ {
+ return code == REG || code == SET
+ ? mask_dest () & p->mask_dest ()
+ : false;
+ }
+
+ bool commutes_with (const ply_t *p, int scratch = 0) const
+ {
+ if (code == SET || p->code == SET)
+ {
+ // SETs will be emit as a group where they commute.
+ if (code == SET && p->code == SET)
+ return true;
+
+ // Grant more flexibility to move around expensive SETs.
+ if (! scratch
+ && (needs_scratch () || p->needs_scratch ()))
+ return false;
+ }
+
+ if (is_bld () || p->is_bld ())
+ {
+ // BLD requires a previous SET or CLT which means that like
+ // BLDs should occur as a contiguous sequence. This limits
+ // re-ordering for the purpose of canonicalization of instruction
+ // ordering.
+ return ((is_cltbld () && p->is_cltbld ())
+ || (is_setbld () && p->is_setbld ()));
+ }
+
+ gprmask_t msrc = 1u << scratch;
+ gprmask_t m1 = mask_dest() | mask_src();
+ gprmask_t m2 = p->mask_dest() | p->mask_src();
+ return (m1 & m2) == 0 && ((m1 | m2) & msrc) == 0;
+ }
+
+ // Expected insn name; used in dumps.
+ const char *insn_name () const
+ {
+ if (code == SET)
+ return select<const char *>()
+ : in_set_some ? "set_some"
+ : scratch && needs_scratch () ? "*reload_inqi"
+ : "movqi_insn";
+
+ return "???";
+ }
+
+ void dump (int level = 0, FILE *f = dump_file) const
+ {
+ if (f)
+ {
+ if (level)
+ avr_fdump (f, ";; .%d ply_t R%d[%d] = %C", level, regno, size, code);
+ else
+ avr_fdump (f, ";; ply_t R%d[%d] = %C", regno, size, code);
+ if (code == REG || is_adiw ())
+ fprintf (f, " %d", arg);
+ else if (code == PLUS || code == MINUS || code == AND
+ || code == IOR || code == XOR)
+ fprintf (f, " R%d", arg);
+ else if (is_setbld ())
+ fprintf (f, " BLD |= 0x%02x", arg);
+ else if (is_cltbld ())
+ fprintf (f, " BLD &= 0x%02x", arg);
+ else
+ fprintf (f, " 0x%x = %d", arg, arg);
+
+ const char *const name = insn_name ();
+ fprintf (f, ", cost=%d, dhamm=%d", cost, dhamming);
+ if (name && name[0] != '?')
+ fprintf (f, ", \"%s\"", name);
+ fprintf (f, "\n");
+ }
+ }
+
+ // Helper for dump_plys: Value of the destination.
+ int dest_value (const memento_t &memo) const
+ {
+ return memo.value (regno, size);
+ }
+
+ // Helper for dump_plys: Value of 1st source arg provided it is a register.
+ int src1_value (const memento_t &memo) const
+ {
+ int rsrc = regno;
+
+ switch (code)
+ {
+ default:
+ return -1;
+
+ case REG:
+ gcc_assert (size == 1 || size == 2);
+ rsrc = arg;
+ break;
+
+ case SS_PLUS:
+ gcc_assert (size == 2);
+ break;
+
+ case NEG: case NOT: case PRE_DEC: case PRE_INC:
+ case ASHIFT: case LSHIFTRT: case ASHIFTRT: case ROTATE:
+ case AND: case IOR: case XOR: case MOD:
+ case PLUS: case MINUS:
+ gcc_assert (size == 1);
+ break;
+ }
+
+ return memo.value (rsrc, size);
+ }
+
+ // Helper for dump_plys: Value of 2nd source argument.
+ int src2_value (const memento_t &memo) const
+ {
+ switch (code)
+ {
+ default:
+ break;
+
+ case AND: case IOR: case XOR:
+ case PLUS: case MINUS:
+ gcc_assert (size == 1);
+
+ return memo.value (arg, 1);
+ }
+
+ return -1;
+ }
+
+ // Dumping a solution (or parts of it) is tedious because when
+ // their specific action should be displayed.
+ static void dump_plys (FILE *f, int level, int len,
+ const ply_t *const ps[], const memento_t &m0)
+ {
+ if (f)
+ {
+ memento_t memo = m0;
+
+ for (int i = 0; i < len; ++i)
+ ps[i]->dump (level, memo, f);
+ }
+ }
+
+ void dump (int level, memento_t &memo, FILE *f = dump_file) const
+ {
+ if (! f)
+ return;
+
+ const ply_t &p = *this;
+
+ // Keep track of chars in the current line for neat alignment.
+ int cs = level > 0
+ ? fprintf (f, ";; .%d ", level)
+ : fprintf (f, ";; ");
+ cs += fprintf (f, "ply_t %-4s R%d[%d] = ", p.mnemonic (), p.regno, p.size);
+
+ const int x = p.src1_value (memo);
+ const int y = p.src2_value (memo);
+
+ memo.apply (p);
+
+ const int z = p.dest_value (memo);
+
+ switch (p.code)
+ {
+ default:
+ fprintf (f, "%s ???", rtx_name[p.code]);
+ gcc_unreachable ();
+ break;
+
+ case REG:
+ cs += fprintf (f, "R%d = 0x%0*x", p.arg, 2 * p.size, x);
+ break;
+
+ case SET:
+ cs += fprintf (f, "0x%02x = %d, \"%s\"", p.arg, p.arg, insn_name ());
+ break;
+
+ case PRE_DEC: case PRE_INC:
+ case ASHIFT: case LSHIFTRT: case ASHIFTRT: case ROTATE:
+ cs += fprintf (f, "R%d %s = 0x%02x = 0x%02x %s",
+ p.regno, p.op_str (), z, x, p.op_str ());
+ break;
+
+ case NEG: case NOT:
+ cs += fprintf (f, "%sR%d = 0x%02x = %s0x%02x",
+ p.op_str (), p.regno, z, p.op_str (), x);
+ break;
+
+ case PLUS: case MINUS:
+ case AND: case IOR: case XOR:
+ cs += fprintf (f, "R%d %s R%d = 0x%02x = 0x%02x %s 0x%02x",
+ p.regno, p.op_str (), p.arg, z, x, p.op_str (), y);
+ break;
+
+ case SS_PLUS: // ADIW / SBIW
+ {
+ int arg = (int16_t) p.arg;
+ char op = arg < 0 ? '-' : '+';
+ cs += fprintf (f, "R%d %c %d = 0x%04x = 0x%04x %c %d", p.regno,
+ op, std::abs (arg), z, x, op, std::abs (arg));
+ }
+ break;
+
+ case MOD: // BLD
+ {
+ const char opc = "&|" [p.is_setbld ()];
+ cs += fprintf (f, "R%d %c 0x%02x = 0x%02x = 0x%02x %c bit%d",
+ p.regno, opc, p.arg, z, x, opc, p.bld_bitno ());
+ }
+ break;
+ }
+
+ cs += fprintf (f, ", ");
+
+ while (cs++ < 56)
+ fputc (' ', f);
+
+ fprintf (f, "cost=%d, dhamm=%d\n", p.cost, p.dhamming);
+ }
+
+ // AVR mnemnic; used in dumps.
+ const char *mnemonic () const
+ {
+ if (is_bld ())
+ {
+ static char s_bld[] = "BLD*";
+ s_bld[3] = '0' + bld_bitno ();
+ return s_bld;
+ }
+
+ return select<const char *>()
+ : code == LSHIFTRT ? "LSR"
+ : code == ASHIFTRT ? "ASR"
+ : code == ASHIFT ? "LSL"
+ : code == ROTATE ? "SWAP"
+ : code == PRE_DEC ? "DEC"
+ : code == PRE_INC ? "INC"
+ : code == MINUS ? "SUB"
+ : code == PLUS ? "ADD"
+ : code == NEG ? "NEG"
+ : code == NOT ? "COM"
+ : code == AND ? "AND"
+ : code == IOR ? "OR"
+ : code == XOR ? "EOR"
+ : code == REG ? size == 1 ? "MOV" : "MOVW"
+ : code == SET ? arg == 0 ? "CLR" : "LDI"
+ : code == SS_PLUS ? arg < 0 ? "SBIW" : "ADIW"
+ : rtx_name[code];
+ }
+
+ // Return a string of length 1 for CODE, or "?".
+ static const char *code_name_str1 (rtx_code code)
+ {
+ return select<const char *>()
+ : code == NEG ? "-"
+ : code == NOT ? "~"
+ : code == AND ? "&"
+ : code == IOR ? "|"
+ : code == XOR ? "^"
+ : code == PLUS ? "+"
+ : code == MINUS ? "-"
+ : "?";
+ }
+
+ // Short semantics representation used in dumps.
+ const char *op_str () const
+ {
+ return select<const char *>()
+ : code == LSHIFTRT ? ">> 1"
+ : code == ASHIFTRT ? ">> 1"
+ : code == ASHIFT ? "<< 1"
+ : code == ROTATE ? ">>> 4"
+ : code == PRE_DEC ? "- 1"
+ : code == PRE_INC ? "+ 1"
+ : code == SS_PLUS ? "+"
+ : *(ply_t::code_name_str1 (code)) != '?' ? ply_t::code_name_str1 (code)
+ : rtx_name[code];
+ }
+}; // ply_t
+
+
+// A set of ply_t's. We prefer std:array (with some expected upper
+// bound for the number of ply_t's as generated by bbinfo_t::get_plies())
+// over std::vector. That way, all plies_t are only allocated once as
+// elements of avr_pass_fuse_move::BInfo.
+
+struct plies_t
+{
+ int n_plies;
+ std::array<ply_t, 50> plies;
+
+ int emit_insns (const insninfo_t &, const memento_t &) const;
+ int emit_sets (const insninfo_t&, int &n_insns, const memento_t&, int) const;
+ int emit_blds (const insninfo_t &, int &n_insns, int i0) const;
+ void add_plies_movw (int regno, int size, uint64_t, int, const memento_t &);
+
+ void reset ()
+ {
+ n_plies = 0;
+ }
+
+ void add (const ply_t &ply)
+ {
+ if (n_plies < (int) plies.size ())
+ {
+ plies[n_plies++] = ply;
+ ply_t::n_ply_ts += 1;
+ }
+ else
+ avr_dump (";; WARNING: plies_t is full\n");
+ }
+
+ void add (ply_t, const ply_t *prev, const memento_t &, bool maybe_set_some);
+
+ plies_t () {}
+
+ plies_t (int n, const ply_t *const ps[])
+ {
+ gcc_assert (n <= (int) plies.size ());
+ for (int i = 0; i < n; ++i)
+ plies[i] = *ps[i];
+ n_plies = n;
+ }
+
+ static int max_n_plies;
+}; // plies_t
+
+
+// An 8-bit value leaf of absint_byte_t.
+// May be known to equal an 8-bit value.
+// May be known to equal the content of an 8-bit GPR.
+struct absint_val_t
+{
+ int16_t val8 = -1;
+ int8_t regno = 0;
+
+ absint_val_t () {}
+
+ bool knows_val8 () const
+ {
+ gcc_assert (IN_RANGE (val8, -1, 0xff));
+ return val8 >= 0;
+ }
+
+ bool knows_regno () const
+ {
+ gcc_assert (IN_RANGE (regno, 0, REG_31));
+ return regno;
+ }
+
+ bool clueless () const
+ {
+ return ! knows_val8 () && ! knows_regno ();
+ }
+
+ gprmask_t reg_mask () const
+ {
+ return regno ? regmask (regno, 1) : 0;
+ }
+
+ void dump (FILE *f = dump_file) const
+ {
+ if (f)
+ {
+ if (knows_regno ())
+ fprintf (f, "r%d%s", regno, knows_val8 () ? "=" : "");
+ if (knows_val8 ())
+ fprintf (f, "%02x", val8);
+ else if (! knows_regno ())
+ fprintf (f, "--");
+ }
+ }
+}; // absint_val_t
+
+
+// One byte in AbsInt.
+class absint_byte_t
+{
+ // "SET": the value is .x0.
+ rtx_code code = UNKNOWN;
+ absint_val_t x0;
+ absint_val_t x1;
+
+public:
+
+ const absint_val_t &arg (int i) const
+ {
+ gcc_assert (IN_RANGE (i, 0, arity () - 1));
+ return i == 1 ? x1 : x0;
+ }
+
+ rtx_code get_code () const
+ {
+ return code;
+ }
+
+ absint_byte_t () {}
+
+ absint_byte_t (absint_val_t x)
+ : code(x.clueless () ? UNKNOWN : SET), x0(x)
+ {}
+
+ // new = <code> A0 where CODE is a unary operation.
+ absint_byte_t (rtx_code c, const absint_byte_t &a0)
+ : code(c)
+ {
+ switch (code)
+ {
+ default:
+ gcc_unreachable ();
+
+ case NOT:
+ if (a0.can (CONST_INT))
+ init_val8 (absint_byte_t::eval (code, a0.val8 ()));
+ else if (a0.can (REG))
+ x0 = a0.x0;
+ else if (a0.can (NOT))
+ init_regno (a0.regno ());
+ else
+ code = UNKNOWN;
+ break;
+
+ case SIGN_EXTEND:
+ if (a0.can (CONST_INT))
+ init_val8 (absint_byte_t::eval (code, a0.val8 ()));
+ else if (a0.can (REG))
+ x0 = a0.x0;
+ else
+ code = UNKNOWN;
+ break;
+ }
+ }
+
+ // new = A0 <code> A1 where CODE is a binary operation.
+ absint_byte_t (rtx_code c, const absint_byte_t &a0, const absint_byte_t &a1)
+ : code(c)
+ {
+ gcc_assert (c == AND || c == IOR || c == XOR || code == PLUS);
+
+ if (a1.is_image1 (c))
+ *this = a1;
+ else if (a0.is_image1 (c))
+ *this = a0;
+ else if (a1.is_neutral (c))
+ *this = a0;
+ else if (a0.is_neutral (c))
+ *this = a1;
+ else if (a0.can (CONST_INT) && a1.can (CONST_INT))
+ init_val8 (absint_byte_t::eval (code, a0.val8 (), a1.val8 ()));
+ else if (a0.can (REG) && a1.can (CONST_INT))
+ {
+ x0 = a0.x0;
+ x1 = a1.x0;
+ if (code == XOR && a1.val8 () == 0xff)
+ code = NOT;
+ }
+ else if (a0.can (CONST_INT) && a1.can (REG))
+ {
+ x0 = a1.x0;
+ x1 = a0.x0;
+ if (code == XOR && a0.val8 () == 0xff)
+ code = NOT;
+ }
+ else if (a0.can (REG) && a1.can (REG))
+ {
+ x0.regno = std::min (a0.regno (), a1.regno ());
+ x1.regno = std::max (a0.regno (), a1.regno ());
+ }
+ else
+ code = UNKNOWN;
+ }
+
+ int arity () const
+ {
+ return select<int>()
+ : code == UNKNOWN ? 0
+ : code == SET || code == NOT || code == SIGN_EXTEND ? 1
+ : code == AND || code == IOR || code == XOR || code == PLUS ? 2
+ : bad_case<int> ();
+ }
+
+ // Return a byte with 8 signs according to code CODE.
+ absint_byte_t get_signs (rtx_code ext) const
+ {
+ return select<absint_byte_t>()
+ : ext == ZERO_EXTEND ? absint_byte_t::from_val8 (0)
+ : ext == SIGN_EXTEND ? absint_byte_t (SIGN_EXTEND, *this)
+ : ext == LSHIFTRT ? absint_byte_t::from_val8 (0)
+ : ext == ASHIFTRT ? absint_byte_t (SIGN_EXTEND, *this)
+ : bad_case<absint_byte_t> ();
+ }
+
+ gprmask_t reg_mask () const
+ {
+ return select<gprmask_t>()
+ : code == SET ? x0.reg_mask ()
+ : arity () == 1 ? x0.reg_mask ()
+ : arity () == 2 ? x0.reg_mask () | x1.reg_mask ()
+ : bad_case<gprmask_t> ();
+ }
+
+ bool check () const
+ {
+ return select<bool>()
+ : arity () >= 1 && x0.clueless () ? false
+ : arity () == 2 && x1.clueless () ? false
+ : true;
+ }
+
+ static inline uint8_t eval (rtx_code code, uint8_t x)
+ {
+ return select<int>()
+ : code == NOT ? ~x
+ : code == SIGN_EXTEND ? (x >= 0x80 ? 0xff : 0x00)
+ : bad_case<int> ();
+ }
+
+ static inline uint8_t eval (rtx_code code, uint8_t x, uint8_t y)
+ {
+ return select<int>()
+ : code == AND ? x & y
+ : code == IOR ? x | y
+ : code == XOR ? x ^ y
+ : code == PLUS ? x + y
+ : bad_case<int> ();
+ }
+
+ bool is_neutral (rtx_code c) const
+ {
+ return can (CONST_INT) && val8 () == AVRasm::neutral_val (c);
+ }
+
+ bool is_image1 (rtx_code c) const
+ {
+ return can (CONST_INT) && val8 () == AVRasm::image1_val (c);
+ }
+
+ bool can (rtx_code c) const
+ {
+ if (code == SET)
+ gcc_assert (IN_RANGE (x0.val8, 0, 0xff) || gpr_regno_p (x0.regno));
+
+ if (c == CONST_INT)
+ return code == SET && x0.knows_val8 ();
+ else if (c == REG)
+ return code == SET && x0.knows_regno ();
+ else if (c == VALUE)
+ return code != UNKNOWN;
+ else if (c == UNKNOWN
+ || c == SET || c == NOT || c == SIGN_EXTEND
+ || c == AND || c == IOR || c == XOR || c == PLUS)
+ return code == c;
+
+ gcc_unreachable ();
+ }
+
+ // Return the known byte value in 0...0xff, or -1 if unknown and ! STRICT.
+ int val8 (bool strict = true) const
+ {
+ gcc_assert (! strict || code == SET);
+ gcc_assert (! strict || can (CONST_INT));
+ return can (CONST_INT) ? x0.val8 : -1;
+ }
+
+ int regno (bool strict = true) const
+ {
+ gcc_assert (! strict || code == SET);
+ gcc_assert (! strict || can (REG));
+ return can (REG) ? x0.regno : 0;
+ }
+
+ void init_val8 (int v)
+ {
+ gcc_assert (IN_RANGE (v, 0, 0xff));
+ x0.val8 = v;
+ x0.regno = 0;
+ code = SET;
+ }
+
+ void init_regno (int r)
+ {
+ gcc_assert (gpr_regno_p (r));
+ x0.val8 = -1;
+ x0.regno = r;
+ code = SET;
+ }
+
+ void learn_val8 (int v)
+ {
+ gcc_assert (IN_RANGE (v, 0, 0xff));
+ gcc_assert (code == SET || code == UNKNOWN);
+ x0.val8 = v;
+ code = SET;
+ }
+
+ void learn_regno (int r)
+ {
+ gcc_assert (gpr_regno_p (r));
+ gcc_assert (code == SET || code == UNKNOWN);
+ x0.regno = r;
+ code = SET;
+ }
+
+ static inline absint_byte_t from_val8 (int val, bool strict = true)
+ {
+ gcc_assert (IN_RANGE (val, -1, 0xff));
+ gcc_assert (! strict || val >= 0);
+ absint_byte_t b;
+ if (val >= 0)
+ b.init_val8 (val);
+
+ return b;
+ }
+
+ // Return a SET rtx that can replace the set_src of INSN.
+ // Returns BINARY_P or NULL_RTX.
+ absint_byte_t find_alternative_binary (const memento_t &memo) const
+ {
+ gprmask_t excludes = x1.knows_regno () ? regmask (x1.regno, 1) : 0;
+ absint_byte_t alt = *this;
+
+ if (arity () == 2
+ && x0.knows_regno ()
+ && x1.knows_val8 ()
+ && (! x1.knows_regno () || x0.regno != x1.regno)
+ && (alt.x1.regno = memo.regno_with_value (x1.val8, excludes)))
+ {
+ if (dump_flags & TDF_FOLDING)
+ {
+ alt.dump (";; AI.alternative AI=[%s]");
+ dump (" can replace AI=[%s]\n");
+ }
+
+ return alt;
+ }
+
+ return absint_byte_t {};
+ }
+
+ rtx to_rtx () const
+ {
+ if (arity () == 2)
+ {
+ gcc_assert (x0.knows_regno ());
+ gcc_assert (x1.knows_regno ());
+ rtx op0 = gen_rtx_REG (QImode, x0.regno);
+ rtx op1 = gen_rtx_REG (QImode, x1.regno);
+ return gen_rtx_fmt_ee (code, QImode, op0, op1);
+ }
+
+ gcc_unreachable ();
+ }
+
+ void dump (const char *msg = nullptr, FILE *f = dump_file) const
+ {
+ if (f)
+ {
+ msg = msg && msg[0] ? msg : "%s";
+ const char *const xs = strstr (msg, "%s");
+ gcc_assert (xs);
+
+ fprintf (f, "%.*s", (int) (xs - msg), msg);
+ if (code == UNKNOWN)
+ fprintf (f, "--");
+ else if (code == SET)
+ x0.dump (f);
+ else if (code == NOT)
+ {
+ fprintf (f, "~");
+ x0.dump (f);
+ }
+ else if (code == SIGN_EXTEND)
+ {
+ fprintf (f, "signs(");
+ x0.dump (f);
+ fprintf (f, ")");
+ }
+ else if (arity () == 2)
+ {
+ x0.dump (f);
+ fprintf (f, "%s", ply_t::code_name_str1 (code));
+ x1.dump (f);
+ }
+ else
+ gcc_unreachable ();
+
+ fprintf (f, "%s", xs + strlen ("%s"));
+ }
+ }
+}; // absint_byte_t
+
+
+struct bbinfo_t
+{
+ // All BBs of the current function.
+ static bbinfo_t *bb_info;
+
+ // bbinfo_t holds additional information for this basic block.
+ basic_block bb;
+
+ // Known values held in GPRs.
+ memento_t regs;
+
+ // Represents the "time" when the value was set. When we have the choice
+ // between several registers to copy from, we use the first (oldest) set.
+ // This can avoid copy-chains.
+ std::array<int, REG_32> ticks;
+ static int tick;
+
+ // Whether according BB is done and optimized.
+ bool done;
+
+ static void optimize_one_function (function *func);
+ void optimize_one_block (bool &changed);
+ void enter ();
+ void leave ();
+
+ // Used when finding a best plies_t. This object is only needed
+ // once and can be shared between all basic blocks.
+ struct find_plies_data_t
+ {
+ // These are used by [run_]find_plies()
+ const ply_t *ply_stack[N_BEST_PLYS];
+ plies_t plies[N_BEST_PLYS];
+ plies_t solution;
+ // Register knowledge at start of recursive algo.
+ memento_t regs0;
+ int max_ply_cost;
+ int movmode_cost;
+ int n_best_plys;
+ int n_get_plies; // Only for bookkeeping / statistics.
+ }; // find_plies_data_t
+
+ static find_plies_data_t *fpd;
+ static bool try_fuse_p;
+ static bool try_bin_arg1_p;
+ static bool try_simplify_p;
+ static bool try_split_ldi_p;
+ static bool try_split_any_p;
+ static bool use_arith_p;
+ static bool use_set_some_p;
+
+ static void get_plies (plies_t &, const insninfo_t &, const memento_t &,
+ const ply_t *);
+ static void find_plies (int depth, const insninfo_t &, const memento_t &);
+ bool run_find_plies (const insninfo_t &, const memento_t &) const;
+}; // bbinfo_t
#define IN_TARGET_CODE 1
+#define INCLUDE_ARRAY
#define INCLUDE_VECTOR
#include "config.h"
#include "system.h"
#include "cfgrtl.h"
#include "context.h"
#include "tree-pass.h"
+#include "insn-attr.h"
+
+
+#define CONST_INT_OR_FIXED_P(X) (CONST_INT_P (X) || CONST_FIXED_P (X))
+
+#define FIRST_GPR (AVR_TINY ? REG_18 : REG_2)
+
+namespace
+{
+
+/////////////////////////////////////////////////////////////////////////////
+// Before we start with the very code, introduce some helpers that are
+// quite generic, though up to now only avr-fuse-add makes use of them.
+
+/* Get the next / previous NONDEBUG_INSN_P after INSN in basic block BB.
+ This assumes we are in CFG layout mode so that BLOCK_FOR_INSN()
+ can be used. */
+
+static rtx_insn *
+next_nondebug_insn_bb (basic_block bb, rtx_insn *insn, bool forward = true)
+{
+ while (insn)
+ {
+ insn = forward ? NEXT_INSN (insn) : PREV_INSN (insn);
+
+ if (insn && NONDEBUG_INSN_P (insn))
+ return BLOCK_FOR_INSN (insn) == bb ? insn : nullptr;
+ }
+
+ return insn;
+}
+
+static rtx_insn *
+prev_nondebug_insn_bb (basic_block bb, rtx_insn *insn)
+{
+ return next_nondebug_insn_bb (bb, insn, false);
+}
+
+
+/* Like `single_set' with the addition that it sets REGNO_SCRATCH when the
+ insn is a single_set with a QImode scratch register. When the insn has
+ no QImode scratch or just a scratch:QI, then set REGNO_SCRATCH = 0.
+ The assumption is that the function is only used after the splits for
+ REG_CC so that the pattern is a parallel with 2 elements (INSN has no
+ scratch operand), or 3 elements (INSN does have a scratch operand). */
+
+static rtx
+single_set_with_scratch (rtx_insn *insn, int ®no_scratch)
+{
+ regno_scratch = 0;
+
+ if (! INSN_P (insn))
+ return NULL_RTX;
+
+ rtx set, clo, reg, pat = PATTERN (insn);
+
+ // Search for SET + CLOBBER(QI) + CLOBBER(CC).
+ if (GET_CODE (pat) == PARALLEL
+ && XVECLEN (pat, 0) == 3
+ && GET_CODE (set = XVECEXP (pat, 0, 0)) == SET
+ // At this pass, all insn are endowed with clobber(CC).
+ && GET_CODE (clo = XVECEXP (pat, 0, 2)) == CLOBBER
+ && GET_MODE (XEXP (clo, 0)) == CCmode
+ && GET_CODE (clo = XVECEXP (pat, 0, 1)) == CLOBBER
+ && REG_P (reg = XEXP (clo, 0))
+ && GET_MODE (reg) == QImode)
+ {
+ regno_scratch = REGNO (reg);
+ return set;
+ }
+
+ return single_set (insn);
+}
+
+// Emit pattern PAT, and ICE when the insn is not valid / not recognized.
+
+static rtx_insn *
+emit_valid_insn (rtx pat)
+{
+ rtx_insn *insn = emit_insn (pat);
+
+ if (! valid_insn_p (insn)) // Also runs recog().
+ fatal_insn ("emit unrecognizable insn", insn);
+
+ return insn;
+}
+
+// Emit a single_set with an optional scratch operand. This function
+// asserts that the new insn is valid and recognized.
+
+static rtx_insn *
+emit_valid_move_clobbercc (rtx dest, rtx src, rtx scratch = NULL_RTX)
+{
+ rtx pat = scratch
+ ? gen_gen_move_clobbercc_scratch (dest, src, scratch)
+ : gen_gen_move_clobbercc (dest, src);
+
+ return emit_valid_insn (pat);
+}
+
+// One bit for each GRP in REG_0 ... REG_31.
+using gprmask_t = uint32_t;
+
+// True when this is a valid GPR number for ordinary code, e.g.
+// registers wider than 2 bytes have to start at an exven regno.
+// TMP_REG and ZERO_REG are not considered valid, even though
+// the C source can use register vars with them.
+static inline bool
+gpr_regno_p (int regno, int n_bytes = 1)
+{
+ return (IN_RANGE (regno, FIRST_GPR, REG_32 - n_bytes)
+ // Size in { 1, 2, 3, 4, 8 } bytes.
+ && ((1u << n_bytes) & 0x11e)
+ // Registers >= 2 bytes start at an even regno.
+ && (n_bytes == 1 || regno % 2 == 0));
+}
+
+// There are cases where the C source defines local reg vars
+// for R1 etc. The assumption is that this is handled before
+// calling this function, e.g. by skipping code when a register
+// overlaps with a fixed register.
+static inline gprmask_t
+regmask (int regno, int size)
+{
+ gcc_checking_assert (gpr_regno_p (regno, size));
+ gprmask_t bits = (1u << size) - 1;
+
+ return bits << regno;
+}
+
+// Mask for hard register X that's some GPR, including fixed regs like R0.
+static gprmask_t
+regmask (rtx x)
+{
+ gcc_assert (REG_P (x));
+ gprmask_t bits = (1u << GET_MODE_SIZE (GET_MODE (x))) - 1;
+
+ return bits << REGNO (x);
+}
+
+
+// Whether X has bits in the range [B0 ... B1]
+static inline bool
+has_bits_in (gprmask_t x, int b0, int b1)
+{
+ if (b0 > b1 || b0 > 31 || b1 < 0)
+ return false;
+
+ const gprmask_t m = (2u << (b1 - b0)) - 1;
+ return x & (m << b0);
+}
+
+
+template<typename T>
+T bad_case ()
+{
+ gcc_unreachable ();
+}
+
+#define select false ? bad_case
+
+
+namespace AVRasm
+{
+ // Returns true when we a scratch reg is needed in order to get
+ // (siged or unsigned) 8-bit value VAL in some GPR.
+ // When it's about costs rather than the sheer requirement for a
+ // scratch, see also AVRasm::constant_cost.
+ static inline bool ldi_needs_scratch (int regno, int val)
+ {
+ return regno < REG_16 && IN_RANGE (val & 0xff, 2, 254);
+ }
+
+ // Return a byte value x >= 0 such that x <code> y == x for all y, or -1.
+ static inline int neutral_val (rtx_code code)
+ {
+ return select<int>()
+ : code == AND ? 0xff
+ : code == IOR ? 0x00
+ : code == XOR ? 0x00
+ : code == PLUS ? 0
+ : -1;
+ }
+
+ // When there exists a value x such that the image of the function
+ // y -> y <code> x has order 1, then return that x. Else return -1.
+ static inline int image1_val (rtx_code code)
+ {
+ return select<int>()
+ : code == AND ? 0x00
+ : code == IOR ? 0xff
+ : -1;
+ }
+
+ // Cost of 8-bit binary operation x o= VAL provided a scratch is
+ // available as needed.
+ static int constant_cost (rtx_code code, int regno, uint8_t val)
+ {
+ bool needs_scratch_p = select<bool>()
+ : code == PLUS ? regno < REG_16 && val != 1 && val != 0xff
+ : code == XOR ? val != 0xff && (regno < REG_16 || val != 0x80)
+ : code == IOR ? regno < REG_16
+ : code == AND ? regno < REG_16 && val != 0
+ : code == SET ? regno < REG_16 && val != 0
+ : bad_case<bool> ();
+
+ return val == AVRasm::neutral_val (code)
+ ? 0
+ : 1 + needs_scratch_p;
+ }
+}; // AVRasm
+
+
+// Returns the mode mask for a mode size of SIZE bytes.
+static uint64_t size_to_mask (int size)
+{
+ return ((uint64_t) 2 << (8 * size - 1)) - 1;
+}
+
+// Return the scalar int mode for a modesize of 1, 2, 3, 4 or 8 bytes.
+static machine_mode size_to_mode (int size)
+{
+ return select<machine_mode>()
+ : size == 1 ? QImode
+ : size == 2 ? HImode
+ : size == 3 ? PSImode
+ : size == 4 ? SImode
+ : size == 8 ? DImode
+ : bad_case<machine_mode> ();
+}
+
+\f
+//////////////////////////////////////////////////////////////////////////////
+// Optimize moves after reload: -mfuse-move=<0,23>
+
+/* The purpose of this pass is to perform optimizations after reload
+ like the following ones:
+
+ Without optimization | With optimization
+ ==================== | =================
+
+ long long fn_zero (void) (1)
+ {
+ return 0;
+ }
+
+ ldi r18, 0 ; movqi_insn | ldi r18, 0 ; movqi_insn
+ ldi r19, 0 ; movqi_insn | ldi r19, 0 ; movqi_insn
+ ldi r20, 0 ; movqi_insn | movw r20, r18 ; *movhi
+ ldi r21, 0 ; movqi_insn |
+ ldi r22, 0 ; movqi_insn | movw r22, r18 ; *movhi
+ ldi r23, 0 ; movqi_insn |
+ ldi r24, 0 ; movqi_insn | movw r24, r18 ; *movhi
+ ldi r25, 0 ; movqi_insn |
+ ret | ret
+
+ int fn_eq0 (char c) (2)
+ {
+ return c == 0;
+ }
+
+ mov r18, r24 ; movqi_insn | mov r18, r24 ; movqi_insn
+ ldi r24, 1 ; *movhi | ldi r24, 1 ; *movhi
+ ldi r25, 0 | ldi r25, 0
+ cp r18, ZERO ; cmpqi3 | cpse r18, ZERO ; peephole
+ breq .+4 ; branch |
+ ldi r24, 0 ; *movhi | ldi r24, 0 ; movqi_insn
+ ldi r25, 0 |
+ ret | ret
+
+ int a, b; (3)
+
+ void fn_store_ab (void)
+ {
+ a = 1;
+ b = -1;
+ }
+
+ ldi r24, 1 ; *movhi | ldi r24, 1 ; *movhi
+ ldi r25, 0 | ldi r25, 0
+ sts a+1, r25 ; *movhi | sts a+1, r25 ; *movhi
+ sts a, r24 | sts a, r24
+ ldi r24, -1 ; *movhi | sbiw r24, 2 ; *addhi3
+ ldi r25, -1 |
+ sts b+1, r25 ; *movhi | sts b+1, r25 ; *movhi
+ sts b, r24 | sts b, r24
+ ret | ret
+
+ unsigned fn_crc (unsigned x, unsigned y) (4)
+ {
+ for (char i = 8; i--; x <<= 1)
+ y ^= (x ^ y) & 0x80 ? 79U : 0U;
+ return y;
+ }
+
+ movw r18, r24 ; *movhi | movw r18, r24 ; *movhi
+ movw r24, r22 ; *movhi | movw r24, r22 ; *movhi
+ ldi r22, 8 ; movqi_insn | ldi r22, 8 ; movqi_insn
+ .L13: | .L13:
+ movw r30, r18 ; *movhi | movw r30, r18 ; *movhi
+ eor r30, r24 ; *xorqi3 | eor r30, r24 ; *xorqi3
+ eor r31, r25 ; *xorqi3 | eor r31, r25 ; *xorqi3
+ mov r20, r30 ; *andhi3 | mov r20, r30 ; *andqi3
+ andi r20, 1<<7 | andi r20, 1<<7
+ clr r21 |
+ sbrs r30, 7 ; *sbrx_branchhi | sbrc r30, 7 ; *sbrx_branchhi
+ rjmp .+4 |
+ ldi r20, 79 ; movqi_insn | ldi r20, 79 ; movqi_insn
+ ldi r21, 0 ; movqi_insn |
+ eor r24, r20 ; *xorqi3 | eor r24, r20 ; *xorqi3
+ eor r25, r21 ; *xorqi3 |
+ lsl r18 ; *ashlhi3_const | lsl r18 ; *ashlhi3_const
+ rol r19 | rol r19
+ subi r22, 1 ; *op8.for.cczn.p| subi r22, 1 ; *op8.for.cczn.plus
+ brne .L13 ; branch_ZN | brne .L13 ; branch_ZN
+ ret | ret
+
+ #define SPDR (*(uint8_t volatile*) 0x2c) (5)
+
+ void fn_PR49807 (long big)
+ {
+ SPDR = big >> 24;
+ SPDR = big >> 16;
+ SPDR = big >> 8;
+ SPDR = big;
+ }
+
+ movw r20, r22 ; *movhi | movw r20, r22 ; *movhi
+ movw r22, r24 ; *movhi | movw r22, r24 ; *movhi
+ mov r24, r23 ; *ashrsi3_const |
+ clr r27 |
+ sbrc r24,7 |
+ com r27 |
+ mov r25, r27 |
+ mov r26, r27 |
+ out 0xc, r24 ; movqi_insn | out 0xc, r23 ; movqi_insn
+ movw r24, r22 ; *ashrsi3_const |
+ clr r27 |
+ sbrc r25, 7 |
+ com r27 |
+ mov r26, r27 |
+ out 0xc, r24 ; movqi_insn | out 0xc, r24 ; movqi_insn
+ clr r27 ; *ashrsi3_const |
+ sbrc r23, 7 |
+ dec r27 |
+ mov r26, r23 |
+ mov r25, r22 |
+ mov r24, r21 |
+ out 0xc, r24 ; movqi_insn | out 0xc, r21 ; movqi_insn
+ out 0xc, r20 ; movqi_insn | out 0xc, r20 ; movqi_insn
+ ret | ret
+
+ The insns of each basic block are traversed from first to last.
+ Each insn is optimized on its own, or may be fused with the
+ previous insn like in example (1).
+ As the insns are traversed, memento_t keeps track of known values
+ held in the GPRs (general purpse registers) R2 ... R31 by simulating
+ the effect of the current insn in memento_t.apply_insn().
+ The basic blocks are traversed in reverse post order so as to
+ maximize the chance that GPRs from all preceding blocks are known,
+ which is the case in example (2). The traversal of the basic block
+ is performed by bbinfo_t.optimize_one_function().
+ bbinfo_t.optimize_one_block() traverses the insns of a BB and tries
+ the following optimizations:
+
+ bbinfo_t::try_fuse_p
+ Try to fuse two 8-bit insns to one MOVW like in (1).
+
+ bbinfo_t::try_simplify_p
+ Only perform the simplest optimizations that don't impede the
+ traceability of the generated code, which are:
+ - Transform operations like Rn = Rn=0 ^ Rm to Rn = Rm.
+ - Remove insns that are no-ops like Rn = Rn ^ Rm=0.
+
+ bbinfo_t::try_bin_arg1_p
+ In insns like EOR Rn, arg1 where arg1 is known or is a reg that
+ dies in the insn, *and* there is a different register Rm that's
+ known to contain the same value, then arg1 is replaced with Rm.
+
+ bbinfo_t::try_split_ldi_p
+ Tries to simplify loads of constants like in examples (1), (2) and (3).
+ It may use arithmetic instructions like AND with registers that
+ are holding known values when this is profitable.
+
+ bbinfo_t::try_split_any_p
+ Split all insns where the operation can be performed on individual
+ bytes, like andsi3. In example (4) the andhi3 can be optimized
+ to an andqi3.
+*/
+
+
+// A basic block with additional information like the GPR state.
+// The main entry point for the pass. Runs various strategies
+// like try_fuse, try_simplify, try_bin_arg1, try_split_ldi, try_split_any
+// depending on -mfuse-add=<0,11>.
+struct bbinfo_t;
+
+// Additional insn information on a REG = non-memory single_set insn
+// for quick access. Only valid when the m_size member is non-zero.
+struct insninfo_t;
+
+// Helper classes with data needed by the try_xxx optimizers.
+struct optimize_data_t;
+struct insn_optimize_data_t;
+
+// Records which GPRs R0 ... R31 are holding a known value,
+// and which values these are.
+struct memento_t;
+
+// Abstract Interpretation of expressions.
+// absint_val_t represents an 8-bit value that equals the content of
+// some GPR, or equals some known value (or both, or none of them).
+// absint_byte_t represents an 8-bit entity that is equivalent to
+// an absint_val_t, or is equivalent to some (unary or binary) operation
+// on absint_val_t's like NOT, AND, IOR, XOR that operate bit-wise (and
+// hence also byte-wise).
+// absint_t represents an array of absint_byte_t's. When some insn is applied
+// to a GPR state, then memento_t.apply_insn() represents the RHS of
+// a single_set as an absint_t, and then applies that result to the GPRs.
+// For example, in int y = x << 8 the representation is x = [r25; r24]
+// and RHS = [r24; 00].
+struct absint_val_t;
+class absint_byte_t;
+struct absint_t;
+
+// A ply_t is a potential step towards an optimal sequence to load a constant
+// value into a multi-byte register. A ply_t loosely relates to one AVR
+// instruction, but it may also represent a sequence of instructions.
+// For example, loading a constant into a lower register when no sratch reg
+// is available may take up to 4 instructions. There is no 1:1 correspondence
+// to insns, either.
+// try_split_ldi determines the best sequence of ply_t's by means of a
+// brute-force search with tree pruning: It's much too complicated to
+// construct a good sequence directly, but there are many conditions that
+// good sequence will satisfy, implemented in bbinfo_t::find_plies.
+struct ply_t;
+struct plies_t;
+
+// The maximal number of ply_t's in any conceivable optimal solution
+// that is better than what a vanilla mov<mode> generates.
+// This is 6 for modes <= 4 and 8 for modes == 8.
+static constexpr int N_BEST_PLYS = 8;
+
+#define FUSE_MOVE_MAX_MODESIZE 8
+
+#include "avr-passes-fuse-move.h"
+
+// Static members.
+
+gprmask_t memento_t::fixed_regs_mask;
+
+// Statistics.
+int ply_t::n_ply_ts;
+int ply_t::max_n_ply_ts;
+int plies_t::max_n_plies;
+
+bbinfo_t *bbinfo_t::bb_info;
+int bbinfo_t::tick;
+bbinfo_t::find_plies_data_t *bbinfo_t::fpd;
+
+// Which optimizations should be performed.
+bool bbinfo_t::try_fuse_p;
+bool bbinfo_t::try_bin_arg1_p;
+bool bbinfo_t::try_split_ldi_p;
+bool bbinfo_t::try_split_any_p;
+bool bbinfo_t::try_simplify_p;
+bool bbinfo_t::use_arith_p;
+bool bbinfo_t::use_set_some_p;
+
+
+// Abstract Interpretation of expressions.
+// A bunch of absint_byte_t's.
+
+struct absint_t
+{
+ static constexpr int eq_size = FUSE_MOVE_MAX_MODESIZE;
+ std::array<absint_byte_t, eq_size> eq;
+
+ rtx xexp = NULL_RTX;
+ rtx xexp_new = NULL_RTX;
+
+ absint_byte_t &operator[] (int i)
+ {
+ gcc_assert (IN_RANGE (i, 0, absint_t::eq_size - 1));
+ return eq[i];
+ }
+
+ const absint_byte_t &operator[] (int i) const
+ {
+ gcc_assert (IN_RANGE (i, 0, absint_t::eq_size - 1));
+ return eq[i];
+ }
+
+ absint_t () {}
+
+ absint_t (rtx xold)
+ : xexp(xold)
+ {}
+
+ absint_t (rtx xold, rtx xnew, int n_bytes)
+ : xexp(xold), xexp_new(xnew)
+ {
+ gcc_assert (n_bytes <= eq_size);
+ if (xnew)
+ for (int i = 0; i < n_bytes; ++i)
+ eq[i].learn_val8 (avr_uint8 (xnew, i));
+ }
+
+ // CODE != UNKNOWN: Maximal index of a byte with code CODE, or -1.
+ // CODE == UNKNOWN: Maximal index of a byte with known CODE, or -1.
+ int max_knows (rtx_code code = UNKNOWN) const
+ {
+ for (int i = eq_size - 1; i >= 0; --i)
+ if ((code == UNKNOWN && ! eq[i].can (UNKNOWN))
+ || (code != UNKNOWN && eq[i].can (code)))
+ return i;
+ return -1;
+ }
+
+ // CODE != UNKNOWN: Maximal i such that all bytes < i have code CODE.
+ // CODE == UNKNOWN: Maximal i such that all bytes < i have code != UNKNOWN.
+ int end_knows (rtx_code code = UNKNOWN) const
+ {
+ for (int i = 0; i < eq_size; ++i)
+ if ((code == UNKNOWN && eq[i].can (UNKNOWN))
+ || (code != UNKNOWN && ! eq[i].can (code)))
+ return i;
+ return eq_size;
+ }
+
+ // Number of bytes for which there is usable information.
+ int popcount () const
+ {
+ int pop = 0;
+ for (int i = 0; i < eq_size; ++i)
+ pop += ! eq[i].can (UNKNOWN);
+ return pop;
+ }
+
+ // Get the value under the assumption that all eq[].val8 are known.
+ uint64_t get_value (int n_bytes, bool strict = true) const
+ {
+ gcc_assert (IN_RANGE (n_bytes, 1, eq_size));
+ gcc_assert (! strict || end_knows (CONST_INT) >= n_bytes);
+
+ uint64_t val = 0;
+ for (int i = n_bytes - 1; i >= 0; --i)
+ val = 256 * val + eq[i].val8 (strict);
+ return val;
+ }
+
+ // Get n-byte value as a const_int, or NULL_RTX when (partially) unknown.
+ rtx get_value_as_const_int (int n_bytes) const
+ {
+ gcc_checking_assert (gpr_regno_p (REG_24, n_bytes));
+
+ if (end_knows (CONST_INT) < n_bytes)
+ return NULL_RTX;
+
+ const uint64_t val = get_value (n_bytes);
+ const machine_mode mode = size_to_mode (n_bytes);
+
+ return gen_int_mode (val, mode);
+ }
+
+ // Find a 16-bit register that contains the same value like held
+ // in positions I1 and I2 (if any). Return 0 when nothing appropriate
+ // for a MOVW is found.
+ int reg16_with_value (int i1, int i2, const memento_t &memo) const
+ {
+ if (i1 == (i2 ^ 1))
+ {
+ const int lo8 = eq[i1 & ~1].val8 (false);
+ const int hi8 = eq[i1 | 1].val8 (false);
+ if (lo8 >= 0 && hi8 >= 0)
+ return memo.reg16_with_value (lo8, hi8, 0);
+ }
+ return 0;
+ }
+
+ // When X is a REG rtx with a known content as of MEMO, then return
+ // the respective value as a constant for mode MODE.
+ // If X is NULL_RTX, or not a REG, or not known, then return NULL_RTX.
+ static rtx maybe_fold (rtx x, const memento_t &memo)
+ {
+ int n_bytes;
+
+ if (x != NULL_RTX
+ && REG_P (x)
+ && (n_bytes = GET_MODE_SIZE (GET_MODE (x))) <= FUSE_MOVE_MAX_MODESIZE
+ && gpr_regno_p (REGNO (x), n_bytes))
+ {
+ rtx xval = memo.get_value_as_const_int (REGNO (x), n_bytes);
+ if (xval)
+ return avr_chunk (GET_MODE (x), xval, 0);
+ }
+
+ return NULL_RTX;
+ }
+
+ // Try to conclude about the bytes that comprise X. DEST_MODE is the
+ // context mode that is used when X is CONST_INT and has VOIDmode.
+ static absint_t explore (rtx x, const memento_t &memo,
+ machine_mode dest_mode = VOIDmode)
+ {
+ const rtx_code code = GET_CODE (x);
+ bool worth_dumping = dump_file && (dump_flags & TDF_FOLDING);
+
+ const machine_mode mode = GET_MODE (x) == VOIDmode
+ ? dest_mode
+ : GET_MODE (x);
+
+ const int n_bytes = mode == VOIDmode && CONST_INT_P (x)
+ ? absint_t::eq_size
+ : GET_MODE_SIZE (mode);
+
+ if (! IN_RANGE (n_bytes, 1, absint_t::eq_size))
+ return absint_t (x);
+
+ // Eat our own dog food as produced by try_plit_ldi.
+
+ rtx xop0 = BINARY_P (x) || UNARY_P (x) ? XEXP (x, 0) : NULL_RTX;
+ rtx xval0 = xop0 && CONST_INT_OR_FIXED_P (xop0)
+ ? xop0
+ : absint_t::maybe_fold (xop0, memo);
+
+ if (UNARY_P (x)
+ && REG_P (xop0)
+ && GET_MODE (xop0) == mode
+ && xval0)
+ {
+ rtx y = simplify_unary_operation (code, mode, xval0, mode);
+ if (y && CONST_INT_OR_FIXED_P (y))
+ return absint_t (x, y, n_bytes);
+ }
+
+ rtx xop1 = BINARY_P (x) ? XEXP (x, 1) : NULL_RTX;
+ rtx xval1 = xop1 && CONST_INT_OR_FIXED_P (xop1)
+ ? xop1
+ : absint_t::maybe_fold (xop1, memo);
+
+ if (BINARY_P (x)
+ && xval0 && xval1)
+ {
+ rtx y = simplify_binary_operation (code, mode, xval0, xval1);
+ if (y && CONST_INT_OR_FIXED_P (y))
+ return absint_t (x, y, n_bytes);
+ }
+
+ // No fold to a constant value was found:
+ // Look at the individual bytes more closely.
+
+ absint_t ai (x);
+
+ switch (code)
+ {
+ default:
+ worth_dumping = false;
+ break;
+
+ case REG:
+ if (END_REGNO (x) <= REG_32
+ && ! (regmask (x) & memento_t::fixed_regs_mask))
+ for (unsigned r = REGNO (x); r < END_REGNO (x); ++r)
+ {
+ ai[r - REGNO (x)].learn_regno (r);
+ if (memo.knows (r))
+ ai[r - REGNO (x)].learn_val8 (memo.value (r));
+ }
+ break;
+
+ CASE_CONST_UNIQUE:
+ ai = absint_t (x, x, n_bytes);
+ break;
+
+ case ASHIFT:
+ case ASHIFTRT:
+ case LSHIFTRT:
+ case ROTATE:
+ case ROTATERT:
+ if ((CONST_INT_P (xop1) && INTVAL (xop1) >= 8)
+ // DImode shift offsets for transparent calls are shipped in R16.
+ || n_bytes == 8)
+ ai = explore_shift (x, memo);
+ break;
+
+ case AND:
+ case IOR:
+ case XOR:
+ {
+ const absint_t ai0 = absint_t::explore (xop0, memo, mode);
+ const absint_t ai1 = absint_t::explore (xop1, memo, mode);
+ for (int i = 0; i < n_bytes; ++i)
+ ai[i] = absint_byte_t (code, ai0[i], ai1[i]);
+ }
+ break;
+
+ case NOT:
+ {
+ const absint_t ai0 = absint_t::explore (xop0, memo);
+ for (int i = 0; i < n_bytes; ++i)
+ ai[i] = absint_byte_t (NOT, ai0[i]);
+ }
+ break;
+
+ case ZERO_EXTEND:
+ case SIGN_EXTEND:
+ {
+ const absint_t ai0 = absint_t::explore (xop0, memo);
+ const int ai0_size = GET_MODE_SIZE (GET_MODE (xop0));
+ const absint_byte_t b_signs = ai0[ai0_size - 1].get_signs (code);
+ for (int i = 0; i < n_bytes; ++i)
+ ai[i] = i < ai0_size ? ai0[i] : b_signs;
+ }
+ break;
+
+ case PLUS:
+ case MINUS:
+ if (SCALAR_INT_MODE_P (mode)
+ || ALL_SCALAR_FIXED_POINT_MODE_P (mode))
+ {
+ const absint_t ai0 = absint_t::explore (xop0, memo, mode);
+ const absint_t ai1 = absint_t::explore (xop1, memo, mode);
+ if (code == MINUS)
+ for (int i = 0; i < n_bytes && ai1[i].val8 (false) == 0; ++i)
+ ai[i] = ai0[i];
+
+ if (code == PLUS)
+ for (int i = 0; i < n_bytes; ++i)
+ {
+ if (ai0[i].val8 (false) == 0)
+ ai[i] = ai1[i];
+ else if (ai1[i].val8 (false) == 0)
+ ai[i] = ai0[i];
+ else
+ {
+ ai[i] = absint_byte_t (code, ai0[i], ai1[i]);
+ break;
+ }
+ }
+
+ if (code == PLUS
+ && GET_CODE (xop0) == ZERO_EXTEND
+ && CONST_INT_P (xop1))
+ {
+ rtx exop = XEXP (xop0, 0);
+ int exsize = GET_MODE_SIZE (GET_MODE (exop));
+ rtx lo_xop1 = avr_chunk (GET_MODE (exop), xop1, 0);
+ if (lo_xop1 == const0_rtx)
+ for (int i = exsize; i < n_bytes; ++i)
+ ai[i] = ai1[i];
+ }
+ }
+ break; // PLUS, MINUS
+
+ case MULT:
+ if (GET_MODE (xop0) == mode
+ && SCALAR_INT_MODE_P (mode))
+ {
+ // The constant may be located in xop0's zero_extend...
+ const absint_t ai0 = absint_t::explore (xop0, memo, mode);
+ const absint_t ai1 = absint_t::explore (xop1, memo, mode);
+ const int end0 = ai0.end_knows (CONST_INT);
+ const int end1 = ai1.end_knows (CONST_INT);
+ const uint64_t mul0 = end0 > 0 ? ai0.get_value (end0) : 1;
+ const uint64_t mul1 = end1 > 0 ? ai1.get_value (end1) : 1;
+ // Shifting in off/8 zero bytes from the right.
+ const int off = mul0 * mul1 != 0 ? ctz_hwi (mul0 * mul1) : 0;
+ for (int i = 0; i < off / 8; ++i)
+ ai[i].learn_val8 (0);
+ }
+ break; // MULT
+
+ case BSWAP:
+ if (GET_MODE (xop0) == mode)
+ {
+ const absint_t ai0 = absint_t::explore (xop0, memo);
+ for (int i = 0; i < n_bytes; ++i)
+ ai[i] = ai0[n_bytes - 1 - i];
+ }
+ break;
+ } // switch code
+
+ if (worth_dumping)
+ {
+ avr_dump (";; AI.explore %C:%m ", code, mode);
+ ai.dump ();
+ }
+
+ for (int i = 0; i < n_bytes; ++i)
+ gcc_assert (ai[i].check ());
+
+ return ai;
+ }
+
+ // Helper for the method above.
+ static absint_t explore_shift (rtx x, const memento_t &memo)
+ {
+ absint_t ai (x);
+
+ const rtx_code code = GET_CODE (x);
+ const machine_mode mode = GET_MODE (x);
+ const int n_bytes = GET_MODE_SIZE (mode);
+
+ if (! BINARY_P (x))
+ return ai;
+
+ rtx xop0 = XEXP (x, 0);
+ rtx xop1 = XEXP (x, 1);
+
+ // Look at shift offsets of DImode more closely;
+ // they are in R16 for __lshrdi3 etc. Patch xop1 on success.
+ if (n_bytes == 8
+ && ! CONST_INT_P (xop1)
+ && GET_MODE (xop0) == mode)
+ {
+ const int n_off = GET_MODE_SIZE (GET_MODE (xop1));
+ const absint_t aoff = absint_t::explore (xop1, memo);
+ xop1 = aoff.get_value_as_const_int (n_off);
+ }
+
+ if (! xop1
+ || GET_MODE (xop0) != mode
+ || ! IN_RANGE (n_bytes, 1, FUSE_MOVE_MAX_MODESIZE)
+ || ! CONST_INT_P (xop1)
+ || ! IN_RANGE (INTVAL (xop1), 8, 8 * n_bytes - 1))
+ return ai;
+
+ const int off = INTVAL (xop1);
+ const absint_t ai0 = absint_t::explore (xop0, memo);
+
+ switch (GET_CODE (x))
+ {
+ default:
+ break;
+
+ case ASHIFT:
+ // Shifting in 0x00's from the right.
+ for (int i = 0; i < off / 8; ++i)
+ ai[i].learn_val8 (0);
+ break;
+
+ case LSHIFTRT:
+ case ASHIFTRT:
+ {
+ // Shifting in 0x00's or signs from the left.
+ absint_byte_t b_signs = ai0[n_bytes - 1].get_signs (GET_CODE (x));
+ for (int i = n_bytes - off / 8; i < n_bytes; ++i)
+ ai[i] = b_signs;
+ if (off == 8 * n_bytes - 1)
+ if (code == ASHIFTRT)
+ ai[0] = b_signs;
+ }
+ break;
+ }
+
+ if (off % 8 != 0
+ || ai0.popcount () == 0)
+ return ai;
+
+ // For shift offsets that are a multiple of 8, record the
+ // action on the constituent bytes.
+
+ // Bytes are moving left by this offset (or zero for "none").
+ const int boffL = select<int>()
+ : code == ROTATE || code == ASHIFT ? off / 8
+ : code == ROTATERT ? n_bytes - off / 8
+ : 0;
+
+ // Bytes are moving right by this offset (or zero for "none").
+ const int boffR = select<int>()
+ : code == ROTATERT || code == ASHIFTRT || code == LSHIFTRT ? off / 8
+ : code == ROTATE ? n_bytes - off / 8
+ : 0;
+
+ if (dump_flags & TDF_FOLDING)
+ {
+ avr_dump (";; AI.explore_shift %C:%m ", code, mode);
+ if (boffL)
+ avr_dump ("<< %d%s", 8 * boffL, boffL && boffR ? ", " : "");
+ if (boffR)
+ avr_dump (">> %d", 8 * boffR);
+ avr_dump ("\n");
+ }
+
+ if (boffL)
+ for (int i = 0; i < n_bytes - boffL; ++i)
+ ai[i + boffL] = ai0[i];
+
+ if (boffR)
+ for (int i = boffR; i < n_bytes; ++i)
+ ai[i - boffR] = ai0[i];
+
+ return ai;
+ }
+
+ void dump (const char *msg = nullptr, FILE *f = dump_file) const
+ {
+ if (f)
+ dump (NULL_RTX, msg, f);
+ }
+
+ void dump (rtx dest, const char *msg = nullptr, FILE *f = dump_file) const
+ {
+ if (f)
+ {
+ int regno = dest && REG_P (dest) ? REGNO (dest) : 0;
+
+ msg = msg && msg[0] ? msg : "AI=[%s]\n";
+ const char *const xs = strstr (msg, "%s");
+ gcc_assert (xs);
+
+ fprintf (f, "%.*s", (int) (xs - msg), msg);
+ for (int i = max_knows (); i >= 0; --i)
+ {
+ const int sub_regno = eq[i].regno (false /*nonstrict*/);
+ const bool nop = regno && sub_regno == regno + i;
+ eq[i].dump (nop ? "%s=nop" : "%s", f);
+ fprintf (f, "%s", i ? "; " : xs + strlen ("%s"));
+ }
+ }
+ }
+}; // absint_t
+
+
+// Information for a REG = non-memory single_set.
+
+struct insninfo_t
+{
+ // This is an insn that sets the m_size bytes of m_regno to either
+ // - A compile time constant m_isrc (m_code = CONST_INT), or
+ // - The contents of register number m_rsrc (m_code = REG).
+ int m_size;
+ int m_regno;
+ int m_rsrc;
+ rtx_code m_code;
+ uint64_t m_isrc;
+ rtx_insn *m_insn;
+ rtx m_set = NULL_RTX;
+ rtx m_src = NULL_RTX;
+ int m_scratch = 0; // 0 or the register number of a QImode scratch.
+ rtx_code m_old_code = UNKNOWN;
+
+ // Knowledge about the bytes of the SET_SRC: A byte may have a known
+ // value, may be known to equal some register (e.g. with BSWAP),
+ // or both, or may be unknown.
+ absint_t m_ai;
+
+ // May be set for binary operations.
+ absint_byte_t m_new_src;
+
+ bool init1 (insn_optimize_data_t &, int max_size, const char *purpose);
+
+ // Upper bound for the cost (in words) of a move<mode> insn that
+ // performs a REG = CONST_XXX = .m_isrc move of modesize .m_size.
+ int cost () const;
+ bool combine (const insninfo_t &prev, const insninfo_t &curr);
+ int emit_insn () const;
+
+ bool needs_scratch () const
+ {
+ gcc_assert (m_code == CONST_INT);
+
+ for (int i = 0; i < m_size; ++i)
+ if (AVRasm::ldi_needs_scratch (m_regno, m_isrc >> (8 * i)))
+ return true;
+
+ return false;
+ }
+
+ int hamming (const memento_t &memo) const
+ {
+ gcc_assert (m_code == CONST_INT);
+
+ int h = 0;
+ for (int i = 0; i < m_size; ++i)
+ h += ! memo.have_value (m_regno + i, 1, 0xff & (m_isrc >> (8 * i)));
+
+ return h;
+ }
+
+ // Upper bound for the number of ply_t's of a solution, given Hamming
+ // distance of HAMM (-1 for unknown).
+ int n_best_plys (int hamm = -1) const
+ {
+ gcc_assert (m_code == CONST_INT);
+
+ if (m_size == 8)
+ return (hamm >= 0 ? hamm : m_size);
+ else if (hamm <= 4)
+ return (hamm >= 0 ? hamm : m_size)
+ // The following terms is the max number of MOVWs with a
+ // Hamming difference of less than 2.
+ + (AVR_HAVE_MOVW && m_regno < REG_14) * m_size / 2
+ + (AVR_HAVE_MOVW && m_regno == REG_14) * std::max (0, m_size - 2)
+ - (AVR_HAVE_MOVW && hamm == 4 && (uint32_t) m_isrc % 0x10001 == 0);
+ else
+ gcc_unreachable ();
+ }
+}; // insninfo_t
+
+
+struct insn_optimize_data_t
+{
+ // Known values held in GPRs prior to the action of .insn / .ii,
+ memento_t ®s;
+ rtx_insn *insn;
+ insninfo_t ii;
+ bool unused;
+
+ insn_optimize_data_t () = delete;
+
+ insn_optimize_data_t (memento_t &memo)
+ : regs(memo)
+ {}
+}; // insn_optimize_data_t
+
+struct optimize_data_t
+{
+ insn_optimize_data_t prev;
+ insn_optimize_data_t curr;
+
+ // Number >= 0 of new insns that replace the curr insn and maybe also the
+ // prev insn. -1 when no replacement has been found.
+ int n_new_insns = -1;
+
+ // .prev will be removed provided we have (potentially zero) new insns.
+ bool delete_prev_p = false;
+
+ // Ignore these GPRs when comparing the simulation results of
+ // old and new insn sequences. Usually some scratch reg(s).
+ gprmask_t ignore_mask = 0;
+
+ optimize_data_t () = delete;
+
+ optimize_data_t (memento_t &prev_regs, memento_t &curr_regs)
+ : prev(prev_regs), curr(curr_regs)
+ {}
+
+ bool try_fuse (bbinfo_t *);
+ bool try_bin_arg1 (bbinfo_t *);
+ bool try_simplify (bbinfo_t *);
+ bool try_split_ldi (bbinfo_t *);
+ bool try_split_any (bbinfo_t *);
+ bool fail (const char *reason);
+ bool emit_signs (int r_sign, gprmask_t);
+ void emit_move_mask (int dest, int src, int n_bytes, gprmask_t &);
+ rtx_insn *emit_sequence (basic_block, rtx_insn *);
+ bool get_2ary_operands (rtx_code &, const absint_byte_t &,
+ insn_optimize_data_t &, int r_dest,
+ absint_val_t &, absint_val_t &, int &ex_cost);
+ rtx_insn *emit_and_apply_move (memento_t &, rtx dest, rtx src);
+
+ // M2 is the state of GPRs as the sequence starts; M1 is the state one before.
+ static void apply_sequence (const std::vector<rtx_insn *> &insns,
+ memento_t &m1, memento_t &m2)
+ {
+ gcc_assert (insns.size () >= 1);
+
+ for (auto &i : insns)
+ {
+ m1 = m2;
+ m2.apply_insn (i, false);
+ }
+ }
+}; // optimize_data_t
+
+
+// Emit INSNS before .curr.insn, replacing .curr.insn and also .prev.insn when
+// .delete_prev_p is on. Adjusts .curr.regs and .prev.regs accordingly.
+rtx_insn *
+optimize_data_t::emit_sequence (basic_block bb, rtx_insn *insns)
+{
+ gcc_assert (n_new_insns >= 0);
+
+ // The old insns will be replaced by and simulated...
+ const std::vector<rtx_insn *> old_insns = delete_prev_p
+ ? std::vector<rtx_insn *> { prev.insn, curr.insn }
+ : std::vector<rtx_insn *> { curr.insn };
+
+ // ...against the new insns.
+ std::vector<rtx_insn *> new_insns;
+ for (rtx_insn *i = insns; i; i = NEXT_INSN (i))
+ new_insns.push_back (i);
+
+ rtx_insn *new_curr_insn;
+
+ memento_t &m1 = prev.regs;
+ memento_t &m2 = curr.regs;
+
+ if (new_insns.empty ())
+ {
+ if (delete_prev_p)
+ {
+ m2 = m1;
+ m1.known = 0;
+ new_curr_insn = prev_nondebug_insn_bb (bb, prev.insn);
+ }
+ else
+ new_curr_insn = prev.insn;
+ }
+ else
+ {
+ // We are going to emit at least one new insn. Simulate the effect of
+ // the new sequence and compare it against the effect of the old one.
+ // Both effects must be the same (modulo scratch regs).
+
+ memento_t n1 = m1;
+ memento_t n2 = m2;
+
+ if (delete_prev_p)
+ {
+ m2 = m1, m1.known = 0;
+ n2 = n1, n1.known = 0;
+ }
+
+ avr_dump (";; Applying new route...\n");
+ optimize_data_t::apply_sequence (new_insns, n1, n2);
+
+ avr_dump (";; Applying old route...\n");
+ optimize_data_t::apply_sequence (old_insns, m1, m2);
+ avr_dump ("\n");
+
+ if (! m2.equals (n2, ignore_mask))
+ {
+ // When we come here, then
+ // - We have a genuine bug, and/or
+ // - We did produce insns that are opaque to absint_t's explore().
+ avr_dump ("INCOMPLETE APPLICATION:\n");
+ m2.dump ("regs old route=%s\n\n");
+ n2.dump ("regs new route=%s\n\n");
+ avr_dump ("The new insns are:\n%L", insns);
+
+ fatal_insn ("incomplete application of insn", insns);
+ }
+
+ // Use N1 and N2 as the new GPR states. Even though they are equal
+ // modulo ignore_mask, N2 may know more about GPRs when it doesn't
+ // clobber the scratch reg.
+ m1 = n1;
+ m2 = n2;
+
+ emit_insn_before (insns, curr.insn);
+
+ new_curr_insn = new_insns.back ();
+ }
+
+ if (delete_prev_p)
+ SET_INSN_DELETED (prev.insn);
+
+ SET_INSN_DELETED (curr.insn);
+
+ return new_curr_insn;
+}
+
+
+const pass_data avr_pass_data_fuse_move =
+{
+ RTL_PASS, // type
+ "", // name (will be patched)
+ OPTGROUP_NONE, // optinfo_flags
+ TV_MACH_DEP, // tv_id
+ 0, // properties_required
+ 0, // properties_provided
+ 0, // properties_destroyed
+ 0, // todo_flags_start
+ TODO_df_finish | TODO_df_verify // todo_flags_finish
+};
+
+
+class avr_pass_fuse_move : public rtl_opt_pass
+{
+public:
+ avr_pass_fuse_move (gcc::context *ctxt, const char *name)
+ : rtl_opt_pass (avr_pass_data_fuse_move, ctxt)
+ {
+ this->name = name;
+ }
+
+ unsigned int execute (function *func) final override
+ {
+ if (optimize > 0 && avr_fuse_move > 0)
+ {
+ df_note_add_problem ();
+ df_analyze ();
+
+ bbinfo_t::optimize_one_function (func);
+ }
+
+ return 0;
+ }
+}; // avr_pass_fuse_move
+
+
+// Append PLY to .plies[]. A SET or BLD ply may start a new sequence of
+// SETs or BLDs and gets assigned the overhead of the sequence like for an
+// initial SET or CLT instruction. A SET ply my be added in two flavours:
+// One that starts a sequence of single_sets, and one that represents the
+// payload of a set_some insn. MEMO is the GPR state prior to PLY.
+void
+plies_t::add (ply_t ply, const ply_t *prev, const memento_t &memo,
+ bool maybe_set_some)
+{
+ if (ply.code == SET)
+ {
+ if (prev && prev->code == SET)
+ {
+ // Proceed with the SET sequence flavour.
+ ply.in_set_some = prev->in_set_some;
+
+ if (ply.in_set_some)
+ ply.scratch = 0;
+ else if (! ply.scratch && ply.needs_scratch ())
+ ply.cost += 2;
+ }
+ else
+ {
+ // The 1st SET in a sequence. May use set_some to set
+ // all bytes in one insn, or a bunch of single_sets.
+
+ // Route1: Bunch of single_sets.
+ const int ply_cost = ply.cost;
+ if (! ply.scratch && ply.needs_scratch ())
+ ply.cost += 2;
+ ply.in_set_some = false;
+
+ add (ply);
+
+ if (maybe_set_some)
+ {
+ // Route 2: One set_some: The 1st SET gets all the overhead.
+ ply.scratch = 0;
+ ply.cost = ply_cost + 1 + ! memo.known_dregno ();
+ ply.in_set_some = true;
+ }
+ }
+ } // SET
+ else if (ply.is_bld ())
+ {
+ // The first BLD in a series of BLDs gets the extra costs
+ // for the SET / CLT that precedes the BLDs.
+ ply.cost += ! ply.is_same_bld (prev);
+ }
+
+ add (ply);
+}
+
+
+// Emit insns for .plies[] and return the number of emitted insns.
+// The emitted insns represent the effect of II with MEMO, which
+// is the GPR knowledge before II is executed.
+int
+plies_t::emit_insns (const insninfo_t &ii, const memento_t &memo) const
+{
+ int n_insns = 0;
+
+ for (int i = 0; i < n_plies; ++i)
+ {
+ const ply_t &p = plies[i];
+
+ // SETs and BLDs are dumped by their emit_xxxs().
+ if (p.code != SET && ! p.is_bld ())
+ p.dump ();
+
+ rtx src1 = NULL_RTX;
+ rtx src2 = NULL_RTX;
+ rtx dest = NULL_RTX;
+ rtx xscratch = NULL_RTX;
+ rtx_code code = p.code;
+
+ switch (p.code)
+ {
+ default:
+ avr_dump ("\n\n;; Bad ply_t:\n");
+ p.dump (i + 1);
+ gcc_unreachable ();
+ break;
+
+ case REG: // *movhi = MOVW; movqi_insn = MOV
+ dest = gen_rtx_REG (p.size == 1 ? QImode : HImode, p.regno);
+ src1 = gen_rtx_REG (p.size == 1 ? QImode : HImode, p.arg);
+ break;
+
+ case SET: // movqi_insn = LDI, CLR; set_some = (LDI + MOV) ** size.
+ i += emit_sets (ii, n_insns, memo, i) - 1;
+ continue;
+
+ case MOD: // *ior<mode>3, *and<mode>3 = SET + BLD... / CLT + BLD...
+ i += emit_blds (ii, n_insns, i) - 1;
+ continue;
+
+ case MINUS: // *subqi3 = SUB
+ case PLUS: // *addqi3 = ADD
+ case AND: // *andqi3 = AND
+ case IOR: // *iorqi3 = OR
+ case XOR: // *xorqi3 = EOR
+ dest = gen_rtx_REG (QImode, p.regno);
+ src2 = gen_rtx_REG (QImode, p.arg);
+ break;
+
+ case PRE_INC: // *addqi3 = INC
+ case PRE_DEC: // *addqi3 = DEC
+ code = PLUS;
+ dest = gen_rtx_REG (QImode, p.regno);
+ src2 = p.code == PRE_INC ? const1_rtx : constm1_rtx;
+ break;
+
+ case NEG: // *negqi2 = NEG
+ case NOT: // *one_cmplqi2 = COM
+ dest = gen_rtx_REG (QImode, p.regno);
+ src1 = dest;
+ break;
+
+ case ROTATE: // *rotlqi3 = SWAP
+ case ASHIFT: // *ashlqi3 = LSL
+ case ASHIFTRT: // *ashrqi3 = ASR
+ case LSHIFTRT: // *lshrqi3 = LSR
+ dest = gen_rtx_REG (QImode, p.regno);
+ src2 = GEN_INT (code == ROTATE ? 4 : 1);
+ break;
+
+ case SS_PLUS: // *addhi3 = ADIW, SBIW
+ code = PLUS;
+ dest = gen_rtx_REG (HImode, p.regno);
+ src2 = gen_int_mode (p.arg, HImode);
+ break;
+ } // switch p.code
+
+ gcc_assert (dest && (! src1) + (! src2) == 1);
+
+ rtx src = code == REG || code == SET
+ ? src1
+ : (src2
+ ? gen_rtx_fmt_ee (code, GET_MODE (dest), dest, src2)
+ : gen_rtx_fmt_e (code, GET_MODE (dest), src1));
+
+ emit_valid_move_clobbercc (dest, src, xscratch);
+ n_insns += 1;
+ }
+
+ return n_insns;
+}
+
+
+// Helper for .emit_insns(). Emit an ior<mode>3 or and<mode>3 insns
+// that's equivalent to a sequence of contiguous BLDs starting at
+// .plies[ISTART]. Updates N_INSNS according to the number of insns emitted
+// and returns the number of consumed plys in .plies[].
+int
+plies_t::emit_blds (const insninfo_t &ii, int &n_insns, int istart) const
+{
+ const ply_t &first = plies[istart];
+
+ gcc_assert (ii.m_size <= 4);
+ gcc_assert (first.is_bld ());
+
+ const rtx_code code = first.is_setbld () ? IOR : AND;
+ const machine_mode mode = size_to_mode (ii.m_size);
+
+ // Determine mask and number of BLDs.
+
+ uint32_t mask = 0;
+ int n_blds = 0;
+
+ for (int i = istart; i < n_plies; ++i, ++n_blds)
+ {
+ const ply_t &p = plies[i];
+ if (! p.is_bld () || ! p.is_same_bld (& first))
+ break;
+
+ // For AND, work on the 1-complement of the mask,
+ // i.e. 1's specify which bits to clear.
+ uint8_t mask8 = code == IOR ? p.arg : ~p.arg;
+ mask |= mask8 << (8 * (p.regno - ii.m_regno));
+ }
+
+ mask = GET_MODE_MASK (mode) & (code == IOR ? mask : ~mask);
+
+ if (dump_file)
+ {
+ fprintf (dump_file, ";; emit_blds[%d...%d] R%d[%d]%s=%0*x\n",
+ istart, istart + n_blds - 1, ii.m_regno, ii.m_size,
+ code == IOR ? "|" : "&", 2 * ii.m_size, (int) mask);
+ }
+
+ for (int i = 0; i < n_blds; ++i)
+ plies[i + istart].dump ();
+
+ rtx dest = gen_rtx_REG (mode, ii.m_regno);
+ rtx src = gen_rtx_fmt_ee (code, mode, dest, gen_int_mode (mask, mode));
+ rtx xscratch = mode == QImode ? NULL_RTX : gen_rtx_SCRATCH (QImode);
+
+ emit_valid_move_clobbercc (dest, src, xscratch);
+ n_insns += 1;
+
+ return n_blds;
+}
+
+
+// Emit insns for a contiguous sequence of SET ply_t's starting at
+// .plies[ISTART]. Advances N_INSNS by the number of emitted insns.
+// MEMO ist the state of the GPRs before II es executed, where II
+// represents the insn under optimization.
+// The emitted insns are "movqi_insn" or "*reload_inqi"
+// when .plies[ISTART].in_set_some is not set, and one "set_some" insn
+// when .plies[ISTART].in_set_some is set.
+int
+plies_t::emit_sets (const insninfo_t &ii, int &n_insns, const memento_t &memo,
+ int istart) const
+{
+ gcc_assert (plies[istart].code == SET);
+
+ const bool in_set_some = plies[istart].in_set_some;
+
+ // Some d-regno that holds a compile-time constant, or 0.
+ const int known_dregno = memo.known_dregno ();
+
+ // Determine number of contiguous SETs,
+ // and sort them in ps[] such that smaller regnos come first.
+
+ const ply_t *ps[FUSE_MOVE_MAX_MODESIZE];
+ int n_sets = 0;
+
+ for (int i = istart; i < n_plies && plies[i].code == SET; ++i)
+ ps[n_sets++] = & plies[i];
+
+ if (dump_file)
+ {
+ fprintf (dump_file, ";; emit_sets[%d...%d] R%d[%d]=%0*" PRIx64,
+ istart, istart + n_sets - 1, ii.m_regno, ii.m_size,
+ 2 * ii.m_size, ii.m_isrc);
+ fprintf (dump_file, ", scratch=%s%d", "R" + ! ii.m_scratch, ii.m_scratch);
+ fprintf (dump_file, ", known_dreg=%s%d, set_some=%d\n",
+ "R" + ! known_dregno, known_dregno, in_set_some);
+ }
+
+ for (int i = 0; i < n_sets; ++i)
+ ps[i]->dump ();
+
+ // Sort. This is most useful on regs like (reg:SI REG_14).
+ for (int i = 0; i < n_sets - 1; ++i)
+ for (int j = i + 1; j < n_sets; ++j)
+ if (ps[i]->regno > ps[j]->regno)
+ std::swap (ps[i], ps[j]);
+
+ // Prepare operands.
+ rtx dst[FUSE_MOVE_MAX_MODESIZE];
+ rtx src[FUSE_MOVE_MAX_MODESIZE];
+ for (int i = 0; i < n_sets; ++i)
+ {
+ dst[i] = gen_rtx_REG (QImode, ps[i]->regno);
+ src[i] = gen_int_mode (ps[i]->arg, QImode);
+ }
+
+ if (in_set_some)
+ {
+ // Emit a "set_some" insn that sets all of the collected 8-bit SETs.
+ // This is a parallel with n_sets QImode SETs as payload.
+
+ gcc_assert (! known_dregno || memo.knows (known_dregno));
+
+ // A scratch reg...
+ rtx op1 = known_dregno
+ ? gen_rtx_REG (QImode, known_dregno)
+ : const0_rtx;
+ // ...with a known content, so it can be restored without saving.
+ rtx op2 = known_dregno
+ ? gen_int_mode (memo.values[known_dregno], QImode)
+ : const0_rtx;
+ // Target register envelope.
+ rtx op3 = GEN_INT (ii.m_regno);
+ rtx op4 = GEN_INT (ii.m_size);
+
+ // Payload.
+ for (int i = 0; i < n_sets; ++i)
+ dst[i] = gen_rtx_SET (dst[i], src[i]);
+
+ rtvec vec = gen_rtvec (5 + n_sets,
+ gen_rtx_USE (VOIDmode, op1),
+ gen_rtx_USE (VOIDmode, op2),
+ gen_rtx_USE (VOIDmode, op3),
+ gen_rtx_USE (VOIDmode, op4),
+ gen_rtx_CLOBBER (VOIDmode, cc_reg_rtx),
+ dst[0], dst[1], dst[2], dst[3]);
+ rtx pattern = gen_rtx_PARALLEL (VOIDmode, vec);
+
+ emit_valid_insn (pattern);
+ n_insns += 1;
+ }
+ else
+ {
+ // Emit a bunch of movqi_insn / *reload_inqi insns.
+
+ for (int i = 0; i < n_sets; ++i)
+ if (ii.m_scratch
+ && AVRasm::constant_cost (SET, ps[i]->regno, ps[i]->arg) > 1)
+ {
+ rtx scratch = gen_rtx_REG (QImode, ii.m_scratch);
+ bool use_reload_inqi = true;
+ if (use_reload_inqi)
+ {
+ emit_valid_move_clobbercc (dst[i], src[i], scratch);
+ n_insns += 1;
+ }
+ else
+ {
+ emit_valid_move_clobbercc (scratch, src[i]);
+ emit_valid_move_clobbercc (dst[i], scratch);
+ n_insns += 2;
+ }
+ }
+ else
+ {
+ emit_valid_move_clobbercc (dst[i], src[i]);
+ n_insns += 1;
+ }
+ }
+
+ return n_sets;
+}
+
+
+// Try to find an operation such that Y = op (X).
+// Shifts and rotates are regarded as unary operaions with
+// an implied 2nd operand.
+static rtx_code
+find_arith (uint8_t y, uint8_t x)
+{
+#define RETIF(ex, code) y == (0xff & (ex)) ? code
+ return select<rtx_code>()
+ : RETIF (x + 1, PRE_INC)
+ : RETIF (x - 1, PRE_DEC)
+ : RETIF ((x << 4) | (x >> 4), ROTATE)
+ : RETIF (-x, NEG)
+ : RETIF (~x, NOT)
+ : RETIF (x >> 1, LSHIFTRT)
+ : RETIF (x << 1, ASHIFT)
+ : RETIF ((x >> 1) | (x & 0x80), ASHIFTRT)
+ : UNKNOWN;
+#undef RETIF
+}
+
+
+// Try to find an operation such that Z = X op X.
+static rtx_code
+find_arith2 (uint8_t z, uint8_t x, uint8_t y)
+{
+#define RETIF(ex, code) z == (0xff & (ex)) ? code
+ return select<rtx_code>()
+ : RETIF (x + y, PLUS)
+ : RETIF (x - y, MINUS)
+ : RETIF (x & y, AND)
+ : RETIF (x | y, IOR)
+ : RETIF (x ^ y, XOR)
+ : UNKNOWN;
+#undef RETIF
+}
+
+
+// Add plies to .plies[] that represent a MOVW, but only ones that reduce the
+// Hamming distance from REGNO[SIZE] to VAL by exactly DHAMM.
+void
+plies_t::add_plies_movw (int regno, int size, uint64_t val,
+ int dhamm, const memento_t &memo)
+{
+ if (! AVR_HAVE_MOVW || size < 2)
+ return;
+
+ for (int i = 0; i < size - 1; i += 2)
+ {
+ // MOVW that sets less than 2 regs to the target value is
+ // not needed for the upper regs.
+ if (dhamm != 2 && regno + i >= REG_16)
+ continue;
+
+ const uint16_t val16 = val >> (8 * i);
+ const uint8_t lo8 = val16;
+ const uint8_t hi8 = val16 >> 8;
+
+ // When one of the target bytes is already as expected, then
+ // no MOVW is needed for an optimal sequence.
+ if (memo.have_value (regno + i, 1, lo8)
+ || memo.have_value (regno + i + 1, 1, hi8))
+ continue;
+
+ const int h_old = memo.hamming (regno + i, 2, val16);
+
+ // Record MOVWs that reduce the Hamming distance by DHAMM as requested.
+ for (int j = FIRST_GPR; j < REG_32; j += 2)
+ if (j != regno + i
+ && memo.knows (j, 2))
+ {
+ const int h_new = memo.hamming (j, 2, val16);
+ if (h_new == h_old - dhamm)
+ add (ply_t { regno + i, 2, REG, j, 1, dhamm });
+ }
+ }
+}
+
+
+// Set PS to plys that reduce the Hamming distance from II.m_regno to
+// compile-time constant II.m_isrc by 2, 1 or 0. PREV is NULL or points
+// to a previous ply_t. MEMO is the GPR state after PREV and prior to the
+// added plys.
+void
+bbinfo_t::get_plies (plies_t &ps, const insninfo_t &ii, const memento_t &memo,
+ const ply_t *prev)
+{
+ ps.reset ();
+
+ fpd->n_get_plies += 1;
+
+ const bool maybe_set_some = (bbinfo_t::use_set_some_p && ii.needs_scratch ());
+
+ // Start with cheap plies, then continue to more expensive ones.
+ const int regno = ii.m_regno;
+ const int size = ii.m_size;
+ const uint64_t val = ii.m_isrc;
+
+ // Find MOVW with a Hamming delta of 2.
+ ps.add_plies_movw (regno, size, val, 2, memo);
+
+ // Find ADIW / SBIW
+ if (AVR_HAVE_ADIW && size >= 2)
+ for (int i = 0; i < size - 1; i += 2)
+ if (regno + i >= REG_24
+ && memo.knows (regno + i, 2))
+ {
+ const int16_t value16 = memo[regno + i] + 256 * memo[regno + i + 1];
+ const int16_t lo16 = val >> (8 * i);
+ const int16_t delta = lo16 - value16;
+ const uint8_t lo8 = val >> (8 * i);
+ const uint8_t hi8 = val >> (8 * i + 8);
+ if (IN_RANGE (delta, -63, 63)
+ && lo8 != memo[regno + i]
+ && hi8 != memo[regno + i + 1])
+ {
+ ps.add (ply_t { regno + i, 2, SS_PLUS, delta, 1, 2 });
+ }
+ }
+
+ // Find 1-reg plies. In an optimal sequence, each 1-reg ply will decrease
+ // the Hamming distance. Thus we only have to consider plies that set
+ // one of the target bytes to the target value VAL. Start with the
+ // high registers since that is the canonical order when two plies commute.
+
+ for (int i = size - 1; i >= 0; --i)
+ {
+ const uint8_t val8 = val >> (8 * i);
+
+ // Nothing to do for this byte when its value is already as desired.
+ if (memo.have_value (regno + i, 1, val8))
+ continue;
+
+ // LDI or CLR.
+ if (regno + i >= REG_16 || val8 == 0)
+ ps.add (ply_t { regno + i, 1, SET, val8, 1 }, prev, memo,
+ maybe_set_some);
+
+ // We only may need to MOV non-zero values since there is CLR,
+ // and only when there is no LDI.
+ if (val8 != 0
+ && regno + i < REG_16)
+ {
+ // MOV where the source register is one of the target regs.
+ for (int j = 0; j < size; ++j)
+ if (j != i)
+ if (memo.have_value (regno + j, 1, val8))
+ ps.add (ply_t { regno + i, 1, REG, regno + j, 1 });
+
+ // MOV where the source register is not a target reg.
+ // FIXME: ticks.
+ for (int j = FIRST_GPR; j < REG_32; ++j)
+ if (! IN_RANGE (j, regno, regno + size - 1))
+ if (memo.have_value (j, 1, val8))
+ ps.add (ply_t { regno + i, 1, REG, j, 1 });
+
+ // LDI + MOV.
+ if (regno + i < REG_16 && val8 != 0)
+ {
+ ply_t p { regno + i, 1, SET, val8, 2 };
+ p.scratch = ii.m_scratch;
+ ps.add (p, prev, memo, maybe_set_some);
+ }
+ }
+ }
+
+ // Arithmetic like INC, DEC or ASHIFT.
+ for (int i = size - 1; i >= 0; --i)
+ if (bbinfo_t::use_arith_p
+ && regno + i < REG_16
+ && memo.knows (regno + i))
+ {
+ const uint8_t y = val >> (8 * i);
+ const uint8_t x = memo[regno + i];
+ rtx_code code;
+
+ if (y == 0 || y == x)
+ continue;
+
+ // INC, DEC, SWAP, LSL, NEG, ...
+ if (UNKNOWN != (code = find_arith (y, x)))
+ {
+ ps.add (ply_t { regno + i, 1, code, x /* dummy */, 1 });
+ continue;
+ }
+
+ // ADD, AND, ...
+ for (int r = FIRST_GPR; r < REG_32; ++r)
+ if (r != regno + i
+ && memo.knows (r)
+ && memo[r] != 0
+ && UNKNOWN != (code = find_arith2 (y, x, memo[r])))
+ {
+ ps.add (ply_t { regno + i, 1, code, r, 1 });
+ }
+
+ if (size < 2 || size > 4)
+ continue;
+
+ // SET + BLD
+ if ((x & y) == x && popcount_hwi (x ^ y) == 1)
+ ps.add (ply_t { regno + i, 1, MOD, x ^ y, 1 },
+ prev, memo, maybe_set_some);
+
+ // CLT + BLD
+ if ((x & y) == y && popcount_hwi (x ^ y) == 1)
+ ps.add (ply_t { regno + i, 1, MOD, x ^ y ^ 0xff, 1 },
+ prev, memo, maybe_set_some);
+ }
+
+ if (bbinfo_t::use_arith_p
+ // For 8-byte values, don't use ply_t's with only a partial reduction
+ // of the hamming distance.
+ && size <= 4)
+ {
+ // Find MOVW with a Hamming delta of 1, then 0.
+ ps.add_plies_movw (regno, size, val, 1, memo);
+ ps.add_plies_movw (regno, size, val, 0, memo);
+ }
+
+ plies_t::max_n_plies = std::max (plies_t::max_n_plies, ps.n_plies);
+}
+
+
+// Try to combine two 8-bit insns PREV and CURR that (effectively)
+// are REG = CONST_INT to one 16-bit such insn. Returns true on success.
+bool
+insninfo_t::combine (const insninfo_t &prev, const insninfo_t &curr)
+{
+ if (prev.m_size == 1 && curr.m_size == 1
+ && prev.m_regno == (1 ^ curr.m_regno)
+ && curr.m_code == CONST_INT
+ && prev.m_code == CONST_INT)
+ {
+ m_regno = curr.m_regno & ~1;
+ m_code = CONST_INT;
+ m_size = 2;
+ m_scratch = std::max (curr.m_scratch, prev.m_scratch);
+ m_isrc = m_regno == prev.m_regno
+ ? (uint8_t) prev.m_isrc + 256 * (uint8_t) curr.m_isrc
+ : (uint8_t) curr.m_isrc + 256 * (uint8_t) prev.m_isrc;
+
+ return true;
+ }
+
+ return false;
+}
+
+
+// Return the cost (in terms of words) of the respective mov<mode> insn.
+// This can be used as an upper bound for the ply_t's cost.
+int
+insninfo_t::cost () const
+{
+ if (m_code != CONST_INT)
+ return m_size;
+
+ if (m_regno >= REG_16 || m_isrc == 0)
+ return m_size
+ // MOVW can save one instruction.
+ - (AVR_HAVE_MOVW && m_size == 4 && (uint32_t) m_isrc % 0x10001 == 0);
+
+ // LDI + MOV to a lower reg.
+ if (m_scratch && m_size == 1)
+ return 2;
+
+ if (m_size == 8)
+ {
+ int len = m_size;
+ for (int i = 0; i < m_size; ++i)
+ len += m_regno + i < REG_16 && (0xff & (m_isrc >> (8 * i))) != 0;
+ return len;
+ }
+
+ // All other cases are complicated. Ask the output oracle.
+ const machine_mode mode = size_to_mode (m_size);
+ rtx xscratch = m_scratch ? all_regs_rtx[m_scratch] : NULL_RTX;
+ rtx xop[] = { gen_rtx_REG (mode, m_regno), gen_int_mode (m_isrc, mode) };
+ int len;
+ if (m_size == 4)
+ output_reload_insisf (xop, xscratch, &len);
+ else
+ output_reload_in_const (xop, xscratch, &len, false);
+
+ return len;
+}
+
+// Emit the according REG = REG-or-CONST_INT insn. Returns 1 or aborts
+// when the insn is not of that form.
+int
+insninfo_t::emit_insn () const
+{
+ int n_insns = 0;
+
+ machine_mode mode = size_to_mode (m_size);
+ rtx xsrc = NULL_RTX;
+ rtx xscratch = NULL_RTX;
+
+ gcc_assert (m_size > 0);
+
+ switch (m_code)
+ {
+ default:
+ gcc_unreachable();
+
+ case CONST_INT:
+ xsrc = gen_int_mode (m_isrc, mode);
+ if (m_scratch && m_regno < REG_16)
+ xscratch = gen_rtx_REG (QImode, m_scratch);
+ break;
+
+ case REG:
+ gcc_assert (gpr_regno_p (m_rsrc, m_size));
+ if (m_regno != m_rsrc)
+ xsrc = gen_rtx_REG (mode, m_rsrc);
+ break;
+ }
+
+ if (xsrc)
+ {
+ rtx dest = gen_rtx_REG (mode, m_regno);
+ emit_valid_move_clobbercc (dest, xsrc, xscratch);
+ n_insns += 1;
+ }
+
+ return n_insns;
+}
+
+
+// Entering a basic block means combining known register values from
+// all incoming BBs.
+void
+bbinfo_t::enter ()
+{
+ avr_dump ("\n;; Entering [bb %d]\n", bb->index);
+
+ gcc_assert (! done);
+
+ edge e;
+ edge_iterator ei;
+ gprmask_t pred_known_mask = ~0u;
+ bbinfo_t *bbi = nullptr;
+
+ // A quick iteration over all predecessors / incoming edges to reveal
+ // whether this BB is worth a closer look.
+ FOR_EACH_EDGE (e, ei, bb->preds)
+ {
+ basic_block pred = e->src;
+ bbi = & bb_info[pred->index];
+
+ pred_known_mask &= bbi->regs.known;
+
+ if (dump_file)
+ {
+ avr_dump (";; [bb %d] <- [bb %d] ", e->dest->index, e->src->index);
+ if (bbi->done)
+ bbi->regs.dump ();
+ else
+ avr_dump (" (unknown)\n");
+ }
+ }
+
+ // Only if all predecessors have already been handled, we can
+ // have known values as we are entering the current BB.
+ if (pred_known_mask != 0
+ && bbi != nullptr)
+ {
+ // Initialize current BB info from BI, an arbitrary predecessor.
+
+ regs = bbi->regs;
+
+ // Coalesce the output values from all predecessing BBs. At the
+ // start of the current BB, a value is only known if it is known
+ // in *all* predecessors and *all* these values are the same.
+ FOR_EACH_EDGE (e, ei, bb->preds)
+ {
+ regs.coalesce (bb_info[e->src->index].regs);
+ }
+ }
+
+ if (dump_file)
+ {
+ avr_dump (";; [bb %d] known at start: ", bb->index);
+ if (regs.known)
+ regs.dump ();
+ else
+ avr_dump (" (none)\n");
+ avr_dump ("\n");
+ }
+}
+
+
+void
+bbinfo_t::leave ()
+{
+ done = true;
+
+ if (dump_file)
+ fprintf (dump_file, ";; Leaving [bb %d]\n\n", bb->index);
+}
+
+
+/* Initialize according to INSN which is a 1-byte single_set that's
+ (effectively) a reg = reg or reg = const move. INSN may be the result
+ of the current pass's optimization, e.g. something like INC R2 where R2
+ has a known content. MEMO is the state prior to INSN. Only CONST
+ cases are recorded; plus cases that are non-trivial for example when
+ an XOR decays to a move. */
+
+bool
+insninfo_t::init1 (insn_optimize_data_t &iod, int max_size,
+ const char *purpose = "")
+{
+ m_size = 0;
+ m_insn = iod.insn;
+ m_old_code = UNKNOWN;
+ iod.unused = false;
+
+ if (! iod.insn
+ || ! (m_set = single_set_with_scratch (iod.insn, m_scratch)))
+ return false;
+
+ rtx dest = SET_DEST (m_set);
+ machine_mode mode = GET_MODE (dest);
+ const int n_bytes = GET_MODE_SIZE (mode);
+ max_size = std::min (max_size, FUSE_MOVE_MAX_MODESIZE);
+
+ if (! REG_P (dest)
+ || END_REGNO (dest) > REG_32
+ || n_bytes > max_size)
+ return false;
+
+ // Omit insns that (explicitly) touch fixed GPRs in any way.
+ using elt0_getter_HRS = elt0_getter<HARD_REG_SET, HARD_REG_ELT_TYPE>;
+ HARD_REG_SET hregs;
+ CLEAR_HARD_REG_SET (hregs);
+ find_all_hard_regs (PATTERN (iod.insn), & hregs);
+ if (memento_t::fixed_regs_mask & (gprmask_t) elt0_getter_HRS::get (hregs))
+ {
+ avr_dump (";; %sinit1 has fixed GPRs\n", purpose);
+ return false;
+ }
+
+ if ((iod.unused = find_reg_note (iod.insn, REG_UNUSED, dest)))
+ return false;
+
+ m_src = SET_SRC (m_set);
+ m_regno = REGNO (dest);
+ const rtx_code src_code = GET_CODE (m_src);
+
+ m_ai = absint_t::explore (m_src, iod.regs, mode);
+
+ if (m_ai.popcount ())
+ {
+ if (m_ai.end_knows (CONST_INT) >= n_bytes)
+ {
+ m_code = CONST_INT;
+ m_old_code = CONSTANT_P (m_src) ? UNKNOWN : src_code;
+ m_isrc = m_ai.get_value (n_bytes);
+ m_size = n_bytes;
+ }
+ else if (! REG_P (m_src)
+ && n_bytes == 1
+ && m_ai.end_knows (REG) >= n_bytes)
+ {
+ m_code = REG;
+ m_old_code = src_code;
+ m_rsrc = m_ai[0].regno ();
+ m_size = n_bytes;
+ }
+ else if (n_bytes == 1)
+ {
+ absint_byte_t &aib = m_new_src;
+ aib = m_ai[0].find_alternative_binary (iod.regs);
+
+ if (aib.arity () == 2
+ && aib.arg (0).regno == m_regno)
+ {
+ m_old_code = src_code;
+ m_code = aib.get_code ();
+ m_size = n_bytes;
+ }
+ }
+ else if (n_bytes >= 2
+ && m_ai.end_knows (VALUE) >= n_bytes)
+ {
+ m_code = src_code;
+ m_size = n_bytes;
+ }
+
+ if (dump_file && m_size != 0)
+ {
+ avr_dump (";; %sinit1 (%C", purpose,
+ m_old_code ? m_old_code : m_code);
+ if (m_old_code)
+ avr_dump ("-> %C", m_code);
+ avr_dump (") insn %d to R%d[%d] := %C:%m = ", INSN_UID (iod.insn),
+ m_regno, n_bytes, src_code, mode);
+
+ m_ai.dump (dest);
+
+ if (dump_flags & TDF_FOLDING)
+ avr_dump ("\n");
+ }
+ }
+
+ return m_size != 0;
+}
+
+
+// The private worker for .apply_insn().
+void
+memento_t::apply_insn1 (rtx_insn *insn, bool unused)
+{
+ gcc_assert (NONDEBUG_INSN_P (insn));
+
+ if (INSN_CODE (insn) == CODE_FOR_set_some)
+ {
+ // This insn only sets some selected bytes of register $3 of
+ // modesize $4. If non-0, then $1 is a QImode scratch d-reg with
+ // a known value of $2.
+
+ const auto &xop = recog_data.operand;
+ extract_insn (insn);
+ gcc_assert (recog_data.n_operands == 7);
+ gcc_assert (set_some_operation (xop[0], VOIDmode));
+
+ const rtx &xscratch = xop[1];
+ const rtx &xscratch_value = xop[2];
+ const int sets_start = 5;
+
+ for (int i = sets_start; i < XVECLEN (xop[0], 0); ++i)
+ {
+ rtx xset = XVECEXP (xop[0], 0, i);
+ avr_dump (";; set_some %r = %r\n", XEXP (xset, 0), XEXP (xset, 1));
+ set_values (XEXP (xset, 0), XEXP (xset, 1));
+ }
+
+ if (REG_P (xscratch))
+ {
+ avr_dump (";; set_some %r = %r restore\n", xscratch, xscratch_value);
+ set_values (xscratch, xscratch_value);
+ }
+
+ return;
+ } // CODE_FOR_set_some
+
+ memento_t mold = *this;
+
+ // When insn changes a register in whatever way, set it to "unknown".
+
+ HARD_REG_SET rset;
+ find_all_hard_reg_sets (insn, &rset, true /* implicit */);
+ known &= ~rset;
+
+ rtx set = single_set (insn);
+ rtx dest;
+
+ if (! set
+ || ! REG_P (dest = SET_DEST (set))
+ || END_REGNO (dest) > REG_32
+ || (regmask (dest) & memento_t::fixed_regs_mask))
+ return;
+
+ rtx src = SET_SRC (set);
+ const rtx_code src_code = GET_CODE (src);
+ const machine_mode mode = GET_MODE (dest);
+ const int n_bytes = GET_MODE_SIZE (mode);
+
+ // Insns that are too complicated or have a poor yield.
+ // Just record which regs are clobberd / changed.
+ if (n_bytes > FUSE_MOVE_MAX_MODESIZE
+ || MEM_P (src)
+ || (REG_P (src) && END_REGNO (src) > REG_32))
+ {
+ // Comparisons may clobber the compared reg when it is unused after.
+ if (src_code == COMPARE
+ && REG_P (XEXP (src, 0))
+ && CONSTANT_P (XEXP (src, 1)))
+ {
+ rtx reg = XEXP (src, 0);
+ for (unsigned r = REGNO (reg); r < END_REGNO (reg); ++r)
+ set_unknown (r);
+ }
+ return;
+ }
+
+ if (unused)
+ return;
+
+ // Simulate the effect of some selected insns that are likely to produce
+ // or propagate known values.
+
+ // Get an abstract representation of src. Bytes may be unknown,
+ // known to equal some 8-bit compile-time constant (CTC) value,
+ // or are known to equal some 8-bit register.
+ // TODO: Currently, only the ai[].val8 knowledge ist used.
+ // What's the best way to make use of ai[].regno ?
+
+ absint_t ai = absint_t::explore (src, mold, mode);
+
+ if (ai.popcount ())
+ {
+ avr_dump (";; apply_insn %d R%d[%d] := %C:%m = ", INSN_UID (insn),
+ REGNO (dest), n_bytes, src_code, mode);
+ ai.dump ();
+
+ for (int i = 0; i < n_bytes; ++i)
+ if (ai[i].can (CONST_INT))
+ set_value (i + REGNO (dest), ai[i].val8 ());
+ }
+}
+
+
+void
+memento_t::apply (const ply_t &p)
+{
+ if (p.is_movw ())
+ {
+ copy_value (p.regno, p.arg);
+ copy_value (p.regno + 1, p.arg + 1);
+ }
+ else if (p.is_adiw ())
+ {
+ int val = p.arg + values[p.regno] + 256 * values[1 + p.regno];
+ set_value (p.regno, val);
+ set_value (p.regno + 1, val >> 8);
+ }
+ else if (p.size == 1)
+ {
+ int x = values[p.regno];
+ int y = values[p.arg];
+
+ switch (p.code)
+ {
+ default:
+ gcc_unreachable ();
+ break;
+
+ case REG:
+ copy_value (p.regno, p.arg);
+ break;
+
+ case SET:
+ set_value (p.regno, p.arg);
+ if (p.scratch >= REG_16)
+ set_unknown (p.scratch);
+ break;
+
+ case MOD: // BLD
+ gcc_assert (knows (p.regno));
+ if (popcount_hwi (p.arg) == 1)
+ values[p.regno] |= p.arg;
+ else if (popcount_hwi (p.arg) == 7)
+ values[p.regno] &= p.arg;
+ else
+ gcc_unreachable ();
+ break;
+
+#define DO_ARITH(n_args, code, expr) \
+ case code: \
+ gcc_assert (knows (p.regno)); \
+ if (n_args == 2) \
+ gcc_assert (knows (p.arg)); \
+ set_value (p.regno, expr); \
+ break
+
+ DO_ARITH (1, NEG, -x);
+ DO_ARITH (1, NOT, ~x);
+ DO_ARITH (1, PRE_INC, x + 1);
+ DO_ARITH (1, PRE_DEC, x - 1);
+ DO_ARITH (1, ROTATE, (x << 4) | (x >> 4));
+ DO_ARITH (1, ASHIFT, x << 1);
+ DO_ARITH (1, LSHIFTRT, x >> 1);
+ DO_ARITH (1, ASHIFTRT, (x >> 1) | (x & 0x80));
+
+ DO_ARITH (2, AND, x & y);
+ DO_ARITH (2, IOR, x | y);
+ DO_ARITH (2, XOR, x ^ y);
+ DO_ARITH (2, PLUS, x + y);
+ DO_ARITH (2, MINUS, x - y);
+#undef DO_ARITH
+ }
+ } // size == 1
+ else
+ gcc_unreachable ();
+}
+
+
+// Try to find a sequence of ply_t's that represent a II.m_regno = II.m_isrc
+// insn that sets a reg to a compile-time constant, and that is more
+// efficient than just a move insn. (When try_split_any_p is on, then
+// solutions that perform equal to a move insn are also allowed).
+// MEMO0 is the GPR state before II runs. A solution has been found
+// when .fpd->solution has at least one entry. LEN specifies the
+// depth of recursion, which works on the LEN-th ply_t.
+void
+bbinfo_t::find_plies (int len, const insninfo_t &ii, const memento_t &memo0)
+{
+ if (len > fpd->n_best_plys)
+ return;
+
+ memento_t memo = memo0;
+ bool ply_applied_p = false;
+
+ //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+ const bool extra = dump_file && (dump_flags & TDF_FOLDING);
+
+ if (extra)
+ {
+ fprintf (dump_file, ";; #%d (HAM=%d): get_plies R%d[%d] = ", len,
+ ii.hamming (fpd->regs0), ii.m_regno, ii.m_size);
+ fprintf (dump_file, "0x%0*" PRIx64 "\n",
+ 2 * ii.m_size, ii.m_isrc & size_to_mask (ii.m_size));
+ }
+
+ plies_t &ps = fpd->plies[len - 1];
+
+ const ply_t *const prev = len >= 2 ? fpd->ply_stack[len - 2] : nullptr;
+ const ply_t *const prev2 = len >= 3 ? fpd->ply_stack[len - 3] : nullptr;
+
+ bbinfo_t::get_plies (ps, ii, memo0, prev);
+
+#define NEXT(reason) \
+ do { \
+ if (extra) \
+ fprintf (dump_file, ";; cont=%s\n", reason); \
+ goto next; \
+ } while (0)
+
+ for (int ip = 0; ip < ps.n_plies; ++ip)
+ {
+ const ply_t &p = ps.plies[ip];
+
+ fpd->ply_stack[len - 1] = &p;
+
+ if (0)
+ next: continue;
+
+ if (extra)
+ ply_t::dump_plys (dump_file, len, 1, fpd->ply_stack + len - 1, memo0);
+
+ // A MOVW with a Hamming distance of < 2 requires more plys.
+ if (p.is_movw () && len + (2 - p.dhamming) > fpd->n_best_plys)
+ NEXT ("movw.plys");
+
+ if (len >= 2)
+ {
+ // Destroying (parts of) the results of the previous ply
+ // won't yield an optimal sequence.
+ if (p.overrides (prev))
+ NEXT ("overrides");
+
+ // When two plys are independent of each other, then only
+ // investigate sequences that operate on the higher reg first.
+ // This canonicalization reduces the number of candidates,
+ if (p.commutes_with (prev, ii.m_scratch)
+ && p.regno > prev->regno)
+ NEXT ("noncanonic");
+
+ // Two subsequent BLDs touching the same register.
+ if (p.is_bld ()
+ && prev->is_bld ()
+ && p.changes_result_of (prev))
+ NEXT ("2bld");
+
+ // When there is a BLD, then at least 2 of the same kind
+ // shall occur in a row.
+ if (prev->is_bld ()
+ && ! p.is_bld ()
+ && (len == 2
+ || (prev->is_setbld () && ! prev2->is_setbld ())
+ || (prev->is_cltbld () && ! prev2->is_cltbld ())))
+ NEXT ("1bld");
+ }
+
+ // The hamming delta of a MOVW may be less than 2, namely 0 or 1.
+ // When the latter is the case, then a reasonable sequence must
+ // modify the result of the MOVW.
+ if (len >= 2
+ && prev->is_movw ()
+ && prev->dhamming == 1
+ && ! p.changes_result_of (prev))
+ NEXT ("movw.dh=1");
+
+ if (len >= 3
+ && prev2->is_movw ()
+ && prev2->dhamming == 0
+ && ! p.changes_result_of (prev2))
+ NEXT ("movw.dh=0");
+
+ // When setting an n-byte destination, then at most n/2 MOVWs
+ // will occur in an optimal sequence.
+ int n_movw = 0;
+ for (int i = 0; i < len; ++i)
+ n_movw += fpd->ply_stack[i]->is_movw ();
+ if (n_movw > ii.m_size / 2)
+ NEXT ("movws");
+
+ if (ply_applied_p)
+ memo = memo0;
+
+ memo.apply (p);
+
+ ply_applied_p = true;
+
+ // Calculate the cost of the sequence we have so far. Scale by some
+ // factor so that we can express that ADIW is more expensive than MOVW
+ // because it is slower, but without defeating MOVW.
+ const int SCALE = 4;
+
+ int penal = 0;
+ int cost = SCALE * 0;
+
+ bool movw_p = 0;
+ for (int i = 0; i < len; ++i)
+ {
+ bool adiw_p = fpd->ply_stack[i]->is_adiw ();
+ cost += SCALE * fpd->ply_stack[i]->cost + adiw_p;
+ penal += adiw_p;
+ movw_p |= fpd->ply_stack[i]->is_movw ();
+ }
+ penal += movw_p;
+
+ const int hamm = ii.hamming (memo);
+
+ // The current Hamming distance yields a lower bound of how many
+ // plys are still required. Consider that future cost already now.
+ int future_cost = AVR_HAVE_MOVW || (AVR_HAVE_ADIW && ii.m_regno >= REG_22)
+ ? (1 + hamm) / 2
+ : hamm;
+
+ // Similarly, when MOVW doesn't decrease the Hamming distance by 2,
+ // then we know that at least 2 - dhamming plys must follow in the
+ // future. (MOVW + ADIW will not occur.)
+ if (p.is_movw ())
+ future_cost = std::max (future_cost, 2 - p.dhamming);
+
+ if (extra && future_cost)
+ avr_dump (";; future cost = %d, dh=%d\n", future_cost, hamm);
+
+ cost += SCALE * future_cost;
+
+ bool profitable = (cost < SCALE * fpd->max_ply_cost
+ || (bbinfo_t::try_split_any_p
+ && cost / SCALE <= fpd->max_ply_cost
+ && cost / SCALE == fpd->movmode_cost));
+ if (! profitable)
+ {
+ if (extra)
+ avr_dump (";; cont=cost %d+%d/%d\n", cost / SCALE, penal, SCALE);
+ continue;
+ }
+
+ if (hamm)
+ {
+ // Go down that rabbit hole.
+ gcc_assert (ply_applied_p);
+ bbinfo_t::find_plies (1 + len, ii, memo);
+ continue;
+ }
+
+ // Found a solution that's better than everything so far.
+
+ // Reduce the upper cost bound according to the found solution.
+ // No future solution will be more expensive.
+ fpd->max_ply_cost = cost / SCALE;
+
+ fpd->solution = plies_t (len, fpd->ply_stack);
+
+ if (dump_file)
+ {
+ avr_dump (";; #%d FOUND COST = %d%s\n", len, cost / SCALE,
+ penal ? " with penalty" : "");
+ ply_t::dump_plys (dump_file, 0, len, fpd->ply_stack, fpd->regs0);
+ if (extra)
+ avr_dump (";; END\n");
+ }
+ } // for ply_t's
+
+#undef NEXT
+}
+
+
+// Run .find_plies() and return true when .fpd->solution is a sequence of ply_t's
+// that represents II, a REG = CONST insn. MEMO is the GPR state prior to II.
+bool
+bbinfo_t::run_find_plies (const insninfo_t &ii, const memento_t &memo) const
+{
+ fpd->solution.reset ();
+ fpd->regs0 = memo;
+ fpd->n_get_plies = 0;
+
+ const int hamm = ii.hamming (memo);
+
+ if (hamm == 0)
+ {
+ avr_dump (";; Found redundant insn %d\n", INSN_UID (ii.m_insn));
+ return true;
+ }
+
+ // Upper bound (in words) for any solution that's better than mov<mode>.
+ // Will be decreased by find plies as it finds better solutions.
+ fpd->movmode_cost = ii.cost ();
+ fpd->max_ply_cost = fpd->movmode_cost;
+
+ // With a non-zero Hamming distance, this insn will require at least one
+ // instruction. When the upper bound for required instructions is that
+ // small, then the current insn is good enough.
+ if (fpd->max_ply_cost <= 1)
+ return false;
+
+ fpd->n_best_plys = ii.n_best_plys (hamm);
+ gcc_assert (fpd->n_best_plys <= N_BEST_PLYS);
+
+ if (dump_file)
+ {
+ const uint64_t mask = size_to_mask (ii.m_size);
+ fprintf (dump_file, ";; find_plies R%d[%d] = 0x%0*" PRIx64,
+ ii.m_regno, ii.m_size, 2 * ii.m_size, ii.m_isrc & mask);
+ if (ii.m_scratch)
+ fprintf (dump_file, ", scratch=r%d", ii.m_scratch);
+ memo.dump ("\n;; regs%s\n");
+ }
+
+ avr_dump (";; mov<mode> cost = %d\n", fpd->max_ply_cost);
+ avr_dump (";; max plys = %d\n", fpd->n_best_plys);
+ ply_t::n_ply_ts = 0;
+
+ find_plies (1, ii, memo);
+
+ avr_dump (";; get_plies called %d times\n", fpd->n_get_plies);
+ avr_dump (";; n_ply_ts = %d\n", ply_t::n_ply_ts);
+ ply_t::max_n_ply_ts = std::max (ply_t::max_n_ply_ts, ply_t::n_ply_ts);
+
+ return fpd->solution.n_plies != 0;
+}
+
+
+// Try to fuse two 1-byte insns .prev and .curr to one 2-byte insn (MOVW).
+// Returns true on success, and sets .n_new_insns, .ignore_mask etc.
+bool
+optimize_data_t::try_fuse (bbinfo_t *bbi)
+{
+ insninfo_t comb;
+
+ if (! prev.ii.m_size
+ || ! curr.ii.m_size
+ || ! comb.combine (prev.ii, curr.ii))
+ return false;
+
+ avr_dump (";; Working on fuse of insn %d + insn %d = 0x%04x\n",
+ INSN_UID (prev.insn), INSN_UID (curr.insn),
+ (unsigned) comb.m_isrc);
+
+ bool found = bbi->run_find_plies (comb, prev.regs);
+ if (found)
+ {
+ avr_dump (";; Found fuse of insns %d and %d\n",
+ INSN_UID (prev.insn), INSN_UID (curr.insn));
+
+ n_new_insns = bbinfo_t::fpd->solution.emit_insns (comb, prev.regs);
+ delete_prev_p = true;
+
+ if (prev.ii.m_scratch)
+ ignore_mask |= regmask (prev.ii.m_scratch, 1);
+ if (curr.ii.m_scratch)
+ ignore_mask |= regmask (curr.ii.m_scratch, 1);
+ ignore_mask &= ~regmask (comb.m_regno, comb.m_size);
+ }
+
+ return found;
+}
+
+
+// Try to replace an arithmetic 1-byte insn by a reg-reg move.
+// Returns true on success, and sets .n_new_insns etc.
+bool
+optimize_data_t::try_simplify (bbinfo_t *)
+{
+ if (curr.ii.m_size == 1
+ && curr.ii.m_old_code != REG
+ && curr.ii.m_code == REG)
+ {
+ avr_dump (";; Found simplify of insn %d\n", INSN_UID (curr.insn));
+
+ n_new_insns = curr.ii.emit_insn ();
+
+ return true;
+ }
+
+ return false;
+}
+
+
+// Try to replace XEXP (*, 1) of a binary operation by a cheaper expression.
+// Returns true on success; sets .n_new_insns, .ignore_mask, .delete_prev_p.
+bool
+optimize_data_t::try_bin_arg1 (bbinfo_t *)
+{
+ if (curr.ii.m_size != 1
+ || curr.ii.m_new_src.arity () != 2
+ || curr.unused)
+ return false;
+
+ avr_dump (";; Working on bin_arg1 insn %d\n", INSN_UID (curr.insn));
+
+ gcc_assert (curr.ii.m_src && BINARY_P (curr.ii.m_src));
+ rtx xarg1_old = XEXP (curr.ii.m_src, 1);
+
+ const absint_byte_t &aib = curr.ii.m_new_src;
+ const absint_val_t &arg0 = aib.arg (0);
+ const absint_val_t &arg1 = aib.arg (1);
+ const absint_val_t &arg1_old = curr.ii.m_ai[0].arg (1);
+
+ rtx src = NULL_RTX;
+
+ if (CONSTANT_P (xarg1_old))
+ {
+ // Sometimes, we allow expensive constants as 2nd operand like
+ // in R2 += 2 which produces two INCs. When we have the
+ // constant handy in a reg, then use that instead of the constant.
+ const rtx_code code = aib.get_code ();
+ gcc_assert (arg1.val8 == (INTVAL (xarg1_old) & 0xff));
+
+ if (AVRasm::constant_cost (code, arg0.regno, arg1.val8) > 1)
+ src = aib.to_rtx ();
+ }
+ else if (REG_P (xarg1_old)
+ && dead_or_set_p (curr.insn, xarg1_old))
+ {
+ src = aib.to_rtx ();
+
+ // The 2nd operand is a reg with a known content that dies
+ // at the current insn. Chances are high that the register
+ // holds a reload value only used by the current insn.
+ if (prev.ii.m_size == 1
+ && rtx_equal_p (xarg1_old, SET_DEST (prev.ii.m_set))
+ && CONSTANT_P (prev.ii.m_src))
+ {
+ avr_dump (";; Found dying reload insn %d\n", INSN_UID (prev.insn));
+
+ delete_prev_p = true;
+ ignore_mask = regmask (arg1_old.regno, 1);
+ }
+ }
+
+ if (src)
+ {
+ rtx dest = SET_DEST (curr.ii.m_set);
+
+ avr_dump (";; Found bin_arg1 for insn %d: ", INSN_UID (curr.insn));
+ avr_dump ("%C:%m %r", curr.ii.m_code, GET_MODE (dest), xarg1_old);
+ aib.dump (" = %s\n");
+
+ emit_valid_move_clobbercc (dest, src);
+ n_new_insns = 1;
+ }
+
+ return src != NULL_RTX;
+}
+
+
+// Try to replace a REG = CONST insn by a cheaper sequence.
+// Returns true on success, and sets .n_new_insns, .ignore_mask etc.
+bool
+optimize_data_t::try_split_ldi (bbinfo_t *bbi)
+{
+ if (! curr.ii.m_size
+ || curr.unused
+ || curr.ii.m_code != CONST_INT
+ || (! bbinfo_t::try_split_any_p
+ // Finding plys will only ever succeed when there are
+ // regs with a known value.
+ && ! (curr.regs.known
+ || (AVR_HAVE_MOVW
+ && curr.ii.m_regno < REG_16 && curr.ii.m_size == 4))))
+ return false;
+
+ avr_dump (";; Working on split_ldi insn %d\n", INSN_UID (curr.insn));
+
+ bool found = bbi->run_find_plies (curr.ii, curr.regs);
+ if (found)
+ {
+ avr_dump (";; Found split for ldi insn %d\n", INSN_UID (curr.insn));
+
+ n_new_insns = bbinfo_t::fpd->solution.emit_insns (curr.ii, curr.regs);
+
+ if (curr.ii.m_scratch)
+ ignore_mask = regmask (curr.ii.m_scratch, 1);
+ }
+
+ return found;
+}
+
+
+// Helper for try_split_any().
+bool
+optimize_data_t::fail (const char *reason)
+{
+ n_new_insns = -1;
+
+ if (dump_file)
+ fprintf (dump_file, ";; Giving up split_any: %s\n", reason);
+
+ return false;
+}
+
+
+// Helper for try_split_any().
+rtx_insn *
+optimize_data_t::emit_and_apply_move (memento_t &memo, rtx dest, rtx src)
+{
+ rtx_insn *insn = emit_valid_move_clobbercc (dest, src);
+ n_new_insns += 1;
+ memo.apply_insn (insn, false);
+
+ return insn;
+}
+
+
+// Set X0 and X1 so that they are operands valid for a andqi3, iorqi3, xorqi3
+// or addqi3 insn with destination R_DEST. The method loads X1 to
+// a scratch reg as needed and records the GPR effect in IOD.regs.
+// EXTRA_COST are extra costs in units of words of insns that cost more
+// than one instruction. This is a helper for try_split_any().
+bool
+optimize_data_t
+ ::get_2ary_operands (rtx_code &code, const absint_byte_t &aib,
+ insn_optimize_data_t &iod, int r_dest,
+ absint_val_t &x0, absint_val_t &x1, int &extra_cost)
+{
+ if (code != IOR && code != AND && code != XOR && code != PLUS)
+ return fail ("2ary: unknown code");
+
+ x0 = aib.arg (0);
+ x1 = aib.arg (1);
+
+ if (! x0.knows_regno ()
+ || x1.clueless ())
+ return fail ("2ary: clueless");
+
+ int val8 = x1.val8;
+ int val8_cost = val8 < 0 ? 100 : AVRasm::constant_cost (code, r_dest, val8);
+
+ if (x0.regno == r_dest
+ && (x1.knows_regno ()
+ || val8_cost <= 1))
+ {
+ if (code == XOR
+ && val8 == 0x80
+ && x0.regno >= REG_16)
+ {
+ // xorxi3 can only "r,0,r".
+ // x0 ^ 0x80 <=> x0 - 0x80.
+ x1.regno = 0;
+ code = MINUS;
+ }
+ return true;
+ }
+
+ const bool and_1_bit = code == AND && popcount_hwi (val8) == 1;
+ // andqi3 has a "r,r,Cb1" alternative where Cb1 has exactly 1 bit set.
+ // This can accommodate bytes of higher AND Cb<N> alternatives.
+ if (x0.regno != r_dest)
+ {
+ if (and_1_bit)
+ {
+ extra_cost += 1 + (r_dest < REG_16);
+ return true;
+ }
+ else if (x1.regno == r_dest)
+ {
+ std::swap (x0, x1);
+ return true;
+ }
+ return fail ("2ary is a 3-operand insn");
+ }
+
+ // Now we have:
+ // 1) r_dest = x0.regno, and
+ // 2) x1 is val8, and
+ // 3) x1 costs 2.
+
+ const bool needs_scratch_p = select<bool>()
+ : code == XOR ? true
+ : code == AND ? popcount_hwi (val8) != 7
+ : code == IOR ? popcount_hwi (val8) != 1
+ : code == PLUS ? IN_RANGE (val8, 3, 0xff - 3)
+ : bad_case<bool> ();
+
+ const int r_val8 = iod.regs.regno_with_value (val8, 0 /* excludes: none */);
+ if (r_val8)
+ {
+ // Found a reg that already holds the constant.
+ x1.val8 = -1;
+ x1.regno = r_val8;
+ return true;
+ }
+ else if (iod.ii.m_scratch)
+ {
+ // Using the insn's scratch reg.
+ rtx xdst = gen_rtx_REG (QImode, iod.ii.m_scratch);
+ rtx xsrc = gen_int_mode (x1.val8, QImode);
+ emit_and_apply_move (iod.regs, xdst, xsrc);
+
+ x1.regno = iod.ii.m_scratch;
+ x1.val8 = -1;
+
+ return true;
+ }
+ else if (! needs_scratch_p)
+ {
+ // Some constants (1 and -1) can be loaded without a scratch.
+ extra_cost += 1;
+ return true;
+ }
+ else if (and_1_bit)
+ {
+ // This can always fall back to BST + CLR + BLD, but may be cheaper.
+ extra_cost += 1 + (r_dest < REG_16);
+ return true;
+ }
+
+ return fail ("2ary: expensive constant");
+}
+
+
+static inline bool
+any_shift_p (rtx_code code)
+{
+ return code == LSHIFTRT || code == ASHIFTRT || code == ASHIFT;
+}
+
+// Try to split .curr into a sequence of 1-byte insns.
+// Returns true on success. Sets .n_new_insns and .ignore_mask.
+bool
+optimize_data_t::try_split_any (bbinfo_t *)
+{
+ if (curr.ii.m_size < 2
+ // Constants are split by split_ldi.
+ || CONSTANT_P (curr.ii.m_src)
+ // Splitting requires knowledge about what to do with each byte.
+ || curr.ii.m_ai.end_knows (VALUE) < curr.ii.m_size)
+ return false;
+
+ avr_dump (";; Working on split_any %C:%m insn %d\n", curr.ii.m_code,
+ GET_MODE (SET_DEST (curr.ii.m_set)), INSN_UID (curr.insn));
+
+ const insninfo_t &ii = curr.ii;
+ const int n_bytes = ii.m_size;
+ int extra_cost = 0;
+ int binop_cost = -1;
+
+ // For plain AND, IOR, XOR get the current cost in units of words.
+ if (BINARY_P (curr.ii.m_src))
+ {
+ const rtx_code code = curr.ii.m_code;
+ if ((code == IOR || code == AND || code == XOR)
+ && REG_P (XEXP (curr.ii.m_src, 0))
+ && CONSTANT_P (XEXP (curr.ii.m_src, 1)))
+ {
+ binop_cost = get_attr_length (curr.insn);
+ avr_dump (";; Competing against %C:%m cost = %d\n", code,
+ GET_MODE (curr.ii.m_src), binop_cost);
+ }
+ }
+
+ // Step 1: Work out conflicts and which sign extends to perform.
+
+ const gprmask_t regs_dest = regmask (ii.m_regno, n_bytes);
+ int r_sign = 0;
+ gprmask_t regs_signs = 0;
+ bool has_lsl = false;
+ bool has_lsr = false;
+
+ for (int i = 0; i < n_bytes; ++i)
+ {
+ const absint_byte_t &aib = ii.m_ai[i];
+ const int r_dest = ii.m_regno + i;
+ const gprmask_t regs_src = aib.reg_mask ();
+
+ // When only regs to the right are used, or only regs to the left
+ // are used, then there's no conflict like it is arising for rotates.
+ // For now, only implement conflict-free splits.
+ has_lsl |= has_bits_in (regs_src & regs_dest, 0, r_dest - 1);
+ has_lsr |= has_bits_in (regs_src & regs_dest, r_dest + 1, 31);
+ if (has_lsl && has_lsr)
+ return fail ("has both << and >>");
+
+ if (aib.get_code () == SIGN_EXTEND)
+ {
+ const absint_val_t x0 = aib.arg (0);
+ if (! r_sign)
+ r_sign = x0.regno;
+ else if (r_sign != x0.regno)
+ return fail ("too many signs");
+
+ // Signs are handled below after all the other bytes.
+ regs_signs |= regmask (r_dest, 1);
+ }
+ }
+
+ // Step 2: Work on the individual bytes and emit according insns.
+
+ n_new_insns = 0;
+ memento_t memo = curr.regs;
+
+ const int step = has_lsl ? -1 : 1;
+ const int istart = step == 1 ? 0 : n_bytes - 1;
+ const int iend = step == 1 ? n_bytes : -1;
+
+ for (int i = istart; i != iend; i += step)
+ {
+ const absint_byte_t &aib = ii.m_ai[i];
+ const int r_dest = ii.m_regno + i;
+ rtx_code code = aib.get_code ();
+ rtx xsrc = NULL_RTX;
+ rtx xdest = gen_rtx_REG (QImode, r_dest);
+
+ if (code == SET)
+ {
+ const int r_src = aib.regno (false);
+ const int val8 = aib.val8 (false);
+ int r16;
+
+ // A no-op...
+ if (r_dest == r_src)
+ continue;
+ // ...or an existing 16-bit constant...
+ else if (AVR_HAVE_MOVW
+ && i + step != iend
+ // Next is not a no-op.
+ && ii.m_ai[i + step].regno (false) != r_dest + step
+ // Eligible for MOVW.
+ && r_dest + step == (r_dest ^ 1)
+ && r_dest % 2 == i % 2
+ && (r16 = ii.m_ai.reg16_with_value (i, i + step, memo)))
+ {
+ xdest = gen_rtx_REG (HImode, r_dest & ~1);
+ xsrc = gen_rtx_REG (HImode, r16);
+ i += step;
+ }
+ // ...or a cheap constant...
+ else if (val8 >= 0
+ && AVRasm::constant_cost (SET, r_dest, val8) <= 1)
+ xsrc = gen_int_mode (val8, QImode);
+ // ...or a reg-reg move...
+ else if (r_src)
+ xsrc = gen_rtx_REG (QImode, r_src);
+ // ...or a costly constant that already exists in some reg...
+ else if (memo.regno_with_value (val8, 0 /* excludes: none */))
+ xsrc = gen_rtx_REG (QImode, memo.regno_with_value (val8, 0));
+ // ...or a costly constant loaded into curr.insn's scratch reg...
+ else if (ii.m_scratch)
+ {
+ rtx xscratch = gen_rtx_REG (QImode, ii.m_scratch);
+ rtx xval8 = gen_int_mode (val8, QImode);
+ emit_and_apply_move (memo, xscratch, xval8);
+ xsrc = xscratch;
+ }
+ // ...or a costly constant (1 or -1) that doesn't need a scratch.
+ else if (! AVRasm::ldi_needs_scratch (r_dest, val8))
+ {
+ extra_cost += 1;
+ xsrc = gen_int_mode (val8, QImode);
+ }
+ else
+ return fail ("expensive val8");
+ } // SET
+ else if (aib.arity () == 1)
+ {
+ if (aib.get_code () == SIGN_EXTEND)
+ // Signs are handled after all the others.
+ continue;
+ else
+ {
+ const absint_val_t x0 = aib.arg (0);
+ rtx xop0 = gen_rtx_REG (QImode, x0.regno);
+ xsrc = gen_rtx_fmt_e (code, QImode, xop0);
+ }
+ } // unary
+ else if (aib.arity () == 2)
+ {
+ absint_val_t x0;
+ absint_val_t x1;
+ insn_optimize_data_t iod (memo);
+ iod.ii = curr.ii;
+
+ if (! get_2ary_operands (code, aib, iod, r_dest, x0, x1, extra_cost))
+ return false;
+ rtx xop0 = gen_rtx_REG (QImode, x0.regno);
+ rtx xop1 = x1.knows_val8 ()
+ ? gen_int_mode (x1.val8, QImode)
+ : gen_rtx_REG (QImode, x1.regno);
+
+ xsrc = gen_rtx_fmt_ee (code, QImode, xop0, xop1);
+ } // binary
+
+ if (! xsrc)
+ return fail ("no source found");
+
+ if (r_sign
+ && (regmask (xdest) & regmask (r_sign, 1)))
+ return fail ("clobbered r_sign");
+
+ emit_and_apply_move (memo, xdest, xsrc);
+ }
+
+ // Step 3: Emit insns for sign extend.
+ // No more need to track memo beyond this point.
+
+ if (! emit_signs (r_sign, regs_signs))
+ return false;
+
+ if (binop_cost >= 0)
+ {
+ avr_dump (";; Expected cost: %d + %d\n", n_new_insns, extra_cost);
+ if (n_new_insns + extra_cost > binop_cost)
+ return fail ("too expensive");
+ }
+
+ if (ii.m_scratch)
+ ignore_mask = regmask (ii.m_scratch, 1);
+
+ return true;
+}
+
+
+// A helper for try_split_any() above.
+// Emit sign extends from R_MSB.7 to all regs in REGS_SIGNS.
+bool
+optimize_data_t::emit_signs (const int r_msb, gprmask_t regs_signs)
+{
+ if (! regs_signs)
+ return true;
+ else if (! r_msb)
+ return fail ("fatal: no r_msb given");
+
+ // Pick an arbitrary reg from the sign destinations when the source
+ // isn't one of the signs.
+ const int r_signs = regs_signs & regmask (r_msb, 1)
+ ? r_msb
+ : ctz_hwi (regs_signs);
+
+ // Set all bits in r_signs according to the sign of r_msb using the
+ // r,r,C07 alternative of ashrqi3.
+ rtx xsrc = gen_rtx_fmt_ee (ASHIFTRT, QImode,
+ gen_rtx_REG (QImode, r_msb), GEN_INT (7));
+ emit_valid_move_clobbercc (gen_rtx_REG (QImode, r_signs), xsrc);
+ regs_signs &= ~regmask (r_signs, 1);
+
+ // Set up a 16-bit sign register if possible.
+ int r16_signs = 0;
+ if (regs_signs & regmask (r_signs ^ 1, 1))
+ {
+ emit_move_mask (r_signs ^ 1, r_signs, 1, regs_signs);
+ r16_signs = r_signs & ~1;
+ }
+
+ // Handle all 16-bit signs regs provided MOVW.
+ if (AVR_HAVE_MOVW)
+ for (int r = FIRST_GPR; r < REG_32; r += 2)
+ {
+ const gprmask_t m = regmask (r, 2);
+ if ((m & regs_signs) == m)
+ {
+ if (r16_signs)
+ emit_move_mask (r, r16_signs, 2, regs_signs);
+ else
+ {
+ emit_move_mask (r + 0, r_signs, 1, regs_signs);
+ emit_move_mask (r + 1, r_signs, 1, regs_signs);
+ r16_signs = r;
+ }
+ }
+ }
+
+ // Handle all remaining signs.
+ while (regs_signs)
+ emit_move_mask (ctz_hwi (regs_signs), r_signs, 1, regs_signs);
+
+ return true;
+}
+
+// Helper for the method above. Move N_BYTES registers from R_SRC to R_DST,
+// keeping track of which regs are still to be done in MASK.
+void
+optimize_data_t::emit_move_mask (int r_dst, int r_src, int n_bytes,
+ gprmask_t &mask)
+{
+ const gprmask_t mask_dst = regmask (r_dst, n_bytes);
+ const gprmask_t mask_src = regmask (r_src, n_bytes);
+ gcc_assert ((mask_dst & mask) == mask_dst);
+ gcc_assert ((mask_src & mask) == 0);
+ rtx xdst = gen_rtx_REG (size_to_mode (n_bytes), r_dst);
+ rtx xsrc = gen_rtx_REG (size_to_mode (n_bytes), r_src);
+ emit_valid_move_clobbercc (xdst, xsrc);
+ n_new_insns += 1;
+ mask &= ~mask_dst;
+}
+
+
+void
+bbinfo_t::optimize_one_block (bool &changed)
+{
+ memento_t prev_regs;
+
+ rtx_insn *insn = next_nondebug_insn_bb (bb, BB_HEAD (bb));
+
+ for (rtx_insn *next_insn; insn; insn = next_insn)
+ {
+ next_insn = next_nondebug_insn_bb (bb, insn);
+
+ avr_dump ("\n;; Working on insn %d\n%r\n\n", INSN_UID (insn), insn);
+
+ optimize_data_t od (prev_regs, regs);
+
+ od.prev.insn = prev_nondebug_insn_bb (bb, insn);
+ od.curr.insn = insn;
+
+ od.prev.ii.init1 (od.prev, 1, "IIprev ");
+ od.curr.ii.init1 (od.curr, 8, "IIcurr ");
+
+ start_sequence ();
+
+ bool found = ((bbinfo_t::try_fuse_p && od.try_fuse (this))
+ || (bbinfo_t::try_bin_arg1_p && od.try_bin_arg1 (this))
+ || (bbinfo_t::try_simplify_p && od.try_simplify (this))
+ || (bbinfo_t::try_split_ldi_p && od.try_split_ldi (this))
+ || (bbinfo_t::try_split_any_p && od.try_split_any (this)));
+
+ rtx_insn *new_insns = get_insns ();
+ end_sequence ();
+
+ gcc_assert (found == (od.n_new_insns >= 0));
+
+ ++tick;
+
+ // This insn will become the previous one in the next loop iteration.
+ // Just used in dumps.
+ rtx_insn *new_curr_insn;
+
+ if (! found)
+ {
+ // Nothing changed.
+ avr_dump (";; Keeping old route.\n");
+ gcc_assert (! od.delete_prev_p);
+
+ prev_regs = regs;
+ regs.apply_insn (insn, false);
+
+ new_curr_insn = insn;
+ }
+ else
+ {
+ // We have new_insns.
+ changed = true;
+
+ if (dump_file)
+ {
+ avr_dump ("\n;; EMIT %d new insn%s replacing ",
+ od.n_new_insns, "s" + (od.n_new_insns == 1));
+ if (od.delete_prev_p)
+ avr_dump ("insn %d and ", INSN_UID (od.prev.insn));
+ avr_dump ("insn %d, delete_prev=%d:\n%L\n", INSN_UID (insn),
+ od.delete_prev_p, new_insns);
+ }
+
+ new_curr_insn = od.emit_sequence (bb, new_insns);
+ } // found
+
+ if (dump_file && new_curr_insn)
+ {
+ avr_dump ("\n");
+
+ const int d = regs.distance_to (prev_regs);
+ if (d || new_curr_insn != insn)
+ avr_dump (";; %d regs changed state:\n", d);
+
+ if (new_curr_insn != insn)
+ {
+ avr_dump (";; Befor insn %d", INSN_UID (new_curr_insn));
+ prev_regs.dump ();
+ }
+
+ avr_dump (";; After insn %d", INSN_UID (new_curr_insn));
+ regs.dump ();
+ }
+ } // for BB insns
+}
+
+
+void
+bbinfo_t::optimize_one_function (function *func)
+{
+ bbinfo_t::fpd = XNEW (bbinfo_t::find_plies_data_t);
+ bbinfo_t::bb_info = XCNEWVEC (bbinfo_t, last_basic_block_for_fn (func));
+ int *post_order = XNEWVEC (int, n_basic_blocks_for_fn (func));
+
+ plies_t::max_n_plies = 0;
+
+ using elt0_getter_HRS = elt0_getter<HARD_REG_SET, HARD_REG_ELT_TYPE>;
+ memento_t::fixed_regs_mask = (gprmask_t) elt0_getter_HRS::get (fixed_reg_set);
+
+ // Option -mfuse-move=<0,23> provides a 3:2:2:2 mixed radix value:
+ // -mfuse-move= 0 1 2 3 4 5 6 7 8 9 10 1 2 3 4 5 6 7 8 9 20 1 2 3 Digit
+ // fuse 1 1 1 1 1 1 1 1 1 1 1 1 0
+ // bin_arg1 1 1 1 1 1 1 1 1 1 1 1 1 1
+ // split_any 1 1 1 1 1 1 1 1 1 1 1 1 2
+ // split_ldi 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 3
+ // use arith 1 1 1 1 1 1 1 1 3
+
+ // Which optimization(s) to perform.
+ bbinfo_t::try_fuse_p = avr_fuse_move & 0x1; // Digit 0 in [0, 1].
+ bbinfo_t::try_bin_arg1_p = avr_fuse_move & 0x2; // Digit 1 in [0, 1].
+ bbinfo_t::try_split_any_p = avr_fuse_move & 0x4; // Digit 2 in [0, 1].
+ bbinfo_t::try_split_ldi_p = avr_fuse_move >> 3; // Digit 3 in [0, 2].
+ bbinfo_t::use_arith_p = (avr_fuse_move >> 3) >= 2; // Digit 3 in [0, 2].
+ bbinfo_t::use_set_some_p = bbinfo_t::try_split_ldi_p; // Digit 3 in [0, 2].
+ bbinfo_t::try_simplify_p = avr_fuse_move != 0;
+
+ // Topologically sort BBs from last to first.
+
+ const int n_post_order = post_order_compute (post_order, false, false);
+ bool changed = false;
+
+ // Traverse the BBs from first to last in order to increase the chance
+ // that register values from all incoming edges are known.
+
+ for (int n = n_post_order - 1; n >= 0; --n)
+ {
+ basic_block bb = BASIC_BLOCK_FOR_FN (func, post_order[n]);
+
+ bbinfo_t::bb_info[bb->index].bb = bb;
+ bbinfo_t::bb_info[bb->index].enter ();
+ bbinfo_t::bb_info[bb->index].optimize_one_block (changed);
+ bbinfo_t::bb_info[bb->index].leave ();
+ }
+
+ if (plies_t::max_n_plies)
+ avr_dump (";; max_n_plies=%d\n", (int) plies_t::max_n_plies);
+
+ if (changed)
+ {
+ df_note_add_problem ();
+ df_analyze ();
+ }
+
+ XDELETEVEC (post_order);
+ XDELETEVEC (bbinfo_t::bb_info);
+ XDELETE (bbinfo_t::fpd);
+}
+
+} // anonymous namespace
+
namespace
{
{
return new avr_pass_recompute_notes (ctxt, "avr-notes-free-cfg");
}
+
+// Optimize moves after reload.
+
+rtl_opt_pass *
+make_avr_pass_fuse_move (gcc::context *ctxt)
+{
+ return new avr_pass_fuse_move (ctxt, "avr-fuse-move");
+}