{ OPT_LEVELS_2_PLUS, OPT_mfuse_move_, NULL, 23 },
{ OPT_LEVELS_2_PLUS, OPT_msplit_bit_shift, NULL, 1 },
{ OPT_LEVELS_2_PLUS, OPT_msplit_ldst, NULL, 1 },
+ { OPT_LEVELS_2_PLUS, OPT_muse_nonzero_bits, NULL, 1 },
// Stick to the "old" placement of the subreg lowering pass.
{ OPT_LEVELS_1_PLUS, OPT_fsplit_wide_types_early, NULL, 1 },
/* Allow optimizer to introduce store data races. This used to be the
#include "target.h"
#include "rtl.h"
#include "tree.h"
+#include "diagnostic-core.h"
#include "cfghooks.h"
#include "cfganal.h"
#include "df.h"
}
+\f
+//////////////////////////////////////////////////////////////////////////////
+// Split insns with nonzero_bits() after combine.
+
+static const pass_data avr_pass_data_split_nzb =
+{
+ RTL_PASS, // type
+ "", // name (will be patched)
+ OPTGROUP_NONE, // optinfo_flags
+ TV_DF_SCAN, // tv_id
+ 0, // properties_required
+ 0, // properties_provided
+ 0, // properties_destroyed
+ 0, // todo_flags_start
+ 0 // todo_flags_finish
+};
+
+class avr_pass_split_nzb : public rtl_opt_pass
+{
+public:
+ avr_pass_split_nzb (gcc::context *ctxt, const char *name)
+ : rtl_opt_pass (avr_pass_data_split_nzb, ctxt)
+ {
+ this->name = name;
+ }
+
+ unsigned int execute (function *) final override
+ {
+ if (avropt_use_nonzero_bits)
+ split_nzb_insns ();
+ return 0;
+ }
+
+ void split_nzb_insns ();
+
+}; // avr_pass_split_nzb
+
+
+void
+avr_pass_split_nzb::split_nzb_insns ()
+{
+ rtx_insn *next;
+
+ for (rtx_insn *insn = get_insns (); insn; insn = next)
+ {
+ next = NEXT_INSN (insn);
+
+ if (INSN_P (insn)
+ && single_set (insn)
+ && get_attr_nzb (insn) == NZB_YES)
+ {
+ rtx_insn *last = try_split (PATTERN (insn), insn, 1 /*last*/);
+
+ // The nonzero_bits() insns *must* split. If not: ICE.
+ if (last == insn)
+ {
+ debug_rtx (insn);
+ internal_error ("failed to split insn");
+ }
+ }
+ }
+}
+
+
\f
//////////////////////////////////////////////////////////////////////////////
// Split shift insns after peephole2 / befor avr-fuse-move.
return new avr_pass_casesi (ctxt, "avr-casesi");
}
+rtl_opt_pass *
+make_avr_pass_split_nzb (gcc::context *ctxt)
+{
+ return new avr_pass_split_nzb (ctxt, "avr-split-nzb");
+}
+
// Try to replace 2 cbranch insns with 1 comparison and 2 branches.
rtl_opt_pass *
INSERT_PASS_AFTER (pass_expand, 1, avr_pass_casesi);
+/* Some combine insns have nonzero_bits() in their condition, though insns
+ should not use such stuff in their condition. Therefore, we split such
+ insn into something without nonzero_bits() in their condition right after
+ insn combine.
+
+ Since neither split_all_insns() nor split_all_insns_noflow() work at that
+ point (presumably since there are splits involving branches), we split
+ respective insns (and only such insns) by hand. Respective insns are
+ tagged with insn attribute nzb = "yes" so that they are easy to spot. */
+
+INSERT_PASS_AFTER (pass_combine, 1, avr_pass_split_nzb);
+
/* If-else decision trees generated for switch / case may produce sequences
like
extern int avr_jump_mode (rtx x, rtx_insn *insn, int = 0);
extern bool test_hard_reg_class (enum reg_class rclass, rtx x);
extern bool jump_over_one_insn_p (rtx_insn *insn, rtx dest);
+extern bool avr_nonzero_bits_lsr_operands_p (rtx_code, rtx *);
extern void avr_final_prescan_insn (rtx_insn *insn, rtx *operand,
int num_operands);
extern rtl_opt_pass *make_avr_pass_recompute_notes (gcc::context *);
extern rtl_opt_pass *make_avr_pass_casesi (gcc::context *);
extern rtl_opt_pass *make_avr_pass_ifelse (gcc::context *);
+extern rtl_opt_pass *make_avr_pass_split_nzb (gcc::context *);
extern rtl_opt_pass *make_avr_pass_split_after_peephole2 (gcc::context *);
#ifdef RTX_CODE
extern bool avr_casei_sequence_check_operands (rtx *xop);
}
}
+ // Insns with nonzero_bits() == 1 in the condition.
+ if (avropt_use_nonzero_bits
+ && mode == QImode
+ && (code == AND || code == IOR || code == XOR)
+ && REG_P (XEXP (x, 1)))
+ {
+ // "*nzb=1.<code>.lsr_split"
+ // "*nzb=1.<code>.lsr.not_split"
+ bool is_nzb = (GET_CODE (XEXP (x, 0)) == LSHIFTRT
+ && (REG_P (XEXP (XEXP (x, 0), 0))
+ || GET_CODE (XEXP (XEXP (x, 0), 0)) == XOR)
+ && const_0_to_7_operand (XEXP (XEXP (x, 0), 1), QImode));
+ // "*nzb=1.<code>.zerox_split"
+ // "*nzb=1.<code>.zerox.not_split"
+ is_nzb |= (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
+ && (REG_P (XEXP (XEXP (x, 0), 0))
+ || GET_CODE (XEXP (XEXP (x, 0), 0)) == XOR)
+ && const1_operand (XEXP (XEXP (x, 0), 1), QImode)
+ && const_0_to_7_operand (XEXP (XEXP (x, 0), 2), QImode));
+ // "*nzb=1.<code>.ge0_split"
+ is_nzb |= (GET_CODE (XEXP (x, 0)) == GE
+ && REG_P (XEXP (XEXP (x, 0), 0))
+ && const0_operand (XEXP (XEXP (x, 0), 1), QImode));
+ if (is_nzb)
+ {
+ *total = COSTS_N_INSNS (code == XOR ? 3 : 2);
+ return true;
+ }
+ }
+
+ // Insn "*nzb=1.ior.ashift_split" with nonzero_bits() == 1 in the condition.
+ if (avropt_use_nonzero_bits
+ && mode == QImode
+ && code == IOR
+ && REG_P (XEXP (x, 1))
+ && GET_CODE (XEXP (x, 0)) == ASHIFT
+ && REG_P (XEXP (XEXP (x, 0), 0))
+ && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
+ {
+ *total = COSTS_N_INSNS (2);
+ return true;
+ }
+
+
switch (code)
{
case CONST_INT:
*total += avr_operand_rtx_cost (XEXP (x, 0), mode, code, 0, speed);
return true;
+ case GE:
+ if (mode == QImode
+ && REG_P (XEXP (x, 0))
+ && XEXP (x, 1) == const0_rtx)
+ {
+ *total = COSTS_N_INSNS (3);
+ return true;
+ }
+ break;
+
+ case ZERO_EXTRACT:
+ if (mode == QImode
+ && REG_P (XEXP (x, 0))
+ && XEXP (x, 1) == const1_rtx
+ && CONST_INT_P (XEXP (x, 2)))
+ {
+ int bpos = INTVAL (XEXP (x, 2));
+ *total = COSTS_N_INSNS (bpos == 0 ? 1 : bpos == 1 ? 2 : 3);
+ return true;
+ }
+ break;
+
case COMPARE:
switch (GET_MODE (XEXP (x, 0)))
{
}
+/* A helper for the insn condition of "*nzb=1.<code>.lsr[.not]_split"
+ where <code> is AND, IOR or XOR. Return true when
+
+ OP[0] <code>= OP[1] >> OP[2]
+
+ can be performed by means of the code of "*nzb=1.<code>.zerox", i.e.
+
+ OP[0] <code>= OP[1].OP[2]
+
+ For example, when OP[0] is in { 0, 1 }, then R24 &= R10.4
+ can be performed by means of SBRS R10,4 $ CLR R24.
+ Notice that the constraint of OP[3] is "0". */
+
+bool
+avr_nonzero_bits_lsr_operands_p (rtx_code code, rtx *op)
+{
+ if (reload_completed)
+ return false;
+
+ const auto offs = INTVAL (op[2]);
+ const auto op1_non0 = nonzero_bits (op[1], QImode);
+ const auto op3_non0 = nonzero_bits (op[3], QImode);
+
+ switch (code)
+ {
+ default:
+ gcc_unreachable ();
+
+ case IOR:
+ case XOR:
+ return op1_non0 >> offs == 1;
+
+ case AND:
+ return op3_non0 == 1;
+ }
+
+ return false;
+}
+
+
/* Worker function for cpymemhi expander.
XOP[0] Destination as MEM:BLK
XOP[1] Source " "
[UNSPEC_STRLEN
UNSPEC_CPYMEM
UNSPEC_INDEX_JMP
+ UNSPEC_NZB
UNSPEC_FMUL
UNSPEC_FMULS
UNSPEC_FMULSU
no"
(const_string "no"))
+(define_attr "nzb"
+ "yes, no"
+ (const_string "no"))
+
;; Flavours of instruction set architecture (ISA), used in enabled attribute
;; mov : ISA has no MOVW movw : ISA has MOVW
DONE;
})
+;; Patterns for -muse-nonzero-bits use nonzero_bits() in their condition,
+;; which makes possible some more optimizations.
+;; Since combine may add clobber of REG_CC, we must make sure that there are
+;; no other routes to synthesize such patterns. We use an UNSPEC for that.
+;; As insns are not supposed to use stuff like nonzero_bits() in their
+;; condition, we split the insns right after reload. For CFG reasons we have
+;; to do the splits by hand in avr_pass_split_nzb. All insns that must be
+;; split by that pass must have insn attribute "nzb" set to "yes". Moreover,
+;; the insns to split must be single_sets and must not touch control flow.
+
+(define_code_attr nzb_constr_rdr [(and "r") (ior "d") (xor "r")])
+(define_code_attr nzb_use1_nnr [(and "n") (ior "n") (xor "r")])
+
+(define_insn_and_split "*nzb=1.<code>.zerox_split"
+ [(set (match_operand:QI 0 "register_operand")
+ (bitop:QI (zero_extract:QI (match_operand:QI 1 "register_operand")
+ (const_int 1)
+ (match_operand:QI 2 "const_0_to_7_operand"))
+ (match_operand:QI 3 "register_operand")))]
+ "optimize && avropt_use_nonzero_bits
+ && !reload_completed
+ && (<CODE> == IOR || <CODE> == XOR
+ || nonzero_bits (operands[3], QImode) == 1)"
+ { gcc_unreachable (); }
+ "optimize && avropt_use_nonzero_bits
+ && !reload_completed"
+ [(parallel [(set (match_dup 0)
+ (bitop:QI (zero_extract:QI (match_dup 1)
+ (const_int 1)
+ (match_dup 2))
+ (unspec:QI [(match_dup 3)
+ ] UNSPEC_NZB)))
+ (use (const_int 1))
+ (clobber (reg:CC REG_CC))])]
+ ""
+ [(set_attr "nzb" "yes")])
+
+(define_insn "*nzb=1.<code>.zerox"
+ [(set (match_operand:QI 0 "register_operand" "=<nzb_constr_rdr>")
+ (bitop:QI (zero_extract:QI (match_operand:QI 1 "register_operand" "r")
+ (const_int 1)
+ (match_operand:QI 2 "const_0_to_7_operand" "n"))
+ (unspec:QI [(match_operand:QI 3 "register_operand" "0")
+ ] UNSPEC_NZB)))
+ (use (match_operand:QI 4 "nonmemory_operand" "<nzb_use1_nnr>"))
+ (clobber (reg:CC REG_CC))]
+ "optimize && avropt_use_nonzero_bits"
+ {
+ if (<CODE> == AND)
+ return "sbrs %1,%2\;clr %0";
+ else if (<CODE> == IOR)
+ return "sbrc %1,%2\;ori %0,1";
+ else if (<CODE> == XOR)
+ return "sbrc %1,%2\;eor %0,%4";
+ else
+ gcc_unreachable ();
+ }
+ [(set_attr "length" "2")])
+
+(define_insn_and_split "*nzb=1.<code>.lsr_split"
+ [(set (match_operand:QI 0 "register_operand")
+ (bitop:QI (lshiftrt:QI (match_operand:QI 1 "register_operand")
+ (match_operand:QI 2 "const_0_to_7_operand"))
+ (match_operand:QI 3 "register_operand")))]
+ "optimize && avropt_use_nonzero_bits
+ && !reload_completed
+ && avr_nonzero_bits_lsr_operands_p (<CODE>, operands)"
+ { gcc_unreachable (); }
+ "optimize && avropt_use_nonzero_bits
+ && !reload_completed"
+ [(parallel [(set (match_dup 0)
+ (bitop:QI (zero_extract:QI (match_dup 1)
+ (const_int 1)
+ (match_dup 2))
+ (unspec:QI [(match_dup 3)
+ ] UNSPEC_NZB)))
+ (use (const_int 1))
+ (clobber (reg:CC REG_CC))])]
+ ""
+ [(set_attr "nzb" "yes")])
+
+(define_insn_and_split "*nzb=1.<code>.zerox.not_split"
+ [(set (match_operand:QI 0 "register_operand")
+ (bitop:QI (zero_extract:QI (xor:QI (match_operand:QI 1 "register_operand")
+ (match_operand:QI 4 "const_int_operand"))
+ (const_int 1)
+ (match_operand:QI 2 "const_0_to_7_operand"))
+ (match_operand:QI 3 "register_operand")))]
+ "optimize && avropt_use_nonzero_bits
+ && !reload_completed
+ && INTVAL (operands[2]) == exact_log2 (0xff & INTVAL (operands[4]))
+ && (<CODE> == IOR
+ || nonzero_bits (operands[3], QImode) == 1)"
+ { gcc_unreachable (); }
+ "optimize && avropt_use_nonzero_bits
+ && !reload_completed"
+ ; "*nzb=1.<code>.zerox.not"
+ [(parallel [(set (match_dup 0)
+ (bitop:QI (zero_extract:QI (not:QI (match_dup 1))
+ (const_int 1)
+ (match_dup 2))
+ (unspec:QI [(match_dup 3)
+ ] UNSPEC_NZB)))
+ (use (const_int 1))
+ (clobber (reg:CC REG_CC))])]
+ ""
+ [(set_attr "nzb" "yes")])
+
+(define_insn_and_split "*nzb=1.<code>.lsr.not_split"
+ [(set (match_operand:QI 0 "register_operand")
+ (bitop:QI (lshiftrt:QI (xor:QI (match_operand:QI 1 "register_operand")
+ (match_operand:QI 4 "const_int_operand"))
+ (match_operand:QI 2 "const_0_to_7_operand"))
+ (match_operand:QI 3 "register_operand")))]
+ "optimize && avropt_use_nonzero_bits
+ && !reload_completed
+ && INTVAL (operands[2]) == exact_log2 (0xff & INTVAL (operands[4]))
+ && avr_nonzero_bits_lsr_operands_p (<CODE>, operands)"
+ { gcc_unreachable (); }
+ "optimize && avropt_use_nonzero_bits
+ && !reload_completed"
+ ; "*nzb=1.<code>.zerox.not"
+ [(parallel [(set (match_dup 0)
+ (bitop:QI (zero_extract:QI (not:QI (match_dup 1))
+ (const_int 1)
+ (match_dup 2))
+ (unspec:QI [(match_dup 3)
+ ] UNSPEC_NZB)))
+ (use (const_int 1))
+ (clobber (reg:CC REG_CC))])]
+ ""
+ [(set_attr "nzb" "yes")])
+
+(define_insn_and_split "*nzb=1.<code>.ge0_split"
+ [(set (match_operand:QI 0 "register_operand")
+ (bitop:QI (ge:QI (match_operand:QI 1 "register_operand")
+ (const_int 0))
+ (match_operand:QI 2 "register_operand")))]
+ "optimize && avropt_use_nonzero_bits
+ && !reload_completed
+ && (<CODE> == IOR || <CODE> == XOR
+ || nonzero_bits (operands[2], QImode) == 1)"
+ { gcc_unreachable (); }
+ "optimize && avropt_use_nonzero_bits
+ && !reload_completed"
+ ; "*nzb=1.<code>.zerox.not"
+ [(parallel [(set (match_dup 0)
+ (bitop:QI (zero_extract:QI (not:QI (match_dup 1))
+ (const_int 1)
+ (const_int 7))
+ (unspec:QI [(match_dup 2)
+ ] UNSPEC_NZB)))
+ (use (const_int 1))
+ (clobber (reg:CC REG_CC))])]
+ ""
+ [(set_attr "nzb" "yes")])
+
+(define_insn "*nzb=1.<code>.zerox.not"
+ [(set (match_operand:QI 0 "register_operand" "=<nzb_constr_rdr>")
+ (bitop:QI (zero_extract:QI (not:QI (match_operand:QI 1 "register_operand" "r"))
+ (const_int 1)
+ (match_operand:QI 2 "const_0_to_7_operand" "n"))
+ (unspec:QI [(match_operand:QI 3 "register_operand" "0")
+ ] UNSPEC_NZB)))
+ (use (match_operand:QI 4 "nonmemory_operand" "<nzb_use1_nnr>"))
+ (clobber (reg:CC REG_CC))]
+ "optimize && avropt_use_nonzero_bits"
+ {
+ if (<CODE> == AND)
+ return "sbrc %1,%2\;clr %0";
+ else if (<CODE> == IOR)
+ return "sbrs %1,%2\;ori %0,1";
+ else if (<CODE> == XOR)
+ return "sbrs %1,%2\;eor %0,%4";
+ else
+ gcc_unreachable ();
+ }
+ [(set_attr "length" "2")])
+
+(define_insn_and_split "*nzb=1.ior.ashift_split"
+ [(set (match_operand:QI 0 "register_operand" "=d")
+ (ior:QI (ashift:QI (match_operand:QI 1 "register_operand" "r")
+ (match_operand:QI 2 "const_0_to_7_operand" "n"))
+ (match_operand:QI 3 "register_operand" "0")))]
+ "optimize && avropt_use_nonzero_bits
+ && !reload_completed
+ && nonzero_bits (operands[1], QImode) == 1"
+ { gcc_unreachable (); }
+ "optimize && avropt_use_nonzero_bits
+ && !reload_completed"
+ [(parallel [(set (match_dup 0)
+ (unspec:QI [(ior:QI (ashift:QI (match_dup 1)
+ (match_dup 2))
+ (match_dup 3))
+ ] UNSPEC_NZB))
+ (clobber (reg:CC REG_CC))])]
+ ""
+ [(set_attr "nzb" "yes")])
+
+(define_insn "*nzb=1.ior.ashift"
+ [(set (match_operand:QI 0 "register_operand" "=d")
+ (unspec:QI [(ior:QI (ashift:QI (match_operand:QI 1 "register_operand" "r")
+ (match_operand:QI 2 "const_0_to_7_operand" "n"))
+ (match_operand:QI 3 "register_operand" "0"))
+ ] UNSPEC_NZB))
+ (clobber (reg:CC REG_CC))]
+ "optimize && avropt_use_nonzero_bits"
+ "sbrc %1,0\;ori %0,1<<%2"
+ [(set_attr "length" "2")])
+
;; Work around PR115307: Early passes expand isinf/f/l to a bloat.
;; These passes do not consider costs, and there is no way to
Target Var(avropt_pr118012) UInteger Init(1) Undocumented
This option is on per default in order to work around PR118012.
+muse-nonzero-bits
+Target Var(avropt_use_nonzero_bits) UInteger Init(0) Optimization
+Optimization. Allow to use nonzero_bits() in some insn conditions.
+
mshort-calls
Target RejectNegative Mask(SHORT_CALLS)
This option is used internally for multilib generation and selection. Assume RJMP / RCALL can target all program memory.
-mdouble=@var{bits} -mlong-double=@var{bits} -mno-call-main
-mn_flash=@var{size} -mfract-convert-truncate -mno-interrupts
-mmain-is-OS_task -mrelax -mrmw -mstrict-X -mtiny-stack
--mrodata-in-ram -msplit-bit-shift -msplit-ldst
--mshort-calls -mskip-bug -nodevicelib -nodevicespecs
+-mrodata-in-ram -msplit-bit-shift -msplit-ldst -mshort-calls
+-mskip-bug -muse-nonzero-bits -nodevicelib -nodevicespecs
-Waddr-space-convert -Wmisspelled-isr}
@emph{Blackfin Options} (@ref{Blackfin Options})
Split multi-byte loads and stores into several byte loads and stores.
This optimization is turned on per default for @option{-O2} and higher.
+@opindex muse-nonzero-bits
+@item -muse-nonzero-bits
+Enable some patterns for bit optimizations that depend on specific values.
+This optimization is turned on per default for @option{-O2} and higher.
+
@end table
@anchor{eind}
--- /dev/null
+/* { dg-do run } */
+/* { dg-additional-options "-std=gnu99 -Wno-pedantic" } */
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdbool.h>
+
+#define BITNO_I 7
+#define BITNO_T 6
+#define BITNO_H 5
+#define BITNO_S 4
+#define BITNO_V 3
+#define BITNO_N 2
+#define BITNO_Z 1
+#define BITNO_C 0
+
+#define I (1u << BITNO_I)
+#define T (1u << BITNO_T)
+#define H (1u << BITNO_H)
+#define S (1u << BITNO_S)
+#define V (1u << BITNO_V)
+#define N (1u << BITNO_N)
+#define Z (1u << BITNO_Z)
+#define C (1u << BITNO_C)
+
+#define bit(a, x) ((bool) ((a) & (1u << (x))))
+
+typedef union
+{
+ uint8_t val;
+ struct
+ {
+ bool c:1;
+ bool z:1;
+ bool n:1;
+ bool v:1;
+ bool s:1;
+ bool h:1;
+ bool t:1;
+ bool i:1;
+ };
+} sreg_t;
+
+
+typedef struct
+{
+ sreg_t sreg;
+ uint8_t mask;
+ uint16_t result;
+} flags_t;
+
+flags_t flags_sub (uint8_t d, uint8_t r)
+{
+ uint8_t res = d - r;
+ bool R7 = bit (res, 7);
+
+ bool Rd7 = bit (d, 7);
+ bool Rd3 = bit (d, 3);
+
+ bool R3 = bit (res, 3);
+ bool Rr7 = bit (r, 7);
+ bool Rr3 = bit (r, 3);
+
+ sreg_t s = { 0 };
+
+ s.v = (Rd7 & !Rr7 & !R7) | (!Rd7 & Rr7 & R7);
+ s.n = R7;
+ s.z = res == 0;
+ s.c = (!Rd7 & Rr7) | (Rr7 & R7) | (R7 & !Rd7);
+ s.h = (!Rd3 & Rr3) | (Rr3 & R3) | (R3 & !Rd3);
+ s.s = s.n ^ s.v;
+
+ return (flags_t) { s, H | S | V | N | Z | C, res };
+}
+
+flags_t flags_sbc (uint8_t d, uint8_t r, sreg_t sreg)
+{
+ uint8_t res = d - r - sreg.c;
+ bool R7 = bit (res, 7);
+
+ bool Rd7 = bit (d, 7);
+ bool Rd3 = bit (d, 3);
+
+ bool R3 = bit (res, 3);
+ bool Rr7 = bit (r, 7);
+ bool Rr3 = bit (r, 3);
+
+ sreg_t s = { 0 };
+
+ s.v = (Rd7 & !Rr7 & !R7) | (!Rd7 & Rr7 & R7);
+ s.n = R7;
+ s.z = (res == 0) & sreg.z;
+ s.c = (!Rd7 & Rr7) | (Rr7 & R7) | (R7 & !Rd7);
+ s.h = (!Rd3 & Rr3) | (Rr3 & R3) | (R3 & !Rd3);
+ s.s = s.n ^ s.v;
+
+ return (flags_t) { s, H | S | V | N | Z | C, res };
+}
+
+flags_t flags_neg (uint8_t d)
+{
+ uint8_t res = -d;
+ bool R7 = bit (res, 7);
+ bool R6 = bit (res, 6);
+ bool R5 = bit (res, 5);
+ bool R4 = bit (res, 4);
+ bool R3 = bit (res, 3);
+ bool R2 = bit (res, 2);
+ bool R1 = bit (res, 1);
+ bool R0 = bit (res, 0);
+
+ bool Rd3 = bit (d, 3);
+
+ sreg_t s = { 0 };
+
+ s.v = R7 & !R6 & !R5 & !R4 & !R3 & !R2 & !R1 & !R0;
+ s.n = R7;
+ s.z = res == 0;
+ s.c = R7 | R6 | R5 | R4 | R3 | R2 | R1 | R0;
+ s.h = R3 | Rd3;
+ s.s = s.n ^ s.v;
+
+ return (flags_t) { s, H | S | V | N | Z | C, res };
+}
+
+flags_t flags_ror (uint8_t d, sreg_t sreg)
+{
+ uint8_t res = (d + 0x100 * sreg.c) >> 1;
+
+ sreg_t s = { 0 };
+
+ s.c = bit (d, 0);
+ s.z = res == 0;
+ s.n = bit (res, 7);
+ s.v = s.n ^ s.c;
+ s.s = s.n ^ s.v;
+
+ return (flags_t) { s, S | V | N | Z | C, res };
+}
+
+flags_t flags_add (uint8_t d, uint8_t r)
+{
+ uint8_t res = d + r;
+ bool R7 = bit (res, 7);
+
+ bool Rd7 = bit (d, 7);
+ bool Rd3 = bit (d, 3);
+
+ bool R3 = bit (res, 3);
+ bool Rr7 = bit (r, 7);
+ bool Rr3 = bit (r, 3);
+
+ sreg_t s = { 0 };
+
+ s.v = (Rd7 & Rr7 & !R7) | (!Rd7 & !Rr7 & R7);
+ s.n = R7;
+ s.z = res == 0;
+ s.c = (Rd7 & Rr7) | (Rr7 & !R7) | (!R7 & Rd7);
+ s.h = (Rd3 & Rr3) | (Rr3 & !R3) | (!R3 & Rd3);
+ s.s = s.n ^ s.v;
+
+ return (flags_t) { s, H | S | V | N | Z | C, res };
+}
+
+static inline
+sreg_t sreg_sub (uint8_t d, uint8_t r, uint8_t sreg, uint8_t result)
+{
+ __asm ("out __SREG__,%[sreg]" "\n\t"
+ "sub %[d],%[r]" "\n\t"
+ "in %[sreg],__SREG__"
+ : [sreg] "+r" (sreg), [d] "+r" (d)
+ : [r] "r" (r));
+ if (d != result)
+ exit (__LINE__);
+ return (sreg_t) sreg;
+}
+
+static inline
+sreg_t sreg_sbc (uint8_t d, uint8_t r, uint8_t sreg, uint8_t result)
+{
+ __asm ("out __SREG__,%[sreg]" "\n\t"
+ "sbc %[d],%[r]" "\n\t"
+ "in %[sreg],__SREG__"
+ : [sreg] "+r" (sreg), [d] "+r" (d)
+ : [r] "r" (r));
+ if (d != result)
+ exit (__LINE__);
+ return (sreg_t) sreg;
+}
+
+static inline
+sreg_t sreg_neg (uint8_t d, uint8_t sreg, uint8_t result)
+{
+ __asm ("out __SREG__,%[sreg]" "\n\t"
+ "neg %[d]" "\n\t"
+ "in %[sreg],__SREG__"
+ : [sreg] "+r" (sreg), [d] "+r" (d));
+ if (d != result)
+ exit (__LINE__);
+ return (sreg_t) sreg;
+}
+
+static inline
+sreg_t sreg_ror (uint8_t d, uint8_t sreg, uint8_t result)
+{
+ __asm ("out __SREG__,%[sreg]" "\n\t"
+ "ror %[d]" "\n\t"
+ "in %[sreg],__SREG__"
+ : [sreg] "+r" (sreg), [d] "+r" (d));
+ if (d != result)
+ exit (__LINE__);
+ return (sreg_t) sreg;
+}
+
+static inline
+sreg_t sreg_add (uint8_t d, uint8_t r, uint8_t sreg, uint8_t result)
+{
+ __asm ("out __SREG__,%[sreg]" "\n\t"
+ "add %[d],%[r]" "\n\t"
+ "in %[sreg],__SREG__"
+ : [sreg] "+r" (sreg), [d] "+r" (d)
+ : [r] "r" (r));
+ if (d != result)
+ exit (__LINE__);
+ return (sreg_t) sreg;
+}
+
+void test_sub (uint8_t d, uint8_t r, sreg_t sreg)
+{
+ sreg_t s0 = sreg_sub (d, r, sreg.val, d - r);
+ flags_t f = flags_sub (d, r);
+ if ((f.sreg.val & f.mask) != (s0.val & f.mask))
+ exit (__LINE__);
+}
+
+void test_sbc (uint8_t d, uint8_t r, sreg_t sreg)
+{
+ sreg_t s0 = sreg_sbc (d, r, sreg.val, d - r - sreg.c);
+ flags_t f = flags_sbc (d, r, sreg);
+ if ((f.sreg.val & f.mask) != (s0.val & f.mask))
+ exit (__LINE__);
+}
+
+void test_neg (uint8_t d, sreg_t sreg)
+{
+ sreg_t s0 = sreg_neg (d, sreg.val, -d);
+ flags_t f = flags_neg (d);
+ if ((f.sreg.val & f.mask) != (s0.val & f.mask))
+ exit (__LINE__);
+}
+
+void test_add (uint8_t d, uint8_t r, sreg_t sreg)
+{
+ sreg_t s0 = sreg_add (d, r, sreg.val, d + r);
+ flags_t f = flags_add (d, r);
+ if ((f.sreg.val & f.mask) != (s0.val & f.mask))
+ exit (__LINE__);
+}
+
+void test_ror (uint8_t d, sreg_t sreg)
+{
+ sreg_t s0 = sreg_ror (d, sreg.val, (d + 0x100 * sreg.c) >> 1);
+ flags_t f = flags_ror (d, sreg);
+ if ((f.sreg.val & f.mask) != (s0.val & f.mask))
+ exit (__LINE__);
+}
+
+void test_sreg (void)
+{
+ uint8_t d = 0;
+
+ do
+ {
+ uint8_t r = 0;
+ test_neg (d, (sreg_t) { 0x00 });
+ test_neg (d, (sreg_t) { 0xff });
+
+ test_ror (d, (sreg_t) { 0 });
+ test_ror (d, (sreg_t) { C });
+
+ do
+ {
+ test_add (d, r, (sreg_t) { 0x00 });
+ test_add (d, r, (sreg_t) { 0xff });
+
+ test_sub (d, r, (sreg_t) { 0x00 });
+ test_sub (d, r, (sreg_t) { 0xff });
+
+ test_sbc (d, r, (sreg_t) { 0 });
+ test_sbc (d, r, (sreg_t) { C });
+ test_sbc (d, r, (sreg_t) { Z });
+ test_sbc (d, r, (sreg_t) { C | Z });
+ } while (++r);
+ } while (++d);
+}
+
+int main (void)
+{
+ test_sreg();
+ return 0;
+}