+2016-01-25 Claudiu Zissulescu <claziss@synopsys.com>
+
+ * config/arc/arc.c (TARGET_DWARF_REGISTER_SPAN): Define.
+ (arc_init): Check validity mll64 option.
+ (arc_save_restore): Use double load/store instruction.
+ (arc_expand_movmem): Likewise.
+ (arc_split_move): Don't split if we have double load/store
+ instructions. Returns a boolean.
+ (arc_process_double_reg_moves): Change function to return boolean
+ instead of a sequence of instructions.
+ (arc_dwarf_register_span): New function.
+ * config/arc/arc-protos.h (arc_split_move): Change prototype.
+ * config/arc/arc.h (TARGET_CPU_CPP_BUILTINS): Define __ARC_LL64__.
+ * config/arc/arc.md (*movdi_insn): Emit ldd/std instructions.
+ (*movdf_insn): Likewise.
+ * config/arc/arc.opt (mll64): New option.
+ * config/arc/predicates.md (even_register_operand): New predicate.
+ * doc/invoke.texi (ARC Options): Add mll64 documentation.
+
2016-01-25 Richard Biener <rguenther@suse.de>
PR lto/69393
extern void split_addsi (rtx *);
extern void split_subsi (rtx *);
extern void arc_pad_return (void);
-extern rtx arc_split_move (rtx *);
+extern void arc_split_move (rtx *);
extern int arc_verify_short (rtx_insn *insn, int unalign, int);
extern const char *arc_short_long (rtx_insn *insn, const char *, const char *);
extern rtx arc_regno_use_in (unsigned int, rtx);
#undef TARGET_ASM_ALIGNED_SI_OP
#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
+#undef TARGET_DWARF_REGISTER_SPAN
+#define TARGET_DWARF_REGISTER_SPAN arc_dwarf_register_span
+
/* Try to keep the (mov:DF _, reg) as early as possible so
that the d<add/sub/mul>h-lr insns appear together and can
use the peephole2 pattern. */
if (TARGET_ATOMIC && !(TARGET_ARC700 || TARGET_HS))
error ("-matomic is only supported for ARC700 or ARC HS cores");
+ /* ll64 ops only available for HS. */
+ if (TARGET_LL64 && !TARGET_HS)
+ error ("-mll64 is only supported for ARC HS cores");
+
arc_init_reg_tables ();
/* Initialize array for PRINT_OPERAND_PUNCT_VALID_P. */
for (regno = 0; regno <= 31; regno++)
{
- if ((gmask & (1L << regno)) != 0)
+ enum machine_mode mode = SImode;
+ bool found = false;
+
+ if (TARGET_LL64
+ && (regno % 2 == 0)
+ && ((gmask & (1L << regno)) != 0)
+ && ((gmask & (1L << (regno+1))) != 0))
+ {
+ found = true;
+ mode = DImode;
+ }
+ else if ((gmask & (1L << regno)) != 0)
{
- rtx reg = gen_rtx_REG (SImode, regno);
+ found = true;
+ mode = SImode;
+ }
+
+ if (found)
+ {
+ rtx reg = gen_rtx_REG (mode, regno);
rtx addr, mem;
int cfa_adjust = *first_offset;
gcc_assert (SMALL_INT (offset));
addr = plus_constant (Pmode, base_reg, offset);
}
- mem = gen_frame_mem (SImode, addr);
+ mem = gen_frame_mem (mode, addr);
if (epilogue_p)
{
rtx insn =
else
frame_move_inc (mem, reg, base_reg, addr);
offset += UNITS_PER_WORD;
+ if (mode == DImode)
+ {
+ offset += UNITS_PER_WORD;
+ ++regno;
+ }
} /* if */
} /* for */
}/* if */
return addr;
}
-/* Like move_by_pieces, but take account of load latency,
- and actual offset ranges.
- Return true on success. */
+/* Like move_by_pieces, but take account of load latency, and actual
+ offset ranges. Return true on success. */
bool
arc_expand_movmem (rtx *operands)
size = INTVAL (operands[2]);
/* move_by_pieces_ninsns is static, so we can't use it. */
if (align >= 4)
- n_pieces = (size + 2) / 4U + (size & 1);
+ {
+ if (TARGET_LL64)
+ n_pieces = (size + 4) / 8U + ((size >> 1) & 1) + (size & 1);
+ else
+ n_pieces = (size + 2) / 4U + (size & 1);
+ }
else if (align == 2)
n_pieces = (size + 1) / 2U;
else
n_pieces = size;
if (n_pieces >= (unsigned int) (optimize_size ? 3 : 15))
return false;
- if (piece > 4)
+ /* Force 32 bit aligned and larger datum to use 64 bit transfers, if
+ possible. */
+ if (TARGET_LL64 && (piece >= 4) && (size >= 8))
+ piece = 8;
+ else if (piece > 4)
piece = 4;
dst_addr = force_offsettable (XEXP (operands[0], 0), size, 0);
src_addr = force_offsettable (XEXP (operands[1], 0), size, 0);
rtx tmp;
machine_mode mode;
- if (piece > size)
- piece = size & -size;
+ while (piece > size)
+ piece >>= 1;
mode = smallest_mode_for_size (piece * BITS_PER_UNIT, MODE_INT);
/* If we don't re-use temporaries, the scheduler gets carried away,
and the register pressure gets unnecessarily high. */
Operand 0: destination register
Operand 1: source register */
-static rtx
+static bool
arc_process_double_reg_moves (rtx *operands)
{
rtx dest = operands[0];
rtx src = operands[1];
- rtx val;
enum usesDxState { none, srcDx, destDx, maxDx };
enum usesDxState state = none;
}
if (state == none)
- return NULL_RTX;
-
- start_sequence ();
+ return false;
if (state == srcDx)
{
else
gcc_unreachable ();
- val = get_insns ();
- end_sequence ();
- return val;
+ return true;
}
/* operands 0..1 are the operands of a 64 bit move instruction.
split it into two moves with operands 2/3 and 4/5. */
-rtx
+void
arc_split_move (rtx *operands)
{
machine_mode mode = GET_MODE (operands[0]);
int i;
int swap = 0;
rtx xop[4];
- rtx val;
if (TARGET_DPFP)
{
- val = arc_process_double_reg_moves (operands);
- if (val)
- return val;
+ if (arc_process_double_reg_moves (operands))
+ return;
}
+ if (TARGET_LL64
+ && ((memory_operand (operands[0], mode)
+ && even_register_operand (operands[1], mode))
+ || (memory_operand (operands[1], mode)
+ && even_register_operand (operands[0], mode))))
+ {
+ emit_move_insn (operands[0], operands[1]);
+ return;
+ }
+
for (i = 0; i < 2; i++)
{
if (MEM_P (operands[i]) && auto_inc_p (XEXP (operands[i], 0)))
swap = 2;
gcc_assert (!reg_overlap_mentioned_p (xop[2], xop[1]));
}
- operands[2+swap] = xop[0];
- operands[3+swap] = xop[1];
- operands[4-swap] = xop[2];
- operands[5-swap] = xop[3];
- start_sequence ();
- emit_insn (gen_rtx_SET (operands[2], operands[3]));
- emit_insn (gen_rtx_SET (operands[4], operands[5]));
- val = get_insns ();
- end_sequence ();
+ emit_move_insn (xop[0 + swap], xop[1 + swap]);
+ emit_move_insn (xop[2 - swap], xop[3 - swap]);
- return val;
}
/* Select between the instruction output templates s_tmpl (for short INSNs)
return true;
}
+/* Return a parallel of registers to represent where to find the
+ register pieces if required, otherwise NULL_RTX. */
+
+static rtx
+arc_dwarf_register_span (rtx rtl)
+{
+ enum machine_mode mode = GET_MODE (rtl);
+ unsigned regno;
+ rtx p;
+
+ if (GET_MODE_SIZE (mode) != 8)
+ return NULL_RTX;
+
+ p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
+ regno = REGNO (rtl);
+ XVECEXP (p, 0, 0) = gen_rtx_REG (SImode, regno);
+ XVECEXP (p, 0, 1) = gen_rtx_REG (SImode, regno + 1);
+
+ return p;
+}
+
+
struct gcc_target targetm = TARGET_INITIALIZER;
#include "gt-arc.h"
builtin_define ("__ARC_NORM__");\
builtin_define ("__Xnorm"); \
} \
+ if (TARGET_LL64) \
+ { \
+ builtin_define ("__ARC_LL64__");\
+ } \
if (TARGET_MUL64_SET) \
builtin_define ("__ARC_MUL64__");\
if (TARGET_MULMAC_32BY16_SET) \
}")
(define_insn_and_split "*movdi_insn"
- [(set (match_operand:DI 0 "move_dest_operand" "=w,w,r,m")
+ [(set (match_operand:DI 0 "move_dest_operand" "=w, w,r,m")
(match_operand:DI 1 "move_double_src_operand" "c,Hi,m,c"))]
"register_operand (operands[0], DImode)
|| register_operand (operands[1], DImode)"
switch (which_alternative)
{
default:
- case 0 :
- /* We normally copy the low-numbered register first. However, if
- the first register operand 0 is the same as the second register of
- operand 1, we must copy in the opposite order. */
- if (REGNO (operands[0]) == REGNO (operands[1]) + 1)
- return \"mov%? %R0,%R1\;mov%? %0,%1\";
- else
- return \"mov%? %0,%1\;mov%? %R0,%R1\";
- case 1 :
- return \"mov%? %L0,%L1\;mov%? %H0,%H1\";
- case 2 :
- /* If the low-address word is used in the address, we must load it
- last. Otherwise, load it first. Note that we cannot have
- auto-increment in that case since the address register is known to be
- dead. */
- if (refers_to_regno_p (REGNO (operands[0]), operands[1]))
- return \"ld%V1 %R0,%R1\;ld%V1 %0,%1\";
- else switch (GET_CODE (XEXP(operands[1], 0)))
- {
- case POST_MODIFY: case POST_INC: case POST_DEC:
- return \"ld%V1 %R0,%R1\;ld%U1%V1 %0,%1\";
- case PRE_MODIFY: case PRE_INC: case PRE_DEC:
- return \"ld%U1%V1 %0,%1\;ld%V1 %R0,%R1\";
- default:
- return \"ld%U1%V1 %0,%1\;ld%U1%V1 %R0,%R1\";
- }
- case 3 :
- switch (GET_CODE (XEXP(operands[0], 0)))
- {
- case POST_MODIFY: case POST_INC: case POST_DEC:
- return \"st%V0 %R1,%R0\;st%U0%V0 %1,%0\";
- case PRE_MODIFY: case PRE_INC: case PRE_DEC:
- return \"st%U0%V0 %1,%0\;st%V0 %R1,%R0\";
- default:
- return \"st%U0%V0 %1,%0\;st%U0%V0 %R1,%R0\";
- }
+ return \"#\";
+
+ case 2:
+ if (TARGET_LL64
+ && ((even_register_operand (operands[0], DImode)
+ && memory_operand (operands[1], DImode))
+ || (memory_operand (operands[0], DImode)
+ && even_register_operand (operands[1], DImode))))
+ return \"ldd%U1%V1 %0,%1%&\";
+ return \"#\";
+
+ case 3:
+ if (TARGET_LL64
+ && ((even_register_operand (operands[0], DImode)
+ && memory_operand (operands[1], DImode))
+ || (memory_operand (operands[0], DImode)
+ && even_register_operand (operands[1], DImode))))
+ return \"std%U0%V0 %1,%0\";
+ return \"#\";
}
}"
- "&& reload_completed && optimize"
- [(set (match_dup 2) (match_dup 3)) (set (match_dup 4) (match_dup 5))]
- "arc_split_move (operands);"
+ "reload_completed"
+ [(const_int 0)]
+ {
+ arc_split_move (operands);
+ DONE;
+ }
[(set_attr "type" "move,move,load,store")
;; ??? The ld/st values could be 4 if it's [reg,bignum].
- (set_attr "length" "8,16,16,16")])
+ (set_attr "length" "8,16,*,*")])
;; Floating point move insns.
""
"if (prepare_move_operands (operands, DFmode)) DONE;")
-(define_insn "*movdf_insn"
+(define_insn_and_split "*movdf_insn"
[(set (match_operand:DF 0 "move_dest_operand" "=D,r,c,c,r,m")
(match_operand:DF 1 "move_double_src_operand" "r,D,c,E,m,c"))]
"register_operand (operands[0], DFmode) || register_operand (operands[1], DFmode)"
- "#"
+ "*
+{
+ switch (which_alternative)
+ {
+ default:
+ return \"#\";
+ case 4:
+ if (TARGET_LL64
+ && ((even_register_operand (operands[0], DFmode)
+ && memory_operand (operands[1], DFmode))
+ || (memory_operand (operands[0], DFmode)
+ && even_register_operand (operands[1], DFmode))))
+ return \"ldd%U1%V1 %0,%1%&\";
+ return \"#\";
+
+ case 5:
+ if (TARGET_LL64
+ && ((even_register_operand (operands[0], DFmode)
+ && memory_operand (operands[1], DFmode))
+ || (memory_operand (operands[0], DFmode)
+ && even_register_operand (operands[1], DFmode))))
+ return \"std%U0%V0 %1,%0\";
+ return \"#\";
+ }
+}"
+ "reload_completed"
+ [(const_int 0)]
+ {
+ arc_split_move (operands);
+ DONE;
+ }
[(set_attr "type" "move,move,move,move,load,store")
(set_attr "predicable" "no,no,yes,yes,no,no")
;; ??? The ld/st values could be 16 if it's [reg,bignum].
(set_attr "length" "4,16,8,16,16,16")])
-(define_split
- [(set (match_operand:DF 0 "move_dest_operand" "")
- (match_operand:DF 1 "move_double_src_operand" ""))]
- "reload_completed"
- [(match_dup 2)]
- "operands[2] = arc_split_move (operands);")
-
(define_insn_and_split "*movdf_insn_nolrsr"
[(set (match_operand:DF 0 "register_operand" "=r")
(match_operand:DF 1 "arc_double_register_operand" "D"))
matomic
Target Report Mask(ATOMIC)
Enable atomic instructions.
+
+mll64
+Target Report Mask(LL64)
+Enable double load/store instructions for ARC HS.
(match_code "reg" "0")))
(define_predicate "any_mem_operand"
- (match_code "mem"))
\ No newline at end of file
+ (match_code "mem"))
+
+; Special predicate to match even-odd double register pair
+(define_predicate "even_register_operand"
+ (match_code "reg")
+ {
+ if ((GET_MODE (op) != mode) && (mode != VOIDmode))
+ return 0;
+
+ return (REG_P (op) && ((REGNO (op) >= FIRST_PSEUDO_REGISTER)
+ || ((REGNO (op) & 1) == 0)));
+ })
-mmixed-code -mq-class -mRcq -mRcw -msize-level=@var{level} @gol
-mtune=@var{cpu} -mmultcost=@var{num} @gol
-munalign-prob-threshold=@var{probability} -mmpy-option=@var{multo} @gol
--mdiv-rem -mcode-density}
+-mdiv-rem -mcode-density -mll64}
@emph{ARM Options}
@gccoptlist{-mapcs-frame -mno-apcs-frame @gol
@opindex mcode-density
Enable code density instructions for ARC EM, default on for ARC HS.
+@item -mll64
+@opindex mll64
+Enable double load/store operations for ARC HS cores.
+
@item -mmpy-option=@var{multo}
@opindex mmpy-option
Compile ARCv2 code with a multiplier design option. @samp{wlh1} is