if (arm_reg_or_long_shift_imm (operands[2], GET_MODE (operands[2]))
&& (REG_P (operands[2]) || INTVAL(operands[2]) != 32))
{
+ operands[2] = convert_modes (QImode, SImode, operands[2], 0);
emit_insn (gen_mve_lsll (operands[0], operands[1], operands[2]));
DONE;
}
if (TARGET_HAVE_MVE && !BYTES_BIG_ENDIAN
&& arm_reg_or_long_shift_imm (operands[2], GET_MODE (operands[2])))
{
+ operands[2] = convert_modes (QImode, SImode, operands[2], 0);
emit_insn (gen_mve_asrl (operands[0], operands[1], operands[2]));
DONE;
}
;; in ARM/Thumb-2 state: Da, Db, Dc, Dd, Dn, DN, Dm, Dl, DL, Do, Dv, Dy, Di,
;; Dj, Ds, Dt, Dp, Dz, Tu, Te
;; in Thumb-1 state: Pa, Pb, Pc, Pd, Pe
-;; in Thumb-2 state: Ha, Pj, PJ, Ps, Pt, Pu, Pv, Pw, Px, Py, Pz, Rd, Rf, Rb, Ra,
-;; Rg, Ri
-;; in all states: Pg
+;; in Thumb-2 state: Ha, Pg, Pj, PJ, Ps, Pt, Pu, Pv, Pw, Px, Py, Pz, Ra, Rb,
+;; Rd, Rf, Rg, Ri
;; The following memory constraints have been used:
;; in ARM/Thumb-2 state: Uh, Ut, Uv, Un, Um, Us, Uo, Up, Uf, Ux, Ul, Uz
"dlstp.<dlstp_elemsize>\t%|lr, %0"
[(set_attr "type" "mve_misc")])
+
+;;
;; Scalar shifts
-(define_insn "mve_asrl"
+;;
+;; immediate shift amounts have to be in the [1..32] range
+;;
+;; shift amounts stored in a register can be negative, in which case
+;; the shift is reversed (asrl, lsll only)
+;; since RTL expects shift amounts to be unsigned, make sure the
+;; negative case is handled, in case simplify_rtx could optimize:
+;; (set (reg:SI 1) (const_int -5))
+;; (set (reg:DI 2) (ashift:DI (reg:DI 3) (reg:SI 1)))
+;; into:
+;; (set (reg:DI 2) (ashift:DI (reg:DI 3) (const_int -5)))
+
+;; General pattern for asrl
+(define_expand "mve_asrl"
+ [(set (match_operand:DI 0 "arm_general_register_operand" "")
+ (ashiftrt:DI (match_operand:DI 1 "arm_general_register_operand" "")
+ (match_operand:QI 2 "reg_or_int_operand" "")))]
+ "TARGET_HAVE_MVE"
+{
+ rtx amount = operands[2];
+ if (CONST_INT_P (amount))
+ {
+ HOST_WIDE_INT ival = INTVAL (amount);
+
+ if (ival >= 0)
+ /* Right shift. */
+ emit_insn (gen_mve_asrl_imm (operands[0], operands[1], amount));
+ else
+ /* Left shift. */
+ emit_insn (gen_mve_lsll_imm (operands[0], operands[1],
+ GEN_INT (-ival)));
+ DONE;
+ }
+
+ emit_insn (gen_mve_asrl_internal (operands[0], operands[1], operands[2]));
+ DONE;
+})
+
+;; immediate shift amount
+;; we have to split the insn if the amount is not in the [1..32] range
+(define_insn_and_split "mve_asrl_imm"
+ [(set (match_operand:DI 0 "arm_general_register_operand" "=r,r")
+ (ashiftrt:DI (match_operand:DI 1 "arm_general_register_operand" "0,r")
+ (match_operand:QI 2 "immediate_operand" "Pg,I")))]
+ "TARGET_HAVE_MVE"
+ "asrl%?\\t%Q0, %R1, %2"
+ "&& !satisfies_constraint_Pg (operands[2])"
+ [(clobber (const_int 0))]
+ "
+ rtx amount = operands[2];
+ HOST_WIDE_INT ival = INTVAL (amount);
+
+ /* shift amount in [1..32] is already handled by the Pg constraint. */
+
+ /* Shift by 0, it is just a move. */
+ if (ival == 0)
+ {
+ emit_insn (gen_movdi (operands[0], operands[1]));
+ DONE;
+ }
+
+ /* ival < 0 should have already been handled by mve_asrl. */
+ gcc_assert (ival > 32);
+
+ /* Shift amount above immediate range (ival > 32).
+ out_hi gets the sign bit
+ out_lo gets in_hi << (ival - 32) or << 31 if ival >= 64.
+ If ival >= 64, the result is either 0 or -1, depending on the
+ input sign. */
+ rtx in_hi = gen_highpart (SImode, operands[1]);
+ rtx out_lo = gen_lowpart (SImode, operands[0]);
+ rtx out_hi = gen_highpart (SImode, operands[0]);
+
+ emit_insn (gen_rtx_SET (out_lo,
+ gen_rtx_fmt_ee (ASHIFTRT,
+ SImode,
+ in_hi,
+ GEN_INT (MIN (ival - 32,
+ 31)))));
+ /* Copy sign bit, which is OK even if out_lo == in_hi. */
+ emit_insn (gen_rtx_SET (out_hi,
+ gen_rtx_fmt_ee (ASHIFTRT,
+ SImode,
+ in_hi,
+ GEN_INT (31))));
+ DONE;
+ "
+ [(set_attr "predicable" "yes,yes")
+ (set_attr "length" "4,8")])
+
+(define_insn "mve_asrl_internal"
[(set (match_operand:DI 0 "arm_general_register_operand" "=r")
- (ashiftrt:DI (match_operand:DI 1 "arm_general_register_operand" "0")
- (match_operand:SI 2 "arm_reg_or_long_shift_imm" "rPg")))]
+ (if_then_else:DI
+ (ge:QI (match_operand:QI 2 "arm_general_register_operand" "r")
+ (const_int 0))
+ (ashiftrt:DI (match_operand:DI 1 "arm_general_register_operand" "0")
+ (match_dup 2))
+ (ashift:DI (match_dup 1) (neg:QI (match_dup 2)))))]
"TARGET_HAVE_MVE"
"asrl%?\\t%Q0, %R1, %2"
[(set_attr "predicable" "yes")])
-(define_insn "mve_lsll"
+;; General pattern for lsll
+(define_expand "mve_lsll"
+ [(set (match_operand:DI 0 "arm_general_register_operand" "")
+ (ashift:DI (match_operand:DI 1 "arm_general_register_operand" "")
+ (match_operand:QI 2 "reg_or_int_operand" "")))]
+ "TARGET_HAVE_MVE"
+{
+ rtx amount = operands[2];
+ if (CONST_INT_P (amount))
+ {
+ HOST_WIDE_INT ival = INTVAL (amount);
+
+ if (ival >= 0)
+ /* Left shift. */
+ emit_insn (gen_mve_lsll_imm (operands[0], operands[1], amount));
+ else
+ /* Right shift. */
+ emit_insn (gen_lshrdi3 (operands[0], operands[1],
+ GEN_INT (-ival)));
+ DONE;
+ }
+
+ emit_insn (gen_mve_lsll_internal (operands[0], operands[1], operands[2]));
+ DONE;
+})
+
+;; immediate shift amount
+;; we have to split the insn if the amount is not in the [1..32] range
+(define_insn_and_split "mve_lsll_imm"
+ [(set (match_operand:DI 0 "arm_general_register_operand" "=r,r")
+ (ashift:DI (match_operand:DI 1 "arm_general_register_operand" "0,r")
+ (match_operand:QI 2 "immediate_operand" "Pg,I")))]
+ "TARGET_HAVE_MVE"
+ "lsll%?\\t%Q0, %R1, %2"
+ "&& !satisfies_constraint_Pg (operands[2])"
+ [(clobber (const_int 0))]
+ "
+ rtx amount = operands[2];
+ HOST_WIDE_INT ival = INTVAL (amount);
+
+ /* shift amount in [1..32] is already handled by the Pg constraint. */
+
+ /* Shift by 0, it is just a move. */
+ if (ival == 0)
+ {
+ emit_insn (gen_movdi (operands[0], operands[1]));
+ DONE;
+ }
+
+ /* Shift amount larger than input, result is 0. */
+ if (ival >= 64)
+ {
+ emit_insn (gen_movdi (operands[0], const0_rtx));
+ DONE;
+ }
+
+ /* ival < 0 should have already been handled by mve_asrl. */
+ gcc_assert (ival > 32);
+
+ /* Shift amount above immediate range: 32 < ival < 64. */
+ rtx in_lo = gen_lowpart (SImode, operands[1]);
+ rtx out_lo = gen_lowpart (SImode, operands[0]);
+ rtx out_hi = gen_highpart (SImode, operands[0]);
+ emit_insn (gen_rtx_SET (out_hi,
+ gen_rtx_fmt_ee (ASHIFT,
+ SImode,
+ in_lo,
+ GEN_INT (ival - 32))));
+ emit_insn (gen_rtx_SET (out_lo, const0_rtx));
+ DONE;
+ "
+ [(set_attr "predicable" "yes,yes")
+ (set_attr "length" "4,8")])
+
+(define_insn "mve_lsll_internal"
[(set (match_operand:DI 0 "arm_general_register_operand" "=r")
- (ashift:DI (match_operand:DI 1 "arm_general_register_operand" "0")
- (match_operand:SI 2 "arm_reg_or_long_shift_imm" "rPg")))]
+ (if_then_else:DI
+ (ge:QI (match_operand:QI 2 "arm_general_register_operand" "r")
+ (const_int 0))
+ (ashift:DI (match_operand:DI 1 "arm_general_register_operand" "0")
+ (match_dup 2))
+ (lshiftrt:DI (match_dup 1) (neg:QI (match_dup 2)))))]
"TARGET_HAVE_MVE"
"lsll%?\\t%Q0, %R1, %2"
[(set_attr "predicable" "yes")])
--- /dev/null
+/* { dg-require-effective-target arm_v8_1m_mve_ok } */
+/* { dg-add-options arm_v8_1m_mve } */
+/* { dg-additional-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+/* Check that calling asrl with an out of range shift amount is not interpreted
+ as undefined behavior, and that we actually use the asrl instruction (except
+ if a negative shift amount can be handled by lsll). Check code generation
+ for various special cases:
+ 1 <= amount <= 32
+ -32 <= amount <= -1
+ 32 < amount < 64
+ -64 < amount < -32
+ amount >= 64
+ amount <= -64
+ amount == 0
+ amount unknown at compile time. */
+#include "arm_mve.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+ /* Positive shift amount in [1.32] range, use the immediate:
+
+ asrl r0, r1, #3 */
+/*
+**foo_3:
+** ...
+** asrl (?:ip|fp|r[0-9]+), (?:ip|fp|r[0-9]+), #3(?: @.*|)
+** ...
+*/
+int64_t
+foo_3 (int64_t value)
+{
+ return asrl (value, 3);
+}
+
+ /* Negative shift amount in [-32.-1] range, reverse shift (lsll) with the
+ opposite shift amount as immediate:
+
+ lsll r0, r1, #3 */
+/*
+**foo_m3:
+** ...
+** lsll (?:ip|fp|r[0-9]+), (?:ip|fp|r[0-9]+), #3(?: @.*|)
+** ...
+*/
+int64_t
+foo_m3 (int64_t value)
+{
+ return asrl (value, -3);
+}
+
+ /* Out of [1.32] range positive shift amount, but < 64.
+ lo_out = hi_in >> (amount - 32)
+ hi_out = hi_in >> 31 (to copy the sign bit)
+
+ asrs r0, r1, #1
+ asrs r1, r1, #31 */
+/*
+**foo_33:
+** ...
+** asrs (?:ip|fp|r[0-9]+), (ip|fp|r[0-9]+), #1(?: @.*|)
+** asrs (?:ip|fp|r[0-9]+), \1, #31(?: @.*|)
+** ...
+*/
+int64_t
+foo_33 (int64_t value)
+{
+ return asrl (value, 33);
+}
+
+ /* Out of [-32..-1] range negative shift amount, but > -64. Reverse shift
+ (lsll equivalent) in [33.64] range:
+ hi_out = lo_in << (-amount - 32)
+ lo_out = 0
+
+ lsls r1, r0, #1
+ movs r0, #0 */
+/*
+**foo_m33:
+** ...
+** lsls (?:ip|fp|r[0-9]+), (ip|fp|r[0-9]+), #1(?: @.*|)
+** movs \1, #0(?: @.*|)
+** ...
+*/
+int64_t
+foo_m33 (int64_t value)
+{
+ return asrl (value, -33);
+}
+
+ /* Out of range positive shift amount (>= 64)
+ lo_out = hi_in >> 31 (copy sign bit)
+ hi_out = hi_in >> 31
+
+ asrs r0, r1, #31
+ mov r1, r0 */
+/*
+**foo_65:
+** ...
+** asrs (ip|fp|r[0-9]+), (?:ip|fp|r[0-9]+), #31(?: @.*|)
+** mov (?:ip|fp|r[0-9]+), \1(?: @.*|)
+** ...
+*/
+int64_t
+foo_65 (int64_t value)
+{
+ return asrl (value, 65);
+}
+
+ /* Out of range negative shift amount (<= 64), result is 0.
+
+ movs r0, #0
+ movs r1, #0 */
+/*
+**foo_m65:
+** ...
+** movs (ip|fp|r[0-9]+), #0(?: @.*|)
+** movs (ip|fp|r[0-9]+), #0(?: @.*|)
+** ...
+*/
+int64_t
+foo_m65 (int64_t value)
+{
+ return asrl (value, -65);
+}
+
+ /* shift amount == 0, use a mov, which is optimized out. */
+/*
+**foo_0:
+** bx lr
+** ...
+*/
+int64_t
+foo_0 (int64_t value)
+{
+ return asrl (value, 0);
+}
+
+ /* Unknown shift amount, use the register variant.
+
+ asrl r0, r1, r2 */
+/*
+**foo_var:
+** ...
+** asrl (?:ip|fp|r[0-9]+), (?:ip|fp|r[0-9]+), (?:ip|fp|r[0-9]+)(?: @.*|)
+** ...
+*/
+int64_t
+foo_var (int64_t value, int32_t amount)
+{
+ return asrl (value, amount);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+/* { dg-final { scan-assembler-not "__ARM_undef" } } */
--- /dev/null
+ /* { dg-require-effective-target arm_v8_1m_mve_ok } */
+/* { dg-add-options arm_v8_1m_mve } */
+/* { dg-additional-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+/* Check that calling lsll with an out of range shift amount is not interpreted
+ as undefined behavior, and that we actually use the lsll instruction (except
+ if a negative shift amount can be handled by asrl). Check code generation
+ for various special cases:
+ 1 <= amount <= 32
+ -32 <= amount <= -1
+ 32 < amount < 64
+ -64 < amount < -32
+ amount >= 64
+ amount <= -64
+ amount == 0
+ amount unknown at compile time. */
+#include "arm_mve.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+ /* Positive shift amount in [1.32] range, use the immediate:
+
+ lsll r0,r1,#3 */
+/*
+**foo_3:
+** ...
+** lsll (?:ip|fp|r[0-9]+), (?:ip|fp|r[0-9]+), #3(?: @.*|)
+** ...
+*/
+uint64_t
+foo_3 (uint64_t value)
+{
+ return lsll (value, 3);
+}
+
+ /* Negative shift amount in [-32.-1] range, reverse shift (asrl) with the
+ opposite shift amount as immediate:
+
+ lsrl r0, r1, #3 */
+/*
+**foo_m3:
+** ...
+** lsrl (?:ip|fp|r[0-9]+), (?:ip|fp|r[0-9]+), #3(?: @.*|)
+** ...
+*/
+uint64_t
+foo_m3 (uint64_t value)
+{
+ return lsll (value, -3);
+}
+
+ /* Out of [1..32] range positive shift amount, but < 64.
+ high_out = low_in << (amount - 32) (using lsls, not lsll)
+ low_out = 0
+
+ lsls r1,r0,#1
+ movs r0, #0 */
+/*
+**foo_33:
+** ...
+** lsls (?:ip|fp|r[0-9]+), (ip|fp|r[0-9]+), #1(?: @.*|)
+** movs \1, #0(?: @.*|)
+** ...
+*/
+uint64_t
+foo_33 (uint64_t value)
+{
+ return lsll (value, 33);
+}
+
+ /* Out of [-32..-1] range negative shift amount, but > -64. Reverse shift
+ (lsrl equivalent) in [33..64] range:
+ lo_out = hi_in >> (amount - 32)
+ hi_out = 0
+
+ lsrs r0, r1, #1
+ movs r1, #0 */
+/*
+**foo_m33:
+** ...
+** lsrs (?:ip|fp|r[0-9]+), (?:ip|fp|r[0-9]+), #1(?: @.*|)
+** movs (?:ip|fp|r[0-9]+), #0(?: @.*|)
+** ...
+*/
+uint64_t
+foo_m33 (uint64_t value)
+{
+ return lsll (value, -33);
+}
+
+ /* Out of range positive shift amount (>= 64), result is 0.
+
+ movs r0, #0
+ movs r1, #0 */
+/*
+**foo_65:
+** ...
+** movs (ip|fp|r[0-9]+), #0(?: @.*|)
+** movs (ip|fp|r[0-9]+), #0(?: @.*|)
+** ...
+*/
+uint64_t
+foo_65 (uint64_t value)
+{
+ return lsll (value, 65);
+}
+
+ /* Out of range negative shift amount (<= -64), result is 0, because lsll
+ uses an unsigned input.
+
+ movs r0, #0
+ movs r1, #0 */
+/*
+**foo_m65:
+** ...
+** movs (ip|fp|r[0-9]+), #0(?: @.*|)
+** movs (ip|fp|r[0-9]+), #0(?: @.*|)
+** ...
+*/
+uint64_t
+foo_m65 (uint64_t value)
+{
+ return lsll (value, -65);
+}
+
+ /* shift amount == 0, use a mov, which is optimized out. */
+/*
+**foo_0:
+** bx lr
+** ...
+*/
+uint64_t
+foo_0 (uint64_t value)
+{
+ return lsll (value, 0);
+}
+
+ /* Unknown shift amount, use the register variant.
+
+ lsll r0, r1, r2 */
+/*
+**foo_var:
+** ...
+** lsll (?:ip|fp|r[0-9]+), (?:ip|fp|r[0-9]+), (?:ip|fp|r[0-9]+)(?: @.*|)
+** ...
+*/
+uint64_t
+foo_var (uint64_t value, int32_t amount)
+{
+ return lsll (value, amount);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+/* { dg-final { scan-assembler-not "__ARM_undef" } } */