[(set_attr "sve_type" "sve_int_general")]
)
+;; Shift an SVE vector left and insert a scalar into element 0 of a zero
+;; register.
+(define_insn "*aarch64_vec_shl_insert_into_zero_<mode>"
+ [(set (match_operand:SVE_FULL_HSD 0 "register_operand")
+ (unspec:SVE_FULL_HSD
+ [(match_operand:SVE_FULL_HSD 1 "aarch64_simd_imm_zero")
+ (match_operand:<VEL> 2 "register_operand")]
+ UNSPEC_INSR))]
+ "TARGET_SVE"
+ {@ [ cons: =0 , 1 , 2 ]
+ [ w , Dz , w ] fmov\t%<Vetype>0, %<Vetype>2
+ }
+ [(set_attr "type" "neon_move")]
+)
+
+;; Shift an SVE vector left and insert a scalar into element 0 of a zero
+;; register for bytes.
+(define_insn "*aarch64_vec_shl_insert_into_zero_vnx16qi"
+ [(set (match_operand:VNx16QI 0 "register_operand")
+ (unspec:VNx16QI
+ [(match_operand:VNx16QI 1 "aarch64_simd_imm_zero")
+ (match_operand:QI 2 "register_operand")]
+ UNSPEC_INSR))]
+ "TARGET_SVE"
+ {@ [ cons: =0 , 1 , 2 ; attrs: length ]
+ [ w , Dz , w ; 8 ] fmov\t%h0, %h2\;and\t%0.h, %0.h, #0xff
+ }
+ [(set_attr "type" "neon_move")]
+)
+
+;; Shift an SVE vector left and insert a scalar into element 0 from a memory
+;; load.
+(define_insn "*aarch64_vec_shl_insert_from_load_<mode>"
+ [(set (match_operand:SVE_FULL 0 "register_operand")
+ (unspec:SVE_FULL
+ [(match_operand:SVE_FULL 1 "aarch64_simd_imm_zero")
+ (match_operand:<VEL> 2 "memory_operand")]
+ UNSPEC_INSR))]
+ "TARGET_SVE"
+ {@ [ cons: =0 , 1 , 2 ]
+ [ w , Dz , m ] ldr\t%<Vetype>0, %2
+ }
+ [(set_attr "type" "neon_move")]
+)
+
;; -------------------------------------------------------------------------
;; ---- [INT] Linear series
;; -------------------------------------------------------------------------
return gen_rtx_MEM (mode, force_reg (Pmode, addr));
}
-/* Emit a load/store from a subreg of SRC to a subreg of DEST.
- The subregs have mode NEW_MODE. Use only for reg<->mem moves. */
-void
-aarch64_emit_load_store_through_mode (rtx dest, rtx src, machine_mode new_mode)
-{
- gcc_assert ((MEM_P (dest) && register_operand (src, VOIDmode))
- || (MEM_P (src) && register_operand (dest, VOIDmode)));
- auto mode = GET_MODE (dest);
- auto int_mode = aarch64_sve_int_mode (mode);
- if (MEM_P (src))
- {
- rtx tmp = force_reg (new_mode, adjust_address (src, new_mode, 0));
- tmp = force_lowpart_subreg (int_mode, tmp, new_mode);
- emit_move_insn (dest, force_lowpart_subreg (mode, tmp, int_mode));
- }
- else
- {
- src = force_lowpart_subreg (int_mode, src, mode);
- emit_move_insn (adjust_address (dest, new_mode, 0),
- force_lowpart_subreg (new_mode, src, int_mode));
- }
-}
-
/* PRED is a predicate that is known to contain PTRUE.
For 128-bit VLS loads/stores, emit LDR/STR.
Else, emit an SVE predicated move from SRC to DEST. */
return;
}
+/* Emit a load/store from a subreg of SRC to a subreg of DEST.
+ The subregs have mode NEW_MODE. Use only for reg<->mem moves. */
+void
+aarch64_emit_load_store_through_mode (rtx dest, rtx src, machine_mode new_mode)
+{
+ gcc_assert ((MEM_P (dest) && register_operand (src, VOIDmode))
+ || (MEM_P (src) && register_operand (dest, VOIDmode)));
+ auto mode = GET_MODE (dest);
+ auto int_mode = aarch64_sve_int_mode (mode);
+ rtx tmp_reg;
+ if (MEM_P (src))
+ {
+ rtx tmp = force_reg (new_mode, adjust_address (src, new_mode, 0));
+ if (!VECTOR_MODE_P (new_mode))
+ {
+ machine_mode full_mode = int_mode;
+ auto vmode = aarch64_classify_vector_mode (int_mode);
+ /* Partial vectors have to go through a full mode insert since we
+ don't support inserting an partial vectors. */
+ if (GET_MODE_INNER (int_mode) != new_mode || (vmode & VEC_PARTIAL))
+ full_mode
+ = aarch64_full_sve_mode (as_a <scalar_mode> (new_mode)).require ();
+
+ /* Create an SVE register with the top bits explicitly zero'd. */
+ tmp_reg = force_reg (full_mode, CONST0_RTX (full_mode));
+ emit_insr (tmp_reg, tmp);
+ if (full_mode != int_mode)
+ tmp_reg = force_lowpart_subreg (int_mode, tmp_reg, full_mode);
+ }
+ else
+ tmp_reg = force_lowpart_subreg (int_mode, tmp, new_mode);
+ emit_move_insn (dest, force_lowpart_subreg (mode, tmp_reg, int_mode));
+ }
+ else
+ {
+ src = force_lowpart_subreg (int_mode, src, mode);
+ emit_move_insn (adjust_address (dest, new_mode, 0),
+ force_lowpart_subreg (new_mode, src, int_mode));
+ }
+}
+
/* Check whether VALUE is a vector constant in which every element
is either a power of 2 or a negated power of 2. If so, return
a constant vector of log2s, and flip CODE between PLUS and MINUS
--- /dev/null
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-O2" } */
+
+#include <arm_sve.h>
+
+extern void abort (void) __attribute__ ((noreturn));
+
+volatile int cond = 1;
+
+int __attribute__ ((noipa))
+a (void)
+{
+ return cond;
+}
+
+#define TEST_LOAD(TYPE, NAME, BITS) \
+ int __attribute__ ((noipa)) \
+ test_##NAME (void) \
+ { \
+ TYPE *g = __builtin_malloc (sizeof (TYPE)); \
+ int c = 0; \
+ if (!g) \
+ abort (); \
+ g[0] = 0; \
+ if (a ()) \
+ { \
+ g[0] = 1; \
+ c = 2; \
+ } \
+ svint##BITS##_t d \
+ = svld1_s##BITS (svptrue_pat_b##BITS (SV_VL1), g); \
+ svbool_t e = svcmpgt_s##BITS (svptrue_b##BITS (), d, \
+ svdup_n_s##BITS (0)); \
+ int f = svptest_any (svptrue_pat_b##BITS (SV_VL1), e); \
+ if (f && c != 2) \
+ abort (); \
+ return f; \
+ }
+
+TEST_LOAD (signed char, byte, 8)
+TEST_LOAD (short, short, 16)
+TEST_LOAD (int, int, 32)
+TEST_LOAD (long, long, 64)
+
+int
+main (void)
+{
+ if (!test_byte ()
+ || !test_short ()
+ || !test_int ()
+ || !test_long ())
+ abort ();
+ return 0;
+}