extern int arm_coproc_mem_operand_wb (rtx, int);
extern int neon_vector_mem_operand (rtx, int, bool);
extern int mve_vector_mem_operand (machine_mode, rtx, bool);
-bool arm_mve_mode_and_operands_type_check (machine_mode, rtx, rtx);
extern int neon_struct_mem_operand (rtx);
extern rtx *neon_vcmla_lane_prepare_operands (rtx *);
/* Match:
(plus (reg)
- (const)). */
+ (const))
+
+ The encoded immediate for 16-bit modes is multiplied by 2,
+ while the encoded immediate for 32-bit and 64-bit modes is
+ multiplied by 4. */
+ int factor = MIN (GET_MODE_SIZE (GET_MODE (op)), 4);
if (GET_CODE (ind) == PLUS
&& REG_P (XEXP (ind, 0))
&& REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
&& CONST_INT_P (XEXP (ind, 1))
- && INTVAL (XEXP (ind, 1)) > -1024
- && INTVAL (XEXP (ind, 1)) < 1024
- && (INTVAL (XEXP (ind, 1)) & 3) == 0)
+ && IN_RANGE (INTVAL (XEXP (ind, 1)), -255 * factor, 255 * factor)
+ && (INTVAL (XEXP (ind, 1)) & (factor - 1)) == 0)
return TRUE;
return FALSE;
struct gcc_target targetm = TARGET_INITIALIZER;
-bool
-arm_mve_mode_and_operands_type_check (machine_mode mode, rtx op0, rtx op1)
-{
- if (!(TARGET_HAVE_MVE || TARGET_HAVE_MVE_FLOAT))
- return true;
- else if (mode == E_BFmode)
- return false;
- else if ((s_register_operand (op0, mode) && MEM_P (op1))
- || (s_register_operand (op1, mode) && MEM_P (op0)))
- return false;
- return true;
-}
-
#include "gt-arm.h"
(define_insn "*arm32_mov<mode>"
[(set (match_operand:HFBF 0 "nonimmediate_operand" "=r,m,r,r")
(match_operand:HFBF 1 "general_operand" " m,r,r,F"))]
- "TARGET_32BIT && !TARGET_HARD_FLOAT
+ "TARGET_32BIT
+ && !TARGET_HARD_FLOAT
+ && !TARGET_HAVE_MVE
&& ( s_register_operand (operands[0], <MODE>mode)
|| s_register_operand (operands[1], <MODE>mode))"
"*
(define_memory_constraint "Uj"
"@internal
- In ARM/Thumb-2 state an VFP load/store address which does not support
- writeback at all (eg vldr.16)."
+ In ARM/Thumb-2 state a VFP load/store address that supports writeback
+ for Neon but not for MVE"
(and (match_code "mem")
- (match_test "TARGET_32BIT && arm_coproc_mem_operand_no_writeback (op)")))
+ (match_test "TARGET_32BIT")
+ (match_test "TARGET_HAVE_MVE
+ ? arm_coproc_mem_operand_no_writeback (op)
+ : neon_vector_mem_operand (op, 2, true)")))
(define_memory_constraint "Uy"
"@internal
(set_attr "arch" "t2,any,any,any,a,t2,any,any,any,any,any,any")]
)
-(define_insn "*mov_load_vfp_hf16"
- [(set (match_operand:HF 0 "s_register_operand" "=t")
- (match_operand:HF 1 "memory_operand" "Uj"))]
- "TARGET_HAVE_MVE_FLOAT"
- "vldr.16\\t%0, %E1"
-)
-
-(define_insn "*mov_store_vfp_hf16"
- [(set (match_operand:HF 0 "memory_operand" "=Uj")
- (match_operand:HF 1 "s_register_operand" "t"))]
- "TARGET_HAVE_MVE_FLOAT"
- "vstr.16\\t%1, %E0"
-)
-
;; HFmode and BFmode moves
(define_insn "*mov<mode>_vfp_<mode>16"
[(set (match_operand:HFBF 0 "nonimmediate_operand"
- "= ?r,?m,t,r,t,r,t, t, Um,r")
+ "= ?r,?m,t,r,t,r,t, t, Uj,r")
(match_operand:HFBF 1 "general_operand"
- " m,r,t,r,r,t,Dv,Um,t, F"))]
+ " m,r,t,r,r,t,Dv,Uj,t, F"))]
"TARGET_32BIT
- && TARGET_VFP_FP16INST
- && arm_mve_mode_and_operands_type_check (<MODE>mode, operands[0],
- operands[1])
+ && (TARGET_VFP_FP16INST || TARGET_HAVE_MVE)
&& (s_register_operand (operands[0], <MODE>mode)
|| s_register_operand (operands[1], <MODE>mode))"
{
case 6: /* S register from immediate. */
return \"vmov.f16\\t%0, %1\t%@ __<fporbf>\";
case 7: /* S register from memory. */
- return \"vld1.16\\t{%z0}, %A1\";
+ if (TARGET_HAVE_MVE)
+ return \"vldr.16\\t%0, %1\";
+ else
+ return \"vld1.16\\t{%z0}, %A1\";
case 8: /* Memory from S register. */
- return \"vst1.16\\t{%z1}, %A0\";
+ if (TARGET_HAVE_MVE)
+ return \"vstr.16\\t%1, %0\";
+ else
+ return \"vst1.16\\t{%z1}, %A0\";
case 9: /* ARM register from constant. */
{
long bits;
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O -mfloat-abi=hard -mfp16-format=ieee" } */
+/* { dg-require-effective-target arm_v8_1m_mve_ok } */
+/* { dg-add-options arm_v8_1m_mve } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+/*
+** r_w:
+** vmov.f16 r0, s0 @ __fp16
+** bx lr
+*/
+void
+r_w (_Float16 s0)
+{
+ register _Float16 r0 asm ("r0");
+ r0 = s0;
+ asm volatile ("" :: "r" (r0));
+}
+
+/*
+** w_r:
+** vmov.f16 s0, r0 @ __fp16
+** bx lr
+*/
+_Float16
+w_r ()
+{
+ register _Float16 r0 asm ("r0");
+ asm volatile ("" : "=r" (r0));
+ return r0;
+}
+
+/*
+** w_w:
+** vmov s1, s0 @ __fp16
+** bx lr
+*/
+void
+w_w (_Float16 s0)
+{
+ register _Float16 s1 asm ("s1");
+ s1 = s0;
+ asm volatile ("" :: "w" (s1));
+}
+
+/*
+** r_m_m128:
+** sub (r[0-9]+), r0, #256
+** ldrh r1, \[\1\] @ __fp16
+** bx lr
+*/
+void
+r_m_m128 (_Float16 *r0)
+{
+ register _Float16 r1 asm ("r1");
+ r1 = r0[-128];
+ asm volatile ("" :: "r" (r1));
+}
+
+/*
+** r_m_m127:
+** ldrh r1, \[r0, #-254\] @ __fp16
+** bx lr
+*/
+void
+r_m_m127 (_Float16 *r0)
+{
+ register _Float16 r1 asm ("r1");
+ r1 = r0[-127];
+ asm volatile ("" :: "r" (r1));
+}
+
+/*
+** r_m_m1:
+** ldrh r1, \[r0, #-2\] @ __fp16
+** bx lr
+*/
+void
+r_m_m1 (_Float16 *r0)
+{
+ register _Float16 r1 asm ("r1");
+ r1 = r0[-1];
+ asm volatile ("" :: "r" (r1));
+}
+
+/*
+** r_m_0:
+** ldrh r1, \[r0\] @ __fp16
+** bx lr
+*/
+void
+r_m_0 (_Float16 *r0)
+{
+ register _Float16 r1 asm ("r1");
+ r1 = r0[0];
+ asm volatile ("" :: "r" (r1));
+}
+
+/*
+** r_m_1:
+** ldrh r1, \[r0, #2\] @ __fp16
+** bx lr
+*/
+void
+r_m_1 (_Float16 *r0)
+{
+ register _Float16 r1 asm ("r1");
+ r1 = r0[1];
+ asm volatile ("" :: "r" (r1));
+}
+
+/*
+** r_m_255:
+** ldrh r1, \[r0, #510\] @ __fp16
+** bx lr
+*/
+void
+r_m_255 (_Float16 *r0)
+{
+ register _Float16 r1 asm ("r1");
+ r1 = r0[255];
+ asm volatile ("" :: "r" (r1));
+}
+
+/*
+** r_m_256:
+** ldrh r1, \[r0, #512\] @ __fp16
+** bx lr
+*/
+void
+r_m_256 (_Float16 *r0)
+{
+ register _Float16 r1 asm ("r1");
+ r1 = r0[256];
+ asm volatile ("" :: "r" (r1));
+}
+
+/* ??? This could be done in one instruction, but without mve.fp,
+ it makes more sense for memory_operand to enforce the GPR range. */
+/*
+** w_m_m128:
+** sub (r[0-9]+), r0, #256
+** vldr.16 s0, \[\1\]
+** bx lr
+*/
+void
+w_m_m128 (_Float16 *r0)
+{
+ register _Float16 s0 asm ("s0");
+ s0 = r0[-128];
+ asm volatile ("" :: "w" (s0));
+}
+
+/*
+** w_m_m127:
+** vldr.16 s0, \[r0, #-254\]
+** bx lr
+*/
+void
+w_m_m127 (_Float16 *r0)
+{
+ register _Float16 s0 asm ("s0");
+ s0 = r0[-127];
+ asm volatile ("" :: "w" (s0));
+}
+
+/*
+** w_m_m1:
+** vldr.16 s0, \[r0, #-2\]
+** bx lr
+*/
+void
+w_m_m1 (_Float16 *r0)
+{
+ register _Float16 s0 asm ("s0");
+ s0 = r0[-1];
+ asm volatile ("" :: "w" (s0));
+}
+
+/*
+** w_m_0:
+** vldr.16 s0, \[r0\]
+** bx lr
+*/
+void
+w_m_0 (_Float16 *r0)
+{
+ register _Float16 s0 asm ("s0");
+ s0 = r0[0];
+ asm volatile ("" :: "w" (s0));
+}
+
+/*
+** w_m_1:
+** vldr.16 s0, \[r0, #2\]
+** bx lr
+*/
+void
+w_m_1 (_Float16 *r0)
+{
+ register _Float16 s0 asm ("s0");
+ s0 = r0[1];
+ asm volatile ("" :: "w" (s0));
+}
+
+/*
+** w_m_255:
+** vldr.16 s0, \[r0, #510\]
+** bx lr
+*/
+void
+w_m_255 (_Float16 *r0)
+{
+ register _Float16 s0 asm ("s0");
+ s0 = r0[255];
+ asm volatile ("" :: "w" (s0));
+}
+
+/*
+** w_m_256:
+** add (r[0-9]+), r0, #512
+** vldr.16 s0, \[\1\]
+** bx lr
+*/
+void
+w_m_256 (_Float16 *r0)
+{
+ register _Float16 s0 asm ("s0");
+ s0 = r0[256];
+ asm volatile ("" :: "w" (s0));
+}
+
+/*
+** m_m128_r:
+** sub (r[0-9]+), r0, #256
+** strh r1, \[\1\] @ __fp16
+** bx lr
+*/
+void
+m_m128_r (_Float16 *r0)
+{
+ register _Float16 r1 asm ("r1");
+ asm volatile ("" : "=r" (r1));
+ r0[-128] = r1;
+}
+
+/*
+** m_m127_r:
+** strh r1, \[r0, #-254\] @ __fp16
+** bx lr
+*/
+void
+m_m127_r (_Float16 *r0)
+{
+ register _Float16 r1 asm ("r1");
+ asm volatile ("" : "=r" (r1));
+ r0[-127] = r1;
+}
+
+/*
+** m_m1_r:
+** strh r1, \[r0, #-2\] @ __fp16
+** bx lr
+*/
+void
+m_m1_r (_Float16 *r0)
+{
+ register _Float16 r1 asm ("r1");
+ asm volatile ("" : "=r" (r1));
+ r0[-1] = r1;
+}
+
+/*
+** m_0_r:
+** strh r1, \[r0\] @ __fp16
+** bx lr
+*/
+void
+m_0_r (_Float16 *r0)
+{
+ register _Float16 r1 asm ("r1");
+ asm volatile ("" : "=r" (r1));
+ r0[0] = r1;
+}
+
+/*
+** m_1_r:
+** strh r1, \[r0, #2\] @ __fp16
+** bx lr
+*/
+void
+m_1_r (_Float16 *r0)
+{
+ register _Float16 r1 asm ("r1");
+ asm volatile ("" : "=r" (r1));
+ r0[1] = r1;
+}
+
+/*
+** m_255_r:
+** strh r1, \[r0, #510\] @ __fp16
+** bx lr
+*/
+void
+m_255_r (_Float16 *r0)
+{
+ register _Float16 r1 asm ("r1");
+ asm volatile ("" : "=r" (r1));
+ r0[255] = r1;
+}
+
+/*
+** m_256_r:
+** strh r1, \[r0, #512\] @ __fp16
+** bx lr
+*/
+void
+m_256_r (_Float16 *r0)
+{
+ register _Float16 r1 asm ("r1");
+ asm volatile ("" : "=r" (r1));
+ r0[256] = r1;
+}
+
+/* ??? This could be done in one instruction, but without mve.fp,
+ it makes more sense for memory_operand to enforce the GPR range. */
+/*
+** m_m128_w:
+** sub (r[0-9]+), r0, #256
+** vstr.16 s0, \[\1\]
+** bx lr
+*/
+void
+m_m128_w (_Float16 *r0)
+{
+ register _Float16 s0 asm ("s0");
+ asm volatile ("" : "=w" (s0));
+ r0[-128] = s0;
+}
+
+/*
+** m_m127_w:
+** vstr.16 s0, \[r0, #-254\]
+** bx lr
+*/
+void
+m_m127_w (_Float16 *r0)
+{
+ register _Float16 s0 asm ("s0");
+ asm volatile ("" : "=w" (s0));
+ r0[-127] = s0;
+}
+
+/*
+** m_m1_w:
+** vstr.16 s0, \[r0, #-2\]
+** bx lr
+*/
+void
+m_m1_w (_Float16 *r0)
+{
+ register _Float16 s0 asm ("s0");
+ asm volatile ("" : "=w" (s0));
+ r0[-1] = s0;
+}
+
+/*
+** m_0_w:
+** vstr.16 s0, \[r0\]
+** bx lr
+*/
+void
+m_0_w (_Float16 *r0)
+{
+ register _Float16 s0 asm ("s0");
+ asm volatile ("" : "=w" (s0));
+ r0[0] = s0;
+}
+
+/*
+** m_1_w:
+** vstr.16 s0, \[r0, #2\]
+** bx lr
+*/
+void
+m_1_w (_Float16 *r0)
+{
+ register _Float16 s0 asm ("s0");
+ asm volatile ("" : "=w" (s0));
+ r0[1] = s0;
+}
+
+/*
+** m_255_w:
+** vstr.16 s0, \[r0, #510\]
+** bx lr
+*/
+void
+m_255_w (_Float16 *r0)
+{
+ register _Float16 s0 asm ("s0");
+ asm volatile ("" : "=w" (s0));
+ r0[255] = s0;
+}
+
+/*
+** m_256_w:
+** add (r[0-9]+), r0, #512
+** vstr.16 s0, \[\1\]
+** bx lr
+*/
+void
+m_256_w (_Float16 *r0)
+{
+ register _Float16 s0 asm ("s0");
+ asm volatile ("" : "=w" (s0));
+ r0[256] = s0;
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O -mfloat-abi=hard" } */
+/* { dg-require-effective-target arm_v8_1m_mve_ok } */
+/* { dg-add-options arm_v8_1m_mve } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+/*
+** r_w:
+** vmov r0, s0
+** bx lr
+*/
+void
+r_w (float s0)
+{
+ register float r0 asm ("r0");
+ r0 = s0;
+ asm volatile ("" :: "r" (r0));
+}
+
+/*
+** w_r:
+** vmov s0, r0
+** bx lr
+*/
+float
+w_r ()
+{
+ register float r0 asm ("r0");
+ asm volatile ("" : "=r" (r0));
+ return r0;
+}
+
+/*
+** w_w:
+** vmov.f32 s1, s0
+** bx lr
+*/
+void
+w_w (float s0)
+{
+ register float s1 asm ("s1");
+ s1 = s0;
+ asm volatile ("" :: "w" (s1));
+}
+
+/*
+** r_m_m64:
+** sub (r[0-9]+), r0, #256
+** ldr r1, \[\1\] @ float
+** bx lr
+*/
+void
+r_m_m64 (float *r0)
+{
+ register float r1 asm ("r1");
+ r1 = r0[-64];
+ asm volatile ("" :: "r" (r1));
+}
+
+/*
+** r_m_m63:
+** ldr r1, \[r0, #-252\] @ float
+** bx lr
+*/
+void
+r_m_m63 (float *r0)
+{
+ register float r1 asm ("r1");
+ r1 = r0[-63];
+ asm volatile ("" :: "r" (r1));
+}
+
+/*
+** r_m_m1:
+** ldr r1, \[r0, #-4\] @ float
+** bx lr
+*/
+void
+r_m_m1 (float *r0)
+{
+ register float r1 asm ("r1");
+ r1 = r0[-1];
+ asm volatile ("" :: "r" (r1));
+}
+
+/*
+** r_m_0:
+** ldr r1, \[r0\] @ float
+** bx lr
+*/
+void
+r_m_0 (float *r0)
+{
+ register float r1 asm ("r1");
+ r1 = r0[0];
+ asm volatile ("" :: "r" (r1));
+}
+
+/*
+** r_m_1:
+** ldr r1, \[r0, #4\] @ float
+** bx lr
+*/
+void
+r_m_1 (float *r0)
+{
+ register float r1 asm ("r1");
+ r1 = r0[1];
+ asm volatile ("" :: "r" (r1));
+}
+
+/*
+** r_m_255:
+** ldr r1, \[r0, #1020\] @ float
+** bx lr
+*/
+void
+r_m_255 (float *r0)
+{
+ register float r1 asm ("r1");
+ r1 = r0[255];
+ asm volatile ("" :: "r" (r1));
+}
+
+/*
+** r_m_256:
+** add (r[0-9]+), r0, #1024
+** ldr r1, \[r0\] @ float
+** bx lr
+*/
+void
+r_m_256 (float *r0)
+{
+ register float r1 asm ("r1");
+ r1 = r0[256];
+ asm volatile ("" :: "r" (r1));
+}
+
+/* ??? This could be done in one instruction, but without mve.fp,
+ it makes more sense for memory_operand to enforce the GPR range. */
+/*
+** w_m_m64:
+** sub (r[0-9]+), r0, #256
+** vldr.32 s0, \[\1\]
+** bx lr
+*/
+void
+w_m_m64 (float *r0)
+{
+ register float s0 asm ("s0");
+ s0 = r0[-64];
+ asm volatile ("" :: "w" (s0));
+}
+
+/*
+** w_m_m63:
+** vldr.32 s0, \[r0, #-252\]
+** bx lr
+*/
+void
+w_m_m63 (float *r0)
+{
+ register float s0 asm ("s0");
+ s0 = r0[-63];
+ asm volatile ("" :: "w" (s0));
+}
+
+/*
+** w_m_m1:
+** vldr.32 s0, \[r0, #-4\]
+** bx lr
+*/
+void
+w_m_m1 (float *r0)
+{
+ register float s0 asm ("s0");
+ s0 = r0[-1];
+ asm volatile ("" :: "w" (s0));
+}
+
+/*
+** w_m_0:
+** vldr.32 s0, \[r0\]
+** bx lr
+*/
+void
+w_m_0 (float *r0)
+{
+ register float s0 asm ("s0");
+ s0 = r0[0];
+ asm volatile ("" :: "w" (s0));
+}
+
+/*
+** w_m_1:
+** vldr.32 s0, \[r0, #4\]
+** bx lr
+*/
+void
+w_m_1 (float *r0)
+{
+ register float s0 asm ("s0");
+ s0 = r0[1];
+ asm volatile ("" :: "w" (s0));
+}
+
+/*
+** w_m_255:
+** vldr.32 s0, \[r0, #1020\]
+** bx lr
+*/
+void
+w_m_255 (float *r0)
+{
+ register float s0 asm ("s0");
+ s0 = r0[255];
+ asm volatile ("" :: "w" (s0));
+}
+
+/*
+** w_m_256:
+** add (r[0-9]+), r0, #1024
+** vldr.32 s0, \[\1\]
+** bx lr
+*/
+void
+w_m_256 (float *r0)
+{
+ register float s0 asm ("s0");
+ s0 = r0[256];
+ asm volatile ("" :: "w" (s0));
+}
+
+/*
+** m_m64_r:
+** sub (r[0-9]+), r0, #256
+** str r1, \[\1\] @ float
+** bx lr
+*/
+void
+m_m64_r (float *r0)
+{
+ register float r1 asm ("r1");
+ asm volatile ("" : "=r" (r1));
+ r0[-64] = r1;
+}
+
+/*
+** m_m63_r:
+** str r1, \[r0, #-252\] @ float
+** bx lr
+*/
+void
+m_m63_r (float *r0)
+{
+ register float r1 asm ("r1");
+ asm volatile ("" : "=r" (r1));
+ r0[-63] = r1;
+}
+
+/*
+** m_m1_r:
+** str r1, \[r0, #-4\] @ float
+** bx lr
+*/
+void
+m_m1_r (float *r0)
+{
+ register float r1 asm ("r1");
+ asm volatile ("" : "=r" (r1));
+ r0[-1] = r1;
+}
+
+/*
+** m_0_r:
+** str r1, \[r0\] @ float
+** bx lr
+*/
+void
+m_0_r (float *r0)
+{
+ register float r1 asm ("r1");
+ asm volatile ("" : "=r" (r1));
+ r0[0] = r1;
+}
+
+/*
+** m_1_r:
+** str r1, \[r0, #4\] @ float
+** bx lr
+*/
+void
+m_1_r (float *r0)
+{
+ register float r1 asm ("r1");
+ asm volatile ("" : "=r" (r1));
+ r0[1] = r1;
+}
+
+/*
+** m_255_r:
+** str r1, \[r0, #1020\] @ float
+** bx lr
+*/
+void
+m_255_r (float *r0)
+{
+ register float r1 asm ("r1");
+ asm volatile ("" : "=r" (r1));
+ r0[255] = r1;
+}
+
+/*
+** m_256_r:
+** add (r[0-9]+), r0, #1024
+** str r1, \[r0\] @ float
+** bx lr
+*/
+void
+m_256_r (float *r0)
+{
+ register float r1 asm ("r1");
+ asm volatile ("" : "=r" (r1));
+ r0[256] = r1;
+}
+
+/* ??? This could be done in one instruction, but without mve.fp,
+ it makes more sense for memory_operand to enforce the GPR range. */
+/*
+** m_m64_w:
+** sub (r[0-9]+), r0, #256
+** vstr.32 s0, \[\1\]
+** bx lr
+*/
+void
+m_m64_w (float *r0)
+{
+ register float s0 asm ("s0");
+ asm volatile ("" : "=w" (s0));
+ r0[-64] = s0;
+}
+
+/*
+** m_m63_w:
+** vstr.32 s0, \[r0, #-252\]
+** bx lr
+*/
+void
+m_m63_w (float *r0)
+{
+ register float s0 asm ("s0");
+ asm volatile ("" : "=w" (s0));
+ r0[-63] = s0;
+}
+
+/*
+** m_m1_w:
+** vstr.32 s0, \[r0, #-4\]
+** bx lr
+*/
+void
+m_m1_w (float *r0)
+{
+ register float s0 asm ("s0");
+ asm volatile ("" : "=w" (s0));
+ r0[-1] = s0;
+}
+
+/*
+** m_0_w:
+** vstr.32 s0, \[r0\]
+** bx lr
+*/
+void
+m_0_w (float *r0)
+{
+ register float s0 asm ("s0");
+ asm volatile ("" : "=w" (s0));
+ r0[0] = s0;
+}
+
+/*
+** m_1_w:
+** vstr.32 s0, \[r0, #4\]
+** bx lr
+*/
+void
+m_1_w (float *r0)
+{
+ register float s0 asm ("s0");
+ asm volatile ("" : "=w" (s0));
+ r0[1] = s0;
+}
+
+/*
+** m_255_w:
+** vstr.32 s0, \[r0, #1020\]
+** bx lr
+*/
+void
+m_255_w (float *r0)
+{
+ register float s0 asm ("s0");
+ asm volatile ("" : "=w" (s0));
+ r0[255] = s0;
+}
+
+/*
+** m_256_w:
+** add (r[0-9]+), r0, #1024
+** vstr.32 s0, \[\1\]
+** bx lr
+*/
+void
+m_256_w (float *r0)
+{
+ register float s0 asm ("s0");
+ asm volatile ("" : "=w" (s0));
+ r0[256] = s0;
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O -mfloat-abi=hard" } */
+/* { dg-require-effective-target arm_v8_1m_mve_ok } */
+/* { dg-add-options arm_v8_1m_mve } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+/*
+** r_w:
+** vmov r0, r1, d0
+** bx lr
+*/
+void
+r_w (double d0)
+{
+ register double r0 asm ("r0");
+ r0 = d0;
+ asm volatile ("" :: "r" (r0));
+}
+
+/*
+** w_r:
+** vmov d0, r0, r1
+** bx lr
+*/
+double
+w_r ()
+{
+ register double r0 asm ("r0");
+ asm volatile ("" : "=r" (r0));
+ return r0;
+}
+
+/*
+** w_w:
+** (
+** vmov.f32 s2, s0
+** vmov.f32 s3, s1
+** |
+** vmov.f32 s3, s1
+** vmov.f32 s2, s0
+** )
+** bx lr
+*/
+void
+w_w (double d0)
+{
+ register double d1 asm ("d1");
+ d1 = d0;
+ asm volatile ("" :: "w" (d1));
+}
+
+/*
+** r_m_m32:
+** sub (r[0-9]+), r0, #256
+** ldrd r2, \[\1\]
+** bx lr
+*/
+void
+r_m_m32 (double *r0)
+{
+ register double r2 asm ("r2");
+ r2 = r0[-32];
+ asm volatile ("" :: "r" (r2));
+}
+
+/*
+** r_m_m31:
+** ldrd r2, \[r0, #-248\]
+** bx lr
+*/
+void
+r_m_m31 (double *r0)
+{
+ register double r2 asm ("r2");
+ r2 = r0[-31];
+ asm volatile ("" :: "r" (r2));
+}
+
+/*
+** r_m_m1:
+** ldrd r2, \[r0, #-8\]
+** bx lr
+*/
+void
+r_m_m1 (double *r0)
+{
+ register double r2 asm ("r2");
+ r2 = r0[-1];
+ asm volatile ("" :: "r" (r2));
+}
+
+/*
+** r_m_0:
+** ldrd r2, \[r0\]
+** bx lr
+*/
+void
+r_m_0 (double *r0)
+{
+ register double r2 asm ("r2");
+ r2 = r0[0];
+ asm volatile ("" :: "r" (r2));
+}
+
+/*
+** r_m_1:
+** ldrd r2, \[r0, #8\]
+** bx lr
+*/
+void
+r_m_1 (double *r0)
+{
+ register double r2 asm ("r2");
+ r2 = r0[1];
+ asm volatile ("" :: "r" (r2));
+}
+
+/*
+** r_m_127:
+** ldrd r2, \[r0, #1016\]
+** bx lr
+*/
+void
+r_m_127 (double *r0)
+{
+ register double r2 asm ("r2");
+ r2 = r0[127];
+ asm volatile ("" :: "r" (r2));
+}
+
+/*
+** r_m_128:
+** add (r[0-9]+), r0, #1024
+** ldrd r2, \[r0\]
+** bx lr
+*/
+void
+r_m_128 (double *r0)
+{
+ register double r2 asm ("r2");
+ r2 = r0[128];
+ asm volatile ("" :: "r" (r2));
+}
+
+/* ??? This could be done in one instruction, but without mve.fp,
+ it makes more sense for memory_operand to enforce the GPR range. */
+/*
+** w_m_m32:
+** sub (r[0-9]+), r0, #256
+** vldr.64 d0, \[\1\]
+** bx lr
+*/
+void
+w_m_m32 (double *r0)
+{
+ register double d0 asm ("d0");
+ d0 = r0[-32];
+ asm volatile ("" :: "w" (d0));
+}
+
+/*
+** w_m_m31:
+** vldr.64 d0, \[r0, #-248\]
+** bx lr
+*/
+void
+w_m_m31 (double *r0)
+{
+ register double d0 asm ("d0");
+ d0 = r0[-31];
+ asm volatile ("" :: "w" (d0));
+}
+
+/*
+** w_m_m1:
+** vldr.64 d0, \[r0, #-8\]
+** bx lr
+*/
+void
+w_m_m1 (double *r0)
+{
+ register double d0 asm ("d0");
+ d0 = r0[-1];
+ asm volatile ("" :: "w" (d0));
+}
+
+/*
+** w_m_0:
+** vldr.64 d0, \[r0\]
+** bx lr
+*/
+void
+w_m_0 (double *r0)
+{
+ register double d0 asm ("d0");
+ d0 = r0[0];
+ asm volatile ("" :: "w" (d0));
+}
+
+/*
+** w_m_1:
+** vldr.64 d0, \[r0, #8\]
+** bx lr
+*/
+void
+w_m_1 (double *r0)
+{
+ register double d0 asm ("d0");
+ d0 = r0[1];
+ asm volatile ("" :: "w" (d0));
+}
+
+/*
+** w_m_127:
+** vldr.64 d0, \[r0, #1016\]
+** bx lr
+*/
+void
+w_m_127 (double *r0)
+{
+ register double d0 asm ("d0");
+ d0 = r0[127];
+ asm volatile ("" :: "w" (d0));
+}
+
+/*
+** w_m_128:
+** add (r[0-9]+), r0, #1024
+** vldr.64 d0, \[\1\]
+** bx lr
+*/
+void
+w_m_128 (double *r0)
+{
+ register double d0 asm ("d0");
+ d0 = r0[128];
+ asm volatile ("" :: "w" (d0));
+}
+
+/*
+** m_m32_r:
+** sub (r[0-9]+), r0, #256
+** strd r2, \[\1\]
+** bx lr
+*/
+void
+m_m32_r (double *r0)
+{
+ register double r2 asm ("r2");
+ asm volatile ("" : "=r" (r2));
+ r0[-32] = r2;
+}
+
+/*
+** m_m31_r:
+** strd r2, \[r0, #-248\]
+** bx lr
+*/
+void
+m_m31_r (double *r0)
+{
+ register double r2 asm ("r2");
+ asm volatile ("" : "=r" (r2));
+ r0[-31] = r2;
+}
+
+/*
+** m_m1_r:
+** strd r2, \[r0, #-8\]
+** bx lr
+*/
+void
+m_m1_r (double *r0)
+{
+ register double r2 asm ("r2");
+ asm volatile ("" : "=r" (r2));
+ r0[-1] = r2;
+}
+
+/*
+** m_0_r:
+** strd r2, \[r0\]
+** bx lr
+*/
+void
+m_0_r (double *r0)
+{
+ register double r2 asm ("r2");
+ asm volatile ("" : "=r" (r2));
+ r0[0] = r2;
+}
+
+/*
+** m_1_r:
+** strd r2, \[r0, #8\]
+** bx lr
+*/
+void
+m_1_r (double *r0)
+{
+ register double r2 asm ("r2");
+ asm volatile ("" : "=r" (r2));
+ r0[1] = r2;
+}
+
+/*
+** m_127_r:
+** strd r2, \[r0, #1016\]
+** bx lr
+*/
+void
+m_127_r (double *r0)
+{
+ register double r2 asm ("r2");
+ asm volatile ("" : "=r" (r2));
+ r0[127] = r2;
+}
+
+/*
+** m_128_r:
+** add (r[0-9]+), r0, #1024
+** strd r2, \[r0\]
+** bx lr
+*/
+void
+m_128_r (double *r0)
+{
+ register double r2 asm ("r2");
+ asm volatile ("" : "=r" (r2));
+ r0[128] = r2;
+}
+
+/* ??? This could be done in one instruction, but without mve.fp,
+ it makes more sense for memory_operand to enforce the GPR range. */
+/*
+** m_m32_w:
+** sub (r[0-9]+), r0, #256
+** vstr.64 d0, \[\1\]
+** bx lr
+*/
+void
+m_m32_w (double *r0)
+{
+ register double d0 asm ("d0");
+ asm volatile ("" : "=w" (d0));
+ r0[-32] = d0;
+}
+
+/*
+** m_m31_w:
+** vstr.64 d0, \[r0, #-248\]
+** bx lr
+*/
+void
+m_m31_w (double *r0)
+{
+ register double d0 asm ("d0");
+ asm volatile ("" : "=w" (d0));
+ r0[-31] = d0;
+}
+
+/*
+** m_m1_w:
+** vstr.64 d0, \[r0, #-8\]
+** bx lr
+*/
+void
+m_m1_w (double *r0)
+{
+ register double d0 asm ("d0");
+ asm volatile ("" : "=w" (d0));
+ r0[-1] = d0;
+}
+
+/*
+** m_0_w:
+** vstr.64 d0, \[r0\]
+** bx lr
+*/
+void
+m_0_w (double *r0)
+{
+ register double d0 asm ("d0");
+ asm volatile ("" : "=w" (d0));
+ r0[0] = d0;
+}
+
+/*
+** m_1_w:
+** vstr.64 d0, \[r0, #8\]
+** bx lr
+*/
+void
+m_1_w (double *r0)
+{
+ register double d0 asm ("d0");
+ asm volatile ("" : "=w" (d0));
+ r0[1] = d0;
+}
+
+/*
+** m_127_w:
+** vstr.64 d0, \[r0, #1016\]
+** bx lr
+*/
+void
+m_127_w (double *r0)
+{
+ register double d0 asm ("d0");
+ asm volatile ("" : "=w" (d0));
+ r0[127] = d0;
+}
+
+/*
+** m_128_w:
+** add (r[0-9]+), r0, #1024
+** vstr.64 d0, \[\1\]
+** bx lr
+*/
+void
+m_128_w (double *r0)
+{
+ register double d0 asm ("d0");
+ asm volatile ("" : "=w" (d0));
+ r0[128] = d0;
+}
pDst[i] = high;
}
-/* { dg-final { scan-assembler {vldr\.16\ts[0-9]+, \[r[0-9]+\]\n} } } */
-/* { dg-final { scan-assembler {vstr\.16\ts[0-9]+, \[r[0-9]+\]\n} } } */
+/* { dg-final { scan-assembler {vldr\.16\ts[0-9]+, \[r[0-9]+(, #-?[0-9]+)?\]\n} } } */
+/* { dg-final { scan-assembler-not {vldr\.16\t[^\n]*\]!} } } */
+/* { dg-final { scan-assembler-not {vstr\.16\t[^\n]*\]!} } } */