]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
[AArch64] Improve vst4_lane intrinsics
authorJames Greenhalgh <james.greenhalgh@arm.com>
Mon, 28 Apr 2014 21:05:51 +0000 (21:05 +0000)
committerJames Greenhalgh <jgreenhalgh@gcc.gnu.org>
Mon, 28 Apr 2014 21:05:51 +0000 (21:05 +0000)
gcc/

* config/aarch64/aarch64-builtins.c
(aarch64_types_storestruct_lane_qualifiers): New.
(TYPES_STORESTRUCT_LANE): Likewise.
* config/aarch64/aarch64-simd-builtins.def (st2_lane): New.
(st3_lane): Likewise.
(st4_lane): Likewise.
* config/aarch64/aarch64-simd.md (vec_store_lanesoi_lane<mode>): New.
(vec_store_lanesci_lane<mode>): Likewise.
(vec_store_lanesxi_lane<mode>): Likewise.
(aarch64_st2_lane<VQ:mode>): Likewise.
(aarch64_st3_lane<VQ:mode>): Likewise.
(aarch64_st4_lane<VQ:mode>): Likewise.
* config/aarch64/aarch64.md (unspec): Add UNSPEC_ST{2,3,4}_LANE.
* config/aarch64/arm_neon.h
(__ST2_LANE_FUNC): Rewrite using builtins, update use points to
use new macro arguments.
(__ST3_LANE_FUNC): Likewise.
(__ST4_LANE_FUNC): Likewise.
* config/aarch64/iterators.md (V_TWO_ELEM): New.
(V_THREE_ELEM): Likewise.
(V_FOUR_ELEM): Likewise.

From-SVN: r209880

gcc/ChangeLog
gcc/config/aarch64/aarch64-builtins.c
gcc/config/aarch64/aarch64-simd-builtins.def
gcc/config/aarch64/aarch64-simd.md
gcc/config/aarch64/aarch64.md
gcc/config/aarch64/arm_neon.h
gcc/config/aarch64/iterators.md

index 41b291e9a55b8c8cd3c9f8251e3555ad89542af7..1799a435274a707d311d380baec27acc5c5a9a6f 100644 (file)
@@ -1,3 +1,27 @@
+2014-04-28  James Greenhalgh  <james.greenhalgh@arm.com>
+
+       * config/aarch64/aarch64-builtins.c
+       (aarch64_types_storestruct_lane_qualifiers): New.
+       (TYPES_STORESTRUCT_LANE): Likewise.
+       * config/aarch64/aarch64-simd-builtins.def (st2_lane): New.
+       (st3_lane): Likewise.
+       (st4_lane): Likewise.
+       * config/aarch64/aarch64-simd.md (vec_store_lanesoi_lane<mode>): New.
+       (vec_store_lanesci_lane<mode>): Likewise.
+       (vec_store_lanesxi_lane<mode>): Likewise.
+       (aarch64_st2_lane<VQ:mode>): Likewise.
+       (aarch64_st3_lane<VQ:mode>): Likewise.
+       (aarch64_st4_lane<VQ:mode>): Likewise.
+       * config/aarch64/aarch64.md (unspec): Add UNSPEC_ST{2,3,4}_LANE.
+       * config/aarch64/arm_neon.h
+       (__ST2_LANE_FUNC): Rewrite using builtins, update use points to
+       use new macro arguments.
+       (__ST3_LANE_FUNC): Likewise.
+       (__ST4_LANE_FUNC): Likewise.
+       * config/aarch64/iterators.md (V_TWO_ELEM): New.
+       (V_THREE_ELEM): Likewise.
+       (V_FOUR_ELEM): Likewise.
+
 2014-04-28  David Malcolm  <dmalcolm@redhat.com>
 
        * doc/gimple.texi: Replace the description of the now-defunct
index 4616ad24c07fe8212d2090d63ccd9b265bd71bdb..a3019828a93264dd69c469d788a9753c6dc4be94 100644 (file)
@@ -246,6 +246,11 @@ aarch64_types_store1_qualifiers[SIMD_MAX_BUILTIN_ARGS]
   = { qualifier_void, qualifier_pointer_map_mode, qualifier_none };
 #define TYPES_STORE1 (aarch64_types_store1_qualifiers)
 #define TYPES_STORESTRUCT (aarch64_types_store1_qualifiers)
+static enum aarch64_type_qualifiers
+aarch64_types_storestruct_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_void, qualifier_pointer_map_mode,
+      qualifier_none, qualifier_none };
+#define TYPES_STORESTRUCT_LANE (aarch64_types_storestruct_lane_qualifiers)
 
 #define CF0(N, X) CODE_FOR_aarch64_##N##X
 #define CF1(N, X) CODE_FOR_##N##X##1
index fa332ae5948de56db65039488b397bbd362e6a8b..339e8f86a4bbfb83e26eb169102c4a365d114c03 100644 (file)
   BUILTIN_VQ (STORESTRUCT, st3, 0)
   BUILTIN_VQ (STORESTRUCT, st4, 0)
 
+  BUILTIN_VQ (STORESTRUCT_LANE, st2_lane, 0)
+  BUILTIN_VQ (STORESTRUCT_LANE, st3_lane, 0)
+  BUILTIN_VQ (STORESTRUCT_LANE, st4_lane, 0)
+
   BUILTIN_VQW (BINOP, saddl2, 0)
   BUILTIN_VQW (BINOP, uaddl2, 0)
   BUILTIN_VQW (BINOP, ssubl2, 0)
index c05767b2045addf0d9be9136cd4f711d537251ce..108bc8d88931e67e6c7eeb77774a01bb391a1ced 100644 (file)
   [(set_attr "type" "neon_store2_2reg<q>")]
 )
 
+(define_insn "vec_store_lanesoi_lane<mode>"
+  [(set (match_operand:<V_TWO_ELEM> 0 "aarch64_simd_struct_operand" "=Utv")
+       (unspec:<V_TWO_ELEM> [(match_operand:OI 1 "register_operand" "w")
+                    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
+                   (match_operand:SI 2 "immediate_operand" "i")]
+                   UNSPEC_ST2_LANE))]
+  "TARGET_SIMD"
+  "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0"
+  [(set_attr "type" "neon_store3_one_lane<q>")]
+)
+
 (define_insn "vec_load_lanesci<mode>"
   [(set (match_operand:CI 0 "register_operand" "=w")
        (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
   [(set_attr "type" "neon_store3_3reg<q>")]
 )
 
+(define_insn "vec_store_lanesci_lane<mode>"
+  [(set (match_operand:<V_THREE_ELEM> 0 "aarch64_simd_struct_operand" "=Utv")
+       (unspec:<V_THREE_ELEM> [(match_operand:CI 1 "register_operand" "w")
+                    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
+                   (match_operand:SI 2 "immediate_operand" "i")]
+                   UNSPEC_ST3_LANE))]
+  "TARGET_SIMD"
+  "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0"
+  [(set_attr "type" "neon_store3_one_lane<q>")]
+)
+
 (define_insn "vec_load_lanesxi<mode>"
   [(set (match_operand:XI 0 "register_operand" "=w")
        (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
   [(set_attr "type" "neon_store4_4reg<q>")]
 )
 
+(define_insn "vec_store_lanesxi_lane<mode>"
+  [(set (match_operand:<V_FOUR_ELEM> 0 "aarch64_simd_struct_operand" "=Utv")
+       (unspec:<V_FOUR_ELEM> [(match_operand:XI 1 "register_operand" "w")
+                    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
+                   (match_operand:SI 2 "immediate_operand" "i")]
+                   UNSPEC_ST4_LANE))]
+  "TARGET_SIMD"
+  "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0"
+  [(set_attr "type" "neon_store4_one_lane<q>")]
+)
+
 ;; Reload patterns for AdvSIMD register list operands.
 
 (define_expand "mov<mode>"
   DONE;
 })
 
+(define_expand "aarch64_st2_lane<VQ:mode>"
+ [(match_operand:DI 0 "register_operand" "r")
+  (match_operand:OI 1 "register_operand" "w")
+  (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
+  (match_operand:SI 2 "immediate_operand")]
+  "TARGET_SIMD"
+{
+  enum machine_mode mode = <V_TWO_ELEM>mode;
+  rtx mem = gen_rtx_MEM (mode, operands[0]);
+  operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
+
+  emit_insn (gen_vec_store_lanesoi_lane<VQ:mode> (mem,
+                                                 operands[1],
+                                                 operands[2]));
+  DONE;
+})
+
+(define_expand "aarch64_st3_lane<VQ:mode>"
+ [(match_operand:DI 0 "register_operand" "r")
+  (match_operand:CI 1 "register_operand" "w")
+  (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
+  (match_operand:SI 2 "immediate_operand")]
+  "TARGET_SIMD"
+{
+  enum machine_mode mode = <V_THREE_ELEM>mode;
+  rtx mem = gen_rtx_MEM (mode, operands[0]);
+  operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
+
+  emit_insn (gen_vec_store_lanesci_lane<VQ:mode> (mem,
+                                                 operands[1],
+                                                 operands[2]));
+  DONE;
+})
+
+(define_expand "aarch64_st4_lane<VQ:mode>"
+ [(match_operand:DI 0 "register_operand" "r")
+  (match_operand:XI 1 "register_operand" "w")
+  (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
+  (match_operand:SI 2 "immediate_operand")]
+  "TARGET_SIMD"
+{
+  enum machine_mode mode = <V_FOUR_ELEM>mode;
+  rtx mem = gen_rtx_MEM (mode, operands[0]);
+  operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
+
+  emit_insn (gen_vec_store_lanesxi_lane<VQ:mode> (mem,
+                                                 operands[1],
+                                                 operands[2]));
+  DONE;
+})
+
 (define_expand "aarch64_st1<VALL:mode>"
  [(match_operand:DI 0 "register_operand")
   (match_operand:VALL 1 "register_operand")]
index 7965db4c9c79c2e5b400b3639df6e2ef15b288e6..a004f9da1c88c73f409a94ca4c1fa794f07dc2fb 100644 (file)
@@ -98,6 +98,9 @@
     UNSPEC_ST2
     UNSPEC_ST3
     UNSPEC_ST4
+    UNSPEC_ST2_LANE
+    UNSPEC_ST3_LANE
+    UNSPEC_ST4_LANE
     UNSPEC_TLS
     UNSPEC_TLSDESC
     UNSPEC_USHL_2S
index 9f1fa98e6fb50d3c8b426733eeceee606a5bfe27..e5c5057bc3d0ad7a6f86e3c323a182ef53cb6e46 100644 (file)
@@ -14452,131 +14452,224 @@ __LD4_LANE_FUNC (uint16x8x4_t, uint16_t, 8h, h, u16, q)
 __LD4_LANE_FUNC (uint32x4x4_t, uint32_t, 4s, s, u32, q)
 __LD4_LANE_FUNC (uint64x2x4_t, uint64_t, 2d, d, u64, q)
 
-#define __ST2_LANE_FUNC(intype, ptrtype, regsuffix,                    \
-                       lnsuffix, funcsuffix, Q)                        \
-  typedef struct { ptrtype __x[2]; } __ST2_LANE_STRUCTURE_##intype;    \
-  __extension__ static __inline void                                   \
-  __attribute__ ((__always_inline__))                                  \
-  vst2 ## Q ## _lane_ ## funcsuffix (ptrtype *ptr,                     \
-                                    intype b, const int c)             \
-  {                                                                    \
-    __ST2_LANE_STRUCTURE_##intype *__p =                               \
-                               (__ST2_LANE_STRUCTURE_##intype *)ptr;   \
-    __asm__ ("ld1 {v16." #regsuffix ", v17." #regsuffix "}, %1\n\t"    \
-            "st2 {v16." #lnsuffix ", v17." #lnsuffix "}[%2], %0\n\t"   \
-            : "=Q"(*__p)                                               \
-            : "Q"(b), "i"(c)                                           \
-            : "v16", "v17");                                           \
-  }
-
-__ST2_LANE_FUNC (int8x8x2_t, int8_t, 8b, b, s8,)
-__ST2_LANE_FUNC (float32x2x2_t, float32_t, 2s, s, f32,)
-__ST2_LANE_FUNC (float64x1x2_t, float64_t, 1d, d, f64,)
-__ST2_LANE_FUNC (poly8x8x2_t, poly8_t, 8b, b, p8,)
-__ST2_LANE_FUNC (poly16x4x2_t, poly16_t, 4h, h, p16,)
-__ST2_LANE_FUNC (int16x4x2_t, int16_t, 4h, h, s16,)
-__ST2_LANE_FUNC (int32x2x2_t, int32_t, 2s, s, s32,)
-__ST2_LANE_FUNC (int64x1x2_t, int64_t, 1d, d, s64,)
-__ST2_LANE_FUNC (uint8x8x2_t, uint8_t, 8b, b, u8,)
-__ST2_LANE_FUNC (uint16x4x2_t, uint16_t, 4h, h, u16,)
-__ST2_LANE_FUNC (uint32x2x2_t, uint32_t, 2s, s, u32,)
-__ST2_LANE_FUNC (uint64x1x2_t, uint64_t, 1d, d, u64,)
-__ST2_LANE_FUNC (float32x4x2_t, float32_t, 4s, s, f32, q)
-__ST2_LANE_FUNC (float64x2x2_t, float64_t, 2d, d, f64, q)
-__ST2_LANE_FUNC (poly8x16x2_t, poly8_t, 16b, b, p8, q)
-__ST2_LANE_FUNC (poly16x8x2_t, poly16_t, 8h, h, p16, q)
-__ST2_LANE_FUNC (int8x16x2_t, int8_t, 16b, b, s8, q)
-__ST2_LANE_FUNC (int16x8x2_t, int16_t, 8h, h, s16, q)
-__ST2_LANE_FUNC (int32x4x2_t, int32_t, 4s, s, s32, q)
-__ST2_LANE_FUNC (int64x2x2_t, int64_t, 2d, d, s64, q)
-__ST2_LANE_FUNC (uint8x16x2_t, uint8_t, 16b, b, u8, q)
-__ST2_LANE_FUNC (uint16x8x2_t, uint16_t, 8h, h, u16, q)
-__ST2_LANE_FUNC (uint32x4x2_t, uint32_t, 4s, s, u32, q)
-__ST2_LANE_FUNC (uint64x2x2_t, uint64_t, 2d, d, u64, q)
-
-#define __ST3_LANE_FUNC(intype, ptrtype, regsuffix,                    \
-                       lnsuffix, funcsuffix, Q)                        \
-  typedef struct { ptrtype __x[3]; } __ST3_LANE_STRUCTURE_##intype;    \
-  __extension__ static __inline void                                   \
-  __attribute__ ((__always_inline__))                                  \
-  vst3 ## Q ## _lane_ ## funcsuffix (ptrtype *ptr,                     \
-                                    intype b, const int c)             \
-  {                                                                    \
-    __ST3_LANE_STRUCTURE_##intype *__p =                               \
-                               (__ST3_LANE_STRUCTURE_##intype *)ptr;   \
-    __asm__ ("ld1 {v16." #regsuffix " - v18." #regsuffix "}, %1\n\t"   \
-            "st3 {v16." #lnsuffix " - v18." #lnsuffix "}[%2], %0\n\t"  \
-            : "=Q"(*__p)                                               \
-            : "Q"(b), "i"(c)                                           \
-            : "v16", "v17", "v18");                                    \
-  }
-
-__ST3_LANE_FUNC (int8x8x3_t, int8_t, 8b, b, s8,)
-__ST3_LANE_FUNC (float32x2x3_t, float32_t, 2s, s, f32,)
-__ST3_LANE_FUNC (float64x1x3_t, float64_t, 1d, d, f64,)
-__ST3_LANE_FUNC (poly8x8x3_t, poly8_t, 8b, b, p8,)
-__ST3_LANE_FUNC (poly16x4x3_t, poly16_t, 4h, h, p16,)
-__ST3_LANE_FUNC (int16x4x3_t, int16_t, 4h, h, s16,)
-__ST3_LANE_FUNC (int32x2x3_t, int32_t, 2s, s, s32,)
-__ST3_LANE_FUNC (int64x1x3_t, int64_t, 1d, d, s64,)
-__ST3_LANE_FUNC (uint8x8x3_t, uint8_t, 8b, b, u8,)
-__ST3_LANE_FUNC (uint16x4x3_t, uint16_t, 4h, h, u16,)
-__ST3_LANE_FUNC (uint32x2x3_t, uint32_t, 2s, s, u32,)
-__ST3_LANE_FUNC (uint64x1x3_t, uint64_t, 1d, d, u64,)
-__ST3_LANE_FUNC (float32x4x3_t, float32_t, 4s, s, f32, q)
-__ST3_LANE_FUNC (float64x2x3_t, float64_t, 2d, d, f64, q)
-__ST3_LANE_FUNC (poly8x16x3_t, poly8_t, 16b, b, p8, q)
-__ST3_LANE_FUNC (poly16x8x3_t, poly16_t, 8h, h, p16, q)
-__ST3_LANE_FUNC (int8x16x3_t, int8_t, 16b, b, s8, q)
-__ST3_LANE_FUNC (int16x8x3_t, int16_t, 8h, h, s16, q)
-__ST3_LANE_FUNC (int32x4x3_t, int32_t, 4s, s, s32, q)
-__ST3_LANE_FUNC (int64x2x3_t, int64_t, 2d, d, s64, q)
-__ST3_LANE_FUNC (uint8x16x3_t, uint8_t, 16b, b, u8, q)
-__ST3_LANE_FUNC (uint16x8x3_t, uint16_t, 8h, h, u16, q)
-__ST3_LANE_FUNC (uint32x4x3_t, uint32_t, 4s, s, u32, q)
-__ST3_LANE_FUNC (uint64x2x3_t, uint64_t, 2d, d, u64, q)
-
-#define __ST4_LANE_FUNC(intype, ptrtype, regsuffix,                    \
-                       lnsuffix, funcsuffix, Q)                        \
-  typedef struct { ptrtype __x[4]; } __ST4_LANE_STRUCTURE_##intype;    \
-  __extension__ static __inline void                                   \
-  __attribute__ ((__always_inline__))                                  \
-  vst4 ## Q ## _lane_ ## funcsuffix (ptrtype *ptr,                     \
-                                    intype b, const int c)             \
-  {                                                                    \
-    __ST4_LANE_STRUCTURE_##intype *__p =                               \
-                               (__ST4_LANE_STRUCTURE_##intype *)ptr;   \
-    __asm__ ("ld1 {v16." #regsuffix " - v19." #regsuffix "}, %1\n\t"   \
-            "st4 {v16." #lnsuffix " - v19." #lnsuffix "}[%2], %0\n\t"  \
-            : "=Q"(*__p)                                               \
-            : "Q"(b), "i"(c)                                           \
-            : "v16", "v17", "v18", "v19");                             \
-  }
-
-__ST4_LANE_FUNC (int8x8x4_t, int8_t, 8b, b, s8,)
-__ST4_LANE_FUNC (float32x2x4_t, float32_t, 2s, s, f32,)
-__ST4_LANE_FUNC (float64x1x4_t, float64_t, 1d, d, f64,)
-__ST4_LANE_FUNC (poly8x8x4_t, poly8_t, 8b, b, p8,)
-__ST4_LANE_FUNC (poly16x4x4_t, poly16_t, 4h, h, p16,)
-__ST4_LANE_FUNC (int16x4x4_t, int16_t, 4h, h, s16,)
-__ST4_LANE_FUNC (int32x2x4_t, int32_t, 2s, s, s32,)
-__ST4_LANE_FUNC (int64x1x4_t, int64_t, 1d, d, s64,)
-__ST4_LANE_FUNC (uint8x8x4_t, uint8_t, 8b, b, u8,)
-__ST4_LANE_FUNC (uint16x4x4_t, uint16_t, 4h, h, u16,)
-__ST4_LANE_FUNC (uint32x2x4_t, uint32_t, 2s, s, u32,)
-__ST4_LANE_FUNC (uint64x1x4_t, uint64_t, 1d, d, u64,)
-__ST4_LANE_FUNC (float32x4x4_t, float32_t, 4s, s, f32, q)
-__ST4_LANE_FUNC (float64x2x4_t, float64_t, 2d, d, f64, q)
-__ST4_LANE_FUNC (poly8x16x4_t, poly8_t, 16b, b, p8, q)
-__ST4_LANE_FUNC (poly16x8x4_t, poly16_t, 8h, h, p16, q)
-__ST4_LANE_FUNC (int8x16x4_t, int8_t, 16b, b, s8, q)
-__ST4_LANE_FUNC (int16x8x4_t, int16_t, 8h, h, s16, q)
-__ST4_LANE_FUNC (int32x4x4_t, int32_t, 4s, s, s32, q)
-__ST4_LANE_FUNC (int64x2x4_t, int64_t, 2d, d, s64, q)
-__ST4_LANE_FUNC (uint8x16x4_t, uint8_t, 16b, b, u8, q)
-__ST4_LANE_FUNC (uint16x8x4_t, uint16_t, 8h, h, u16, q)
-__ST4_LANE_FUNC (uint32x4x4_t, uint32_t, 4s, s, u32, q)
-__ST4_LANE_FUNC (uint64x2x4_t, uint64_t, 2d, d, u64, q)
+#define __ST2_LANE_FUNC(intype, largetype, ptrtype,                         \
+                       mode, ptr_mode, funcsuffix, signedtype)              \
+__extension__ static __inline void                                          \
+__attribute__ ((__always_inline__))                                         \
+vst2_lane_ ## funcsuffix (ptrtype *__ptr,                                   \
+                         intype __b, const int __c)                         \
+{                                                                           \
+  __builtin_aarch64_simd_oi __o;                                            \
+  largetype __temp;                                                         \
+  __temp.val[0]                                                                     \
+    = vcombine_##funcsuffix (__b.val[0],                                    \
+                            vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
+  __temp.val[1]                                                                     \
+    = vcombine_##funcsuffix (__b.val[1],                                    \
+                            vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
+  __o = __builtin_aarch64_set_qregoi##mode (__o,                            \
+                                           (signedtype) __temp.val[0], 0);  \
+  __o = __builtin_aarch64_set_qregoi##mode (__o,                            \
+                                           (signedtype) __temp.val[1], 1);  \
+  __builtin_aarch64_st2_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *)  \
+                                    __ptr, __o, __c);                       \
+}
+
+__ST2_LANE_FUNC (float32x2x2_t, float32x4x2_t, float32_t, v4sf, sf, f32,
+                float32x4_t)
+__ST2_LANE_FUNC (float64x1x2_t, float64x2x2_t, float64_t, v2df, df, f64,
+                float64x2_t)
+__ST2_LANE_FUNC (poly8x8x2_t, poly8x16x2_t, poly8_t, v16qi, qi, p8, int8x16_t)
+__ST2_LANE_FUNC (poly16x4x2_t, poly16x8x2_t, poly16_t, v8hi, hi, p16,
+                int16x8_t)
+__ST2_LANE_FUNC (int8x8x2_t, int8x16x2_t, int8_t, v16qi, qi, s8, int8x16_t)
+__ST2_LANE_FUNC (int16x4x2_t, int16x8x2_t, int16_t, v8hi, hi, s16, int16x8_t)
+__ST2_LANE_FUNC (int32x2x2_t, int32x4x2_t, int32_t, v4si, si, s32, int32x4_t)
+__ST2_LANE_FUNC (int64x1x2_t, int64x2x2_t, int64_t, v2di, di, s64, int64x2_t)
+__ST2_LANE_FUNC (uint8x8x2_t, uint8x16x2_t, uint8_t, v16qi, qi, u8, int8x16_t)
+__ST2_LANE_FUNC (uint16x4x2_t, uint16x8x2_t, uint16_t, v8hi, hi, u16,
+                int16x8_t)
+__ST2_LANE_FUNC (uint32x2x2_t, uint32x4x2_t, uint32_t, v4si, si, u32,
+                int32x4_t)
+__ST2_LANE_FUNC (uint64x1x2_t, uint64x2x2_t, uint64_t, v2di, di, u64,
+                int64x2_t)
+
+#undef __ST2_LANE_FUNC
+#define __ST2_LANE_FUNC(intype, ptrtype, mode, ptr_mode, funcsuffix)       \
+__extension__ static __inline void                                         \
+__attribute__ ((__always_inline__))                                        \
+vst2q_lane_ ## funcsuffix (ptrtype *__ptr,                                 \
+                          intype __b, const int __c)                       \
+{                                                                          \
+  union { intype __i;                                                      \
+         __builtin_aarch64_simd_oi __o; } __temp = { __b };                \
+  __builtin_aarch64_st2_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \
+                                   __ptr, __temp.__o, __c);                \
+}
+
+__ST2_LANE_FUNC (float32x4x2_t, float32_t, v4sf, sf, f32)
+__ST2_LANE_FUNC (float64x2x2_t, float64_t, v2df, df, f64)
+__ST2_LANE_FUNC (poly8x16x2_t, poly8_t, v16qi, qi, p8)
+__ST2_LANE_FUNC (poly16x8x2_t, poly16_t, v8hi, hi, p16)
+__ST2_LANE_FUNC (int8x16x2_t, int8_t, v16qi, qi, s8)
+__ST2_LANE_FUNC (int16x8x2_t, int16_t, v8hi, hi, s16)
+__ST2_LANE_FUNC (int32x4x2_t, int32_t, v4si, si, s32)
+__ST2_LANE_FUNC (int64x2x2_t, int64_t, v2di, di, s64)
+__ST2_LANE_FUNC (uint8x16x2_t, uint8_t, v16qi, qi, u8)
+__ST2_LANE_FUNC (uint16x8x2_t, uint16_t, v8hi, hi, u16)
+__ST2_LANE_FUNC (uint32x4x2_t, uint32_t, v4si, si, u32)
+__ST2_LANE_FUNC (uint64x2x2_t, uint64_t, v2di, di, u64)
+
+#define __ST3_LANE_FUNC(intype, largetype, ptrtype,                         \
+                       mode, ptr_mode, funcsuffix, signedtype)              \
+__extension__ static __inline void                                          \
+__attribute__ ((__always_inline__))                                         \
+vst3_lane_ ## funcsuffix (ptrtype *__ptr,                                   \
+                         intype __b, const int __c)                         \
+{                                                                           \
+  __builtin_aarch64_simd_ci __o;                                            \
+  largetype __temp;                                                         \
+  __temp.val[0]                                                                     \
+    = vcombine_##funcsuffix (__b.val[0],                                    \
+                            vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
+  __temp.val[1]                                                                     \
+    = vcombine_##funcsuffix (__b.val[1],                                    \
+                            vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
+  __temp.val[2]                                                                     \
+    = vcombine_##funcsuffix (__b.val[2],                                    \
+                            vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
+  __o = __builtin_aarch64_set_qregci##mode (__o,                            \
+                                           (signedtype) __temp.val[0], 0);  \
+  __o = __builtin_aarch64_set_qregci##mode (__o,                            \
+                                           (signedtype) __temp.val[1], 1);  \
+  __o = __builtin_aarch64_set_qregci##mode (__o,                            \
+                                           (signedtype) __temp.val[2], 2);  \
+  __builtin_aarch64_st3_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *)  \
+                                    __ptr, __o, __c);                       \
+}
+
+__ST3_LANE_FUNC (float32x2x3_t, float32x4x3_t, float32_t, v4sf, sf, f32,
+                float32x4_t)
+__ST3_LANE_FUNC (float64x1x3_t, float64x2x3_t, float64_t, v2df, df, f64,
+                float64x2_t)
+__ST3_LANE_FUNC (poly8x8x3_t, poly8x16x3_t, poly8_t, v16qi, qi, p8, int8x16_t)
+__ST3_LANE_FUNC (poly16x4x3_t, poly16x8x3_t, poly16_t, v8hi, hi, p16,
+                int16x8_t)
+__ST3_LANE_FUNC (int8x8x3_t, int8x16x3_t, int8_t, v16qi, qi, s8, int8x16_t)
+__ST3_LANE_FUNC (int16x4x3_t, int16x8x3_t, int16_t, v8hi, hi, s16, int16x8_t)
+__ST3_LANE_FUNC (int32x2x3_t, int32x4x3_t, int32_t, v4si, si, s32, int32x4_t)
+__ST3_LANE_FUNC (int64x1x3_t, int64x2x3_t, int64_t, v2di, di, s64, int64x2_t)
+__ST3_LANE_FUNC (uint8x8x3_t, uint8x16x3_t, uint8_t, v16qi, qi, u8, int8x16_t)
+__ST3_LANE_FUNC (uint16x4x3_t, uint16x8x3_t, uint16_t, v8hi, hi, u16,
+                int16x8_t)
+__ST3_LANE_FUNC (uint32x2x3_t, uint32x4x3_t, uint32_t, v4si, si, u32,
+                int32x4_t)
+__ST3_LANE_FUNC (uint64x1x3_t, uint64x2x3_t, uint64_t, v2di, di, u64,
+                int64x2_t)
+
+#undef __ST3_LANE_FUNC
+#define __ST3_LANE_FUNC(intype, ptrtype, mode, ptr_mode, funcsuffix)       \
+__extension__ static __inline void                                         \
+__attribute__ ((__always_inline__))                                        \
+vst3q_lane_ ## funcsuffix (ptrtype *__ptr,                                 \
+                          intype __b, const int __c)                       \
+{                                                                          \
+  union { intype __i;                                                      \
+         __builtin_aarch64_simd_ci __o; } __temp = { __b };                \
+  __builtin_aarch64_st3_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \
+                                   __ptr, __temp.__o, __c);                \
+}
+
+__ST3_LANE_FUNC (float32x4x3_t, float32_t, v4sf, sf, f32)
+__ST3_LANE_FUNC (float64x2x3_t, float64_t, v2df, df, f64)
+__ST3_LANE_FUNC (poly8x16x3_t, poly8_t, v16qi, qi, p8)
+__ST3_LANE_FUNC (poly16x8x3_t, poly16_t, v8hi, hi, p16)
+__ST3_LANE_FUNC (int8x16x3_t, int8_t, v16qi, qi, s8)
+__ST3_LANE_FUNC (int16x8x3_t, int16_t, v8hi, hi, s16)
+__ST3_LANE_FUNC (int32x4x3_t, int32_t, v4si, si, s32)
+__ST3_LANE_FUNC (int64x2x3_t, int64_t, v2di, di, s64)
+__ST3_LANE_FUNC (uint8x16x3_t, uint8_t, v16qi, qi, u8)
+__ST3_LANE_FUNC (uint16x8x3_t, uint16_t, v8hi, hi, u16)
+__ST3_LANE_FUNC (uint32x4x3_t, uint32_t, v4si, si, u32)
+__ST3_LANE_FUNC (uint64x2x3_t, uint64_t, v2di, di, u64)
+
+#define __ST4_LANE_FUNC(intype, largetype, ptrtype,                         \
+                       mode, ptr_mode, funcsuffix, signedtype)              \
+__extension__ static __inline void                                          \
+__attribute__ ((__always_inline__))                                         \
+vst4_lane_ ## funcsuffix (ptrtype *__ptr,                                   \
+                         intype __b, const int __c)                         \
+{                                                                           \
+  __builtin_aarch64_simd_xi __o;                                            \
+  largetype __temp;                                                         \
+  __temp.val[0]                                                                     \
+    = vcombine_##funcsuffix (__b.val[0],                                    \
+                            vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
+  __temp.val[1]                                                                     \
+    = vcombine_##funcsuffix (__b.val[1],                                    \
+                            vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
+  __temp.val[2]                                                                     \
+    = vcombine_##funcsuffix (__b.val[2],                                    \
+                            vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
+  __temp.val[3]                                                                     \
+    = vcombine_##funcsuffix (__b.val[3],                                    \
+                            vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
+  __o = __builtin_aarch64_set_qregxi##mode (__o,                            \
+                                           (signedtype) __temp.val[0], 0);  \
+  __o = __builtin_aarch64_set_qregxi##mode (__o,                            \
+                                           (signedtype) __temp.val[1], 1);  \
+  __o = __builtin_aarch64_set_qregxi##mode (__o,                            \
+                                           (signedtype) __temp.val[2], 2);  \
+  __o = __builtin_aarch64_set_qregxi##mode (__o,                            \
+                                           (signedtype) __temp.val[3], 3);  \
+  __builtin_aarch64_st4_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *)  \
+                                    __ptr, __o, __c);                       \
+}
+
+__ST4_LANE_FUNC (float32x2x4_t, float32x4x4_t, float32_t, v4sf, sf, f32,
+                float32x4_t)
+__ST4_LANE_FUNC (float64x1x4_t, float64x2x4_t, float64_t, v2df, df, f64,
+                float64x2_t)
+__ST4_LANE_FUNC (poly8x8x4_t, poly8x16x4_t, poly8_t, v16qi, qi, p8, int8x16_t)
+__ST4_LANE_FUNC (poly16x4x4_t, poly16x8x4_t, poly16_t, v8hi, hi, p16,
+                int16x8_t)
+__ST4_LANE_FUNC (int8x8x4_t, int8x16x4_t, int8_t, v16qi, qi, s8, int8x16_t)
+__ST4_LANE_FUNC (int16x4x4_t, int16x8x4_t, int16_t, v8hi, hi, s16, int16x8_t)
+__ST4_LANE_FUNC (int32x2x4_t, int32x4x4_t, int32_t, v4si, si, s32, int32x4_t)
+__ST4_LANE_FUNC (int64x1x4_t, int64x2x4_t, int64_t, v2di, di, s64, int64x2_t)
+__ST4_LANE_FUNC (uint8x8x4_t, uint8x16x4_t, uint8_t, v16qi, qi, u8, int8x16_t)
+__ST4_LANE_FUNC (uint16x4x4_t, uint16x8x4_t, uint16_t, v8hi, hi, u16,
+                int16x8_t)
+__ST4_LANE_FUNC (uint32x2x4_t, uint32x4x4_t, uint32_t, v4si, si, u32,
+                int32x4_t)
+__ST4_LANE_FUNC (uint64x1x4_t, uint64x2x4_t, uint64_t, v2di, di, u64,
+                int64x2_t)
+
+#undef __ST4_LANE_FUNC
+#define __ST4_LANE_FUNC(intype, ptrtype, mode, ptr_mode, funcsuffix)       \
+__extension__ static __inline void                                         \
+__attribute__ ((__always_inline__))                                        \
+vst4q_lane_ ## funcsuffix (ptrtype *__ptr,                                 \
+                          intype __b, const int __c)                       \
+{                                                                          \
+  union { intype __i;                                                      \
+         __builtin_aarch64_simd_xi __o; } __temp = { __b };                \
+  __builtin_aarch64_st4_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \
+                                   __ptr, __temp.__o, __c);                \
+}
+
+__ST4_LANE_FUNC (float32x4x4_t, float32_t, v4sf, sf, f32)
+__ST4_LANE_FUNC (float64x2x4_t, float64_t, v2df, df, f64)
+__ST4_LANE_FUNC (poly8x16x4_t, poly8_t, v16qi, qi, p8)
+__ST4_LANE_FUNC (poly16x8x4_t, poly16_t, v8hi, hi, p16)
+__ST4_LANE_FUNC (int8x16x4_t, int8_t, v16qi, qi, s8)
+__ST4_LANE_FUNC (int16x8x4_t, int16_t, v8hi, hi, s16)
+__ST4_LANE_FUNC (int32x4x4_t, int32_t, v4si, si, s32)
+__ST4_LANE_FUNC (int64x2x4_t, int64_t, v2di, di, s64)
+__ST4_LANE_FUNC (uint8x16x4_t, uint8_t, v16qi, qi, u8)
+__ST4_LANE_FUNC (uint16x8x4_t, uint16_t, v8hi, hi, u16)
+__ST4_LANE_FUNC (uint32x4x4_t, uint32_t, v4si, si, u32)
+__ST4_LANE_FUNC (uint64x2x4_t, uint64_t, v2di, di, u64)
 
 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
 vaddlv_s32 (int32x2_t a)
index fd1eb482f0f43280a06e13cd9cb04fd0112fdb1b..c537c3780eea95fa315c82bb36ac7f91f0f920fd 100644 (file)
 
 (define_mode_attr VSTRUCT_DREG [(OI "TI") (CI "EI") (XI "OI")])
 
+;; Mode of pair of elements for each vector mode, to define transfer
+;; size for structure lane/dup loads and stores.
+(define_mode_attr V_TWO_ELEM [(V8QI "HI")   (V16QI "HI")
+                              (V4HI "SI")   (V8HI "SI")
+                              (V2SI "V2SI") (V4SI "V2SI")
+                              (DI "V2DI")   (V2DI "V2DI")
+                              (V2SF "V2SF") (V4SF "V2SF")
+                              (DF "V2DI")   (V2DF "V2DI")])
+
+;; Similar, for three elements.
+(define_mode_attr V_THREE_ELEM [(V8QI "BLK") (V16QI "BLK")
+                                (V4HI "BLK") (V8HI "BLK")
+                                (V2SI "BLK") (V4SI "BLK")
+                                (DI "EI")    (V2DI "EI")
+                                (V2SF "BLK") (V4SF "BLK")
+                                (DF "EI")    (V2DF "EI")])
+
+;; Similar, for four elements.
+(define_mode_attr V_FOUR_ELEM [(V8QI "SI")   (V16QI "SI")
+                               (V4HI "V4HI") (V8HI "V4HI")
+                               (V2SI "V4SI") (V4SI "V4SI")
+                               (DI "OI")     (V2DI "OI")
+                               (V2SF "V4SF") (V4SF "V4SF")
+                               (DF "OI")     (V2DF "OI")])
+
+
 ;; Mode for atomic operation suffixes
 (define_mode_attr atomic_sfx
   [(QI "b") (HI "h") (SI "") (DI "")])