return result;
}
-__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
-vld1_f32 (const float32_t * a)
-{
- float32x2_t result;
- __asm__ ("ld1 {%0.2s}, %1"
- : "=w"(result)
- : "Utv"(({const float32x2_t *_a = (float32x2_t *) a; *_a;}))
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
-vld1_f64 (const float64_t * a)
-{
- float64x1_t result;
- __asm__ ("ld1 {%0.1d}, %1"
- : "=w"(result)
- : "Utv"(*a)
- : /* No clobbers */);
- return result;
-}
-
#define vld1_lane_f32(a, b, c) \
__extension__ \
({ \
result; \
})
-__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
-vld1_p8 (const poly8_t * a)
-{
- poly8x8_t result;
- __asm__ ("ld1 {%0.8b}, %1"
- : "=w"(result)
- : "Utv"(({const poly8x8_t *_a = (poly8x8_t *) a; *_a;}))
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
-vld1_p16 (const poly16_t * a)
-{
- poly16x4_t result;
- __asm__ ("ld1 {%0.4h}, %1"
- : "=w"(result)
- : "Utv"(({const poly16x4_t *_a = (poly16x4_t *) a; *_a;}))
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
-vld1_s8 (const int8_t * a)
-{
- int8x8_t result;
- __asm__ ("ld1 {%0.8b}, %1"
- : "=w"(result)
- : "Utv"(({const int8x8_t *_a = (int8x8_t *) a; *_a;}))
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
-vld1_s16 (const int16_t * a)
-{
- int16x4_t result;
- __asm__ ("ld1 {%0.4h}, %1"
- : "=w"(result)
- : "Utv"(({const int16x4_t *_a = (int16x4_t *) a; *_a;}))
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
-vld1_s32 (const int32_t * a)
-{
- int32x2_t result;
- __asm__ ("ld1 {%0.2s}, %1"
- : "=w"(result)
- : "Utv"(({const int32x2_t *_a = (int32x2_t *) a; *_a;}))
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
-vld1_s64 (const int64_t * a)
-{
- int64x1_t result;
- __asm__ ("ld1 {%0.1d}, %1"
- : "=w"(result)
- : "Utv"(*a)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
-vld1_u8 (const uint8_t * a)
-{
- uint8x8_t result;
- __asm__ ("ld1 {%0.8b}, %1"
- : "=w"(result)
- : "Utv"(({const uint8x8_t *_a = (uint8x8_t *) a; *_a;}))
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
-vld1_u16 (const uint16_t * a)
-{
- uint16x4_t result;
- __asm__ ("ld1 {%0.4h}, %1"
- : "=w"(result)
- : "Utv"(({const uint16x4_t *_a = (uint16x4_t *) a; *_a;}))
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
-vld1_u32 (const uint32_t * a)
-{
- uint32x2_t result;
- __asm__ ("ld1 {%0.2s}, %1"
- : "=w"(result)
- : "Utv"(({const uint32x2_t *_a = (uint32x2_t *) a; *_a;}))
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-vld1_u64 (const uint64_t * a)
-{
- uint64x1_t result;
- __asm__ ("ld1 {%0.1d}, %1"
- : "=w"(result)
- : "Utv"(*a)
- : /* No clobbers */);
- return result;
-}
-
__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vld1q_dup_f32 (const float32_t * a)
{
return result;
}
-__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
-vld1q_f32 (const float32_t * a)
-{
- float32x4_t result;
- __asm__ ("ld1 {%0.4s}, %1"
- : "=w"(result)
- : "Utv"(({const float32x4_t *_a = (float32x4_t *) a; *_a;}))
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
-vld1q_f64 (const float64_t * a)
-{
- float64x2_t result;
- __asm__ ("ld1 {%0.2d}, %1"
- : "=w"(result)
- : "Utv"(({const float64x2_t *_a = (float64x2_t *) a; *_a;}))
- : /* No clobbers */);
- return result;
-}
-
#define vld1q_lane_f32(a, b, c) \
__extension__ \
({ \
result; \
})
-__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
-vld1q_p8 (const poly8_t * a)
-{
- poly8x16_t result;
- __asm__ ("ld1 {%0.16b}, %1"
- : "=w"(result)
- : "Utv"(({const poly8x16_t *_a = (poly8x16_t *) a; *_a;}))
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
-vld1q_p16 (const poly16_t * a)
-{
- poly16x8_t result;
- __asm__ ("ld1 {%0.16b}, %1"
- : "=w"(result)
- : "Utv"(({const poly16x8_t *_a = (poly16x8_t *) a; *_a;}))
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
-vld1q_s8 (const int8_t * a)
-{
- int8x16_t result;
- __asm__ ("ld1 {%0.16b}, %1"
- : "=w"(result)
- : "Utv"(({const int8x16_t *_a = (int8x16_t *) a; *_a;}))
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
-vld1q_s16 (const int16_t * a)
-{
- int16x8_t result;
- __asm__ ("ld1 {%0.8h}, %1"
- : "=w"(result)
- : "Utv"(({const int16x8_t *_a = (int16x8_t *) a; *_a;}))
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
-vld1q_s32 (const int32_t * a)
-{
- int32x4_t result;
- __asm__ ("ld1 {%0.4s}, %1"
- : "=w"(result)
- : "Utv"(({const int32x4_t *_a = (int32x4_t *) a; *_a;}))
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
-vld1q_s64 (const int64_t * a)
-{
- int64x2_t result;
- __asm__ ("ld1 {%0.2d}, %1"
- : "=w"(result)
- : "Utv"(({const int64x2_t *_a = (int64x2_t *) a; *_a;}))
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
-vld1q_u8 (const uint8_t * a)
-{
- uint8x16_t result;
- __asm__ ("ld1 {%0.16b}, %1"
- : "=w"(result)
- : "Utv"(({const uint8x16_t *_a = (uint8x16_t *) a; *_a;}))
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
-vld1q_u16 (const uint16_t * a)
-{
- uint16x8_t result;
- __asm__ ("ld1 {%0.8h}, %1"
- : "=w"(result)
- : "Utv"(({const uint16x8_t *_a = (uint16x8_t *) a; *_a;}))
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
-vld1q_u32 (const uint32_t * a)
-{
- uint32x4_t result;
- __asm__ ("ld1 {%0.4s}, %1"
- : "=w"(result)
- : "Utv"(({const uint32x4_t *_a = (uint32x4_t *) a; *_a;}))
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
-vld1q_u64 (const uint64_t * a)
-{
- uint64x2_t result;
- __asm__ ("ld1 {%0.2d}, %1"
- : "=w"(result)
- : "Utv"(({const uint64x2_t *_a = (uint64x2_t *) a; *_a;}))
- : /* No clobbers */);
- return result;
-}
-
#define vmla_lane_f32(a, b, c, d) \
__extension__ \
({ \
result; \
})
-__extension__ static __inline void __attribute__ ((__always_inline__))
-vst1_f32 (float32_t * a, float32x2_t b)
-{
- __asm__ ("st1 {%1.2s},[%0]"
- :
- : "r"(a), "w"(b)
- : "memory");
-}
-
-__extension__ static __inline void __attribute__ ((__always_inline__))
-vst1_f64 (float64_t * a, float64x1_t b)
-{
- __asm__ ("st1 {%1.1d},[%0]"
- :
- : "r"(a), "w"(b)
- : "memory");
-}
-
#define vst1_lane_f32(a, b, c) \
__extension__ \
({ \
: "memory"); \
})
-__extension__ static __inline void __attribute__ ((__always_inline__))
-vst1_p8 (poly8_t * a, poly8x8_t b)
-{
- __asm__ ("st1 {%1.8b},[%0]"
- :
- : "r"(a), "w"(b)
- : "memory");
-}
-
-__extension__ static __inline void __attribute__ ((__always_inline__))
-vst1_p16 (poly16_t * a, poly16x4_t b)
-{
- __asm__ ("st1 {%1.4h},[%0]"
- :
- : "r"(a), "w"(b)
- : "memory");
-}
-
-__extension__ static __inline void __attribute__ ((__always_inline__))
-vst1_s8 (int8_t * a, int8x8_t b)
-{
- __asm__ ("st1 {%1.8b},[%0]"
- :
- : "r"(a), "w"(b)
- : "memory");
-}
-
-__extension__ static __inline void __attribute__ ((__always_inline__))
-vst1_s16 (int16_t * a, int16x4_t b)
-{
- __asm__ ("st1 {%1.4h},[%0]"
- :
- : "r"(a), "w"(b)
- : "memory");
-}
-
-__extension__ static __inline void __attribute__ ((__always_inline__))
-vst1_s32 (int32_t * a, int32x2_t b)
-{
- __asm__ ("st1 {%1.2s},[%0]"
- :
- : "r"(a), "w"(b)
- : "memory");
-}
-
-__extension__ static __inline void __attribute__ ((__always_inline__))
-vst1_s64 (int64_t * a, int64x1_t b)
-{
- __asm__ ("st1 {%1.1d},[%0]"
- :
- : "r"(a), "w"(b)
- : "memory");
-}
-
-__extension__ static __inline void __attribute__ ((__always_inline__))
-vst1_u8 (uint8_t * a, uint8x8_t b)
-{
- __asm__ ("st1 {%1.8b},[%0]"
- :
- : "r"(a), "w"(b)
- : "memory");
-}
-
-__extension__ static __inline void __attribute__ ((__always_inline__))
-vst1_u16 (uint16_t * a, uint16x4_t b)
-{
- __asm__ ("st1 {%1.4h},[%0]"
- :
- : "r"(a), "w"(b)
- : "memory");
-}
-
-__extension__ static __inline void __attribute__ ((__always_inline__))
-vst1_u32 (uint32_t * a, uint32x2_t b)
-{
- __asm__ ("st1 {%1.2s},[%0]"
- :
- : "r"(a), "w"(b)
- : "memory");
-}
-
-__extension__ static __inline void __attribute__ ((__always_inline__))
-vst1_u64 (uint64_t * a, uint64x1_t b)
-{
- __asm__ ("st1 {%1.1d},[%0]"
- :
- : "r"(a), "w"(b)
- : "memory");
-}
-
-__extension__ static __inline void __attribute__ ((__always_inline__))
-vst1q_f32 (float32_t * a, float32x4_t b)
-{
- __asm__ ("st1 {%1.4s},[%0]"
- :
- : "r"(a), "w"(b)
- : "memory");
-}
-
-__extension__ static __inline void __attribute__ ((__always_inline__))
-vst1q_f64 (float64_t * a, float64x2_t b)
-{
- __asm__ ("st1 {%1.2d},[%0]"
- :
- : "r"(a), "w"(b)
- : "memory");
-}
#define vst1q_lane_f32(a, b, c) \
__extension__ \
: "memory"); \
})
-__extension__ static __inline void __attribute__ ((__always_inline__))
-vst1q_p8 (poly8_t * a, poly8x16_t b)
-{
- __asm__ ("st1 {%1.16b},[%0]"
- :
- : "r"(a), "w"(b)
- : "memory");
-}
-
-__extension__ static __inline void __attribute__ ((__always_inline__))
-vst1q_p16 (poly16_t * a, poly16x8_t b)
-{
- __asm__ ("st1 {%1.8h},[%0]"
- :
- : "r"(a), "w"(b)
- : "memory");
-}
-
-__extension__ static __inline void __attribute__ ((__always_inline__))
-vst1q_s8 (int8_t * a, int8x16_t b)
-{
- __asm__ ("st1 {%1.16b},[%0]"
- :
- : "r"(a), "w"(b)
- : "memory");
-}
-
-__extension__ static __inline void __attribute__ ((__always_inline__))
-vst1q_s16 (int16_t * a, int16x8_t b)
-{
- __asm__ ("st1 {%1.8h},[%0]"
- :
- : "r"(a), "w"(b)
- : "memory");
-}
-
-__extension__ static __inline void __attribute__ ((__always_inline__))
-vst1q_s32 (int32_t * a, int32x4_t b)
-{
- __asm__ ("st1 {%1.4s},[%0]"
- :
- : "r"(a), "w"(b)
- : "memory");
-}
-
-__extension__ static __inline void __attribute__ ((__always_inline__))
-vst1q_s64 (int64_t * a, int64x2_t b)
-{
- __asm__ ("st1 {%1.2d},[%0]"
- :
- : "r"(a), "w"(b)
- : "memory");
-}
-
-__extension__ static __inline void __attribute__ ((__always_inline__))
-vst1q_u8 (uint8_t * a, uint8x16_t b)
-{
- __asm__ ("st1 {%1.16b},[%0]"
- :
- : "r"(a), "w"(b)
- : "memory");
-}
-
-__extension__ static __inline void __attribute__ ((__always_inline__))
-vst1q_u16 (uint16_t * a, uint16x8_t b)
-{
- __asm__ ("st1 {%1.8h},[%0]"
- :
- : "r"(a), "w"(b)
- : "memory");
-}
-
-__extension__ static __inline void __attribute__ ((__always_inline__))
-vst1q_u32 (uint32_t * a, uint32x4_t b)
-{
- __asm__ ("st1 {%1.4s},[%0]"
- :
- : "r"(a), "w"(b)
- : "memory");
-}
-
-__extension__ static __inline void __attribute__ ((__always_inline__))
-vst1q_u64 (uint64_t * a, uint64x2_t b)
-{
- __asm__ ("st1 {%1.2d},[%0]"
- :
- : "r"(a), "w"(b)
- : "memory");
-}
-
__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vsubhn_high_s16 (int8x8_t a, int16x8_t b, int16x8_t c)
{
return (uint64x1_t) __builtin_aarch64_dup_lane_scalarv2di ((int64x2_t) a, b);
}
+/* vld1 */
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vld1_f32 (const float32_t *a)
+{
+ return __builtin_aarch64_ld1v2sf ((const __builtin_aarch64_simd_sf *) a);
+}
+
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vld1_f64 (const float64_t *a)
+{
+ return *a;
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vld1_p8 (const poly8_t *a)
+{
+ return (poly8x8_t)
+ __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) a);
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vld1_p16 (const poly16_t *a)
+{
+ return (poly16x4_t)
+ __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) a);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vld1_s8 (const int8_t *a)
+{
+ return __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) a);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vld1_s16 (const int16_t *a)
+{
+ return __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) a);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vld1_s32 (const int32_t *a)
+{
+ return __builtin_aarch64_ld1v2si ((const __builtin_aarch64_simd_si *) a);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vld1_s64 (const int64_t *a)
+{
+ return *a;
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vld1_u8 (const uint8_t *a)
+{
+ return (uint8x8_t)
+ __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) a);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vld1_u16 (const uint16_t *a)
+{
+ return (uint16x4_t)
+ __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) a);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vld1_u32 (const uint32_t *a)
+{
+ return (uint32x2_t)
+ __builtin_aarch64_ld1v2si ((const __builtin_aarch64_simd_si *) a);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vld1_u64 (const uint64_t *a)
+{
+ return *a;
+}
+
+/* vld1q */
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vld1q_f32 (const float32_t *a)
+{
+ return __builtin_aarch64_ld1v4sf ((const __builtin_aarch64_simd_sf *) a);
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vld1q_f64 (const float64_t *a)
+{
+ return __builtin_aarch64_ld1v2df ((const __builtin_aarch64_simd_df *) a);
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vld1q_p8 (const poly8_t *a)
+{
+ return (poly8x16_t)
+ __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) a);
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vld1q_p16 (const poly16_t *a)
+{
+ return (poly16x8_t)
+ __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) a);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vld1q_s8 (const int8_t *a)
+{
+ return __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) a);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vld1q_s16 (const int16_t *a)
+{
+ return __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) a);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vld1q_s32 (const int32_t *a)
+{
+ return __builtin_aarch64_ld1v4si ((const __builtin_aarch64_simd_si *) a);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vld1q_s64 (const int64_t *a)
+{
+ return __builtin_aarch64_ld1v2di ((const __builtin_aarch64_simd_di *) a);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vld1q_u8 (const uint8_t *a)
+{
+ return (uint8x16_t)
+ __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) a);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vld1q_u16 (const uint16_t *a)
+{
+ return (uint16x8_t)
+ __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) a);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vld1q_u32 (const uint32_t *a)
+{
+ return (uint32x4_t)
+ __builtin_aarch64_ld1v4si ((const __builtin_aarch64_simd_si *) a);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vld1q_u64 (const uint64_t *a)
+{
+ return (uint64x2_t)
+ __builtin_aarch64_ld1v2di ((const __builtin_aarch64_simd_di *) a);
+}
+
/* vldn */
__extension__ static __inline int64x1x2_t __attribute__ ((__always_inline__))
return (uint64x1_t) __builtin_aarch64_usri_ndi (__a, __b, __c);
}
+/* vst1 */
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_f32 (float32_t *a, float32x2_t b)
+{
+ __builtin_aarch64_st1v2sf ((__builtin_aarch64_simd_sf *) a, b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_f64 (float64_t *a, float64x1_t b)
+{
+ *a = b;
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_p8 (poly8_t *a, poly8x8_t b)
+{
+ __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a,
+ (int8x8_t) b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_p16 (poly16_t *a, poly16x4_t b)
+{
+ __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) a,
+ (int16x4_t) b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_s8 (int8_t *a, int8x8_t b)
+{
+ __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a, b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_s16 (int16_t *a, int16x4_t b)
+{
+ __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) a, b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_s32 (int32_t *a, int32x2_t b)
+{
+ __builtin_aarch64_st1v2si ((__builtin_aarch64_simd_si *) a, b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_s64 (int64_t *a, int64x1_t b)
+{
+ *a = b;
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_u8 (uint8_t *a, uint8x8_t b)
+{
+ __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a,
+ (int8x8_t) b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_u16 (uint16_t *a, uint16x4_t b)
+{
+ __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) a,
+ (int16x4_t) b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_u32 (uint32_t *a, uint32x2_t b)
+{
+ __builtin_aarch64_st1v2si ((__builtin_aarch64_simd_si *) a,
+ (int32x2_t) b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_u64 (uint64_t *a, uint64x1_t b)
+{
+ *a = b;
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_f32 (float32_t *a, float32x4_t b)
+{
+ __builtin_aarch64_st1v4sf ((__builtin_aarch64_simd_sf *) a, b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_f64 (float64_t *a, float64x2_t b)
+{
+ __builtin_aarch64_st1v2df ((__builtin_aarch64_simd_df *) a, b);
+}
+
+/* vst1q */
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_p8 (poly8_t *a, poly8x16_t b)
+{
+ __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a,
+ (int8x16_t) b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_p16 (poly16_t *a, poly16x8_t b)
+{
+ __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) a,
+ (int16x8_t) b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_s8 (int8_t *a, int8x16_t b)
+{
+ __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a, b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_s16 (int16_t *a, int16x8_t b)
+{
+ __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) a, b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_s32 (int32_t *a, int32x4_t b)
+{
+ __builtin_aarch64_st1v4si ((__builtin_aarch64_simd_si *) a, b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_s64 (int64_t *a, int64x2_t b)
+{
+ __builtin_aarch64_st1v2di ((__builtin_aarch64_simd_di *) a, b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_u8 (uint8_t *a, uint8x16_t b)
+{
+ __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a,
+ (int8x16_t) b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_u16 (uint16_t *a, uint16x8_t b)
+{
+ __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) a,
+ (int16x8_t) b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_u32 (uint32_t *a, uint32x4_t b)
+{
+ __builtin_aarch64_st1v4si ((__builtin_aarch64_simd_si *) a,
+ (int32x4_t) b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_u64 (uint64_t *a, uint64x2_t b)
+{
+ __builtin_aarch64_st1v2di ((__builtin_aarch64_simd_di *) a,
+ (int64x2_t) b);
+}
+
/* vstn */
__extension__ static __inline void