From: Richard Earnshaw Date: Fri, 8 Dec 2023 16:04:18 +0000 (+0000) Subject: Revert "arm: vst1_types_x4 ACLE intrinsics" X-Git-Tag: basepoints/gcc-15~3806 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=bdd0a50833e2a30ad5795f0c81925c9cea46f9d1;p=thirdparty%2Fgcc.git Revert "arm: vst1_types_x4 ACLE intrinsics" This reverts commit 2f48d846c794ba091b266133f73717361096d454. --- diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h index c9bdda39663a..e76be3516d95 100644 --- a/gcc/config/arm/arm_neon.h +++ b/gcc/config/arm/arm_neon.h @@ -11258,14 +11258,6 @@ vst1_p64_x3 (poly64_t * __a, poly64x1x3_t __b) __builtin_neon_vst1_x3di ((__builtin_neon_di *) __a, __bu.__o); } -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1_p64_x4 (poly64_t * __a, poly64x1x4_t __b) -{ - union { poly64x1x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; - __builtin_neon_vst1_x3di ((__builtin_neon_di *) __a, __bu.__o); -} - #pragma GCC pop_options __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -11359,38 +11351,6 @@ vst1_s64_x3 (int64_t * __a, int64x1x3_t __b) __builtin_neon_vst1_x3di ((__builtin_neon_di *) __a, __bu.__o); } -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1_s8_x4 (int8_t * __a, int8x8x4_t __b) -{ - union { int8x8x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; - __builtin_neon_vst1_x4v8qi ((__builtin_neon_qi *) __a, __bu.__o); -} - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1_s16_x4 (int16_t * __a, int16x4x4_t __b) -{ - union { int16x4x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; - __builtin_neon_vst1_x4v4hi ((__builtin_neon_hi *) __a, __bu.__o); -} - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1_s32_x4 (int32_t * __a, int32x2x4_t __b) -{ - union { int32x2x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; - __builtin_neon_vst1_x4v2si ((__builtin_neon_si *) __a, __bu.__o); -} - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1_s64_x4 (int64_t * __a, int64x1x4_t __b) -{ - union { int64x1x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; - __builtin_neon_vst1_x4di ((__builtin_neon_di *) __a, __bu.__o); -} - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -11443,24 +11403,6 @@ vst1_f32_x3 (float32_t * __a, float32x2x3_t __b) __builtin_neon_vst1_x3v2sf ((__builtin_neon_sf *) __a, __bu.__o); } -#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1_f16_x4 (float16_t * __a, float16x4x4_t __b) -{ - union { float16x4x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; - __builtin_neon_vst1_x4v4hf (__a, __bu.__o); -} -#endif - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1_f32_x4 (float32_t * __a, float32x2x4_t __b) -{ - union { float32x2x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; - __builtin_neon_vst1_x4v2sf ((__builtin_neon_sf *) __a, __bu.__o); -} - __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1_u8 (uint8_t * __a, uint8x8_t __b) @@ -11553,38 +11495,6 @@ vst1_u64_x3 (uint64_t * __a, uint64x1x3_t __b) __builtin_neon_vst1_x3di ((__builtin_neon_di *) __a, __bu.__o); } -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1_u8_x4 (uint8_t * __a, uint8x8x4_t __b) -{ - union { uint8x8x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; - __builtin_neon_vst1_x4v8qi ((__builtin_neon_qi *) __a, __bu.__o); -} - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1_u16_x4 (uint16_t * __a, uint16x4x4_t __b) -{ - union { uint16x4x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; - __builtin_neon_vst1_x4v4hi ((__builtin_neon_hi *) __a, __bu.__o); -} - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1_u32_x4 (uint32_t * __a, uint32x2x4_t __b) -{ - union { uint32x2x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; - __builtin_neon_vst1_x4v2si ((__builtin_neon_si *) __a, __bu.__o); -} - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1_u64_x4 (uint64_t * __a, uint64x1x4_t __b) -{ - union { uint64x1x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; - __builtin_neon_vst1_x4di ((__builtin_neon_di *) __a, __bu.__o); -} - __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1_p8 (poly8_t * __a, poly8x8_t __b) @@ -11631,22 +11541,6 @@ vst1_p16_x3 (poly16_t * __a, poly16x4x3_t __b) __builtin_neon_vst1_x3v4hi ((__builtin_neon_hi *) __a, __bu.__o); } -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1_p8_x4 (poly8_t * __a, poly8x8x4_t __b) -{ - union { poly8x8x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; - __builtin_neon_vst1_x4v8qi ((__builtin_neon_qi *) __a, __bu.__o); -} - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1_p16_x4 (poly16_t * __a, poly16x4x4_t __b) -{ - union { poly16x4x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; - __builtin_neon_vst1_x4v4hi ((__builtin_neon_hi *) __a, __bu.__o); -} - #pragma GCC push_options #pragma GCC target ("fpu=crypto-neon-fp-armv8") __extension__ extern __inline void @@ -20404,14 +20298,6 @@ vst1_bf16_x3 (bfloat16_t * __a, bfloat16x4x3_t __b) __builtin_neon_vst1_x3v4bf ((__builtin_neon_bf *) __a, __bu.__o); } -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1_bf16_x4 (bfloat16_t * __a, bfloat16x4x4_t __b) -{ - union { bfloat16x4x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; - __builtin_neon_vst1_x4v4bf ((__builtin_neon_bf *) __a, __bu.__o); -} - __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1q_bf16 (bfloat16_t * __a, bfloat16x8_t __b) diff --git a/gcc/config/arm/arm_neon_builtins.def b/gcc/config/arm/arm_neon_builtins.def index a4056ec24d96..8b104b1a700b 100644 --- a/gcc/config/arm/arm_neon_builtins.def +++ b/gcc/config/arm/arm_neon_builtins.def @@ -313,7 +313,6 @@ VAR14 (STORE1, vst1, v4bf, v8bf) VAR7 (STORE1, vst1_x2, v8qi, v4hi, v2si, di, v4hf, v2sf, v4bf) VAR7 (STORE1, vst1_x3, v8qi, v4hi, v2si, di, v4hf, v2sf, v4bf) -VAR7 (STORE1, vst1_x4, v8qi, v4hi, v2si, di, v4hf, v2sf, v4bf) VAR14 (STORE1LANE, vst1_lane, v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf, v2di, v4bf, v8bf) VAR13 (LOAD1, vld2, diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index dfbaf5a6dc68..5185434d6d93 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -5145,16 +5145,6 @@ if (BYTES_BIG_ENDIAN) [(set_attr "type" "neon_store1_3reg")] ) -(define_insn "neon_vst1_x4" - [(set (match_operand:OI 0 "neon_struct_operand" "=Um") - (unspec:OI [(match_operand:OI 1 "s_register_operand" "w") - (unspec:VDQX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] - UNSPEC_VST1))] - "TARGET_NEON" - "vst1.\t%h1, %A0" - [(set_attr "type" "neon_store1_4reg")] -) - (define_insn "neon_vst1" [(set (match_operand:VDQX 0 "neon_struct_operand" "=Um") (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")] diff --git a/gcc/testsuite/gcc.target/arm/simd/vst1_base_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vst1_base_xN_1.c index 04ca6583552f..5f820a6a496e 100644 --- a/gcc/testsuite/gcc.target/arm/simd/vst1_base_xN_1.c +++ b/gcc/testsuite/gcc.target/arm/simd/vst1_base_xN_1.c @@ -115,62 +115,8 @@ void test_vst1_p16_x3 (poly16_t * ptr, poly16x4x3_t val) vst1_p16_x3 (ptr, val); } -void test_vst1_u8_x4 (uint8_t * ptr, uint8x8x4_t val) -{ - vst1_u8_x4 (ptr, val); -} - -void test_vst1_u16_x4 (uint16_t * ptr, uint16x4x4_t val) -{ - vst1_u16_x4 (ptr, val); -} - -void test_vst1_u32_x4 (uint32_t * ptr, uint32x2x4_t val) -{ - vst1_u32_x4 (ptr, val); -} - -void test_vst1_u64_x4 (uint64_t * ptr, uint64x1x4_t val) -{ - vst1_u64_x4 (ptr, val); -} - -void test_vst1_s8_x4 (int8_t * ptr, int8x8x4_t val) -{ - vst1_s8_x4 (ptr, val); -} - -void test_vst1_s16_x4 (int16_t * ptr, int16x4x4_t val) -{ - vst1_s16_x4 (ptr, val); -} - -void test_vst1_s32_x4 (int32_t * ptr, int32x2x4_t val) -{ - vst1_s32_x4 (ptr, val); -} - -void test_vst1_s64_x4 (int64_t * ptr, int64x1x4_t val) -{ - vst1_s64_x4 (ptr, val); -} - -void test_vst1_f32_x4 (float32_t * ptr, float32x2x4_t val) -{ - vst1_f32_x4 (ptr, val); -} - -void test_vst1_p8_x4 (poly8_t * ptr, poly8x8x4_t val) -{ - vst1_p8_x4 (ptr, val); -} - -void test_vst1_p16_x4 (poly16_t * ptr, poly16x4x4_t val) -{ - vst1_p16_x4 (ptr, val); -} -/* { dg-final { scan-assembler-times {vst1.8\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 9 } } */ -/* { dg-final { scan-assembler-times {vst1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 9 } } */ -/* { dg-final { scan-assembler-times {vst1.32\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 9 } } */ -/* { dg-final { scan-assembler-times {vst1.64\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+:64\]\n} 6 } } */ +/* { dg-final { scan-assembler-times {vst1.8\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 6 } } */ +/* { dg-final { scan-assembler-times {vst1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 6 } } */ +/* { dg-final { scan-assembler-times {vst1.32\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 6 } } */ +/* { dg-final { scan-assembler-times {vst1.64\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+:64\]\n} 4 } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vst1_bf16_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vst1_bf16_xN_1.c index d919c7d060dc..a3a00ead4682 100644 --- a/gcc/testsuite/gcc.target/arm/simd/vst1_bf16_xN_1.c +++ b/gcc/testsuite/gcc.target/arm/simd/vst1_bf16_xN_1.c @@ -15,8 +15,4 @@ void test_vst1_bf16_x3 (bfloat16_t * ptr, bfloat16x4x3_t val) vst1_bf16_x3 (ptr, val); } -void test_vst1_bf16_x4 (bfloat16_t * ptr, bfloat16x4x4_t val) -{ - vst1_bf16_x4 (ptr, val); -} -/* { dg-final { scan-assembler-times {vst1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 3 } } */ +/* { dg-final { scan-assembler-times {vst1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vst1_fp16_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vst1_fp16_xN_1.c index 3d1d1eb7ad14..0a6863e24c6c 100644 --- a/gcc/testsuite/gcc.target/arm/simd/vst1_fp16_xN_1.c +++ b/gcc/testsuite/gcc.target/arm/simd/vst1_fp16_xN_1.c @@ -15,9 +15,4 @@ void test_vst1_f16_x3 (float16_t * ptr, float16x4x3_t val) vst1_f16_x3 (ptr, val); } -void test_vst1_f16_x4 (float16_t * ptr, float16x4x4_t val) -{ - vst1_f16_x4 (ptr, val); -} - -/* { dg-final { scan-assembler-times {vst1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 3 } } */ +/* { dg-final { scan-assembler-times {vst1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vst1_p64_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vst1_p64_xN_1.c index 62912143481a..5dbd6049bc9f 100644 --- a/gcc/testsuite/gcc.target/arm/simd/vst1_p64_xN_1.c +++ b/gcc/testsuite/gcc.target/arm/simd/vst1_p64_xN_1.c @@ -15,9 +15,4 @@ void test_vst1_p64_x3 (poly64_t * ptr, poly64x1x3_t val) vst1_p64_x3 (ptr, val); } -void test_vst1_p64_x4 (poly64_t * ptr, poly64x1x4_t val) -{ - vst1_p64_x4 (ptr, val); -} - -/* { dg-final { scan-assembler-times {vst1.64\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+:64\]\n} 3 } } */ \ No newline at end of file +/* { dg-final { scan-assembler-times {vst1.64\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+:64\]\n} 2 } } */ \ No newline at end of file