;; VQMOV without 2-element modes.
(define_mode_iterator VQMOV_NO2E [V16QI V8HI V4SI V8HF V8BF V4SF])
+;; Modes that can be duplicated into a 64-bit register.
+(define_mode_iterator VDDUP [V4QI V2QI QI V2HI HI SI
+ V2BF BF V2HF HF SF])
+
+;; Modes that can be duplicated into a 128-bit register.
+(define_mode_iterator VQDUP [V8QI V4QI V2QI QI V4HI V2HI HI V2SI SI DI
+ V4BF V2BF BF V4HF V2HF HF V2SF SF DF])
+
;; Double integer vector modes.
(define_mode_iterator VD_I [V8QI V4HI V2SI DI])
;; The number of bits in a vector element, or controlled by a predicate
;; element.
-(define_mode_attr elem_bits [(VNx16BI "8") (VNx8BI "16")
+(define_mode_attr elem_bits [(V2QI "8") (V4QI "8") (V2HF "16") (V2HI "16")
+ (V2BF "16")
+ (VNx16BI "8") (VNx8BI "16")
(VNx4BI "32") (VNx2BI "64")
(VNx16QI "8") (VNx32QI "8") (VNx64QI "8")
(VNx8HI "16") (VNx16HI "16") (VNx32HI "16")
;; Mode-to-individual element type mapping.
(define_mode_attr Vetype [(V8QI "b") (V16QI "b")
- (V4HI "h") (V8HI "h")
+ (V2QI "b") (V4QI "b")
+ (V4HI "h") (V8HI "h") (V2HI "h")
(V2SI "s") (V4SI "s")
(V2DI "d") (V1DI "d")
- (V4HF "h") (V8HF "h")
- (V2SF "s") (V4SF "s")
+ (V4HF "h") (V8HF "h") (V2HF "h")
+ (V2SF "s") (V4SF "s") (V2BF "h")
(V2DF "d") (V1DF "d")
(V2x8QI "b") (V2x4HI "h")
(V2x2SI "s") (V2x1DI "d")
(V4x2DF "v2df") (V4x8BF "v8bf")])
;; Define element mode for each vector mode.
-(define_mode_attr VEL [(V8QI "QI") (V16QI "QI")
+(define_mode_attr VEL [(V8QI "QI") (V16QI "QI")
+ (V2QI "QI") (V4QI "QI")
(V4HI "HI") (V8HI "HI")
+ (V2HI "HI") (V2HF "HF")
(V2SI "SI") (V4SI "SI")
(DI "DI") (V1DI "DI")
(V2DI "DI")
(SI "SI") (HI "HI")
(QI "QI")
(V4BF "BF") (V8BF "BF")
+ (V2BF "BF")
(V2x8QI "QI") (V2x4HI "HI")
(V2x2SI "SI") (V2x1DI "DI")
(V2x4HF "HF") (V2x2SF "SF")
;; 64-bit container modes the inner or scalar source mode.
(define_mode_attr VCOND [(HI "V4HI") (SI "V2SI")
+ (V2HI "V4HI")
(V4HI "V4HI") (V8HI "V4HI")
(V2SI "V2SI") (V4SI "V2SI")
+ (QI "V8QI") (V2QI "V8QI")
+ (V4QI "V8QI")
(DI "DI") (V2DI "DI")
+ (HF "V4HF") (V2HF "V4HF")
(V4HF "V4HF") (V8HF "V4HF")
+ (BF "V4BF") (V2BF "V4BF")
+ (SF "V2SF")
(V2SF "V2SF") (V4SF "V2SF")
(V2DF "DF")])
+;; Same as above, but in lowercase.
+(define_mode_attr vcond [(HI "v4hi") (SI "v2si")
+ (V2HI "v4hi")
+ (V4HI "v4hi") (V8HI "v4hi")
+ (V2SI "v2si") (V4SI "v2si")
+ (QI "v8qi") (V2QI "v8qi")
+ (V4QI "v8qi")
+ (DI "di") (V2DI "di")
+ (HF "v4hf") (V2HF "v4hf")
+ (V4HF "v4hf") (V8HF "v4hf")
+ (BF "v4bf") (V2BF "v4bf")
+ (SF "v2sf")
+ (V2SF "v2sf") (V4SF "v2sf")
+ (V2DF "df")])
+
;; 128-bit container modes the inner or scalar source mode.
(define_mode_attr VCONQ [(V8QI "V16QI") (V16QI "V16QI")
+ (V4QI "V16QI") (V2QI "V16QI")
(V4HI "V8HI") (V8HI "V8HI")
+ (V2HI "V8HI")
(V2SI "V4SI") (V4SI "V4SI")
(DI "V2DI") (V2DI "V2DI")
(V4HF "V8HF") (V8HF "V8HF")
+ (V2HF "V8HF") (HF "V8HF")
(V4BF "V8BF") (V8BF "V8BF")
+ (V2BF "V8BF") (BF "V8BF")
(V2SF "V4SF") (V4SF "V4SF")
(V2DF "V2DF") (SI "V4SI")
(HI "V8HI") (QI "V16QI")
(SF "V4SF") (DF "V2DF")])
+;; Same as above, but in lowercase.
+(define_mode_attr vconq [(V8QI "v16qi") (V16QI "v16qi")
+ (V4QI "v16qi") (V2QI "v16qi")
+ (V4HI "v8hi") (V8HI "v8hi")
+ (V2HI "v8hi")
+ (V2SI "v4si") (V4SI "v4si")
+ (DI "v2di") (V2DI "v2di")
+ (V4HF "v8hf") (V8HF "v8hf")
+ (V2HF "v8hf") (HF "v8hf")
+ (V4BF "v8bf") (V8BF "v8bf")
+ (V2BF "v8bf") (BF "v8bf")
+ (V2SF "v4sf") (V4SF "v4sf")
+ (V2DF "v2df") (SI "v4si")
+ (HI "v8hi") (QI "v16qi")
+ (SF "v4sf") (DF "v2df")])
+
;; Half modes of all vector modes.
(define_mode_attr VHALF [(V8QI "V4QI") (V16QI "V8QI")
(V4HI "V2HI") (V8HI "V4HI")
(define_mode_attr V2ntype [(V8HI "16b") (V4SI "8h")
(V2DI "4s")])
+;; Register suffix used when duplicating a value of a certain mode
+;; into a full 128-bit AdvSIMD register.
+(define_mode_attr Vqduptype [(QI "16b") (V2QI "8h") (V4QI "4s") (V8QI "2d")
+ (HI "8h") (V2HI "4s") (V4HI "2d")
+ (HF "8h") (V2HF "4s") (V4HF "2d")
+ (BF "8h") (V2BF "4s") (V4BF "2d")
+ (SI "4s") (V2SI "2d")
+ (SF "4s") (V2SF "2d")
+ (DI "2d") (DF "2d")])
+
+;; Register suffix used when duplicating a value of a certain mode
+;; into a partial 64-bit AdvSIMD register.
+(define_mode_attr Vdduptype [(QI "8b") (V2QI "4h") (V4QI "2s") (V8QI "")
+ (HI "4h") (V2HI "2s") (V4HI "")
+ (HF "4h") (V2HF "2s") (V4HF "")
+ (BF "4h") (V2BF "2s") (V4BF "")
+ (SI "2s") (V2SI "")
+ (SF "2s") (V2SF "")
+ (DI "") (DF "")])
+
;; The result of FCVTN on two vectors of the given mode. The result has
;; twice as many QI elements as the input.
(define_mode_attr VPACKB [(V4HF "V8QI") (V8HF "V16QI") (V4SF "V8QI")])
;; Whether a mode fits in W or X registers (i.e. "w" for 32-bit modes
;; and "x" for 64-bit modes).
(define_mode_attr single_wx [(SI "w") (SF "w")
+ (V2QI "w") (V4QI "w")
(V8QI "x") (V4HI "x")
(V4HF "x") (V4BF "x")
+ (V2HI "w") (V2HF "w")
+ (HF "w") (QI "w")
+ (V2BF "w") (BF "w")
+ (HI "w")
(V2SI "x") (V2SF "x")
(DI "x") (DF "x")])
(V8QI "d") (V4HI "d")
(V4HF "d") (V4BF "d")
(V2SI "d") (V2SF "d")
- (DI "d") (DF "d")])
+ (DI "d") (DF "d")
+ (QI "b") (BF "h")
+ (V2HF "s") (HI "h")
+ (V4QI "s") (V2QI "h")
+ (V2HI "s") (V2BF "s")
+ (HF "h")])
;; Whether a double-width mode fits in D or Q registers (i.e. "d" for
;; 32-bit modes and "q" for 64-bit modes).
(V2SI "q") (V2SF "q")
(DI "q") (DF "q")])
-;; Scalar size of a sub-64-bit vector mode.
-(define_mode_attr vstype [(V4QI "s") (V2QI "h")
- (V2HI "s") (V2BF "s") (V2HF "s")])
+;; Scalar size of a sub-128-bit vector or scalar mode.
+(define_mode_attr vstype [(V8QI "d") (V4QI "s") (V2QI "h") (QI "b")
+ (V4HI "d") (V2HI "s") (HI "h")
+ (V2SI "d") (SI "s") (DI "d")
+ (V4BF "d") (V2BF "s") (BF "h")
+ (V4HF "d") (V2HF "s") (HF "h")
+ (V2SF "d") (SF "s") (DF "d")])
;; Define corresponding core/FP element mode for each vector mode.
(define_mode_attr vw [(V8QI "w") (V16QI "w")
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=armv8.2-a+fp16" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#include <arm_neon.h>
+
+/* Check vector initialization with a repeating sequence of elements. */
+
+#ifndef TESTCASE
+#define TESTCASE(TYPE, ETYPE, T, SZ, NUM, MULT, ...)\
+ TYPE##SZ##MULT##_t test_##TYPE##SZ##_##NUM (ETYPE x0, ETYPE x1, ETYPE x2, ETYPE x3,\
+ ETYPE x4, ETYPE x5, ETYPE x6, ETYPE x7)\
+ {\
+ return (TYPE##SZ##MULT##_t) {__VA_ARGS__};\
+ }
+#endif
+
+#define TEST_8(TYPE, ETYPE, T)\
+ TESTCASE (TYPE, ETYPE, T, 8, 1, x16, x0, x0, x0, x0, x0, x0, x0, x0,\
+ x0, x0, x0, x0, x0, x0, x0, x0)\
+ TESTCASE (TYPE, ETYPE, T, 8, 2, x16, x0, x1, x0, x1, x0, x1, x0, x1,\
+ x0, x1, x0, x1, x0, x1, x0, x1)\
+ TESTCASE (TYPE, ETYPE, T, 8, 3, x16, x0, x1, x2, x3, x0, x1, x2, x3,\
+ x0, x1, x2, x3, x0, x1, x2, x3)\
+ TESTCASE (TYPE, ETYPE, T, 8, 4, x16, x0, x1, x2, x3, x4, x5, x6, x7,\
+ x0, x1, x2, x3, x4, x5, x6, x7)\
+ TESTCASE (TYPE, ETYPE, T, 8, 5, x16, x0, 0, x0, 0, x0, 0, x0, 0,\
+ x0, 0, x0, 0, x0, 0, x0, 0)\
+ TESTCASE (TYPE, ETYPE, T, 8, 6, x16, 0, x0, 0, x0, 0, x0, 0, x0,\
+ 0, x0, 0, x0, 0, x0, 0, x0)\
+ TESTCASE (TYPE, ETYPE, T, 8, 7, x16, x0, x1, 0, 1, x0, x1, 0, 1,\
+ x0, x1, 0, 1, x0, x1, 0, 1)\
+ TESTCASE (TYPE, ETYPE, T, 8, 8, x16, 0, 1, x0, x1, 0, 1, x0, x1,\
+ 0, 1, x0, x1, 0, 1, x0, x1)\
+ TESTCASE (TYPE, ETYPE, T, 8, 9, x16, x0, 0, x1, 1, x0, 0, x1, 1,\
+ x0, 0, x1, 1, x0, 0, x1, 1)\
+ TESTCASE (TYPE, ETYPE, T, 8, 10, x16, x0, 0, x1, 1, x2, 2, x3, 3,\
+ x0, 0, x1, 1, x2, 2, x3, 3)\
+ TESTCASE (TYPE, ETYPE, T, 8, 11, x16, 0, x0, 1, x1, 2, x2, 3, x3,\
+ 0, x0, 1, x1, 2, x2, 3, x3)\
+ TESTCASE (TYPE, ETYPE, T, 8, 12, x16, x0, x1, 0, 1, x2, x3, 2, 3,\
+ x0, x1, 0, 1, x2, x3, 2, 3)\
+ TESTCASE (TYPE, ETYPE, T, 8, 13, x16, 0, 1, x0, x1, 2, 3, x2, x3,\
+ 0, 1, x0, x1, 2, 3, x2, x3)
+
+#define TEST_16(TYPE, ETYPE, T)\
+ TESTCASE (TYPE, ETYPE, T, 16, 1, x8, x0, x0, x0, x0, x0, x0, x0, x0)\
+ TESTCASE (TYPE, ETYPE, T, 16, 2, x8, x0, x1, x0, x1, x0, x1, x0, x1)\
+ TESTCASE (TYPE, ETYPE, T, 16, 3, x8, x0, x1, x2, x3, x0, x1, x2, x3)\
+ TESTCASE (TYPE, ETYPE, T, 16, 4, x8, x0, 0, x0, 0, x0, 0, x0, 0)\
+ TESTCASE (TYPE, ETYPE, T, 16, 5, x8, 0, x0, 0, x0, 0, x0, 0, x0)\
+ TESTCASE (TYPE, ETYPE, T, 16, 6, x8, x0, x1, 0, 1, x0, x1, 0, 1)\
+ TESTCASE (TYPE, ETYPE, T, 16, 7, x8, 0, 1, x0, x1, 0, 1, x0, x1)\
+ TESTCASE (TYPE, ETYPE, T, 16, 8, x8, 0, x0, 1, x1, 0, x0, 1, x1)\
+
+#define TEST_32(TYPE, ETYPE, T)\
+ TESTCASE (TYPE, ETYPE, T, 32, 1, x4, x0, x0, x0, x0)\
+ TESTCASE (TYPE, ETYPE, T, 32, 2, x4, x0, x1, x0, x1)\
+ TESTCASE (TYPE, ETYPE, T, 32, 3, x4, x0, 0, x0, 0)\
+ TESTCASE (TYPE, ETYPE, T, 32, 4, x4, 0, x0, 0, x0)
+
+#define TEST_64(TYPE, ETYPE, T)\
+ TESTCASE (TYPE, ETYPE, T, 64, 1, x2, x0, x0)
+
+TEST_8(int, int8_t, s)
+
+TEST_16(float, float, f)
+TEST_16(int, int16_t, s)
+
+TEST_32(float, float, f)
+TEST_32(int, int32_t, s)
+
+TEST_64(float, double, f)
+TEST_64(int, int64_t, s)
+
+/*
+** test_int8_1:
+** dup v0\.16b, w0
+** ret
+*/
+
+/*
+** test_int8_2:
+** bfi w0, w1, 8, 8
+** dup v0\.8h, w0
+** ret
+*/
+
+/*
+** test_int8_3:
+** bfi w0, w1, 8, 8
+** bfi w0, w2, 16, 8
+** bfi w0, w3, 24, 8
+** dup v0\.4s, w0
+** ret
+*/
+
+/*
+** test_int8_4:
+** bfi w0, w2, 8, 8
+** bfi w1, w3, 8, 8
+** bfi w0, w4, 16, 8
+** bfi w1, w5, 16, 8
+** bfi w0, w6, 24, 8
+** bfi w1, w7, 24, 8
+** dup v31\.2s, w0
+** dup v0\.2s, w1
+** zip1 v0\.16b, v31\.16b, v0\.16b
+** ret
+*/
+
+/*
+** test_int8_5:
+** mov w1, 0
+** bfi w1, w0, 0, 8
+** dup v0\.8h, w1
+** ret
+*/
+
+/*
+** test_int8_6:
+** mov w1, 0
+** bfi w1, w0, 8, 8
+** dup v0\.8h, w1
+** ret
+*/
+
+/*
+** test_int8_7:
+** mov w2, 16777472
+** bfi w2, w0, 0, 8
+** bfi w2, w1, 8, 8
+** dup v0\.4s, w2
+** ret
+*/
+
+/*
+** test_int8_8:
+** mov w2, 16777472
+** bfi w2, w0, 16, 8
+** bfi w2, w1, 24, 8
+** dup v0\.4s, w2
+** ret
+*/
+
+/*
+** test_int8_9:
+** mov w2, 16777216
+** bfi w2, w0, 0, 8
+** bfi w2, w1, 16, 8
+** dup v0\.4s, w2
+** ret
+*/
+
+/*
+** test_int8_10:
+** bfi w0, w1, 8, 8
+** bfi w0, w2, 16, 8
+** bfi w0, w3, 24, 8
+** dup v31\.2s, w0
+** adrp x0, .LANCHOR[0-9]+
+** ldr d0, \[x0, #:lo12:.LANCHOR[0-9]+\]
+** zip1 v0\.16b, v31\.16b, v0\.16b
+** ret
+*/
+
+/*
+** test_int8_11:
+** bfi w0, w1, 8, 8
+** adrp x4, .LANCHOR[0-9]+
+** bfi w0, w2, 16, 8
+** ldr d0, \[x4, #:lo12:\.LANCHOR[0-9]+\]
+** bfi w0, w3, 24, 8
+** dup v31\.2s, w0
+** zip1 v0\.16b, v0\.16b, v31\.16b
+** ret
+*/
+
+/*
+** test_int8_12:
+** mov w4, 33685504
+** bfi w4, w0, 0, 8
+** mov w0, 257
+** movk w0, 0x303, lsl 16
+** bfi w0, w1, 0, 8
+** bfi w4, w2, 16, 8
+** bfi w0, w3, 16, 8
+** dup v31\.2s, w4
+** dup v0\.2s, w0
+** zip1 v0\.16b, v31\.16b, v0\.16b
+** ret
+*/
+
+/*
+** test_int8_13:
+** mov w4, 33685504
+** bfi w4, w0, 8, 8
+** mov w0, 257
+** movk w0, 0x303, lsl 16
+** bfi w0, w1, 8, 8
+** bfi w4, w2, 24, 8
+** bfi w0, w3, 24, 8
+** dup v31\.2s, w4
+** dup v0\.2s, w0
+** zip1 v0\.16b, v31\.16b, v0\.16b
+** ret
+*/
+
+/*
+** test_float16_1:
+** fcvt h0, s0
+** dup v0\.8h, v0\.h\[0\]
+** ret
+*/
+
+/*
+** test_float16_2:
+** fcvt h1, s1
+** fcvt h0, s0
+** ins v0\.h\[1\], v1\.h\[0\]
+** dup v0\.4s, v0\.s\[0\]
+** ret
+*/
+
+/*
+** test_float16_3:
+** uzp1 v2\.2s, v0\.2s, v2\.2s
+** uzp1 v3\.2s, v1\.2s, v3\.2s
+** zip1 v3\.4s, v2\.4s, v3\.4s
+** fcvtn v0\.4h, v3\.4s
+** uzp1 v0\.2d, v0\.2d, v0\.2d
+** ret
+*/
+
+/*
+** test_float16_4:
+** fcvt h0, s0
+** movi v31\.2d, #0
+** ins v31\.h\[0\], v0\.h\[0\]
+** dup v0\.4s, v31\.s\[0\]
+** ret
+*/
+
+/*
+** test_float16_5:
+** fcvt h0, s0
+** movi v31\.2d, #0
+** ins v31\.h\[1\], v0\.h\[0\]
+** dup v0\.4s, v31\.s\[0\]
+** ret
+*/
+
+/*
+** test_float16_6:
+** fcvt h1, s1
+** fcvt h0, s0
+** movi v31\.2d, #0
+** mov w0, 1006648320
+** umov w1, v1\.h\[0\]
+** ins v31\.h\[0\], v0\.h\[0\]
+** bfi w0, w1, 0, 16
+** dup v31\.2s, v31\.s\[0\]
+** dup v0\.2s, w0
+** zip1 v0\.8h, v31\.8h, v0\.8h
+** ret
+*/
+
+/*
+** test_float16_7:
+** fcvt h1, s1
+** fcvt h0, s0
+** movi v31\.2d, #0
+** mov w0, 1006648320
+** umov w1, v1\.h\[0\]
+** ins v31\.h\[1\], v0\.h\[0\]
+** bfi w0, w1, 16, 16
+** dup v31\.2s, v31\.s\[0\]
+** dup v0\.2s, w0
+** zip1 v0\.8h, v31\.8h, v0\.8h
+** ret
+*/
+
+/*
+** test_float16_8:
+** fcvt h1, s1
+** fcvt h0, s0
+** movi v31\.2s, 0x3c, lsl 24
+** ins v0\.h\[1\], v1\.h\[0\]
+** dup v0\.2s, v0\.s\[0\]
+** zip1 v0\.8h, v31\.8h, v0\.8h
+** ret
+*/
+
+/*
+** test_int16_1:
+** dup v0\.8h, w0
+** ret
+*/
+
+/*
+** test_int16_2:
+** bfi w0, w1, 16, 16
+** dup v0\.4s, w0
+** ret
+*/
+
+/*
+** test_int16_3:
+** bfi w0, w2, 16, 16
+** bfi w1, w3, 16, 16
+** dup v31\.2s, w0
+** dup v0\.2s, w1
+** zip1 v0\.8h, v31\.8h, v0\.8h
+** ret
+*/
+
+/*
+** test_int16_4:
+** mov w1, 0
+** bfi w1, w0, 0, 16
+** dup v0\.4s, w1
+** ret
+*/
+
+/*
+** test_int16_5:
+** mov w1, 0
+** bfi w1, w0, 16, 16
+** dup v0\.4s, w1
+** ret
+*/
+
+/*
+** test_int16_6:
+** mov w2, 0
+** bfi w2, w0, 0, 16
+** mov w0, 65537
+** bfi w0, w1, 0, 16
+** dup v31\.2s, w2
+** dup v0\.2s, w0
+** zip1 v0\.8h, v31\.8h, v0\.8h
+** ret
+*/
+
+/*
+** test_int16_7:
+** mov w2, 0
+** bfi w2, w0, 16, 16
+** mov w0, 65537
+** bfi w0, w1, 16, 16
+** dup v31\.2s, w2
+** dup v0\.2s, w0
+** zip1 v0\.8h, v31\.8h, v0\.8h
+** ret
+*/
+
+/*
+** test_int16_8:
+** bfi w0, w1, 16, 16
+** movi v0\.2s, 0x1, lsl 16
+** dup v31\.2s, w0
+** zip1 v0\.8h, v0\.8h, v31\.8h
+** ret
+*/
+
+/*
+** test_float32_1:
+** dup v0\.4s, v0\.s\[0\]
+** ret
+*/
+
+/*
+** test_float32_2:
+** uzp1 v0\.2s, v0\.2s, v1\.2s
+** dup v0\.2d, v0\.d\[0\]
+** ret
+*/
+
+/*
+** test_float32_3:
+** movi v31\.2s, 0
+** dup v0\.2s, v0\.s\[0\]
+** zip1 v0\.4s, v0\.4s, v31\.4s
+** ret
+*/
+
+/*
+** test_float32_4:
+** movi v31\.2s, 0
+** dup v0\.2s, v0\.s\[0\]
+** zip1 v0\.4s, v31\.4s, v0\.4s
+** ret
+*/
+
+/*
+** test_int32_1:
+** dup v0\.4s, w0
+** ret
+*/
+
+/*
+** test_int32_2:
+** fmov s0, w0
+** ins v0\.s\[1\], w1
+** dup v0\.2d, v0\.d\[0\]
+** ret
+*/
+
+/*
+** test_int32_3:
+** dup v31\.2s, w0
+** movi v0\.2s, 0
+** zip1 v0\.4s, v31\.4s, v0\.4s
+** ret
+*/
+
+/*
+** test_int32_4:
+** dup v31\.2s, w0
+** movi v0\.2s, 0
+** zip1 v0\.4s, v0\.4s, v31\.4s
+** ret
+*/
+
+/*
+** test_float64_1:
+** dup v0\.2d, v0\.d\[0\]
+** ret
+*/
+
+/*
+** test_int64_1:
+** dup v0\.2d, x0
+** ret
+*/