VECTOR_MODES (FLOAT, 16); /* V4SF V2DF. */
VECTOR_MODE (INT, DI, 1); /* V1DI. */
VECTOR_MODE (FLOAT, DF, 1); /* V1DF. */
-VECTOR_MODE (FLOAT, HF, 2); /* V2HF. */
+VECTOR_MODES (INT, 2); /* V2QI. */
+VECTOR_MODES (INT, 4); /* V4QI V2HI. */
+VECTOR_MODES (FLOAT, 4); /* V2BF V2HF. */
/* Integer vector modes used to represent intermediate widened values in some
instructions. Not intended to be moved to and from registers or memory. */
int aarch64_branch_cost (bool, bool);
enum aarch64_symbol_type aarch64_classify_symbolic_expression (rtx);
bool aarch64_advsimd_struct_mode_p (machine_mode mode);
+bool aarch64_advsimd_sub_dword_mode_p (machine_mode mode);
opt_machine_mode aarch64_v64_mode (scalar_mode);
opt_machine_mode aarch64_v128_mode (scalar_mode);
opt_machine_mode aarch64_full_sve_mode (scalar_mode);
(define_subst_attr "vczbe" "add_vec_concat_subst_be" "" "_vec_concatz_be")
(define_expand "mov<mode>"
- [(set (match_operand:VALL_F16 0 "nonimmediate_operand")
- (match_operand:VALL_F16 1 "general_operand"))]
+ [(set (match_operand:VALL_F16_SUB64 0 "nonimmediate_operand")
+ (match_operand:VALL_F16_SUB64 1 "general_operand"))]
"TARGET_FLOAT"
"
/* Force the operand into a register if it is not an
aarch64_expand_vector_init (operands[0], operands[1]);
DONE;
}
- else if (!aarch64_simd_imm_zero (operands[1], <MODE>mode)
+ else if (!aarch64_advsimd_sub_dword_mode_p (<MODE>mode)
+ && !aarch64_simd_imm_zero (operands[1], <MODE>mode)
&& !aarch64_simd_special_constant_p (operands[1], <MODE>mode)
&& !aarch64_simd_valid_mov_imm (operands[1])
&& !aarch64_const_vec_fmov_p (operands[1]))
}
)
+(define_insn_and_split "*aarch64_simd_mov<mode>"
+ [(set (match_operand:VSUB64 0 "nonimmediate_operand")
+ (match_operand:VSUB64 1 "general_operand"))]
+ "TARGET_FLOAT
+ && (register_operand (operands[0], <MODE>mode)
+ || aarch64_simd_reg_or_zero (operands[1], <MODE>mode)
+ || CONST_VECTOR_P (operands[1]))"
+ {@ [cons: =0, 1; attrs: type, arch]
+ [r , Dz ; mov_imm , * ] mov\t%w0, 0
+ [r , rZ ; mov_reg , * ] mov\t%w0, %w1
+ [r , Da ; mov_imm , * ] #
+ [r , w ; mov_reg , simd ] #
+ [r , m ; load_4 , * ] ldr<size>\t%w0, %1
+ [w , w ; neon_logic , simd ] mov\t%0.8b, %1.8b
+ [w , m ; neon_load1_1reg , simd ] ldr\t%<vstype>0, %1
+ [w , Dz ; neon_move , simd ] movi\t%0.2d, #0
+ [m , rZ ; store_4 , * ] str<size>\t%w1, %0
+ [m , w ; neon_store1_1reg , simd ] str\t%<vstype>1, %0
+ }
+ "&& reload_completed
+ && REG_P (operands[0])"
+ [(const_int 0)]
+ {
+ if (CONST_VECTOR_P (operands[1]))
+ {
+ int elt_bitsize
+ = GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (operands[1])));
+ int n_elts = CONST_VECTOR_NUNITS (operands[1]).to_constant ();
+ int val = 0;
+ bool int_vector_p = CONST_INT_P (CONST_VECTOR_ELT (operands[1], 0));
+ unsigned HOST_WIDE_INT eltval;
+ rtx elt;
+ for (int i = 0; i < n_elts; i++)
+ {
+ elt = CONST_VECTOR_ELT (operands[1], BYTES_BIG_ENDIAN
+ ? i
+ : n_elts - 1 - i);
+ if (int_vector_p)
+ eltval = INTVAL (elt);
+ else
+ {
+ bool res = aarch64_reinterpret_float_as_int (elt, &eltval);
+ gcc_assert (res);
+ }
+
+ val = (val << elt_bitsize) + (eltval & ((1 << elt_bitsize) - 1));
+ }
+ emit_move_insn (gen_rtx_REG (SImode, REGNO (operands[0])),
+ GEN_INT (val));
+ }
+ else if (REG_P (operands[1]))
+ aarch64_simd_emit_reg_reg_move (operands, <VSC>mode, 1);
+ DONE;
+ }
+ [(set_attr "type" "mov_reg")]
+)
+
;; When storing lane zero we can use the normal STR and its more permissive
;; addressing modes.
case E_V4x2DFmode:
return (TARGET_FLOAT || any_target_p) ? VEC_ADVSIMD | VEC_STRUCT : 0;
+ /* 16-bit Advanced SIMD vectors. */
+ case E_V2QImode:
+ /* 32-bit Advanced SIMD vectors. */
+ case E_V2HFmode:
+ case E_V2BFmode:
+ case E_V2HImode:
+ case E_V4QImode:
/* 64-bit Advanced SIMD vectors. */
case E_V8QImode:
case E_V4HImode:
return (aarch64_classify_vector_mode (mode) == (VEC_ADVSIMD | VEC_STRUCT));
}
+/* Return true if MODE is a partial (sub-64-bit) Advanced SIMD mode. */
+bool
+aarch64_advsimd_sub_dword_mode_p (machine_mode mode)
+{
+ return (aarch64_classify_vector_mode (mode) == VEC_ADVSIMD)
+ && known_lt (GET_MODE_BITSIZE (mode), 64);
+}
+
/* Return true if MODE is any of the data vector modes, including
structure modes. */
static bool
{
struct expand_vec_perm_d d;
+ if (aarch64_advsimd_sub_dword_mode_p (op_mode))
+ return false;
+
/* Check whether the mask can be applied to a single vector. */
if (sel.ninputs () == 1
|| (op0 && rtx_equal_p (op0, op1)))
(and (match_code "const_int")
(match_test "aarch64_simd_scalar_immediate_valid_for_move (op,
QImode)")))
+(define_constraint "Da"
+ "@internal
+ A constraint that matches all sub-64-bit AdvSIMD vectors."
+ (and (match_code "const_vector")
+ (match_test "aarch64_advsimd_sub_dword_mode_p (GET_MODE (op))")))
(define_constraint "Dt"
"@internal
;; All Advanced SIMD integer modes
(define_mode_iterator VALLI [VDQ_BHSI V2DI])
+;; All sub-64-bit vector modes.
+(define_mode_iterator VSUB64 [V2QI V4QI V2HI V2HF V2BF])
+
;; All Advanced SIMD modes suitable for moving, loading, and storing.
(define_mode_iterator VALL_F16 [V8QI V16QI V4HI V8HI V2SI V4SI V2DI
V4HF V8HF V4BF V8BF V2SF V4SF V2DF])
+;; All Advanced SIMD modes suitable for moving, loading, and storing,
+;; plus all sub-64-bit vector modes.
+(define_mode_iterator VALL_F16_SUB64 [VALL_F16 VSUB64])
+
;; The VALL_F16 modes except the 128-bit 2-element ones.
(define_mode_iterator VALL_F16_NO_V2Q [V8QI V16QI V4HI V8HI V2SI V4SI
V4HF V8HF V2SF V4SF])
(define_mode_attr s [(HF "h") (SF "s") (DF "d") (SI "s") (DI "d")])
;; Give the length suffix letter for a sign- or zero-extension.
-(define_mode_attr size [(QI "b") (HI "h") (SI "w")])
+(define_mode_attr size [(QI "b") (HI "h") (SI "w") (HF "") (BF "") (SF "")
+ (V2QI "h") (V4QI "") (V2HI "")
+ (V2HF "") (V2BF "")])
;; Give the number of bits in the mode
(define_mode_attr sizen [(QI "8") (HI "16") (SI "32") (DI "64")])
(VNx4SI "v2si") (VNx4SF "v2sf")
(VNx2DI "di") (VNx2DF "df")])
+;; Sub-64-bit vector mode to equivalent scalar mode.
+(define_mode_attr VSC [(V4QI "SI") (V2QI "HI")
+ (V2HI "SI") (V2HF "SF") (V2BF "SF")])
+
(define_mode_attr vnx [(V4SI "vnx4si") (V2DI "vnx2di")])
;; 64-bit container modes the inner or scalar source mode.
(V2SI "q") (V2SF "q")
(DI "q") (DF "q")])
+;; Scalar size of a sub-64-bit vector mode.
+(define_mode_attr vstype [(V4QI "s") (V2QI "h")
+ (V2HI "s") (V2BF "s") (V2HF "s")])
+
;; Define corresponding core/FP element mode for each vector mode.
(define_mode_attr vw [(V8QI "w") (V16QI "w")
(V4HI "w") (V8HI "w")
/* { dg-final { scan-tree-dump "add new stmt: \[^\n\r]*COMPLEX_ADD_ROT270" "slp1" { xfail *-*-* } } } */
/* { dg-final { scan-tree-dump "add new stmt: \[^\n\r]*COMPLEX_ADD_ROT90" "slp1" { xfail *-*-* } } } */
+
+/* { dg-final { scan-tree-dump "Found COMPLEX_ADD_ROT90" "slp1" { xfail arm*-*-* } } } */
+/* { dg-final { scan-tree-dump "Found COMPLEX_ADD_ROT270" "slp1" { xfail arm*-*-* } } } */
#define N 16
#include "complex-mla-template.c"
+/* { dg-final { scan-tree-dump-times "add new stmt:\[^\n\r]*COMPLEX_FMA" 1 "slp1" { xfail *-*-* } } } */
+
/* { dg-final { scan-tree-dump "Found COMPLEX_FMA_CONJ" "slp1" { xfail *-*-* } } } */
-/* { dg-final { scan-tree-dump "Found COMPLEX_FMA" "slp1" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump "Found COMPLEX_FMA" "slp1" { xfail arm*-*-* } } */
#define N 16
#include "complex-mul-template.c"
-/* { dg-final { scan-tree-dump "Found COMPLEX_MUL_CONJ" "slp1" { xfail *-*-* } } } */
-/* { dg-final { scan-tree-dump "Found COMPLEX_MUL" "slp1" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "add new stmt:\[^\n\r]*COMPLEX_MUL_CONJ" 1 "slp1" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "add new stmt:\[^\n\r]*COMPLEX_MUL" 1 "slp1" { xfail *-*-* } } } */
+
+/* { dg-final { scan-tree-dump "Found COMPLEX_MUL_CONJ" "slp1" { xfail arm*-*-* } } } */
+/* { dg-final { scan-tree-dump "Found COMPLEX_MUL" "slp1" { xfail arm*-*-* } } } */
TEST_ALL (VEC_PERM)
/* We should use one DUP for each of the 8-, 16- and 32-bit types,
- although we currently use LD1RW for _Float16. We should use two
+ (for now, insert both elements with umov + ins for _Float16). We should use two
DUPs for each of the three 64-bit types. */
/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, [hw]} 2 } } */
-/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.s, [sw]} 2 } } */
-/* { dg-final { scan-assembler-times {\tld1rw\tz[0-9]+\.s, } 1 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.s, [sw]} 3 } } */
/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, [dx]} 9 } } */
+/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.h} 2 } } */
+/* { dg-final { scan-assembler-times {\tins\tv[0-9]+\.h\[0\], w[0-9]+} 3 } } */
+/* { dg-final { scan-assembler-times {\tins\tv[0-9]+\.h\[1\], w[0-9]+} 3 } } */
/* { dg-final { scan-assembler-times {\tzip1\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 3 } } */
/* { dg-final { scan-assembler-not {\tzip2\t} } } */
/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s} 6 } } */
/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d} 6 } } */
/* { dg-final { scan-assembler-not {\tldr} } } */
-/* { dg-final { scan-assembler-times {\tstr} 2 } } */
-/* { dg-final { scan-assembler-times {\tstr\th[0-9]+} 2 } } */
+/* { dg-final { scan-assembler-not {\tstr} } } */
/* { dg-final { scan-assembler-not {\tuqdec} } } */