char *aarch64_output_simd_orr_imm (rtx, unsigned);
char *aarch64_output_simd_and_imm (rtx, unsigned);
char *aarch64_output_simd_xor_imm (rtx, unsigned);
+char *aarch64_output_fmov (rtx);
char *aarch64_output_sve_mov_immediate (rtx);
char *aarch64_output_sve_ptrues (rtx);
bool aarch64_simd_shift_imm_p (rtx, machine_mode, bool);
bool aarch64_sve_ptrue_svpattern_p (rtx, struct simd_immediate_info *);
bool aarch64_simd_valid_and_imm (rtx);
+bool aarch64_simd_valid_and_imm_fmov (rtx, unsigned int * = NULL);
bool aarch64_simd_valid_mov_imm (rtx);
bool aarch64_simd_valid_orr_imm (rtx);
bool aarch64_simd_valid_xor_imm (rtx);
[(set_attr "type" "neon_fp_abd_<stype><q>")]
)
-;; For AND (vector, register) and BIC (vector, immediate)
+;; For AND (vector, register), BIC (vector, immediate) and FMOV (register)
(define_insn "and<mode>3<vczle><vczbe>"
[(set (match_operand:VDQ_I 0 "register_operand")
(and:VDQ_I (match_operand:VDQ_I 1 "register_operand")
(match_operand:VDQ_I 2 "aarch64_reg_or_and_imm")))]
"TARGET_SIMD"
- {@ [ cons: =0 , 1 , 2 ]
- [ w , w , w ] and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>
- [ w , 0 , Db ] << aarch64_output_simd_and_imm (operands[2], <bitsize>);
+ {@ [ cons: =0 , 1 , 2 ; attrs: type ]
+ [ w , w , w ; neon_logic<q> ] and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>
+ [ w , w , Df ; fmov ] << aarch64_output_fmov (operands[2]);
+ [ w , 0 , Db ; neon_logic<q> ] << aarch64_output_simd_and_imm (operands[2], <bitsize>);
}
- [(set_attr "type" "neon_logic<q>")]
)
;; For ORR (vector, register) and ORR (vector, immediate)
return aarch64_simd_valid_imm (op, NULL, AARCH64_CHECK_AND);
}
+/* Return true if OP is a valid SIMD and immediate which allows the and to be
+ optimized as fmov. If ELT_BITSIZE is nonnull, use it to return the number of
+ bits to move. */
+bool
+aarch64_simd_valid_and_imm_fmov (rtx op, unsigned int *elt_bitsize)
+{
+ machine_mode mode = GET_MODE (op);
+ gcc_assert (!aarch64_sve_mode_p (mode));
+
+ auto_vec<target_unit, 16> buffer;
+ unsigned int n_bytes = GET_MODE_SIZE (mode).to_constant ();
+ buffer.reserve (n_bytes);
+
+ bool ok = native_encode_rtx (mode, op, buffer, 0, n_bytes);
+ gcc_assert (ok);
+
+ auto mask = native_decode_int (buffer, 0, n_bytes, n_bytes * BITS_PER_UNIT);
+ int set_bit = wi::exact_log2 (mask + 1);
+ if ((set_bit == 16 && TARGET_SIMD_F16INST)
+ || set_bit == 32
+ || set_bit == 64)
+ {
+ if (elt_bitsize)
+ *elt_bitsize = set_bit;
+ return true;
+ }
+
+ return false;
+}
+
/* Return true if OP is a valid SIMD xor immediate for SVE. */
bool
aarch64_simd_valid_xor_imm (rtx op)
return aarch64_real_float_const_representable_p (r);
}
+/* Returns the string with the fmov instruction which is equivalent to an and
+ instruction with the SIMD immediate CONST_VECTOR. */
+char*
+aarch64_output_fmov (rtx const_vector)
+{
+ bool is_valid;
+ static char templ[40];
+ char element_char;
+ unsigned int elt_bitsize;
+
+ is_valid = aarch64_simd_valid_and_imm_fmov (const_vector, &elt_bitsize);
+ gcc_assert (is_valid);
+
+ element_char = sizetochar (elt_bitsize);
+ snprintf (templ, sizeof (templ), "fmov\t%%%c0, %%%c1", element_char,
+ element_char);
+
+ return templ;
+}
+
/* Returns the string with the instruction for the SIMD immediate
* CONST_VECTOR of MODE and WIDTH. WHICH selects a move, and(bic) or orr. */
char*
(and (match_code "const_vector")
(match_test "aarch64_simd_valid_orr_imm (op)")))
+(define_constraint "Df"
+ "@internal
+ A constraint that matches a vector of immediates for and which can be
+ optimized as fmov."
+ (and (match_code "const_vector")
+ (match_test "aarch64_simd_valid_and_imm_fmov (op)")))
+
(define_constraint "Db"
"@internal
A constraint that matches vector of immediates for and/bic."
(define_predicate "aarch64_reg_or_and_imm"
(ior (match_operand 0 "register_operand")
(and (match_code "const_vector")
- (match_test "aarch64_simd_valid_and_imm (op)"))))
+ (ior (match_test "aarch64_simd_valid_and_imm (op)")
+ (match_test "aarch64_simd_valid_and_imm_fmov (op)")))))
(define_predicate "aarch64_reg_or_xor_imm"
(ior (match_operand 0 "register_operand")
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -mbig-endian" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#pragma GCC target ("arch=armv8-a")
+
+typedef int v2si __attribute__ ((vector_size (8)));
+typedef float v2sf __attribute__ ((vector_size (8)));
+typedef short v4hi __attribute__ ((vector_size (8)));
+typedef char v8qi __attribute__ ((vector_size (8)));
+typedef long v2di __attribute__ ((vector_size (16)));
+typedef double v2df __attribute__ ((vector_size (16)));
+typedef int v4si __attribute__ ((vector_size (16)));
+typedef float v4sf __attribute__ ((vector_size (16)));
+typedef short v8hi __attribute__ ((vector_size (16)));
+typedef char v16qi __attribute__ ((vector_size (16)));
+
+/*
+** f_v4hi:
+** fmov s0, s0
+** ret
+*/
+v4hi
+f_v4hi (v4hi x)
+{
+ return x & (v4hi){ 0, 0, 0xffff, 0xffff };
+}
+
+/*
+** g_v4hi:
+** movi d([0-9]+), 0xffff00000000ffff
+** and v0.8b, (?:v0.8b, v\1.8b|v\1.8b, v0.8b)
+** ret
+*/
+v4hi
+g_v4hi (v4hi x)
+{
+ return x & (v4hi){ 0xffff, 0, 0, 0xffff };
+}
+
+/*
+** f_v8hi:
+** fmov s0, s0
+** ret
+*/
+v8hi
+f_v8hi (v8hi x)
+{
+ return x & (v8hi){ 0, 0, 0, 0, 0, 0, 0xffff, 0xffff };
+}
+
+/*
+** g_v8hi:
+** fmov d0, d0
+** ret
+*/
+v8hi
+g_v8hi (v8hi x)
+{
+ return x & (v8hi){ 0, 0, 0, 0, 0xffff, 0xffff, 0xffff, 0xffff };
+}
+
+/*
+** f_v2si:
+** fmov s0, s0
+** ret
+*/
+v2si
+f_v2si (v2si x)
+{
+ return x & (v2si){ 0, 0xffffffff };
+}
+
+/*
+** f_v2di:
+** fmov d0, d0
+** ret
+*/
+v2di
+f_v2di (v2di x)
+{
+ return x & (v2di){ 0, 0xffffffffffffffff };
+}
+
+/*
+** g_v2di:
+** fmov s0, s0
+** ret
+*/
+v2di
+g_v2di (v2di x)
+{
+ return x & (v2di){ 0, 0xffffffff };
+}
+
+/*
+** f_v4si:
+** fmov s0, s0
+** ret
+*/
+v4si
+f_v4si (v4si x)
+{
+ return x & (v4si){ 0, 0, 0, 0xffffffff };
+}
+
+/*
+** h_v4si:
+** fmov d0, d0
+** ret
+*/
+v4si
+h_v4si (v4si x)
+{
+ return x & (v4si){ 0, 0, 0xffffffff, 0xffffffff };
+}
+
+/*
+** f_v8qi:
+** fmov s0, s0
+** ret
+*/
+v8qi
+f_v8qi (v8qi x)
+{
+ return x & (v8qi){ 0, 0, 0, 0, 0xff, 0xff, 0xff, 0xff };
+}
+
+/*
+** f_v16qi:
+** fmov d0, d0
+** ret
+*/
+v16qi
+f_v16qi (v16qi x)
+{
+ return x & (v16qi){ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
+}
+
+/*
+** g_v16qi:
+** fmov s0, s0
+** ret
+*/
+v16qi
+g_v16qi (v16qi x)
+{
+ return x & (v16qi){ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0xff, 0xff, 0xff, 0xff };
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -mlittle-endian" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#pragma GCC target ("arch=armv8-a")
+
+typedef int v2si __attribute__ ((vector_size (8)));
+typedef float v2sf __attribute__ ((vector_size (8)));
+typedef short v4hi __attribute__ ((vector_size (8)));
+typedef char v8qi __attribute__ ((vector_size (8)));
+typedef long v2di __attribute__ ((vector_size (16)));
+typedef double v2df __attribute__ ((vector_size (16)));
+typedef int v4si __attribute__ ((vector_size (16)));
+typedef float v4sf __attribute__ ((vector_size (16)));
+typedef short v8hi __attribute__ ((vector_size (16)));
+typedef char v16qi __attribute__ ((vector_size (16)));
+
+/*
+** f_v4hi:
+** fmov s0, s0
+** ret
+*/
+v4hi
+f_v4hi (v4hi x)
+{
+ return x & (v4hi){ 0xffff, 0xffff, 0, 0 };
+}
+
+/*
+** g_v4hi:
+** movi d([0-9]+), 0xffff00000000ffff
+** and v0.8b, (?:v0.8b, v\1.8b|v\1.8b, v0.8b)
+** ret
+*/
+v4hi
+g_v4hi (v4hi x)
+{
+ return x & (v4hi){ 0xffff, 0, 0, 0xffff };
+}
+
+/*
+** f_v8hi:
+** fmov s0, s0
+** ret
+*/
+v8hi
+f_v8hi (v8hi x)
+{
+ return x & (v8hi){ 0xffff, 0xffff, 0, 0, 0, 0, 0, 0 };
+}
+
+/*
+** g_v8hi:
+** fmov d0, d0
+** ret
+*/
+v8hi
+g_v8hi (v8hi x)
+{
+ return x & (v8hi){ 0xffff, 0xffff, 0xffff, 0xffff, 0, 0, 0, 0 };
+}
+
+/*
+** f_v2si:
+** fmov s0, s0
+** ret
+*/
+v2si
+f_v2si (v2si x)
+{
+ return x & (v2si){ 0xffffffff, 0 };
+}
+
+/*
+** f_v2di:
+** fmov d0, d0
+** ret
+*/
+v2di
+f_v2di (v2di x)
+{
+ return x & (v2di){ 0xffffffffffffffff, 0 };
+}
+
+/*
+** g_v2di:
+** fmov s0, s0
+** ret
+*/
+v2di
+g_v2di (v2di x)
+{
+ return x & (v2di){ 0xffffffff, 0 };
+}
+
+/*
+** f_v4si:
+** fmov s0, s0
+** ret
+*/
+v4si
+f_v4si (v4si x)
+{
+ return x & (v4si){ 0xffffffff, 0, 0, 0 };
+}
+
+/*
+** h_v4si:
+** fmov d0, d0
+** ret
+*/
+v4si
+h_v4si (v4si x)
+{
+ return x & (v4si){ 0xffffffff, 0xffffffff, 0, 0 };
+}
+
+/*
+** f_v8qi:
+** fmov s0, s0
+** ret
+*/
+v8qi
+f_v8qi (v8qi x)
+{
+ return x & (v8qi){ 0xff, 0xff, 0xff, 0xff, 0, 0, 0, 0 };
+}
+
+/*
+** f_v16qi:
+** fmov d0, d0
+** ret
+*/
+v16qi
+f_v16qi (v16qi x)
+{
+ return x & (v16qi){ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0, 0, 0, 0, 0, 0, 0, 0 };
+}
+
+/*
+** g_v16qi:
+** fmov s0, s0
+** ret
+*/
+v16qi
+g_v16qi (v16qi x)
+{
+ return x & (v16qi){ 0xff, 0xff, 0xff, 0xff, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0 };
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -mbig-endian" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#pragma GCC target ("arch=armv8.2-a+fp16")
+
+typedef int v2si __attribute__ ((vector_size (8)));
+typedef short v4hi __attribute__ ((vector_size (8)));
+typedef char v8qi __attribute__ ((vector_size (8)));
+typedef long v2di __attribute__ ((vector_size (16)));
+typedef int v4si __attribute__ ((vector_size (16)));
+typedef short v8hi __attribute__ ((vector_size (16)));
+typedef char v16qi __attribute__ ((vector_size (16)));
+
+/*
+** f_v2di:
+** fmov h0, h0
+** ret
+*/
+v2di
+f_v2di (v2di x)
+{
+ return x & (v2di){ 0, 0xffff };
+}
+
+/*
+** f_v4si:
+** fmov h0, h0
+** ret
+*/
+v4si
+f_v4si (v4si x)
+{
+ return x & (v4si){ 0, 0, 0, 0xffff };
+}
+
+/*
+** f_v2si:
+** fmov h0, h0
+** ret
+*/
+v2si
+f_v2si (v2si x)
+{
+ return x & (v2si){ 0, 0xffff };
+}
+
+/*
+** f_v8hi:
+** fmov h0, h0
+** ret
+*/
+v8hi
+f_v8hi (v8hi x)
+{
+ return x & (v8hi){ 0, 0, 0, 0, 0, 0, 0, 0xffff };
+}
+
+/*
+** f_v4hi:
+** fmov h0, h0
+** ret
+*/
+v4hi
+f_v4hi (v4hi x)
+{
+ return x & (v4hi){ 0, 0, 0, 0xffff };
+}
+
+/*
+** f_v16qi:
+** fmov h0, h0
+** ret
+*/
+v16qi
+f_v16qi (v16qi x)
+{
+ return x & (v16qi){ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xff, 0xff };
+}
+
+/*
+** f_v8qi:
+** fmov h0, h0
+** ret
+*/
+v8qi
+f_v8qi (v8qi x)
+{
+ return x & (v8qi){ 0, 0, 0, 0, 0, 0, 0xff, 0xff };
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -mlittle-endian" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#pragma GCC target ("arch=armv8.2-a+fp16")
+
+typedef int v2si __attribute__ ((vector_size (8)));
+typedef short v4hi __attribute__ ((vector_size (8)));
+typedef char v8qi __attribute__ ((vector_size (8)));
+typedef long v2di __attribute__ ((vector_size (16)));
+typedef int v4si __attribute__ ((vector_size (16)));
+typedef short v8hi __attribute__ ((vector_size (16)));
+typedef char v16qi __attribute__ ((vector_size (16)));
+
+/*
+** f_v2di:
+** fmov h0, h0
+** ret
+*/
+v2di
+f_v2di (v2di x)
+{
+ return x & (v2di){ 0xffff, 0 };
+}
+
+/*
+** f_v4si:
+** fmov h0, h0
+** ret
+*/
+v4si
+f_v4si (v4si x)
+{
+ return x & (v4si){ 0xffff, 0, 0, 0 };
+}
+
+/*
+** f_v2si:
+** fmov h0, h0
+** ret
+*/
+v2si
+f_v2si (v2si x)
+{
+ return x & (v2si){ 0xffff, 0 };
+}
+
+/*
+** f_v8hi:
+** fmov h0, h0
+** ret
+*/
+v8hi
+f_v8hi (v8hi x)
+{
+ return x & (v8hi){ 0xffff, 0, 0, 0, 0, 0, 0, 0 };
+}
+
+/*
+** f_v4hi:
+** fmov h0, h0
+** ret
+*/
+v4hi
+f_v4hi (v4hi x)
+{
+ return x & (v4hi){ 0xffff, 0, 0, 0 };
+}
+
+/*
+** f_v16qi:
+** fmov h0, h0
+** ret
+*/
+v16qi
+f_v16qi (v16qi x)
+{
+ return x & (v16qi){ 0xff, 0xff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+}
+
+/*
+** f_v8qi:
+** fmov h0, h0
+** ret
+*/
+v8qi
+f_v8qi (v8qi x)
+{
+ return x & (v8qi){ 0xff, 0xff, 0, 0, 0, 0, 0, 0 };
+}