rtx
expand (function_expander &e) const override
{
- insn_code icode = code_for_aarch64_sve (m_unspec, e.vector_mode (0));
+ auto mode = e.vector_mode (0);
+ insn_code icode = (e.type_suffix (0).bool_p
+ ? code_for_aarch64_sve_acle (m_unspec, mode)
+ : code_for_aarch64_sve (m_unspec, mode));
return e.use_exact_insn (icode);
}
"<perm_insn>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
)
-;; Special purpose permute used by the predicate generation instructions.
-;; Unlike the normal permute patterns, these instructions operate on VNx16BI
-;; regardless of the element size, so that all input and output bits are
-;; well-defined. Operand 3 then indicates the size of the permute.
-(define_insn "@aarch64_sve_trn1_conv<mode>"
+;; Special-purpose permutes used by the ACLE intrinsics and predicate
+;; generation instructions. Unlike the normal permute patterns, these
+;; instructions operate on VNx16BI regardless of the element size, so that
+;; all input and output bits are well-defined. Operand 3 then indicates
+;; the size of the permute.
+;;
+;; To make generation easier, this pattern embeds the permute type as the
+;; fourth operand to the unspec. On the one hand, this avoids overloading
+;; unspecs like UNSPEC_ZIP1 to represent two different operations. On the
+;; other hand, it avoids having a separate unspec for each variant, and
+;; having to map from one kind of unspec to the other.
+(define_expand "@aarch64_sve_<perm_insn><mode>_acle"
+ [(set (match_operand:VNx16BI 0 "register_operand")
+ (unspec:VNx16BI [(match_operand:VNx16BI 1 "register_operand")
+ (match_operand:VNx16BI 2 "register_operand")
+ (match_dup:PRED_ALL 3)
+ (const_int PERMUTE)]
+ UNSPEC_PERMUTE_PRED))]
+ "TARGET_SVE"
+ {
+ operands[3] = CONST0_RTX (<MODE>mode);
+ }
+)
+
+(define_insn "*aarch64_sve_<perm_insn><mode>_acle"
[(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
(unspec:VNx16BI [(match_operand:VNx16BI 1 "register_operand" "Upa")
(match_operand:VNx16BI 2 "register_operand" "Upa")
- (match_operand:PRED_ALL 3 "aarch64_simd_imm_zero")]
- UNSPEC_TRN1_CONV))]
+ (match_operand:PRED_ALL 3 "aarch64_simd_imm_zero")
+ (const_int PERMUTE)]
+ UNSPEC_PERMUTE_PRED))]
"TARGET_SVE"
- "trn1\t%0.<PRED_ALL:Vetype>, %1.<PRED_ALL:Vetype>, %2.<PRED_ALL:Vetype>"
+ "<perm_insn>\t%0.<PRED_ALL:Vetype>, %1.<PRED_ALL:Vetype>, %2.<PRED_ALL:Vetype>"
)
;; =========================================================================
operands but permutes them as though they had mode MODE. */
machine_mode mode = aarch64_sve_pred_mode (permute_size).require ();
target = aarch64_target_reg (target, GET_MODE (a));
- rtx type_reg = CONST0_RTX (mode);
- emit_insn (gen_aarch64_sve_trn1_conv (mode, target, a, b, type_reg));
+ emit_insn (gen_aarch64_sve_acle (UNSPEC_TRN1, mode, target, a, b));
return target;
}
UNSPEC_UZP2Q ; Used in aarch64-sve.md.
UNSPEC_ZIP1Q ; Used in aarch64-sve.md.
UNSPEC_ZIP2Q ; Used in aarch64-sve.md.
- UNSPEC_TRN1_CONV ; Used in aarch64-sve.md.
UNSPEC_COND_CMPEQ_WIDE ; Used in aarch64-sve.md.
UNSPEC_COND_CMPGE_WIDE ; Used in aarch64-sve.md.
UNSPEC_COND_CMPGT_WIDE ; Used in aarch64-sve.md.
UNSPEC_LUTI2 ; Used in aarch64-simd.md.
UNSPEC_LUTI4 ; Used in aarch64-simd.md.
+ ;; All used in aarch64-sve.md
+ UNSPEC_PERMUTE_PRED
+
;; All used in aarch64-sve2.md
UNSPEC_ADDQV
UNSPEC_ANDQV
--- /dev/null
+/* { dg-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include <arm_sve.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+** test1:
+** ...
+** ptrue (p[0-3])\.h, all
+** ...
+** trn1 p0\.h, p[0-3]\.h, \1\.h
+** ret
+*/
+svbool_t
+test1 ()
+{
+ return svtrn1_b16 (svptrue_b8 (), svptrue_b16 ());
+}
+
+/*
+** test2:
+** ...
+** ptrue (p[0-3])\.h, all
+** ...
+** trn1 p0\.h, \1\.h, p[0-3]\.h
+** ret
+*/
+svbool_t
+test2 ()
+{
+ return svtrn1_b16 (svptrue_b16 (), svptrue_b8 ());
+}
+
+/*
+** test3:
+** ...
+** ptrue (p[0-3])\.s, all
+** ...
+** trn1 p0\.s, p[0-3]\.s, \1\.s
+** ret
+*/
+svbool_t
+test3 ()
+{
+ return svtrn1_b32 (svptrue_b8 (), svptrue_b32 ());
+}
+
+/*
+** test4:
+** ...
+** ptrue (p[0-3])\.s, all
+** ...
+** trn1 p0\.s, \1\.s, p[0-3]\.s
+** ret
+*/
+svbool_t
+test4 ()
+{
+ return svtrn1_b32 (svptrue_b32 (), svptrue_b8 ());
+}
+
+/*
+** test5:
+** ...
+** ptrue (p[0-3])\.d, all
+** ...
+** trn1 p0\.d, p[0-3]\.d, \1\.d
+** ret
+*/
+svbool_t
+test5 ()
+{
+ return svtrn1_b64 (svptrue_b8 (), svptrue_b64 ());
+}
+
+/*
+** test6:
+** ...
+** ptrue (p[0-3])\.d, all
+** ...
+** trn1 p0\.d, \1\.d, p[0-3]\.d
+** ret
+*/
+svbool_t
+test6 ()
+{
+ return svtrn1_b64 (svptrue_b64 (), svptrue_b8 ());
+}
+
+#ifdef __cplusplus
+}
+#endif
--- /dev/null
+/* { dg-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include <arm_sve.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+** test1:
+** ...
+** ptrue (p[0-3])\.h, all
+** ...
+** trn2 p0\.h, p[0-3]\.h, \1\.h
+** ret
+*/
+svbool_t
+test1 ()
+{
+ return svtrn2_b16 (svptrue_b8 (), svptrue_b16 ());
+}
+
+/*
+** test2:
+** ...
+** ptrue (p[0-3])\.h, all
+** ...
+** trn2 p0\.h, \1\.h, p[0-3]\.h
+** ret
+*/
+svbool_t
+test2 ()
+{
+ return svtrn2_b16 (svptrue_b16 (), svptrue_b8 ());
+}
+
+/*
+** test3:
+** ...
+** ptrue (p[0-3])\.s, all
+** ...
+** trn2 p0\.s, p[0-3]\.s, \1\.s
+** ret
+*/
+svbool_t
+test3 ()
+{
+ return svtrn2_b32 (svptrue_b8 (), svptrue_b32 ());
+}
+
+/*
+** test4:
+** ...
+** ptrue (p[0-3])\.s, all
+** ...
+** trn2 p0\.s, \1\.s, p[0-3]\.s
+** ret
+*/
+svbool_t
+test4 ()
+{
+ return svtrn2_b32 (svptrue_b32 (), svptrue_b8 ());
+}
+
+/*
+** test5:
+** ...
+** ptrue (p[0-3])\.d, all
+** ...
+** trn2 p0\.d, p[0-3]\.d, \1\.d
+** ret
+*/
+svbool_t
+test5 ()
+{
+ return svtrn2_b64 (svptrue_b8 (), svptrue_b64 ());
+}
+
+/*
+** test6:
+** ...
+** ptrue (p[0-3])\.d, all
+** ...
+** trn2 p0\.d, \1\.d, p[0-3]\.d
+** ret
+*/
+svbool_t
+test6 ()
+{
+ return svtrn2_b64 (svptrue_b64 (), svptrue_b8 ());
+}
+
+#ifdef __cplusplus
+}
+#endif
--- /dev/null
+/* { dg-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include <arm_sve.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+** test1:
+** ...
+** ptrue (p[0-3])\.h, all
+** ...
+** zip1 p0\.h, p[0-3]\.h, \1\.h
+** ret
+*/
+svbool_t
+test1 ()
+{
+ return svzip1_b16 (svptrue_b8 (), svptrue_b16 ());
+}
+
+/*
+** test2:
+** ...
+** ptrue (p[0-3])\.h, all
+** ...
+** zip1 p0\.h, \1\.h, p[0-3]\.h
+** ret
+*/
+svbool_t
+test2 ()
+{
+ return svzip1_b16 (svptrue_b16 (), svptrue_b8 ());
+}
+
+/*
+** test3:
+** ...
+** ptrue (p[0-3])\.s, all
+** ...
+** zip1 p0\.s, p[0-3]\.s, \1\.s
+** ret
+*/
+svbool_t
+test3 ()
+{
+ return svzip1_b32 (svptrue_b8 (), svptrue_b32 ());
+}
+
+/*
+** test4:
+** ...
+** ptrue (p[0-3])\.s, all
+** ...
+** zip1 p0\.s, \1\.s, p[0-3]\.s
+** ret
+*/
+svbool_t
+test4 ()
+{
+ return svzip1_b32 (svptrue_b32 (), svptrue_b8 ());
+}
+
+/*
+** test5:
+** ...
+** ptrue (p[0-3])\.d, all
+** ...
+** zip1 p0\.d, p[0-3]\.d, \1\.d
+** ret
+*/
+svbool_t
+test5 ()
+{
+ return svzip1_b64 (svptrue_b8 (), svptrue_b64 ());
+}
+
+/*
+** test6:
+** ...
+** ptrue (p[0-3])\.d, all
+** ...
+** zip1 p0\.d, \1\.d, p[0-3]\.d
+** ret
+*/
+svbool_t
+test6 ()
+{
+ return svzip1_b64 (svptrue_b64 (), svptrue_b8 ());
+}
+
+#ifdef __cplusplus
+}
+#endif
--- /dev/null
+/* { dg-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include <arm_sve.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+** test1:
+** ...
+** ptrue (p[0-3])\.h, all
+** ...
+** zip2 p0\.h, p[0-3]\.h, \1\.h
+** ret
+*/
+svbool_t
+test1 ()
+{
+ return svzip2_b16 (svptrue_b8 (), svptrue_b16 ());
+}
+
+/*
+** test2:
+** ...
+** ptrue (p[0-3])\.h, all
+** ...
+** zip2 p0\.h, \1\.h, p[0-3]\.h
+** ret
+*/
+svbool_t
+test2 ()
+{
+ return svzip2_b16 (svptrue_b16 (), svptrue_b8 ());
+}
+
+/*
+** test3:
+** ...
+** ptrue (p[0-3])\.s, all
+** ...
+** zip2 p0\.s, p[0-3]\.s, \1\.s
+** ret
+*/
+svbool_t
+test3 ()
+{
+ return svzip2_b32 (svptrue_b8 (), svptrue_b32 ());
+}
+
+/*
+** test4:
+** ...
+** ptrue (p[0-3])\.s, all
+** ...
+** zip2 p0\.s, \1\.s, p[0-3]\.s
+** ret
+*/
+svbool_t
+test4 ()
+{
+ return svzip2_b32 (svptrue_b32 (), svptrue_b8 ());
+}
+
+/*
+** test5:
+** ...
+** ptrue (p[0-3])\.d, all
+** ...
+** zip2 p0\.d, p[0-3]\.d, \1\.d
+** ret
+*/
+svbool_t
+test5 ()
+{
+ return svzip2_b64 (svptrue_b8 (), svptrue_b64 ());
+}
+
+/*
+** test6:
+** ...
+** ptrue (p[0-3])\.d, all
+** ...
+** zip2 p0\.d, \1\.d, p[0-3]\.d
+** ret
+*/
+svbool_t
+test6 ()
+{
+ return svzip2_b64 (svptrue_b64 (), svptrue_b8 ());
+}
+
+#ifdef __cplusplus
+}
+#endif
--- /dev/null
+/* { dg-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include <arm_sve.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+** test1:
+** ...
+** ptrue (p[0-3])\.h, all
+** ...
+** uzp1 p0\.h, p[0-3]\.h, \1\.h
+** ret
+*/
+svbool_t
+test1 ()
+{
+ return svuzp1_b16 (svptrue_b8 (), svptrue_b16 ());
+}
+
+/*
+** test2:
+** ...
+** ptrue (p[0-3])\.h, all
+** ...
+** uzp1 p0\.h, \1\.h, p[0-3]\.h
+** ret
+*/
+svbool_t
+test2 ()
+{
+ return svuzp1_b16 (svptrue_b16 (), svptrue_b8 ());
+}
+
+/*
+** test3:
+** ...
+** ptrue (p[0-3])\.s, all
+** ...
+** uzp1 p0\.s, p[0-3]\.s, \1\.s
+** ret
+*/
+svbool_t
+test3 ()
+{
+ return svuzp1_b32 (svptrue_b8 (), svptrue_b32 ());
+}
+
+/*
+** test4:
+** ...
+** ptrue (p[0-3])\.s, all
+** ...
+** uzp1 p0\.s, \1\.s, p[0-3]\.s
+** ret
+*/
+svbool_t
+test4 ()
+{
+ return svuzp1_b32 (svptrue_b32 (), svptrue_b8 ());
+}
+
+/*
+** test5:
+** ...
+** ptrue (p[0-3])\.d, all
+** ...
+** uzp1 p0\.d, p[0-3]\.d, \1\.d
+** ret
+*/
+svbool_t
+test5 ()
+{
+ return svuzp1_b64 (svptrue_b8 (), svptrue_b64 ());
+}
+
+/*
+** test6:
+** ...
+** ptrue (p[0-3])\.d, all
+** ...
+** uzp1 p0\.d, \1\.d, p[0-3]\.d
+** ret
+*/
+svbool_t
+test6 ()
+{
+ return svuzp1_b64 (svptrue_b64 (), svptrue_b8 ());
+}
+
+#ifdef __cplusplus
+}
+#endif
--- /dev/null
+/* { dg-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include <arm_sve.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+** test1:
+** ...
+** ptrue (p[0-3])\.h, all
+** ...
+** uzp2 p0\.h, p[0-3]\.h, \1\.h
+** ret
+*/
+svbool_t
+test1 ()
+{
+ return svuzp2_b16 (svptrue_b8 (), svptrue_b16 ());
+}
+
+/*
+** test2:
+** ...
+** ptrue (p[0-3])\.h, all
+** ...
+** uzp2 p0\.h, \1\.h, p[0-3]\.h
+** ret
+*/
+svbool_t
+test2 ()
+{
+ return svuzp2_b16 (svptrue_b16 (), svptrue_b8 ());
+}
+
+/*
+** test3:
+** ...
+** ptrue (p[0-3])\.s, all
+** ...
+** uzp2 p0\.s, p[0-3]\.s, \1\.s
+** ret
+*/
+svbool_t
+test3 ()
+{
+ return svuzp2_b32 (svptrue_b8 (), svptrue_b32 ());
+}
+
+/*
+** test4:
+** ...
+** ptrue (p[0-3])\.s, all
+** ...
+** uzp2 p0\.s, \1\.s, p[0-3]\.s
+** ret
+*/
+svbool_t
+test4 ()
+{
+ return svuzp2_b32 (svptrue_b32 (), svptrue_b8 ());
+}
+
+/*
+** test5:
+** ...
+** ptrue (p[0-3])\.d, all
+** ...
+** uzp2 p0\.d, p[0-3]\.d, \1\.d
+** ret
+*/
+svbool_t
+test5 ()
+{
+ return svuzp2_b64 (svptrue_b8 (), svptrue_b64 ());
+}
+
+/*
+** test6:
+** ...
+** ptrue (p[0-3])\.d, all
+** ...
+** uzp2 p0\.d, \1\.d, p[0-3]\.d
+** ret
+*/
+svbool_t
+test6 ()
+{
+ return svuzp2_b64 (svptrue_b64 (), svptrue_b8 ());
+}
+
+#ifdef __cplusplus
+}
+#endif