"<perm_insn>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
)
+;; Special purpose permute used by the predicate generation instructions.
+;; Unlike the normal permute patterns, these instructions operate on VNx16BI
+;; regardless of the element size, so that all input and output bits are
+;; well-defined. Operand 3 then indicates the size of the permute.
+(define_insn "@aarch64_sve_trn1_conv<mode>"
+ [(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
+ (unspec:VNx16BI [(match_operand:VNx16BI 1 "register_operand" "Upa")
+ (match_operand:VNx16BI 2 "register_operand" "Upa")
+ (match_operand:PRED_ALL 3 "aarch64_simd_imm_zero")]
+ UNSPEC_TRN1_CONV))]
+ "TARGET_SVE"
+ "trn1\t%0.<PRED_ALL:Vetype>, %1.<PRED_ALL:Vetype>, %2.<PRED_ALL:Vetype>"
+)
+
;; =========================================================================
;; == Conversions
;; =========================================================================
}
}
- /* Emit the TRN1 itself. */
+ /* Emit the TRN1 itself. We emit a TRN that operates on VNx16BI
+ operands but permutes them as though they had mode MODE. */
machine_mode mode = aarch64_sve_pred_mode (permute_size).require ();
- target = aarch64_target_reg (target, mode);
- emit_insn (gen_aarch64_sve (UNSPEC_TRN1, mode, target,
- gen_lowpart (mode, a),
- gen_lowpart (mode, b)));
+ target = aarch64_target_reg (target, GET_MODE (a));
+ rtx type_reg = CONST0_RTX (mode);
+ emit_insn (gen_aarch64_sve_trn1_conv (mode, target, a, b, type_reg));
return target;
}
UNSPEC_UZP2Q ; Used in aarch64-sve.md.
UNSPEC_ZIP1Q ; Used in aarch64-sve.md.
UNSPEC_ZIP2Q ; Used in aarch64-sve.md.
+ UNSPEC_TRN1_CONV ; Used in aarch64-sve.md.
UNSPEC_COND_CMPEQ_WIDE ; Used in aarch64-sve.md.
UNSPEC_COND_CMPGE_WIDE ; Used in aarch64-sve.md.
UNSPEC_COND_CMPGT_WIDE ; Used in aarch64-sve.md.
--- /dev/null
+/* { dg-additional-options "-O2 -fno-schedule-insns" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "arm_sve.h"
+
+/*
+** foo:
+** ptrue (p[0-7])\.d, all
+** pfalse (p[0-7])\.b
+** ptrue (p[0-7])\.s, all
+** trn1 (p[0-7])\.d, \2\.d, \3\.d
+** trn1 \2\.d, \1\.d, \3\.d
+** faddv (h[0-31]), \4\, (z[0-31]).h
+** faddv (h[0-31]), \2\, \6\.h
+** str \5, [x0]
+** str \7, [x0, 2]
+** ret
+*/
+void foo(svfloat16_t in, float16_t *dst) {
+ const svbool_t pg_q0 = svdupq_n_b16(1, 0, 1, 0, 0, 0, 0, 0);
+ const svbool_t pg_f0 = svdupq_n_b16(1, 0, 0, 0, 0, 0, 0, 0);
+ dst[0] = svaddv_f16(pg_f0, in);
+ dst[1] = svaddv_f16(pg_q0, in);
+}
+