;; Integer unary arithmetic predicated with a PTRUE.
(define_insn "@aarch64_pred_<optab><mode>"
- [(set (match_operand:SVE_I 0 "register_operand")
- (unspec:SVE_I
+ [(set (match_operand:SVE_VDQ_I 0 "register_operand")
+ (unspec:SVE_VDQ_I
[(match_operand:<VPRED> 1 "register_operand")
- (SVE_INT_UNARY:SVE_I
- (match_operand:SVE_I 2 "register_operand"))]
+ (SVE_INT_UNARY:SVE_VDQ_I
+ (match_operand:SVE_VDQ_I 2 "register_operand"))]
UNSPEC_PRED_X))]
"TARGET_SVE"
{@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
- [ w , Upl , 0 ; * ] <sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
- [ ?&w , Upl , w ; yes ] movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
+ [ w , Upl , 0 ; * ] <sve_int_op>\t%Z0.<Vetype>, %1/m, %Z2.<Vetype>
+ [ ?&w , Upl , w ; yes ] movprfx\t%Z0, %Z2\;<sve_int_op>\t%Z0.<Vetype>, %1/m, %Z2.<Vetype>
}
)
}
)
+
;; -------------------------------------------------------------------------
;; ---- [INT] General unary arithmetic corresponding to unspecs
;; -------------------------------------------------------------------------
;; element modes
(define_mode_iterator SVE_I_SIMD_DI [SVE_I V2DI])
+;; All SVE and Advanced SIMD integer vector modes.
+(define_mode_iterator SVE_VDQ_I [SVE_I VDQ_I])
+
;; SVE integer vector modes whose elements are 16 bits or wider.
(define_mode_iterator SVE_HSDI [VNx8HI VNx4HI VNx2HI
VNx4SI VNx2SI
(VNx32BF "VNx8BI")
(VNx16SI "VNx4BI") (VNx16SF "VNx4BI")
(VNx8DI "VNx2BI") (VNx8DF "VNx2BI")
+ (V8QI "VNx8BI") (V16QI "VNx16BI")
+ (V4HI "VNx4BI") (V8HI "VNx8BI") (V2SI "VNx2BI")
(V4SI "VNx4BI") (V2DI "VNx2BI")])
;; ...and again in lower case.
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=armv8.2-a+sve -fno-vect-cost-model -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+/*
+** f_v4hi:
+** ptrue (p[0-7]).b, all
+** ldr d([0-9]+), \[x0\]
+** cnt z\2.h, \1/m, z\2.h
+** str d\2, \[x1\]
+** ret
+*/
+void
+f_v4hi (unsigned short *__restrict b, unsigned short *__restrict d)
+{
+ d[0] = __builtin_popcount (b[0]);
+ d[1] = __builtin_popcount (b[1]);
+ d[2] = __builtin_popcount (b[2]);
+ d[3] = __builtin_popcount (b[3]);
+}
+
+/*
+** f_v8hi:
+** ptrue (p[0-7]).b, all
+** ldr q([0-9]+), \[x0\]
+** cnt z\2.h, \1/m, z\2.h
+** str q\2, \[x1\]
+** ret
+*/
+void
+f_v8hi (unsigned short *__restrict b, unsigned short *__restrict d)
+{
+ d[0] = __builtin_popcount (b[0]);
+ d[1] = __builtin_popcount (b[1]);
+ d[2] = __builtin_popcount (b[2]);
+ d[3] = __builtin_popcount (b[3]);
+ d[4] = __builtin_popcount (b[4]);
+ d[5] = __builtin_popcount (b[5]);
+ d[6] = __builtin_popcount (b[6]);
+ d[7] = __builtin_popcount (b[7]);
+}
+
+/*
+** f_v2si:
+** ptrue (p[0-7]).b, all
+** ldr d([0-9]+), \[x0\]
+** cnt z\2.s, \1/m, z\2.s
+** str d\2, \[x1\]
+** ret
+*/
+void
+f_v2si (unsigned int *__restrict b, unsigned int *__restrict d)
+{
+ d[0] = __builtin_popcount (b[0]);
+ d[1] = __builtin_popcount (b[1]);
+}
+
+/*
+** f_v4si:
+** ptrue (p[0-7]).b, all
+** ldr q([0-9]+), \[x0\]
+** cnt z\2.s, \1/m, z\2.s
+** str q\2, \[x1\]
+** ret
+*/
+void
+f_v4si (unsigned int *__restrict b, unsigned int *__restrict d)
+{
+ d[0] = __builtin_popcount (b[0]);
+ d[1] = __builtin_popcount (b[1]);
+ d[2] = __builtin_popcount (b[2]);
+ d[3] = __builtin_popcount (b[3]);
+}
+
+/*
+** f_v2di:
+** ptrue (p[0-7]).b, all
+** ldr q([0-9]+), \[x0\]
+** cnt z\2.d, \1/m, z\2.d
+** str q\2, \[x1\]
+** ret
+*/
+void
+f_v2di (unsigned long *__restrict b, unsigned long *__restrict d)
+{
+ d[0] = __builtin_popcountll (b[0]);
+ d[1] = __builtin_popcountll (b[1]);
+}