DEF_HELPER_FLAGS_4(sve2p1_uminqv_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(sve2p1_uminqv_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(sve2p1_uminqv_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(pext, TCG_CALL_NO_RWG, void, ptr, i32, i32)
%rn_ax2 6:4 !function=times_2
%pnd 0:3 !function=plus_8
+%pnn 5:3 !function=plus_8
###########################################################################
# Named attribute sets. These are used to make nice(er) names
WHILE_gt_cnt2 00100101 .. 1 ..... 0100 . 0 ..... 1 . ... @while_cnt
WHILE_gt_cnt4 00100101 .. 1 ..... 0110 . 0 ..... 1 . ... @while_cnt
+# SVE2.1 extract mask predicate from predicate-as-counter
+&pext rd rn esz imm
+PEXT_1 00100101 esz:2 1 00000 0111 00 imm:2 ... 1 rd:4 &pext rn=%pnn
+PEXT_2 00100101 esz:2 1 00000 0111 010 imm:1 ... 1 rd:4 &pext rn=%pnn
+
### SVE Integer Wide Immediate - Unpredicated Group
# SVE broadcast floating-point immediate (unpredicated)
#undef DO_FCVTLT
#undef DO_FCVTNT
+
+void HELPER(pext)(void *vd, uint32_t png, uint32_t desc)
+{
+ int pl = FIELD_EX32(desc, PREDDESC, OPRSZ);
+ int vl = pl * 8;
+ unsigned v_esz = FIELD_EX32(desc, PREDDESC, ESZ);
+ int part = FIELD_EX32(desc, PREDDESC, DATA);
+ DecodeCounter p = decode_counter(png, vl, v_esz);
+ uint64_t mask = pred_esz_masks[v_esz + p.lg2_stride];
+ ARMPredicateReg *d = vd;
+
+ /*
+ * Convert from element count to byte count and adjust
+ * for the portion of the 4*VL counter to be extracted.
+ */
+ int b_count = (p.count << v_esz) - vl * part;
+
+ memset(d, 0, sizeof(*d));
+ if (p.invert) {
+ if (b_count <= 0) {
+ do_whilel(vd, mask, vl, vl);
+ } else if (b_count < vl) {
+ do_whileg(vd, mask, vl - b_count, vl);
+ }
+ } else if (b_count > 0) {
+ do_whilel(vd, mask, MIN(b_count, vl), vl);
+ }
+}
return true;
}
+static bool do_pext(DisasContext *s, arg_pext *a, int n)
+{
+ TCGv_i32 t_png;
+ TCGv_ptr t_pd;
+ int pl;
+
+ if (!sve_access_check(s)) {
+ return true;
+ }
+
+ t_png = tcg_temp_new_i32();
+ tcg_gen_ld16u_i32(t_png, tcg_env,
+ pred_full_reg_offset(s, a->rn) ^
+ (HOST_BIG_ENDIAN ? 6 : 0));
+
+ t_pd = tcg_temp_new_ptr();
+ pl = pred_full_reg_size(s);
+
+ for (int i = 0; i < n; ++i) {
+ int rd = (a->rd + i) % 16;
+ int part = a->imm * n + i;
+ unsigned desc = 0;
+
+ desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pl);
+ desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
+ desc = FIELD_DP32(desc, PREDDESC, DATA, part);
+
+ tcg_gen_addi_ptr(t_pd, tcg_env, pred_full_reg_offset(s, rd));
+ gen_helper_pext(t_pd, t_png, tcg_constant_i32(desc));
+ }
+ return true;
+}
+
+TRANS_FEAT(PEXT_1, aa64_sme2_or_sve2p1, do_pext, a, 1)
+TRANS_FEAT(PEXT_2, aa64_sme2_or_sve2p1, do_pext, a, 2)
+
/*
*** SVE Integer Wide Immediate - Unpredicated Group
*/
float32 sve_f16_to_f32(float16 f, float_status *fpst);
float16 sve_f32_to_f16(float32 f, float_status *fpst);
+/*
+ * Decode helper functions for predicate as counter.
+ */
+
+typedef struct {
+ unsigned count;
+ unsigned lg2_stride;
+ bool invert;
+} DecodeCounter;
+
+static inline DecodeCounter
+decode_counter(unsigned png, unsigned vl, unsigned v_esz)
+{
+ DecodeCounter ret = { };
+
+ /* C.f. Arm pseudocode CounterToPredicate. */
+ if (likely(png & 0xf)) {
+ unsigned p_esz = ctz32(png);
+
+ /*
+ * maxbit = log2(pl(bits) * 4)
+ * = log2(vl(bytes) * 4)
+ * = log2(vl) + 2
+ * maxbit_mask = ones<maxbit:0>
+ * = (1 << (maxbit + 1)) - 1
+ * = (1 << (log2(vl) + 2 + 1)) - 1
+ * = (1 << (log2(vl) + 3)) - 1
+ * = (pow2ceil(vl) << 3) - 1
+ */
+ ret.count = png & (((unsigned)pow2ceil(vl) << 3) - 1);
+ ret.count >>= p_esz + 1;
+
+ ret.invert = (png >> 15) & 1;
+
+ /*
+ * The Arm pseudocode for CounterToPredicate expands the count to
+ * a set of bits, and then the operation proceeds as for the original
+ * interpretation of predicates as a set of bits.
+ *
+ * We can avoid the expansion by adjusting the count and supplying
+ * an element stride.
+ */
+ if (unlikely(p_esz != v_esz)) {
+ if (p_esz < v_esz) {
+ /*
+ * For predicate esz < vector esz, the expanded predicate
+ * will have more bits set than will be consumed.
+ * Adjust the count down, rounding up.
+ * Consider p_esz = MO_8, v_esz = MO_64, count 14:
+ * The expanded predicate would be
+ * 0011 1111 1111 1111
+ * The significant bits are
+ * ...1 ...1 ...1 ...1
+ */
+ unsigned shift = v_esz - p_esz;
+ unsigned trunc = ret.count >> shift;
+ ret.count = trunc + (ret.count != (trunc << shift));
+ } else {
+ /*
+ * For predicate esz > vector esz, the expanded predicate
+ * will have bits set only at power-of-two multiples of
+ * the vector esz. Bits at other multiples will all be
+ * false. Adjust the count up, and supply the caller
+ * with a stride of elements to skip.
+ */
+ unsigned shift = p_esz - v_esz;
+ ret.count <<= shift;
+ ret.lg2_stride = shift;
+ }
+ }
+ }
+ return ret;
+}
+
#endif /* TARGET_ARM_VEC_INTERNAL_H */