TRANS_FEAT(LDUMIN, aa64_lse, do_atomic_ld, a, tcg_gen_atomic_fetch_umin_i64, 0, false)
TRANS_FEAT(SWP, aa64_lse, do_atomic_ld, a, tcg_gen_atomic_xchg_i64, 0, false)
+typedef void Atomic128ThreeOpFn(TCGv_i128, TCGv_i64, TCGv_i128, TCGArg, MemOp);
+
+static bool do_atomic128_ld(DisasContext *s, arg_atomic128 *a,
+ Atomic128ThreeOpFn *fn, bool invert)
+{
+ MemOp mop;
+ int rlo, rhi;
+ TCGv_i64 clean_addr, tlo, thi;
+ TCGv_i128 t16;
+
+ if (a->rt == 31 || a->rt2 == 31 || a->rt == a->rt2) {
+ return false;
+ }
+ if (a->rn == 31) {
+ gen_check_sp_alignment(s);
+ }
+ mop = check_atomic_align(s, a->rn, MO_128);
+ clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), false,
+ a->rn != 31, mop);
+
+ rlo = (s->be_data == MO_LE ? a->rt : a->rt2);
+ rhi = (s->be_data == MO_LE ? a->rt2 : a->rt);
+
+ tlo = read_cpu_reg(s, rlo, true);
+ thi = read_cpu_reg(s, rhi, true);
+ if (invert) {
+ tcg_gen_not_i64(tlo, tlo);
+ tcg_gen_not_i64(thi, thi);
+ }
+ /*
+ * The tcg atomic primitives are all full barriers. Therefore we
+ * can ignore the Acquire and Release bits of this instruction.
+ */
+ t16 = tcg_temp_new_i128();
+ tcg_gen_concat_i64_i128(t16, tlo, thi);
+
+ fn(t16, clean_addr, t16, get_mem_index(s), mop);
+
+ tcg_gen_extr_i128_i64(cpu_reg(s, rlo), cpu_reg(s, rhi), t16);
+ return true;
+}
+
+TRANS_FEAT(LDCLRP, aa64_lse128, do_atomic128_ld,
+ a, tcg_gen_atomic_fetch_and_i128, true)
+TRANS_FEAT(LDSETP, aa64_lse128, do_atomic128_ld,
+ a, tcg_gen_atomic_fetch_or_i128, false)
+TRANS_FEAT(SWPP, aa64_lse128, do_atomic128_ld,
+ a, tcg_gen_atomic_xchg_i128, false)
+
static bool trans_LDAPR(DisasContext *s, arg_LDAPR *a)
{
bool iss_sf = ldst_iss_sf(a->sz, false, false);