target/arm: Implement FEAT_LSE128

author Richard Henderson <richard.henderson@linaro.org>

Fri, 15 Aug 2025 12:26:52 +0000 (22:26 +1000)

committer Peter Maydell <peter.maydell@linaro.org>

Sat, 30 Aug 2025 15:38:46 +0000 (16:38 +0100)
author Richard Henderson <richard.henderson@linaro.org>
Fri, 15 Aug 2025 12:26:52 +0000 (22:26 +1000)
committer Peter Maydell <peter.maydell@linaro.org>
Sat, 30 Aug 2025 15:38:46 +0000 (16:38 +0100)
diff --git a/target/arm/cpu-features.h b/target/arm/cpu-features.h

index 451b37b5b3923d7fe2bc92f20412cfc50d0b3988..e49e0ae3af009c18d0bf79cd4449cbf476fad9bd 100644 (file)
--- a/target/arm/cpu-features.h
+++ b/target/arm/cpu-features.h
@@ -411,6 +411,11 @@ static inline bool isar_feature_aa64_lse(const ARMISARegisters *id)
      return FIELD_EX64_IDREG(id, ID_AA64ISAR0, ATOMIC) >= 2;
  }
  
+static inline bool isar_feature_aa64_lse128(const ARMISARegisters *id)
+{
+    return FIELD_EX64_IDREG(id, ID_AA64ISAR0, ATOMIC) >= 3;
+}
+
  static inline bool isar_feature_aa64_rdm(const ARMISARegisters *id)
  {
      return FIELD_EX64_IDREG(id, ID_AA64ISAR0, RDM) != 0;
diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode

index 766c610c019fa5914ff27d5099105a38bfde599d..55ff6c504f112f2c308d3cdc5453ae0130fbb99c 100644 (file)
--- a/target/arm/tcg/a64.decode
+++ b/target/arm/tcg/a64.decode
@@ -546,6 +546,13 @@ SWP             .. 111 0 00 . . 1 ..... 1000 00 ..... ..... @atomic
  
  LDAPR           sz:2 111 0 00 1 0 1 11111 1100 00 rn:5 rt:5
  
+# Atomic 128-bit memory operations
+&atomic128      rn rt rt2 a r
+@atomic128      ........ a:1 r:1 . rt2:5 ...... rn:5 rt:5   &atomic128
+LDCLRP          00011001 .   .   1 ..... 000100 ..... ..... @atomic128
+LDSETP          00011001 .   .   1 ..... 001100 ..... ..... @atomic128
+SWPP            00011001 .   .   1 ..... 100000 ..... ..... @atomic128
+
  # Load/store register (pointer authentication)
  
  # LDRA immediate is 10 bits signed and scaled, but the bits aren't all contiguous
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c

index 0ba537268cda9e1018ecfcfe70e97e1f20a77d2e..37bedc3780b78aca2ef5a2a9c35b5f5f4ad4abff 100644 (file)
--- a/target/arm/tcg/translate-a64.c
+++ b/target/arm/tcg/translate-a64.c
@@ -3753,6 +3753,55 @@ TRANS_FEAT(LDUMAX, aa64_lse, do_atomic_ld, a, tcg_gen_atomic_fetch_umax_i64, 0,
  TRANS_FEAT(LDUMIN, aa64_lse, do_atomic_ld, a, tcg_gen_atomic_fetch_umin_i64, 0, false)
  TRANS_FEAT(SWP, aa64_lse, do_atomic_ld, a, tcg_gen_atomic_xchg_i64, 0, false)
  
+typedef void Atomic128ThreeOpFn(TCGv_i128, TCGv_i64, TCGv_i128, TCGArg, MemOp);
+
+static bool do_atomic128_ld(DisasContext *s, arg_atomic128 *a,
+                            Atomic128ThreeOpFn *fn, bool invert)
+{
+    MemOp mop;
+    int rlo, rhi;
+    TCGv_i64 clean_addr, tlo, thi;
+    TCGv_i128 t16;
+
+    if (a->rt == 31 || a->rt2 == 31 || a->rt == a->rt2) {
+        return false;
+    }
+    if (a->rn == 31) {
+        gen_check_sp_alignment(s);
+    }
+    mop = check_atomic_align(s, a->rn, MO_128);
+    clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), false,
+                                a->rn != 31, mop);
+
+    rlo = (s->be_data == MO_LE ? a->rt : a->rt2);
+    rhi = (s->be_data == MO_LE ? a->rt2 : a->rt);
+
+    tlo = read_cpu_reg(s, rlo, true);
+    thi = read_cpu_reg(s, rhi, true);
+    if (invert) {
+        tcg_gen_not_i64(tlo, tlo);
+        tcg_gen_not_i64(thi, thi);
+    }
+    /*
+     * The tcg atomic primitives are all full barriers.  Therefore we
+     * can ignore the Acquire and Release bits of this instruction.
+     */
+    t16 = tcg_temp_new_i128();
+    tcg_gen_concat_i64_i128(t16, tlo, thi);
+
+    fn(t16, clean_addr, t16, get_mem_index(s), mop);
+
+    tcg_gen_extr_i128_i64(cpu_reg(s, rlo), cpu_reg(s, rhi), t16);
+    return true;
+}
+
+TRANS_FEAT(LDCLRP, aa64_lse128, do_atomic128_ld,
+           a, tcg_gen_atomic_fetch_and_i128, true)
+TRANS_FEAT(LDSETP, aa64_lse128, do_atomic128_ld,
+           a, tcg_gen_atomic_fetch_or_i128, false)
+TRANS_FEAT(SWPP, aa64_lse128, do_atomic128_ld,
+           a, tcg_gen_atomic_xchg_i128, false)
+
  static bool trans_LDAPR(DisasContext *s, arg_LDAPR *a)
  {
      bool iss_sf = ldst_iss_sf(a->sz, false, false);
author	Richard Henderson <richard.henderson@linaro.org>
	Fri, 15 Aug 2025 12:26:52 +0000 (22:26 +1000)
committer	Peter Maydell <peter.maydell@linaro.org>
	Sat, 30 Aug 2025 15:38:46 +0000 (16:38 +0100)
target/arm/cpu-features.h		patch \| blob \| blame \| history
target/arm/tcg/a64.decode		patch \| blob \| blame \| history
target/arm/tcg/translate-a64.c		patch \| blob \| blame \| history