]> git.ipfire.org Git - thirdparty/valgrind.git/commitdiff
Bug 444399 - disInstr(arm64): unhandled instruction 0xC87F2D89 (LD{,A}XP and ST{...
authorJulian Seward <jseward@acm.org>
Fri, 12 Nov 2021 11:13:45 +0000 (12:13 +0100)
committerJulian Seward <jseward@acm.org>
Fri, 12 Nov 2021 11:13:45 +0000 (12:13 +0100)
This is unfortunately a big and complex patch, to implement LD{,A}XP and
ST{,L}XP.  These were omitted from the original AArch64 v8.0 implementation
for unknown reasons.

(Background) the patch is made significantly more complex because for AArch64
we actually have two implementations of the underlying
Load-Linked/Store-Conditional (LL/SC) machinery: a "primary" implementation,
which translates LL/SC more or less directly into IR and re-emits them at the
back end, and a "fallback" implementation that implements LL/SC "manually", by
taking advantage of the fact that V serialises thread execution, so we can
"implement" LL/SC by simulating a reservation using fields LLSC_* in the guest
state, and invalidating the reservation at every thread switch.

(Background) the fallback scheme is needed because the primary scheme is in
violation of the ARMv8 semantics in that it can (easily) introduce extra
memory references between the LL and SC, hence on some hardware causing the
reservation to always fail and so the simulated program to wind up looping
forever.

For these instructions, big picture:

* for the primary implementation, we take advantage of the fact that
  IRStmt_LLSC allows I128 bit transactions to be represented.  Hence we bundle
  up the two 64-bit data elements into an I128 (or vice versa) and present a
  single I128-typed IRStmt_LLSC in the IR.  In the backend, those are
  re-emitted as LDXP/STXP respectively.  For LL/SC on 32-bit register pairs,
  that bundling produces a single 64-bit item, and so the existing LL/SC
  backend machinery handles it.  The effect is that a doubleword 32-bit LL/SC
  in the front end translates into a single 64-bit LL/SC in the back end.
  Overall, though, the implementation is straightforward.

* for the fallback implementation, it is necessary to extend the guest state
  field `guest_LLSC_DATA` to represent a 128-bit transaction, by splitting it
  into _DATA_LO64 and DATA_HI64.  Then, the implementation is an exact
  analogue of the fallback implementation for single-word LL/SC.  It takes
  advantage of the fact that the backend already supports 128-bit CAS, as
  fixed in bug 445354.  As with the primary implementation, doubleword 32-bit
  LL/SC is bundled into a single 64-bit transaction.

Detailed changes:

* new arm64 guest state fields LLSC_DATA_LO64/LLSC_DATA_LO64 to replace
  guest_LLSC_DATA

* (ridealong fix) arm64 front end: a fix to a minor and harmless decoding bug
  for the single-word LDX/STX case.

* arm64 front end: IR generation for LD{,A}XP/ST{,L}XP: tedious and
  longwinded, but per comments above, an exact(ish) analogue of the singleword
  case

* arm64 backend: new insns ARM64Instr_LdrEXP / ARM64Instr_StrEXP to wrap up 2
  x 64 exclusive loads/stores.  Per comments above, there's no need to handle
  the 2 x 32 case.

* arm64 isel: translate I128-typed IRStmt_LLSC into the above two insns

* arm64 isel: some auxiliary bits and pieces needed to handle I128 values;
  this is standard doubleword isel stuff

* arm64 isel: (ridealong fix): Ist_CAS: check for endianness of the CAS!

* arm64 isel: (ridealong) a couple of formatting fixes

* IR infrastructure: add support for I128 constants, done the same as V128
  constants

* memcheck: handle shadow loads and stores for I128 values

* testcase: memcheck/tests/atomic_incs.c: on arm64, also test 128-bit atomic
  addition, to check we really have atomicity right

* testcase: new test none/tests/arm64/ldxp_stxp.c, tests operation but not
  atomicity.  (Smoke test).

22 files changed:
VEX/priv/guest_arm64_toIR.c
VEX/priv/host_arm64_defs.c
VEX/priv/host_arm64_defs.h
VEX/priv/host_arm64_isel.c
VEX/priv/ir_defs.c
VEX/pub/libvex_guest_arm64.h
VEX/pub/libvex_ir.h
memcheck/mc_machine.c
memcheck/mc_translate.c
memcheck/tests/Makefile.am
memcheck/tests/atomic_incs.c
memcheck/tests/atomic_incs.stdout.exp-32bit
memcheck/tests/atomic_incs.stdout.exp-64bit
memcheck/tests/atomic_incs.stdout.exp-64bit-and-128bit [new file with mode: 0644]
none/tests/arm64/Makefile.am
none/tests/arm64/ldxp_stxp.c [new file with mode: 0644]
none/tests/arm64/ldxp_stxp_basisimpl.stderr.exp [new file with mode: 0644]
none/tests/arm64/ldxp_stxp_basisimpl.stdout.exp [new file with mode: 0644]
none/tests/arm64/ldxp_stxp_basisimpl.vgtest [new file with mode: 0644]
none/tests/arm64/ldxp_stxp_fallbackimpl.stderr.exp [new file with mode: 0644]
none/tests/arm64/ldxp_stxp_fallbackimpl.stdout.exp [new file with mode: 0644]
none/tests/arm64/ldxp_stxp_fallbackimpl.vgtest [new file with mode: 0644]

index 12a1c5978af9f4bccada62c2acec4d9ebe1f3a37..ee018c6a9fceb51b99e2e60f0978be5dd652ae1e 100644 (file)
@@ -1184,9 +1184,10 @@ static IRExpr* narrowFrom64 ( IRType dstTy, IRExpr* e )
 #define OFFB_CMSTART  offsetof(VexGuestARM64State,guest_CMSTART)
 #define OFFB_CMLEN    offsetof(VexGuestARM64State,guest_CMLEN)
 
-#define OFFB_LLSC_SIZE offsetof(VexGuestARM64State,guest_LLSC_SIZE)
-#define OFFB_LLSC_ADDR offsetof(VexGuestARM64State,guest_LLSC_ADDR)
-#define OFFB_LLSC_DATA offsetof(VexGuestARM64State,guest_LLSC_DATA)
+#define OFFB_LLSC_SIZE      offsetof(VexGuestARM64State,guest_LLSC_SIZE)
+#define OFFB_LLSC_ADDR      offsetof(VexGuestARM64State,guest_LLSC_ADDR)
+#define OFFB_LLSC_DATA_LO64 offsetof(VexGuestARM64State,guest_LLSC_DATA_LO64)
+#define OFFB_LLSC_DATA_HI64 offsetof(VexGuestARM64State,guest_LLSC_DATA_HI64)
 
 
 /* ---------------- Integer registers ---------------- */
@@ -6652,7 +6653,7 @@ Bool dis_ARM64_load_store(/*MB_OUT*/DisResult* dres, UInt insn,
         (coregrind/m_scheduler/scheduler.c, run_thread_for_a_while()
          has to do this bit)
    */   
-   if (INSN(29,23) == BITS7(0,0,1,0,0,0,0)
+   if (INSN(29,24) == BITS6(0,0,1,0,0,0)
        && (INSN(23,21) & BITS3(1,0,1)) == BITS3(0,0,0)
        && INSN(14,10) == BITS5(1,1,1,1,1)) {
       UInt szBlg2     = INSN(31,30);
@@ -6678,7 +6679,8 @@ Bool dis_ARM64_load_store(/*MB_OUT*/DisResult* dres, UInt insn,
             // if it faults.
             IRTemp loaded_data64 = newTemp(Ity_I64);
             assign(loaded_data64, widenUto64(ty, loadLE(ty, mkexpr(ea))));
-            stmt( IRStmt_Put( OFFB_LLSC_DATA, mkexpr(loaded_data64) ));
+            stmt( IRStmt_Put( OFFB_LLSC_DATA_LO64, mkexpr(loaded_data64) ));
+            stmt( IRStmt_Put( OFFB_LLSC_DATA_HI64, mkU64(0) ));
             stmt( IRStmt_Put( OFFB_LLSC_ADDR, mkexpr(ea) ));
             stmt( IRStmt_Put( OFFB_LLSC_SIZE, mkU64(szB) ));
             putIReg64orZR(tt, mkexpr(loaded_data64));
@@ -6729,7 +6731,7 @@ Bool dis_ARM64_load_store(/*MB_OUT*/DisResult* dres, UInt insn,
             ));
             // Fail if the data doesn't match the LL data
             IRTemp llsc_data64 = newTemp(Ity_I64);
-            assign(llsc_data64, IRExpr_Get(OFFB_LLSC_DATA, Ity_I64));
+            assign(llsc_data64, IRExpr_Get(OFFB_LLSC_DATA_LO64, Ity_I64));
             stmt( IRStmt_Exit(
                       binop(Iop_CmpNE64, widenUto64(ty, loadLE(ty, mkexpr(ea))),
                                          mkexpr(llsc_data64)),
@@ -6771,6 +6773,257 @@ Bool dis_ARM64_load_store(/*MB_OUT*/DisResult* dres, UInt insn,
       /* else fall through */
    }
 
+   /* -------------------- LD{,A}XP -------------------- */
+   /* -------------------- ST{,L}XP -------------------- */
+   /* 31 30 29     23  20    15 14  9  4
+       1 sz 001000 011 11111 0  t2  n  t1   LDXP  Rt1, Rt2, [Xn|SP]
+       1 sz 001000 011 11111 1  t2  n  t1   LDAXP Rt1, Rt2, [Xn|SP]
+       1 sz 001000 001 s     0  t2  n  t1   STXP  Ws, Rt1, Rt2, [Xn|SP]
+       1 sz 001000 001 s     1  t2  n  t1   STLXP Ws, Rt1, Rt2, [Xn|SP]
+   */
+   /* See just above, "LD{,A}X{R,RH,RB} / ST{,L}X{R,RH,RB}", for detailed
+      comments about this implementation.  Note the 'sz' field here is only 1
+      bit; above, it is 2 bits, and has a different encoding.
+   */
+   if (INSN(31,31) == 1
+       && INSN(29,24) == BITS6(0,0,1,0,0,0)
+       && (INSN(23,21) & BITS3(1,0,1)) == BITS3(0,0,1)) {
+      Bool elemIs64   = INSN(30,30) == 1;
+      Bool isLD       = INSN(22,22) == 1;
+      Bool isAcqOrRel = INSN(15,15) == 1;
+      UInt ss         = INSN(20,16);
+      UInt tt2        = INSN(14,10);
+      UInt nn         = INSN(9,5);
+      UInt tt1        = INSN(4,0);
+
+      UInt   elemSzB = elemIs64 ? 8 : 4;
+      UInt   fullSzB = 2 * elemSzB;
+      IRType elemTy  = integerIRTypeOfSize(elemSzB);
+      IRType fullTy  = integerIRTypeOfSize(fullSzB);
+
+      IRTemp ea = newTemp(Ity_I64);
+      assign(ea, getIReg64orSP(nn));
+      /* FIXME generate check that ea is 2*elemSzB-aligned */
+
+      if (isLD && ss == BITS5(1,1,1,1,1)) {
+         if (abiinfo->guest__use_fallback_LLSC) {
+            // Fallback implementation of LL.
+            // Do the load first so we don't update any guest state if it
+            // faults.  Assumes little-endian guest.
+            if (fullTy == Ity_I64) {
+               vassert(elemSzB == 4);
+               IRTemp loaded_data64 = newTemp(Ity_I64);
+               assign(loaded_data64, loadLE(fullTy, mkexpr(ea)));
+               stmt( IRStmt_Put( OFFB_LLSC_DATA_LO64, mkexpr(loaded_data64) ));
+               stmt( IRStmt_Put( OFFB_LLSC_DATA_HI64, mkU64(0) ));
+               stmt( IRStmt_Put( OFFB_LLSC_ADDR, mkexpr(ea) ));
+               stmt( IRStmt_Put( OFFB_LLSC_SIZE, mkU64(8) ));
+               putIReg64orZR(tt1, unop(Iop_32Uto64,
+                                       unop(Iop_64to32,
+                                            mkexpr(loaded_data64))));
+               putIReg64orZR(tt2, unop(Iop_32Uto64,
+                                       unop(Iop_64HIto32,
+                                            mkexpr(loaded_data64))));
+            } else {
+               vassert(elemSzB == 8 && fullTy == Ity_I128);
+               IRTemp loaded_data128 = newTemp(Ity_I128);
+               // Hack: do the load as V128 rather than I128 so as to avoid
+               // having to implement I128 loads in the arm64 back end.
+               assign(loaded_data128, unop(Iop_ReinterpV128asI128,
+                                           loadLE(Ity_V128, mkexpr(ea))));
+               IRTemp loaded_data_lo64 = newTemp(Ity_I64);
+               IRTemp loaded_data_hi64 = newTemp(Ity_I64);
+               assign(loaded_data_lo64, unop(Iop_128to64,
+                                             mkexpr(loaded_data128)));
+               assign(loaded_data_hi64, unop(Iop_128HIto64,
+                                             mkexpr(loaded_data128)));
+               stmt( IRStmt_Put( OFFB_LLSC_DATA_LO64,
+                                 mkexpr(loaded_data_lo64) ));
+               stmt( IRStmt_Put( OFFB_LLSC_DATA_HI64,
+                                 mkexpr(loaded_data_hi64) ));
+               stmt( IRStmt_Put( OFFB_LLSC_ADDR, mkexpr(ea) ));
+               stmt( IRStmt_Put( OFFB_LLSC_SIZE, mkU64(16) ));
+               putIReg64orZR(tt1, mkexpr(loaded_data_lo64));
+               putIReg64orZR(tt2, mkexpr(loaded_data_hi64));
+            }
+         } else {
+            // Non-fallback implementation of LL.
+            IRTemp res = newTemp(fullTy); // I64 or I128
+            stmt(IRStmt_LLSC(Iend_LE, res, mkexpr(ea), NULL/*LL*/));
+            // Assuming a little-endian guest here.  Rt1 goes at the lower
+            // address, so it must live in the least significant half of `res`.
+            IROp opGetLO = fullTy == Ity_I128 ? Iop_128to64   : Iop_64to32;
+            IROp opGetHI = fullTy == Ity_I128 ? Iop_128HIto64 : Iop_64HIto32;
+            putIReg64orZR(tt1, widenUto64(elemTy, unop(opGetLO, mkexpr(res))));
+            putIReg64orZR(tt2, widenUto64(elemTy, unop(opGetHI, mkexpr(res))));
+         }
+         if (isAcqOrRel) {
+            stmt(IRStmt_MBE(Imbe_Fence));
+         }
+         DIP("ld%sxp %s, %s, [%s] %s\n",
+             isAcqOrRel ? (isLD ? "a" : "l") : "",
+             nameIRegOrZR(elemSzB == 8, tt1),
+             nameIRegOrZR(elemSzB == 8, tt2),
+             nameIReg64orSP(nn),
+             abiinfo->guest__use_fallback_LLSC
+                ? "(fallback implementation)" : "");
+         return True;
+      }
+      if (!isLD) {
+         if (isAcqOrRel) {
+            stmt(IRStmt_MBE(Imbe_Fence));
+         }
+         if (abiinfo->guest__use_fallback_LLSC) {
+            // Fallback implementation of SC.
+            // This is really ugly, since we don't have any way to do
+            // proper if-then-else.  First, set up as if the SC failed,
+            // and jump forwards if it really has failed.
+
+            // Continuation address
+            IRConst* nia = IRConst_U64(guest_PC_curr_instr + 4);
+
+            // "the SC failed".  Any non-zero value means failure.
+            putIReg64orZR(ss, mkU64(1));
+
+            IRTemp tmp_LLsize = newTemp(Ity_I64);
+            assign(tmp_LLsize, IRExpr_Get(OFFB_LLSC_SIZE, Ity_I64));
+            stmt( IRStmt_Put( OFFB_LLSC_SIZE, mkU64(0) // "no transaction"
+            ));
+            // Fail if no or wrong-size transaction
+            vassert((fullSzB == 8 && fullTy == Ity_I64)
+                    || (fullSzB == 16 && fullTy == Ity_I128));
+            stmt( IRStmt_Exit(
+                     binop(Iop_CmpNE64, mkexpr(tmp_LLsize), mkU64(fullSzB)),
+                     Ijk_Boring, nia, OFFB_PC
+            ));
+            // Fail if the address doesn't match the LL address
+            stmt( IRStmt_Exit(
+                      binop(Iop_CmpNE64, mkexpr(ea),
+                                         IRExpr_Get(OFFB_LLSC_ADDR, Ity_I64)),
+                      Ijk_Boring, nia, OFFB_PC
+            ));
+            // The data to be stored.
+            IRTemp store_data = newTemp(fullTy);
+            if (fullTy == Ity_I64) {
+               assign(store_data,
+                      binop(Iop_32HLto64,
+                            narrowFrom64(Ity_I32, getIReg64orZR(tt2)),
+                            narrowFrom64(Ity_I32, getIReg64orZR(tt1))));
+            } else {
+               assign(store_data,
+                      binop(Iop_64HLto128,
+                            getIReg64orZR(tt2), getIReg64orZR(tt1)));
+            }
+
+            if (fullTy == Ity_I64) {
+               // 64 bit (2x32 bit) path
+               // Fail if the data in memory doesn't match the data stashed by
+               // the LL.
+               IRTemp llsc_data_lo64 = newTemp(Ity_I64);
+               assign(llsc_data_lo64,
+                      IRExpr_Get(OFFB_LLSC_DATA_LO64, Ity_I64));
+               stmt( IRStmt_Exit(
+                         binop(Iop_CmpNE64, loadLE(Ity_I64, mkexpr(ea)),
+                                            mkexpr(llsc_data_lo64)),
+                      Ijk_Boring, nia, OFFB_PC
+               ));
+               // Try to CAS the new value in.
+               IRTemp old = newTemp(Ity_I64);
+               IRTemp expd = newTemp(Ity_I64);
+               assign(expd, mkexpr(llsc_data_lo64));
+               stmt( IRStmt_CAS(mkIRCAS(/*oldHi*/IRTemp_INVALID, old,
+                                        Iend_LE, mkexpr(ea),
+                                        /*expdHi*/NULL, mkexpr(expd),
+                                        /*dataHi*/NULL, mkexpr(store_data)
+               )));
+               // Fail if the CAS failed (viz, old != expd)
+               stmt( IRStmt_Exit(
+                         binop(Iop_CmpNE64, mkexpr(old), mkexpr(expd)),
+                         Ijk_Boring, nia, OFFB_PC
+               ));
+            } else {
+               // 128 bit (2x64 bit) path
+               // Fail if the data in memory doesn't match the data stashed by
+               // the LL.
+               IRTemp llsc_data_lo64 = newTemp(Ity_I64);
+               assign(llsc_data_lo64,
+                      IRExpr_Get(OFFB_LLSC_DATA_LO64, Ity_I64));
+               IRTemp llsc_data_hi64 = newTemp(Ity_I64);
+               assign(llsc_data_hi64,
+                      IRExpr_Get(OFFB_LLSC_DATA_HI64, Ity_I64));
+               IRTemp data_at_ea = newTemp(Ity_I128);
+               assign(data_at_ea,
+                      unop(Iop_ReinterpV128asI128,
+                           loadLE(Ity_V128, mkexpr(ea))));
+               stmt( IRStmt_Exit(
+                        binop(Iop_CmpNE64,
+                              unop(Iop_128to64, mkexpr(data_at_ea)),
+                              mkexpr(llsc_data_lo64)),
+                        Ijk_Boring, nia, OFFB_PC
+               ));
+               stmt( IRStmt_Exit(
+                        binop(Iop_CmpNE64,
+                              unop(Iop_128HIto64, mkexpr(data_at_ea)),
+                              mkexpr(llsc_data_hi64)),
+                        Ijk_Boring, nia, OFFB_PC
+               ));
+               // Try to CAS the new value in.
+               IRTemp old_lo64 = newTemp(Ity_I64);
+               IRTemp old_hi64 = newTemp(Ity_I64);
+               IRTemp expd_lo64 = newTemp(Ity_I64);
+               IRTemp expd_hi64 = newTemp(Ity_I64);
+               IRTemp store_data_lo64 = newTemp(Ity_I64);
+               IRTemp store_data_hi64 = newTemp(Ity_I64);
+               assign(expd_lo64, mkexpr(llsc_data_lo64));
+               assign(expd_hi64, mkexpr(llsc_data_hi64));
+               assign(store_data_lo64, unop(Iop_128to64, mkexpr(store_data)));
+               assign(store_data_hi64, unop(Iop_128HIto64, mkexpr(store_data)));
+               stmt( IRStmt_CAS(mkIRCAS(old_hi64, old_lo64,
+                                        Iend_LE, mkexpr(ea),
+                                        mkexpr(expd_hi64), mkexpr(expd_lo64),
+                                        mkexpr(store_data_hi64),
+                                        mkexpr(store_data_lo64)
+               )));
+               // Fail if the CAS failed (viz, old != expd)
+               stmt( IRStmt_Exit(
+                        binop(Iop_CmpNE64, mkexpr(old_lo64), mkexpr(expd_lo64)),
+                        Ijk_Boring, nia, OFFB_PC
+               ));
+               stmt( IRStmt_Exit(
+                        binop(Iop_CmpNE64, mkexpr(old_hi64), mkexpr(expd_hi64)),
+                        Ijk_Boring, nia, OFFB_PC
+               ));
+            }
+            // Otherwise we succeeded (!)
+            putIReg64orZR(ss, mkU64(0));
+         } else {
+            // Non-fallback implementation of SC.
+            IRTemp  res     = newTemp(Ity_I1);
+            IRExpr* dataLO  = narrowFrom64(elemTy, getIReg64orZR(tt1));
+            IRExpr* dataHI  = narrowFrom64(elemTy, getIReg64orZR(tt2));
+            IROp    opMerge = fullTy == Ity_I128 ? Iop_64HLto128 : Iop_32HLto64;
+            IRExpr* data    = binop(opMerge, dataHI, dataLO);
+            // Assuming a little-endian guest here.  Rt1 goes at the lower
+            // address, so it must live in the least significant half of `data`.
+            stmt(IRStmt_LLSC(Iend_LE, res, mkexpr(ea), data));
+            /* IR semantics: res is 1 if store succeeds, 0 if it fails.
+               Need to set rS to 1 on failure, 0 on success. */
+            putIReg64orZR(ss, binop(Iop_Xor64, unop(Iop_1Uto64, mkexpr(res)),
+                                               mkU64(1)));
+         }
+         DIP("st%sxp %s, %s, %s, [%s] %s\n",
+             isAcqOrRel ? (isLD ? "a" : "l") : "",
+             nameIRegOrZR(False, ss),
+             nameIRegOrZR(elemSzB == 8, tt1),
+             nameIRegOrZR(elemSzB == 8, tt2),
+             nameIReg64orSP(nn),
+             abiinfo->guest__use_fallback_LLSC
+                ? "(fallback implementation)" : "");
+         return True;
+      }
+      /* else fall through */
+   }
+
    /* ------------------ LDA{R,RH,RB} ------------------ */
    /* ------------------ STL{R,RH,RB} ------------------ */
    /* 31 29     23  20      14    9 4
index 5657bcab962459357d386cfae667c06445b731ee..b65e27db4d421c3484e718d2f10964475c7dd208 100644 (file)
@@ -1059,6 +1059,16 @@ ARM64Instr* ARM64Instr_StrEX ( Int szB ) {
    vassert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
    return i;
 }
+ARM64Instr* ARM64Instr_LdrEXP ( void ) {
+   ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
+   i->tag        = ARM64in_LdrEXP;
+   return i;
+}
+ARM64Instr* ARM64Instr_StrEXP ( void ) {
+   ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
+   i->tag        = ARM64in_StrEXP;
+   return i;
+}
 ARM64Instr* ARM64Instr_CAS ( Int szB ) {
    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
    i->tag             = ARM64in_CAS;
@@ -1699,12 +1709,19 @@ void ppARM64Instr ( const ARM64Instr* i ) {
                     sz, i->ARM64in.StrEX.szB == 8 ? 'x' : 'w');
          return;
       }
+      case ARM64in_LdrEXP:
+         vex_printf("ldxp   x2, x3, [x4]");
+         return;
+      case ARM64in_StrEXP:
+         vex_printf("stxp   w0, x2, x3, [x4]");
+         return;
       case ARM64in_CAS: {
          vex_printf("x1 = cas(%dbit)(x3, x5 -> x7)", 8 * i->ARM64in.CAS.szB);
          return;
       }
       case ARM64in_CASP: {
-         vex_printf("x0,x1 = casp(%dbit)(x2, x4,x5 -> x6,x7)", 8 * i->ARM64in.CASP.szB);
+         vex_printf("x0,x1 = casp(2x%dbit)(x2, x4,x5 -> x6,x7)",
+                    8 * i->ARM64in.CASP.szB);
          return;
       }
       case ARM64in_MFence:
@@ -2253,6 +2270,17 @@ void getRegUsage_ARM64Instr ( HRegUsage* u, const ARM64Instr* i, Bool mode64 )
          addHRegUse(u, HRmWrite, hregARM64_X0());
          addHRegUse(u, HRmRead, hregARM64_X2());
          return;
+      case ARM64in_LdrEXP:
+         addHRegUse(u, HRmRead, hregARM64_X4());
+         addHRegUse(u, HRmWrite, hregARM64_X2());
+         addHRegUse(u, HRmWrite, hregARM64_X3());
+         return;
+      case ARM64in_StrEXP:
+         addHRegUse(u, HRmRead, hregARM64_X4());
+         addHRegUse(u, HRmWrite, hregARM64_X0());
+         addHRegUse(u, HRmRead, hregARM64_X2());
+         addHRegUse(u, HRmRead, hregARM64_X3());
+         return;
       case ARM64in_CAS:
          addHRegUse(u, HRmRead, hregARM64_X3());
          addHRegUse(u, HRmRead, hregARM64_X5());
@@ -2571,6 +2599,10 @@ void mapRegs_ARM64Instr ( HRegRemap* m, ARM64Instr* i, Bool mode64 )
          return;
       case ARM64in_StrEX:
          return;
+      case ARM64in_LdrEXP:
+         return;
+      case ARM64in_StrEXP:
+         return;
       case ARM64in_CAS:
          return;
       case ARM64in_CASP:
@@ -4167,6 +4199,16 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc,
          }
          goto bad;
       }
+      case ARM64in_LdrEXP: {
+         // 820C7FC8   ldxp x2, x3, [x4]
+         *p++ = 0xC87F0C82;
+         goto done;
+      }
+      case ARM64in_StrEXP: {
+         // 820C20C8   stxp w0, x2, x3, [x4]
+         *p++ = 0xC8200C82;
+         goto done;
+      }
       case ARM64in_CAS: {
          /* This isn't simple.  For an explanation see the comment in
             host_arm64_defs.h on the definition of ARM64Instr case CAS.
index 01fb5708e006fd79a5a8833cd443051d3314af0f..dc686dff7fc397579105d7c3ddadaf120c2594ad 100644 (file)
@@ -509,8 +509,10 @@ typedef
       ARM64in_AddToSP,     /* move SP by small, signed constant */
       ARM64in_FromSP,      /* move SP to integer register */
       ARM64in_Mul,
-      ARM64in_LdrEX,
-      ARM64in_StrEX,
+      ARM64in_LdrEX,       /* load exclusive, single register */
+      ARM64in_StrEX,       /* store exclusive, single register */
+      ARM64in_LdrEXP,      /* load exclusive, register pair, 2x64-bit only */
+      ARM64in_StrEXP,      /* store exclusive, register pair, 2x64-bit only */
       ARM64in_CAS,
       ARM64in_CASP,
       ARM64in_MFence,
@@ -719,6 +721,12 @@ typedef
          struct {
             Int  szB; /* 1, 2, 4 or 8 */
          } StrEX;
+         /* LDXP x2, x3, [x4].  This is 2x64-bit only. */
+         struct {
+         } LdrEXP;
+         /* STXP w0, x2, x3, [x4].  This is 2x64-bit only. */
+         struct {
+         } StrEXP;
          /* x1 = CAS(x3(addr), x5(expected) -> x7(new)),
             and trashes x8
             where x1[8*szB-1 : 0] == x5[8*szB-1 : 0] indicates success,
@@ -1037,6 +1045,8 @@ extern ARM64Instr* ARM64Instr_Mul     ( HReg dst, HReg argL, HReg argR,
                                         ARM64MulOp op );
 extern ARM64Instr* ARM64Instr_LdrEX   ( Int szB );
 extern ARM64Instr* ARM64Instr_StrEX   ( Int szB );
+extern ARM64Instr* ARM64Instr_LdrEXP  ( void );
+extern ARM64Instr* ARM64Instr_StrEXP  ( void );
 extern ARM64Instr* ARM64Instr_CAS     ( Int szB );
 extern ARM64Instr* ARM64Instr_CASP    ( Int szB );
 extern ARM64Instr* ARM64Instr_MFence  ( void );
index 4b1d8c8469e047609a079bdd469f8a2fb9260f3c..094e7e74b48dee3fa14d4a13743d8909c71518cf 100644 (file)
@@ -196,9 +196,9 @@ static HReg        iselCondCode_R        ( ISelEnv* env, IRExpr* e );
 static HReg        iselIntExpr_R_wrk     ( ISelEnv* env, IRExpr* e );
 static HReg        iselIntExpr_R         ( ISelEnv* env, IRExpr* e );
 
-static void        iselInt128Expr_wrk    ( /*OUT*/HReg* rHi, HReg* rLo, 
+static void        iselInt128Expr_wrk    ( /*OUT*/HReg* rHi, /*OUT*/HReg* rLo,
                                            ISelEnv* env, IRExpr* e );
-static void        iselInt128Expr        ( /*OUT*/HReg* rHi, HReg* rLo, 
+static void        iselInt128Expr        ( /*OUT*/HReg* rHi, /*OUT*/HReg* rLo,
                                            ISelEnv* env, IRExpr* e );
 
 static HReg        iselDblExpr_wrk        ( ISelEnv* env, IRExpr* e );
@@ -1759,9 +1759,12 @@ static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e )
 
       /* AND/OR/XOR(e1, e2) (for any e1, e2) */
       switch (e->Iex.Binop.op) {
-         case Iop_And64: case Iop_And32: lop = ARM64lo_AND; goto log_binop;
-         case Iop_Or64:  case Iop_Or32:  case Iop_Or16: lop = ARM64lo_OR;  goto log_binop;
-         case Iop_Xor64: case Iop_Xor32: lop = ARM64lo_XOR; goto log_binop;
+         case Iop_And64: case Iop_And32:
+            lop = ARM64lo_AND; goto log_binop;
+         case Iop_Or64:  case Iop_Or32:  case Iop_Or16:
+            lop = ARM64lo_OR;  goto log_binop;
+         case Iop_Xor64: case Iop_Xor32:
+            lop = ARM64lo_XOR; goto log_binop;
          log_binop: {
             HReg      dst  = newVRegI(env);
             HReg      argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
@@ -2013,6 +2016,11 @@ static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e )
             iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
             return rHi; /* and abandon rLo */
          }
+         case Iop_128to64: {
+            HReg rHi, rLo;
+            iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
+            return rLo; /* and abandon rHi */
+         }
          case Iop_8Sto32: case Iop_8Sto64: {
             IRExpr* arg = e->Iex.Unop.arg;
             HReg    src = iselIntExpr_R(env, arg);
@@ -2185,13 +2193,19 @@ static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e )
             }
             return dst;
          }
+         case Iop_64HIto32: {
+            HReg dst = newVRegI(env);
+            HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
+            addInstr(env, ARM64Instr_Shift(dst, src, ARM64RI6_I6(32),
+                                           ARM64sh_SHR));
+            return dst;
+         }
          case Iop_64to32:
          case Iop_64to16:
          case Iop_64to8:
          case Iop_32to16:
             /* These are no-ops. */
             return iselIntExpr_R(env, e->Iex.Unop.arg);
-
          default:
             break;
       }
@@ -2335,6 +2349,43 @@ static void iselInt128Expr_wrk ( HReg* rHi, HReg* rLo,
    vassert(e);
    vassert(typeOfIRExpr(env->type_env,e) == Ity_I128);
 
+   /* --------- TEMP --------- */
+   if (e->tag == Iex_RdTmp) {
+      lookupIRTempPair(rHi, rLo, env, e->Iex.RdTmp.tmp);
+      return;
+   }
+
+   /* --------- CONST --------- */
+   if (e->tag == Iex_Const) {
+      IRConst* c = e->Iex.Const.con;
+      vassert(c->tag == Ico_U128);
+      if (c->Ico.U128 == 0) {
+         // The only case we need to handle (so far)
+         HReg zero = newVRegI(env);
+         addInstr(env, ARM64Instr_Imm64(zero, 0));
+         *rHi = *rLo = zero;
+         return;
+      }
+   }
+
+   /* --------- UNARY ops --------- */
+   if (e->tag == Iex_Unop) {
+      switch (e->Iex.Unop.op) {
+         case Iop_ReinterpV128asI128: {
+            HReg dstHi = newVRegI(env);
+            HReg dstLo = newVRegI(env);
+            HReg src    = iselV128Expr(env, e->Iex.Unop.arg);
+            addInstr(env, ARM64Instr_VXfromQ(dstHi, src, 1));
+            addInstr(env, ARM64Instr_VXfromQ(dstLo, src, 0));
+            *rHi = dstHi;
+            *rLo = dstLo;
+            return;
+         }
+         default:
+            break;
+      }
+   }
+
    /* --------- BINARY ops --------- */
    if (e->tag == Iex_Binop) {
       switch (e->Iex.Binop.op) {
@@ -4086,6 +4137,14 @@ static void iselStmt ( ISelEnv* env, IRStmt* stmt )
          addInstr(env, ARM64Instr_VMov(8/*yes, really*/, dst, src));
          return;
       }
+      if (ty == Ity_I128) {
+         HReg rHi, rLo, dstHi, dstLo;
+         iselInt128Expr(&rHi,&rLo, env, stmt->Ist.WrTmp.data);
+         lookupIRTempPair( &dstHi, &dstLo, env, tmp);
+         addInstr(env, ARM64Instr_MovI(dstHi, rHi));
+         addInstr(env, ARM64Instr_MovI(dstLo, rLo));
+         return;
+      }
       if (ty == Ity_V128) {
          HReg src = iselV128Expr(env, stmt->Ist.WrTmp.data);
          HReg dst = lookupIRTemp(env, tmp);
@@ -4183,42 +4242,67 @@ static void iselStmt ( ISelEnv* env, IRStmt* stmt )
          /* LL */
          IRTemp res = stmt->Ist.LLSC.result;
          IRType ty  = typeOfIRTemp(env->type_env, res);
-         if (ty == Ity_I64 || ty == Ity_I32 
+         if (ty == Ity_I128 || ty == Ity_I64 || ty == Ity_I32
              || ty == Ity_I16 || ty == Ity_I8) {
             Int  szB   = 0;
-            HReg r_dst = lookupIRTemp(env, res);
             HReg raddr = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
             switch (ty) {
-               case Ity_I8:  szB = 1; break;
-               case Ity_I16: szB = 2; break;
-               case Ity_I32: szB = 4; break;
-               case Ity_I64: szB = 8; break;
-               default:      vassert(0);
+               case Ity_I8:   szB = 1;  break;
+               case Ity_I16:  szB = 2;  break;
+               case Ity_I32:  szB = 4;  break;
+               case Ity_I64:  szB = 8;  break;
+               case Ity_I128: szB = 16; break;
+               default:       vassert(0);
+            }
+            if (szB == 16) {
+               HReg r_dstMSword = INVALID_HREG;
+               HReg r_dstLSword = INVALID_HREG;
+               lookupIRTempPair(&r_dstMSword, &r_dstLSword, env, res);
+               addInstr(env, ARM64Instr_MovI(hregARM64_X4(), raddr));
+               addInstr(env, ARM64Instr_LdrEXP());
+               addInstr(env, ARM64Instr_MovI(r_dstLSword, hregARM64_X2()));
+               addInstr(env, ARM64Instr_MovI(r_dstMSword, hregARM64_X3()));
+            } else {
+               vassert(szB != 0);
+               HReg r_dst = lookupIRTemp(env, res);
+               addInstr(env, ARM64Instr_MovI(hregARM64_X4(), raddr));
+               addInstr(env, ARM64Instr_LdrEX(szB));
+               addInstr(env, ARM64Instr_MovI(r_dst, hregARM64_X2()));
             }
-            addInstr(env, ARM64Instr_MovI(hregARM64_X4(), raddr));
-            addInstr(env, ARM64Instr_LdrEX(szB));
-            addInstr(env, ARM64Instr_MovI(r_dst, hregARM64_X2()));
             return;
          }
          goto stmt_fail;
       } else {
          /* SC */
          IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.LLSC.storedata);
-         if (tyd == Ity_I64 || tyd == Ity_I32
+         if (tyd == Ity_I128 || tyd == Ity_I64 || tyd == Ity_I32
              || tyd == Ity_I16 || tyd == Ity_I8) {
             Int  szB = 0;
-            HReg rD  = iselIntExpr_R(env, stmt->Ist.LLSC.storedata);
             HReg rA  = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
             switch (tyd) {
-               case Ity_I8:  szB = 1; break;
-               case Ity_I16: szB = 2; break;
-               case Ity_I32: szB = 4; break;
-               case Ity_I64: szB = 8; break;
-               default:      vassert(0);
+               case Ity_I8:   szB = 1; break;
+               case Ity_I16:  szB = 2; break;
+               case Ity_I32:  szB = 4; break;
+               case Ity_I64:  szB = 8; break;
+               case Ity_I128: szB = 16; break;
+               default:       vassert(0);
+            }
+            if (szB == 16) {
+               HReg rD_MSword = INVALID_HREG;
+               HReg rD_LSword = INVALID_HREG;
+               iselInt128Expr(&rD_MSword,
+                              &rD_LSword, env, stmt->Ist.LLSC.storedata);
+               addInstr(env, ARM64Instr_MovI(hregARM64_X2(), rD_LSword));
+               addInstr(env, ARM64Instr_MovI(hregARM64_X3(), rD_MSword));
+               addInstr(env, ARM64Instr_MovI(hregARM64_X4(), rA));
+               addInstr(env, ARM64Instr_StrEXP());
+            } else {
+               vassert(szB != 0);
+               HReg rD  = iselIntExpr_R(env, stmt->Ist.LLSC.storedata);
+               addInstr(env, ARM64Instr_MovI(hregARM64_X2(), rD));
+               addInstr(env, ARM64Instr_MovI(hregARM64_X4(), rA));
+               addInstr(env, ARM64Instr_StrEX(szB));
             }
-            addInstr(env, ARM64Instr_MovI(hregARM64_X2(), rD));
-            addInstr(env, ARM64Instr_MovI(hregARM64_X4(), rA));
-            addInstr(env, ARM64Instr_StrEX(szB));
          } else {
             goto stmt_fail;
          }
@@ -4243,10 +4327,10 @@ static void iselStmt ( ISelEnv* env, IRStmt* stmt )
 
    /* --------- ACAS --------- */
    case Ist_CAS: {
-      if (stmt->Ist.CAS.details->oldHi == IRTemp_INVALID) {
+      IRCAS* cas = stmt->Ist.CAS.details;
+      if (cas->oldHi == IRTemp_INVALID && cas->end == Iend_LE) {
          /* "normal" singleton CAS */
          UChar  sz;
-         IRCAS* cas = stmt->Ist.CAS.details;
          IRType ty  = typeOfIRExpr(env->type_env, cas->dataLo);
          switch (ty) { 
             case Ity_I64: sz = 8; break;
@@ -4281,10 +4365,9 @@ static void iselStmt ( ISelEnv* env, IRStmt* stmt )
          addInstr(env, ARM64Instr_MovI(rOld, rResult));
          return;
       }
-      else {
+      if (cas->oldHi != IRTemp_INVALID && cas->end == Iend_LE) {
          /* Paired register CAS, i.e. CASP */
          UChar  sz;
-         IRCAS* cas = stmt->Ist.CAS.details;
          IRType ty  = typeOfIRExpr(env->type_env, cas->dataLo);
          switch (ty) {
             case Ity_I64: sz = 8; break;
index 25566c41cc4ba159cf452fd6749940ecd5fd77e4..2d82c41a1a85d9b38b46e12341830787e928f675 100644 (file)
@@ -76,6 +76,7 @@ void ppIRConst ( const IRConst* con )
       case Ico_U16:  vex_printf( "0x%x:I16",     (UInt)(con->Ico.U16)); break;
       case Ico_U32:  vex_printf( "0x%x:I32",     (UInt)(con->Ico.U32)); break;
       case Ico_U64:  vex_printf( "0x%llx:I64",   (ULong)(con->Ico.U64)); break;
+      case Ico_U128: vex_printf( "I128{0x%04x}", (UInt)(con->Ico.U128)); break;
       case Ico_F32:  u.f32 = con->Ico.F32;
                      vex_printf( "F32{0x%x}",   u.i32);
                      break;
@@ -2266,6 +2267,13 @@ IRConst* IRConst_U64 ( ULong u64 )
    c->Ico.U64 = u64;
    return c;
 }
+IRConst* IRConst_U128 ( UShort con )
+{
+   IRConst* c  = LibVEX_Alloc_inline(sizeof(IRConst));
+   c->tag      = Ico_U128;
+   c->Ico.U128 = con;
+   return c;
+}
 IRConst* IRConst_F32 ( Float f32 )
 {
    IRConst* c = LibVEX_Alloc_inline(sizeof(IRConst));
@@ -4230,6 +4238,7 @@ IRType typeOfIRConst ( const IRConst* con )
       case Ico_U16:   return Ity_I16;
       case Ico_U32:   return Ity_I32;
       case Ico_U64:   return Ity_I64;
+      case Ico_U128:  return Ity_I128;
       case Ico_F32:   return Ity_F32;
       case Ico_F32i:  return Ity_F32;
       case Ico_F64:   return Ity_F64;
@@ -5129,7 +5138,7 @@ void tcStmt ( const IRSB* bb, const IRStmt* stmt, IRType gWordTy )
          tyRes = typeOfIRTemp(tyenv, stmt->Ist.LLSC.result);
          if (stmt->Ist.LLSC.storedata == NULL) {
             /* it's a LL */
-            if (tyRes != Ity_I64 && tyRes != Ity_I32
+            if (tyRes != Ity_I128 && tyRes != Ity_I64 && tyRes != Ity_I32
                 && tyRes != Ity_I16 && tyRes != Ity_I8)
                sanityCheckFail(bb,stmt,"Ist.LLSC(LL).result :: bogus");
          } else {
@@ -5137,7 +5146,7 @@ void tcStmt ( const IRSB* bb, const IRStmt* stmt, IRType gWordTy )
             if (tyRes != Ity_I1)
                sanityCheckFail(bb,stmt,"Ist.LLSC(SC).result: not :: Ity_I1");
             tyData = typeOfIRExpr(tyenv, stmt->Ist.LLSC.storedata);
-            if (tyData != Ity_I64 && tyData != Ity_I32
+            if (tyData != Ity_I128 && tyData != Ity_I64 && tyData != Ity_I32
                 && tyData != Ity_I16 && tyData != Ity_I8)
                sanityCheckFail(bb,stmt,
                                "Ist.LLSC(SC).result :: storedata bogus");
@@ -5385,6 +5394,7 @@ Int sizeofIRType ( IRType ty )
 IRType integerIRTypeOfSize ( Int szB )
 {
    switch (szB) {
+      case 16: return Ity_I128;
       case 8: return Ity_I64;
       case 4: return Ity_I32;
       case 2: return Ity_I16;
index 39b6ecdc2e2427d8ec7539042d04cebf8ed7ebd2..91d06bd754facd84e6455c8984638b64880ce0fd 100644 (file)
@@ -157,14 +157,18 @@ typedef
          note of bits 23 and 22. */
       UInt  guest_FPCR;
 
-      /* Fallback LL/SC support.  See bugs 344524 and 369459. */
-      ULong guest_LLSC_SIZE; // 0==no current transaction, else 1,2,4 or 8.
+      /* Fallback LL/SC support.  See bugs 344524 and 369459.  _LO64 and _HI64
+         contain the original contents of _ADDR+0 .. _ADDR+15, but only _SIZE
+         number of bytes of it.  The remaining 16-_SIZE bytes of them must be
+         zero. */
+      ULong guest_LLSC_SIZE; // 0==no current transaction, else 1,2,4,8 or 16.
       ULong guest_LLSC_ADDR; // Address of transaction.
-      ULong guest_LLSC_DATA; // Original value at _ADDR, zero-extended.
+      ULong guest_LLSC_DATA_LO64; // Original value at _ADDR+0.
+      ULong guest_LLSC_DATA_HI64; // Original value at _ADDR+8.
 
       /* Padding to make it have an 16-aligned size */
       /* UInt  pad_end_0; */
-      ULong pad_end_1;
+      /* ULong pad_end_1; */
    }
    VexGuestARM64State;
 
index deaa044c1341534ed2f8470ef0694bb9e244dc55..85805bb69b8b447d0d5cd362b1fd3e5b9b181c28 100644 (file)
@@ -269,6 +269,8 @@ typedef
       Ico_U16, 
       Ico_U32, 
       Ico_U64,
+      Ico_U128,  /* 128-bit restricted integer constant,
+                    same encoding scheme as V128 */
       Ico_F32,   /* 32-bit IEEE754 floating */
       Ico_F32i,  /* 32-bit unsigned int to be interpreted literally
                     as a IEEE754 single value. */
@@ -295,6 +297,7 @@ typedef
          UShort U16;
          UInt   U32;
          ULong  U64;
+         UShort U128;
          Float  F32;
          UInt   F32i;
          Double F64;
@@ -311,6 +314,7 @@ extern IRConst* IRConst_U8   ( UChar );
 extern IRConst* IRConst_U16  ( UShort );
 extern IRConst* IRConst_U32  ( UInt );
 extern IRConst* IRConst_U64  ( ULong );
+extern IRConst* IRConst_U128 ( UShort );
 extern IRConst* IRConst_F32  ( Float );
 extern IRConst* IRConst_F32i ( UInt );
 extern IRConst* IRConst_F64  ( Double );
index 919c7fae88132988efcf054c015bc6b06b272ed5..176c8e5cb914bf8340aede5f16f86054bfa3a547 100644 (file)
@@ -1115,9 +1115,10 @@ static Int get_otrack_shadow_offset_wrk ( Int offset, Int szB )
    if (o == GOF(CMSTART) && sz == 8) return -1; // untracked
    if (o == GOF(CMLEN)   && sz == 8) return -1; // untracked
 
-   if (o == GOF(LLSC_SIZE) && sz == 8) return -1; // untracked
-   if (o == GOF(LLSC_ADDR) && sz == 8) return o;
-   if (o == GOF(LLSC_DATA) && sz == 8) return o;
+   if (o == GOF(LLSC_SIZE)      && sz == 8) return -1; // untracked
+   if (o == GOF(LLSC_ADDR)      && sz == 8) return o;
+   if (o == GOF(LLSC_DATA_LO64) && sz == 8) return o;
+   if (o == GOF(LLSC_DATA_HI64) && sz == 8) return o;
 
    VG_(printf)("MC_(get_otrack_shadow_offset)(arm64)(off=%d,sz=%d)\n",
                offset,szB);
index c6fd2653f47f5f3f3ba3702e20dcc439fe003c5a..72ccb3c8c6b5f4b3bf2a8dcd7182f655538b7afa 100644 (file)
@@ -5497,8 +5497,11 @@ IRAtom* expr2vbits_Load_WRK ( MCEnv* mce,
       the address (shadow) to 'defined' following the test. */
    complainIfUndefined( mce, addr, guard );
 
-   /* Now cook up a call to the relevant helper function, to read the
-      data V bits from shadow memory. */
+   /* Now cook up a call to the relevant helper function, to read the data V
+      bits from shadow memory.  Note that I128 loads are done by pretending
+      we're doing a V128 load, and then converting the resulting V128 vbits
+      word to an I128, right at the end of this function -- see `castedToI128`
+      below.  (It's only a minor hack :-) This pertains to bug 444399. */
    ty = shadowTypeV(ty);
 
    void*        helper           = NULL;
@@ -5511,6 +5514,7 @@ IRAtom* expr2vbits_Load_WRK ( MCEnv* mce,
                         hname = "MC_(helperc_LOADV256le)";
                         ret_via_outparam = True;
                         break;
+         case Ity_I128: // fallthrough.  See comment above.
          case Ity_V128: helper = &MC_(helperc_LOADV128le);
                         hname = "MC_(helperc_LOADV128le)";
                         ret_via_outparam = True;
@@ -5576,7 +5580,7 @@ IRAtom* expr2vbits_Load_WRK ( MCEnv* mce,
 
    /* We need to have a place to park the V bits we're just about to
       read. */
-   IRTemp datavbits = newTemp(mce, ty, VSh);
+   IRTemp datavbits = newTemp(mce, ty == Ity_I128 ? Ity_V128 : ty, VSh);
 
    /* Here's the call. */
    IRDirty* di;
@@ -5603,7 +5607,14 @@ IRAtom* expr2vbits_Load_WRK ( MCEnv* mce,
    }
    stmt( 'V', mce, IRStmt_Dirty(di) );
 
-   return mkexpr(datavbits);
+   if (ty == Ity_I128) {
+      IRAtom* castedToI128
+         = assignNew('V', mce, Ity_I128,
+                     unop(Iop_ReinterpV128asI128, mkexpr(datavbits)));
+      return castedToI128;
+   } else {
+      return mkexpr(datavbits);
+   }
 }
 
 
@@ -5631,6 +5642,7 @@ IRAtom* expr2vbits_Load ( MCEnv* mce,
       case Ity_I16:
       case Ity_I32:
       case Ity_I64:
+      case Ity_I128:
       case Ity_V128:
       case Ity_V256:
          return expr2vbits_Load_WRK(mce, end, ty, addr, bias, guard);
@@ -5928,6 +5940,7 @@ void do_shadow_Store ( MCEnv* mce,
                         c = IRConst_V256(V_BITS32_DEFINED); break;
          case Ity_V128: // V128 weirdness -- used twice
                         c = IRConst_V128(V_BITS16_DEFINED); break;
+         case Ity_I128: c = IRConst_U128(V_BITS16_DEFINED); break;
          case Ity_I64:  c = IRConst_U64 (V_BITS64_DEFINED); break;
          case Ity_I32:  c = IRConst_U32 (V_BITS32_DEFINED); break;
          case Ity_I16:  c = IRConst_U16 (V_BITS16_DEFINED); break;
@@ -5948,6 +5961,7 @@ void do_shadow_Store ( MCEnv* mce,
       switch (ty) {
          case Ity_V256: /* we'll use the helper four times */
          case Ity_V128: /* we'll use the helper twice */
+         case Ity_I128: /* we'll use the helper twice */
          case Ity_I64: helper = &MC_(helperc_STOREV64le);
                        hname = "MC_(helperc_STOREV64le)";
                        break;
@@ -6051,9 +6065,9 @@ void do_shadow_Store ( MCEnv* mce,
       stmt( 'V', mce, IRStmt_Dirty(diQ3) );
 
    } 
-   else if (UNLIKELY(ty == Ity_V128)) {
+   else if (UNLIKELY(ty == Ity_V128 || ty == Ity_I128)) {
 
-      /* V128-bit case */
+      /* V128/I128-bit case */
       /* See comment in next clause re 64-bit regparms */
       /* also, need to be careful about endianness */
 
@@ -6062,6 +6076,7 @@ void do_shadow_Store ( MCEnv* mce,
       IRAtom  *addrLo64, *addrHi64;
       IRAtom  *vdataLo64, *vdataHi64;
       IRAtom  *eBiasLo64, *eBiasHi64;
+      IROp    opGetLO64,  opGetHI64;
 
       if (end == Iend_LE) {
          offLo64 = 0;
@@ -6071,9 +6086,17 @@ void do_shadow_Store ( MCEnv* mce,
          offHi64 = 0;
       }
 
+      if (ty == Ity_V128) {
+         opGetLO64 = Iop_V128to64;
+         opGetHI64 = Iop_V128HIto64;
+      } else {
+         opGetLO64 = Iop_128to64;
+         opGetHI64 = Iop_128HIto64;
+      }
+
       eBiasLo64 = tyAddr==Ity_I32 ? mkU32(bias+offLo64) : mkU64(bias+offLo64);
       addrLo64  = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasLo64) );
-      vdataLo64 = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, vdata));
+      vdataLo64 = assignNew('V', mce, Ity_I64, unop(opGetLO64, vdata));
       diLo64    = unsafeIRDirty_0_N( 
                      1/*regparms*/, 
                      hname, VG_(fnptr_to_fnentry)( helper ), 
@@ -6081,7 +6104,7 @@ void do_shadow_Store ( MCEnv* mce,
                   );
       eBiasHi64 = tyAddr==Ity_I32 ? mkU32(bias+offHi64) : mkU64(bias+offHi64);
       addrHi64  = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasHi64) );
-      vdataHi64 = assignNew('V', mce, Ity_I64, unop(Iop_V128HIto64, vdata));
+      vdataHi64 = assignNew('V', mce, Ity_I64, unop(opGetHI64, vdata));
       diHi64    = unsafeIRDirty_0_N( 
                      1/*regparms*/, 
                      hname, VG_(fnptr_to_fnentry)( helper ), 
@@ -6888,7 +6911,7 @@ static void do_shadow_LLSC ( MCEnv*    mce,
       /* Just treat this as a normal load, followed by an assignment of
          the value to .result. */
       /* Stay sane */
-      tl_assert(resTy == Ity_I64 || resTy == Ity_I32
+      tl_assert(resTy == Ity_I128 || resTy == Ity_I64 || resTy == Ity_I32
                 || resTy == Ity_I16 || resTy == Ity_I8);
       assign( 'V', mce, resTmp,
                    expr2vbits_Load(
@@ -6899,7 +6922,7 @@ static void do_shadow_LLSC ( MCEnv*    mce,
       /* Stay sane */
       IRType dataTy = typeOfIRExpr(mce->sb->tyenv,
                                    stStoredata);
-      tl_assert(dataTy == Ity_I64 || dataTy == Ity_I32
+      tl_assert(dataTy == Ity_I128 || dataTy == Ity_I64 || dataTy == Ity_I32
                 || dataTy == Ity_I16 || dataTy == Ity_I8);
       do_shadow_Store( mce, stEnd,
                             stAddr, 0/* addr bias */,
@@ -7684,7 +7707,7 @@ static void schemeS ( MCEnv* mce, IRStmt* st )
                = typeOfIRTemp(mce->sb->tyenv, st->Ist.LLSC.result);
             IRExpr* vanillaLoad
                = IRExpr_Load(st->Ist.LLSC.end, resTy, st->Ist.LLSC.addr);
-            tl_assert(resTy == Ity_I64 || resTy == Ity_I32
+            tl_assert(resTy == Ity_I128 || resTy == Ity_I64 || resTy == Ity_I32
                       || resTy == Ity_I16 || resTy == Ity_I8);
             assign( 'B', mce, findShadowTmpB(mce, st->Ist.LLSC.result),
                               schemeE(mce, vanillaLoad));
index 449710020aac3d1e8ea857a0e4183f2c72635178..2b43ef7d759023998e8d7594c61792c87702bfbb 100644 (file)
@@ -90,6 +90,7 @@ EXTRA_DIST = \
        addressable.stderr.exp addressable.stdout.exp addressable.vgtest \
        atomic_incs.stderr.exp atomic_incs.vgtest \
        atomic_incs.stdout.exp-32bit atomic_incs.stdout.exp-64bit \
+       atomic_incs.stdout.exp-64bit-and-128bit \
        badaddrvalue.stderr.exp \
        badaddrvalue.stdout.exp badaddrvalue.vgtest \
         exit_on_first_error.stderr.exp \
index f931750f474221de692b9eb1017bff20f43ee359..1c738c530d0563ae051f4e4e7156b02d2c7828ac 100644 (file)
 #define NNN 3456987
 
 #define IS_8_ALIGNED(_ptr)   (0 == (((unsigned long)(_ptr)) & 7))
+#define IS_16_ALIGNED(_ptr)  (0 == (((unsigned long)(_ptr)) & 15))
+
+// U128 from libvex_basictypes.h is a 4-x-UInt array, which is a bit
+// inconvenient, hence:
+typedef
+   struct {
+      // assuming little-endianness
+      unsigned long long int lo64;
+      unsigned long long int hi64;
+   }
+   MyU128;
 
 
 __attribute__((noinline)) void atomic_add_8bit ( char* p, int n ) 
@@ -712,6 +723,40 @@ __attribute__((noinline)) void atomic_add_64bit ( long long int* p, int n )
 #endif
 }
 
+__attribute__((noinline)) void atomic_add_128bit ( MyU128* p,
+                                                   unsigned long long int n )
+{
+#if defined(VGA_x86) || defined(VGA_ppc32) || defined(VGA_mips32) \
+    || defined (VGA_nanomips) || defined(VGA_mips64) \
+    || defined(VGA_amd64) \
+    || defined(VGA_ppc64be) || defined(VGA_ppc64le) \
+    || defined(VGA_arm) \
+    || defined(VGA_s390x)
+   /* do nothing; is not supported */
+#elif defined(VGA_arm64)
+   unsigned long long int block[3]
+      = { (unsigned long long int)p, (unsigned long long int)n,
+          0xFFFFFFFFFFFFFFFFULL};
+   do {
+      __asm__ __volatile__(
+         "mov   x5, %0"             "\n\t" // &block[0]
+         "ldr   x9, [x5, #0]"       "\n\t" // p
+         "ldr   x10, [x5, #8]"      "\n\t" // n
+         "ldxp  x7, x8, [x9]"       "\n\t"
+         "adds  x7, x7, x10"        "\n\t"
+         "adc   x8, x8, xzr"        "\n\t"
+         "stxp  w4, x7, x8, [x9]"   "\n\t"
+         "str   x4, [x5, #16]"      "\n\t"
+         : /*out*/
+         : /*in*/ "r"(&block[0])
+         : /*trash*/ "memory", "cc", "x5", "x7", "x8", "x9", "x10", "x4"
+      );
+   } while (block[2] != 0);
+#else
+# error "Unsupported arch"
+#endif
+}
+
 int main ( int argc, char** argv )
 {
    int    i, status;
@@ -720,8 +765,12 @@ int main ( int argc, char** argv )
    short* p16;
    int*   p32;
    long long int* p64;
+   MyU128*  p128;
    pid_t  child, p2;
 
+   assert(sizeof(MyU128) == 16);
+   assert(sysconf(_SC_PAGESIZE) >= 4096);
+
    printf("parent, pre-fork\n");
 
    page = mmap( 0, sysconf(_SC_PAGESIZE),
@@ -736,11 +785,13 @@ int main ( int argc, char** argv )
    p16 = (short*)(page+256);
    p32 = (int*)(page+512);
    p64 = (long long int*)(page+768);
+   p128 = (MyU128*)(page+1024);
 
    assert( IS_8_ALIGNED(p8) );
    assert( IS_8_ALIGNED(p16) );
    assert( IS_8_ALIGNED(p32) );
    assert( IS_8_ALIGNED(p64) );
+   assert( IS_16_ALIGNED(p128) );
 
    memset(page, 0, 1024);
 
@@ -748,6 +799,7 @@ int main ( int argc, char** argv )
    *p16 = 0;
    *p32 = 0;
    *p64 = 0;
+   p128->lo64 = p128->hi64 = 0;
 
    child = fork();
    if (child == -1) {
@@ -763,6 +815,7 @@ int main ( int argc, char** argv )
          atomic_add_16bit(p16, 1);
          atomic_add_32bit(p32, 1);
          atomic_add_64bit(p64, 98765 ); /* ensure we hit the upper 32 bits */
+         atomic_add_128bit(p128, 0x1000000013374771ULL); // ditto re upper 64
       }
       return 1;
       /* NOTREACHED */
@@ -778,6 +831,7 @@ int main ( int argc, char** argv )
       atomic_add_16bit(p16, 1);
       atomic_add_32bit(p32, 1);
       atomic_add_64bit(p64, 98765 ); /* ensure we hit the upper 32 bits */
+      atomic_add_128bit(p128, 0x1000000013374771ULL); // ditto re upper 64
    }
 
    p2 = waitpid(child, &status, 0);
@@ -788,11 +842,17 @@ int main ( int argc, char** argv )
 
    printf("FINAL VALUES:  8 bit %d,  16 bit %d,  32 bit %d,  64 bit %lld\n",
           (int)(*(signed char*)p8), (int)(*p16), *p32, *p64 );
+   printf("               128 bit 0x%016llx:0x%016llx\n",
+          p128->hi64, p128->lo64);
 
    if (-74 == (int)(*(signed char*)p8) 
        && 32694 == (int)(*p16) 
        && 6913974 == *p32
-       && (0LL == *p64 || 682858642110LL == *p64)) {
+       && (0LL == *p64 || 682858642110LL == *p64)
+       && ((0 == p128->hi64 && 0 == p128->lo64)
+           || (0x00000000000697fb == p128->hi64
+               && 0x6007eb426316d956ULL == p128->lo64))
+      ) {
       printf("PASS\n");
    } else {
       printf("FAIL -- see source code for expected values\n");
index c5b8781e55f8067bde1463751188793d3673186e..55e5044b55f88bc40c772dd10f7224ac4c60ea13 100644 (file)
@@ -3,5 +3,6 @@ child
 parent, pre-fork
 parent
 FINAL VALUES:  8 bit -74,  16 bit 32694,  32 bit 6913974,  64 bit 0
+               128 bit 0x0000000000000000:0x0000000000000000
 PASS
 parent exits
index 82405c52096ed7b60628c551e13975af8b5425d1..ca2f4fc9700cccd16caaba7d8d0ec86b23dbb557 100644 (file)
@@ -3,5 +3,6 @@ child
 parent, pre-fork
 parent
 FINAL VALUES:  8 bit -74,  16 bit 32694,  32 bit 6913974,  64 bit 682858642110
+               128 bit 0x0000000000000000:0x0000000000000000
 PASS
 parent exits
diff --git a/memcheck/tests/atomic_incs.stdout.exp-64bit-and-128bit b/memcheck/tests/atomic_incs.stdout.exp-64bit-and-128bit
new file mode 100644 (file)
index 0000000..ef65809
--- /dev/null
@@ -0,0 +1,8 @@
+parent, pre-fork
+child
+parent, pre-fork
+parent
+FINAL VALUES:  8 bit -74,  16 bit 32694,  32 bit 6913974,  64 bit 682858642110
+               128 bit 0x00000000000697fb:0x6007eb426316d956
+PASS
+parent exits
index 00cbfa52c1ee9a4f86b3c9507a9e2264038e0443..9efb49b276cd4e00d8b4b74e1f7e82d1507f2c99 100644 (file)
@@ -12,7 +12,10 @@ EXTRA_DIST = \
        atomics_v81.stdout.exp atomics_v81.stderr.exp atomics_v81.vgtest \
        simd_v81.stdout.exp simd_v81.stderr.exp simd_v81.vgtest \
         fmadd_sub.stdout.exp fmadd_sub.stderr.exp fmadd_sub.vgtest \
-       fp_and_simd_v82.stdout.exp fp_and_simd_v82.stderr.exp fp_and_simd_v82.vgtest
+       fp_and_simd_v82.stdout.exp fp_and_simd_v82.stderr.exp \
+       fp_and_simd_v82.vgtest \
+       ldxp_stxp.stdout.exp ldxp_stxp.stderr.exp \
+       ldxp_stxp_basisimpl.vgtest ldxp_stxp_fallbackimpl.vgtest
 
 check_PROGRAMS = \
        allexec \
@@ -20,7 +23,8 @@ check_PROGRAMS = \
        fp_and_simd \
        integer \
        memory \
-       fmadd_sub
+       fmadd_sub \
+       ldxp_stxp
 
 if BUILD_ARMV8_CRC_TESTS
   check_PROGRAMS += crc32
diff --git a/none/tests/arm64/ldxp_stxp.c b/none/tests/arm64/ldxp_stxp.c
new file mode 100644 (file)
index 0000000..b5f6ea1
--- /dev/null
@@ -0,0 +1,93 @@
+
+/* Note, this is only a basic smoke test of LD{A}XP and ST{L}XP.  Their
+   atomicity properties are tested by memcheck/tests/atomic_incs.c. */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <malloc.h>
+#include <assert.h>
+
+typedef  unsigned int            UInt;
+typedef  unsigned long long int  ULong;
+
+
+void initBlock ( ULong* block )
+{
+   block[0] = 0x0001020304050607ULL;
+   block[1] = 0x1011121314151617ULL;
+   block[2] = 0x2021222324252627ULL;
+   block[3] = 0x3031323334353637ULL;
+   block[4] = 0x4041424344454647ULL;
+   block[5] = 0x5051525354555657ULL;
+}
+
+void printBlock ( const char* who,
+                  ULong* block, ULong rt1contents, ULong rt2contents,
+                  UInt zeroIfSuccess )
+{
+   printf("Block %s (%s)\n", who, zeroIfSuccess == 0 ? "success" : "FAILURE" );
+   for (int i = 0; i < 6; i++) {
+      printf("0x%016llx\n", block[i]);
+   }
+   printf("0x%016llx rt1contents\n", rt1contents);
+   printf("0x%016llx rt2contents\n", rt2contents);
+   printf("\n");
+}
+
+int main ( void )
+{
+   ULong* block = memalign(16, 6 * sizeof(ULong));
+   assert(block);
+
+   ULong rt1in, rt2in, rt1out, rt2out;
+   UInt scRes;
+
+   // Do ldxp then stxp with x-registers
+   initBlock(block);
+   rt1in  = 0x5555666677778888ULL;
+   rt2in  = 0xAAAA9999BBBB0000ULL;
+   rt1out = 0x1111222233334444ULL;
+   rt2out = 0xFFFFEEEEDDDDCCCCULL;
+   scRes  = 0x55555555;
+   __asm__ __volatile__(
+      "ldxp %1, %2, [%5]"       "\n\t"
+      "stxp %w0, %3, %4, [%5]"  "\n\t"
+      : /*OUT*/
+        "=&r"(scRes),  // %0
+        "=&r"(rt1out), // %1
+        "=&r"(rt2out)  // %2
+      : /*IN*/
+        "r"(rt1in),    // %3
+        "r"(rt2in),    // %4
+        "r"(&block[2]) // %5
+      : /*TRASH*/
+        "memory","cc"
+   );
+   printBlock("after ldxp/stxp 2x64-bit", block, rt1out, rt2out, scRes);
+
+   // Do ldxp then stxp with w-registers
+   initBlock(block);
+   rt1in  = 0x5555666677778888ULL;
+   rt2in  = 0xAAAA9999BBBB0000ULL;
+   rt1out = 0x1111222233334444ULL;
+   rt2out = 0xFFFFEEEEDDDDCCCCULL;
+   scRes  = 0x55555555;
+   __asm__ __volatile__(
+      "ldxp %w1, %w2, [%5]"       "\n\t"
+      "stxp %w0, %w3, %w4, [%5]"  "\n\t"
+      : /*OUT*/
+        "=&r"(scRes),  // %0
+        "=&r"(rt1out), // %1
+        "=&r"(rt2out)  // %2
+      : /*IN*/
+        "r"(rt1in),    // %3
+        "r"(rt2in),    // %4
+        "r"(&block[2]) // %5
+      : /*TRASH*/
+        "memory","cc"
+   );
+   printBlock("after ldxp/stxp 2x32-bit", block, rt1out, rt2out, scRes);
+
+   free(block);
+   return 0;
+}
diff --git a/none/tests/arm64/ldxp_stxp_basisimpl.stderr.exp b/none/tests/arm64/ldxp_stxp_basisimpl.stderr.exp
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/none/tests/arm64/ldxp_stxp_basisimpl.stdout.exp b/none/tests/arm64/ldxp_stxp_basisimpl.stdout.exp
new file mode 100644 (file)
index 0000000..f269ecd
--- /dev/null
@@ -0,0 +1,20 @@
+Block after ldxp/stxp 2x64-bit (success)
+0x0001020304050607
+0x1011121314151617
+0x5555666677778888
+0xaaaa9999bbbb0000
+0x4041424344454647
+0x5051525354555657
+0x2021222324252627 rt1contents
+0x3031323334353637 rt2contents
+
+Block after ldxp/stxp 2x32-bit (success)
+0x0001020304050607
+0x1011121314151617
+0xbbbb000077778888
+0x3031323334353637
+0x4041424344454647
+0x5051525354555657
+0x0000000024252627 rt1contents
+0x0000000020212223 rt2contents
+
diff --git a/none/tests/arm64/ldxp_stxp_basisimpl.vgtest b/none/tests/arm64/ldxp_stxp_basisimpl.vgtest
new file mode 100644 (file)
index 0000000..2913372
--- /dev/null
@@ -0,0 +1,2 @@
+prog: ldxp_stxp
+vgopts: -q
diff --git a/none/tests/arm64/ldxp_stxp_fallbackimpl.stderr.exp b/none/tests/arm64/ldxp_stxp_fallbackimpl.stderr.exp
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/none/tests/arm64/ldxp_stxp_fallbackimpl.stdout.exp b/none/tests/arm64/ldxp_stxp_fallbackimpl.stdout.exp
new file mode 100644 (file)
index 0000000..f269ecd
--- /dev/null
@@ -0,0 +1,20 @@
+Block after ldxp/stxp 2x64-bit (success)
+0x0001020304050607
+0x1011121314151617
+0x5555666677778888
+0xaaaa9999bbbb0000
+0x4041424344454647
+0x5051525354555657
+0x2021222324252627 rt1contents
+0x3031323334353637 rt2contents
+
+Block after ldxp/stxp 2x32-bit (success)
+0x0001020304050607
+0x1011121314151617
+0xbbbb000077778888
+0x3031323334353637
+0x4041424344454647
+0x5051525354555657
+0x0000000024252627 rt1contents
+0x0000000020212223 rt2contents
+
diff --git a/none/tests/arm64/ldxp_stxp_fallbackimpl.vgtest b/none/tests/arm64/ldxp_stxp_fallbackimpl.vgtest
new file mode 100644 (file)
index 0000000..474282a
--- /dev/null
@@ -0,0 +1,2 @@
+prog: ldxp_stxp
+vgopts: -q --sim-hints=fallback-llsc