]> git.ipfire.org Git - thirdparty/valgrind.git/commitdiff
Bug 418702 - ARMv8.1 Paired register compare-and-swap instructions are not supported.
authorJulian Seward <jseward@acm.org>
Wed, 11 Mar 2020 14:05:02 +0000 (15:05 +0100)
committerJulian Seward <jseward@acm.org>
Wed, 11 Mar 2020 14:05:02 +0000 (15:05 +0100)
Implementation only; tests to follow.
Patch by Assad Hashmi <assad.hashmi@linaro.org>.

VEX/priv/guest_arm64_toIR.c
VEX/priv/host_arm64_defs.c
VEX/priv/host_arm64_defs.h
VEX/priv/host_arm64_isel.c

index c8bfd1888fef43dc936d0d8bb8225dbd27c6a4bf..fe80e593c4ec84676efe465b435f77061b6fda7f 100644 (file)
@@ -6907,6 +6907,53 @@ Bool dis_ARM64_load_store(/*MB_OUT*/DisResult* dres, UInt insn,
       return True;
    }
 
+   /* ---------------- ARMv8.1-LSE: Compare-and-Swap Pair --------------- */
+   /* 31 30 29      22 21 20 15 14    9 4
+      0  sz 0010000 A  1  s  R  11111 n t CASP{,A}{,L} <Rs>, <Rt>, [<Xn|SP>]
+   */
+   if (INSN(31,31) == 0
+       && INSN(29,23) == BITS7(0,0,1,0,0,0,0)
+       && INSN(21,21) == 1
+       && INSN(14,10) == BITS5(1,1,1,1,1)) {
+      UInt is64 = INSN(30,30);
+      Bool isAcq = INSN(22,22) == 1;
+      Bool isRel = INSN(15,15) == 1;
+      UInt ss  = INSN(20,16);
+      UInt nn  = INSN(9,5);
+      UInt tt  = INSN(4,0);
+
+      if ((ss & 0x1) || (tt & 0x1)) {
+         /* undefined; fall through */
+      } else {
+         IRExpr *expLo = getIRegOrZR(is64, ss);
+         IRExpr *expHi = getIRegOrZR(is64, ss + 1);
+         IRExpr *newLo = getIRegOrZR(is64, tt);
+         IRExpr *newHi = getIRegOrZR(is64, tt + 1);
+         IRTemp oldLo = newTemp(is64 ? Ity_I64 : Ity_I32);
+         IRTemp oldHi = newTemp(is64 ? Ity_I64 : Ity_I32);
+
+         if (isAcq)
+            stmt(IRStmt_MBE(Imbe_Fence));
+
+         stmt( IRStmt_CAS(mkIRCAS(oldHi, oldLo,
+                                  Iend_LE, getIReg64orSP(nn),
+                                  expHi, expLo,
+                                  newHi, newLo)) );
+
+         if (isRel)
+            stmt(IRStmt_MBE(Imbe_Fence));
+
+         putIRegOrZR(is64, ss, mkexpr(oldLo));
+         putIRegOrZR(is64, ss+1, mkexpr(oldHi));
+         DIP("casp%s%s %s, %s, %s, %s, [%s]\n",
+             isAcq ? "a" : "", isRel ? "l" : "",
+             nameIRegOrZR(is64, ss), nameIRegOrZR(is64, ss+1),
+             nameIRegOrZR(is64, tt), nameIRegOrZR(is64, tt+1),
+             nameIReg64orSP(nn));
+         return True;
+      }
+   }
+
    if (sigill_diag) {
       vex_printf("ARM64 front end: load_store\n");
    }
index 13a61b0bd4c98a5b7bb4d12418a03a484c3b1991..e4ef5698683df260efccd97e53050c3f7fbf5cbf 100644 (file)
@@ -1020,6 +1020,13 @@ ARM64Instr* ARM64Instr_CAS ( Int szB ) {
    vassert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
    return i;
 }
+ARM64Instr* ARM64Instr_CASP ( Int szB ) {
+   ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
+   i->tag              = ARM64in_CASP;
+   i->ARM64in.CASP.szB = szB;
+   vassert(szB == 8 || szB == 4);
+   return i;
+}
 ARM64Instr* ARM64Instr_MFence ( void ) {
    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
    i->tag        = ARM64in_MFence;
@@ -1593,6 +1600,10 @@ void ppARM64Instr ( const ARM64Instr* i ) {
          vex_printf("x1 = cas(%dbit)(x3, x5 -> x7)", 8 * i->ARM64in.CAS.szB);
          return;
       }
+      case ARM64in_CASP: {
+         vex_printf("x0,x1 = casp(%dbit)(x2, x4,x5 -> x6,x7)", 8 * i->ARM64in.CASP.szB);
+         return;
+      }
       case ARM64in_MFence:
          vex_printf("(mfence) dsb sy; dmb sy; isb");
          return;
@@ -2102,6 +2113,17 @@ void getRegUsage_ARM64Instr ( HRegUsage* u, const ARM64Instr* i, Bool mode64 )
          /* Pointless to state this since X8 is not available to RA. */
          addHRegUse(u, HRmWrite, hregARM64_X8());
          break;
+      case ARM64in_CASP:
+         addHRegUse(u, HRmRead, hregARM64_X2());
+         addHRegUse(u, HRmRead, hregARM64_X4());
+         addHRegUse(u, HRmRead, hregARM64_X5());
+         addHRegUse(u, HRmRead, hregARM64_X6());
+         addHRegUse(u, HRmRead, hregARM64_X7());
+         addHRegUse(u, HRmWrite, hregARM64_X0());
+         addHRegUse(u, HRmWrite, hregARM64_X1());
+         addHRegUse(u, HRmWrite, hregARM64_X9());
+         addHRegUse(u, HRmWrite, hregARM64_X8());
+         break;
       case ARM64in_MFence:
          return;
       case ARM64in_ClrEX:
@@ -2372,6 +2394,8 @@ void mapRegs_ARM64Instr ( HRegRemap* m, ARM64Instr* i, Bool mode64 )
          return;
       case ARM64in_CAS:
          return;
+      case ARM64in_CASP:
+         return;
       case ARM64in_MFence:
          return;
       case ARM64in_ClrEX:
@@ -3910,6 +3934,68 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc,
          *p++ = 0x35FFFF68;
          goto done;
       }
+      case ARM64in_CASP: {
+         /* Generate:
+            CASP <Xs>, <X(s+1)>, <Xt>, <X(t+1)>, [<Xn|SP>{,#0}]
+
+            Register allocation (see ARM64in_CASP in getRegUsage_ARM64Instr):
+            Xn:         memory address
+                        -> X2 (INPUT)
+            Xs, X(s+1): values to be compared with value read from address
+                        -> X4,X5 (INPUTS)
+                        -> X0,X1 (OUTPUTS) loaded from memory and compared with
+                           scratch registers X8,X9 (CLOBBERED) which contain
+                           contents of X4,X5
+            Xt, X(t+1): values to be stored to memory if X0,X1==X8,X9
+                        -> X6,X7 (INPUT)
+
+            loop:
+              -- two of:
+              mov     x8, x4                 // AA0403E8
+              mov     x9, x5                 // AA0503E9
+              and     x8, x4, #0xFFFFFFFF    // 92407C88
+              and     x9, x5, #0xFFFFFFFF    // 92407CA9
+
+              -- one of:
+              ldxp    x0,x1, [x2]            // C87F0440
+              ldxp    w0,w1, [x2]            // 887F0440
+
+              -- always:
+              cmp     x0, x8                 // EB08001F
+              bne     out                    // 540000E1 (b.ne #28 <out>)
+              cmp     x1, x9                 // EB09003F
+              bne     out                    // 540000A1 (b.ne #20 <out>)
+
+              -- one of:
+              stxp    w1, x6, x7, [x2]       // C8211C46
+              stxp    w1, w6, w7, [x2]       // 88211C46
+
+              -- always:
+              cbnz    w1, loop               // 35FFFE81 (cbnz w1, #-48 <loop>)
+            out:
+         */
+         switch (i->ARM64in.CASP.szB) {
+            case 8:  *p++ = 0xAA0403E8; *p++ = 0xAA0503E9; break;
+            case 4:  *p++ = 0x92407C88; *p++ = 0x92407CA9; break;
+            default: vassert(0);
+         }
+         switch (i->ARM64in.CASP.szB) {
+            case 8:  *p++ = 0xC87F0440; break;
+            case 4:  *p++ = 0x887F0440; break;
+            default: vassert(0);
+         }
+         *p++ = 0xEB08001F;
+         *p++ = 0x540000E1;
+         *p++ = 0xEB09003F;
+         *p++ = 0x540000A1;
+         switch (i->ARM64in.CASP.szB) {
+            case 8:  *p++ = 0xC8211C46; break;
+            case 4:  *p++ = 0x88211C46; break;
+            default: vassert(0);
+         }
+         *p++ = 0x35FFFE81;
+         goto done;
+      }
       case ARM64in_MFence: {
          *p++ = 0xD5033F9F; /* DSB sy */
          *p++ = 0xD5033FBF; /* DMB sy */
index 63cf2bb7909a37e1210791e480e38ef270f8781d..05dba7ab8b2662747d3947d2f197480c40a349d6 100644 (file)
@@ -481,6 +481,7 @@ typedef
       ARM64in_LdrEX,
       ARM64in_StrEX,
       ARM64in_CAS,
+      ARM64in_CASP,
       ARM64in_MFence,
       ARM64in_ClrEX,
       /* ARM64in_V*: scalar ops involving vector registers */
@@ -700,6 +701,9 @@ typedef
          struct {
             Int szB; /* 1, 2, 4 or 8 */
          } CAS;
+         struct {
+            Int szB; /* 4 or 8 */
+         } CASP;
          /* Mem fence.  An insn which fences all loads and stores as
             much as possible before continuing.  On ARM64 we emit the
             sequence "dsb sy ; dmb sy ; isb sy", which is probably
@@ -946,6 +950,7 @@ extern ARM64Instr* ARM64Instr_Mul     ( HReg dst, HReg argL, HReg argR,
 extern ARM64Instr* ARM64Instr_LdrEX   ( Int szB );
 extern ARM64Instr* ARM64Instr_StrEX   ( Int szB );
 extern ARM64Instr* ARM64Instr_CAS     ( Int szB );
+extern ARM64Instr* ARM64Instr_CASP    ( Int szB );
 extern ARM64Instr* ARM64Instr_MFence  ( void );
 extern ARM64Instr* ARM64Instr_ClrEX   ( void );
 extern ARM64Instr* ARM64Instr_VLdStH  ( Bool isLoad, HReg sD, HReg rN,
index d19b19fcf8bfd1d53f6a5d0189ebe36f9b639f4b..2f19eab8146d8f3dae3afb61da02cae8850d8849 100644 (file)
@@ -4003,6 +4003,51 @@ static void iselStmt ( ISelEnv* env, IRStmt* stmt )
          addInstr(env, ARM64Instr_MovI(rOld, rResult));
          return;
       }
+      else {
+         /* Paired register CAS, i.e. CASP */
+         UChar  sz;
+         IRCAS* cas = stmt->Ist.CAS.details;
+         IRType ty  = typeOfIRExpr(env->type_env, cas->dataLo);
+         switch (ty) {
+            case Ity_I64: sz = 8; break;
+            case Ity_I32: sz = 4; break;
+            default: goto unhandled_cas;
+         }
+         HReg rAddr = iselIntExpr_R(env, cas->addr);
+
+         HReg rExpd0 = iselIntExpr_R(env, cas->expdLo);
+         vassert(cas->expdHi != NULL);
+         HReg rExpd1 = iselIntExpr_R(env, cas->expdHi);
+
+         HReg rData0 = iselIntExpr_R(env, cas->dataLo);
+         vassert(cas->dataHi != NULL);
+         HReg rData1 = iselIntExpr_R(env, cas->dataHi);
+
+         addInstr(env, ARM64Instr_MovI(hregARM64_X2(), rAddr));
+
+         addInstr(env, ARM64Instr_MovI(hregARM64_X4(), rExpd0));
+         addInstr(env, ARM64Instr_MovI(hregARM64_X5(), rExpd1));
+
+         addInstr(env, ARM64Instr_MovI(hregARM64_X6(), rData0));
+         addInstr(env, ARM64Instr_MovI(hregARM64_X7(), rData1));
+
+         addInstr(env, ARM64Instr_CASP(sz));
+
+         HReg rResult0 = hregARM64_X0();
+         HReg rResult1 = hregARM64_X1();
+         switch (sz) {
+            case 8:  break;
+            case 4:  rResult0 = widen_z_32_to_64(env, rResult0);
+                     rResult1 = widen_z_32_to_64(env, rResult1);
+                     break;
+            default: vassert(0);
+         }
+         HReg rOldLo = lookupIRTemp(env, cas->oldLo);
+         HReg rOldHi = lookupIRTemp(env, cas->oldHi);
+         addInstr(env, ARM64Instr_MovI(rOldLo, rResult0));
+         addInstr(env, ARM64Instr_MovI(rOldHi, rResult1));
+         return;
+      }
       unhandled_cas:
       break;
    }