]> git.ipfire.org Git - thirdparty/valgrind.git/commitdiff
Bug 415136 - ARMv8.1 Compare-and-Swap instructions are not supported.
authorJulian Seward <jseward@acm.org>
Mon, 9 Mar 2020 08:18:09 +0000 (09:18 +0100)
committerJulian Seward <jseward@acm.org>
Mon, 9 Mar 2020 08:20:27 +0000 (09:20 +0100)
This commit implements ARMv8.1 CAS instructions.  It does not contain
test cases; those will be in a subsequent commit.
Patch by Assad Hashmi <assad.hashmi@linaro.org>.

VEX/priv/guest_arm64_toIR.c
VEX/priv/host_arm64_defs.c
VEX/priv/host_arm64_defs.h
VEX/priv/host_arm64_isel.c

index aae3c6f52b2139a8cabf96a79fc2154a90e26a67..c8bfd1888fef43dc936d0d8bb8225dbd27c6a4bf 100644 (file)
@@ -6864,6 +6864,49 @@ Bool dis_ARM64_load_store(/*MB_OUT*/DisResult* dres, UInt insn,
       return True;
    }
 
+   /* ------------------ ARMv8.1-LSE: Compare-and-Swap ------------------ */
+   /* 31 29      22 21 20 15 14    9 4
+      sz 0010001 A  1  s  R  11111 n t CAS{,A}{,L}<sz> <Rs>, <Rt>, [<Xn|SP>]
+   */
+   if (INSN(29,23) == BITS7(0,0,1,0,0,0,1)
+       && INSN(21,21) == 1
+       && INSN(14,10) == BITS5(1,1,1,1,1)) {
+      UInt szBlg2 = INSN(31,30);
+      Bool isAcq = INSN(22,22) == 1;
+      Bool isRel = INSN(15,15) == 1;
+      UInt ss  = INSN(20,16);
+      UInt nn  = INSN(9,5);
+      UInt tt  = INSN(4,0);
+
+      const HChar* suffix[4] = { "b", "h", "", "" };
+
+      UInt  szB = 1 << szBlg2; /* 1, 2, 4 or 8 */
+      IRType ty = integerIRTypeOfSize(szB);
+      Bool is64 = szB == 8;
+
+      IRExpr *exp = narrowFrom64(ty, getIReg64orZR(ss));
+      IRExpr *new = narrowFrom64(ty, getIReg64orZR(tt));
+
+      if (isAcq)
+         stmt(IRStmt_MBE(Imbe_Fence));
+
+      // Store the result back if LHS remains unchanged in memory.
+      IRTemp old = newTemp(ty);
+      stmt( IRStmt_CAS(mkIRCAS(/*oldHi*/IRTemp_INVALID, old,
+                               Iend_LE, getIReg64orSP(nn),
+                               /*expdHi*/NULL, exp,
+                               /*dataHi*/NULL, new)) );
+
+      if (isRel)
+         stmt(IRStmt_MBE(Imbe_Fence));
+
+      putIReg64orZR(ss, widenUto64(ty, mkexpr(old)));
+      DIP("cas%s%s%s %s, %s, [%s]\n",
+          isAcq ? "a" : "", isRel ? "l" : "", suffix[szBlg2],
+          nameIRegOrZR(is64, ss), nameIRegOrZR(is64, tt), nameIReg64orSP(nn));
+      return True;
+   }
+
    if (sigill_diag) {
       vex_printf("ARM64 front end: load_store\n");
    }
index 33acae946244eab472e70a419d0c8ff7a2fcddde..13a61b0bd4c98a5b7bb4d12418a03a484c3b1991 100644 (file)
@@ -3852,9 +3852,14 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc,
       }
       case ARM64in_CAS: {
          /* This isn't simple.  For an explanation see the comment in
-            host_arm64_defs.h on the the definition of ARM64Instr case
-            CAS. */
+            host_arm64_defs.h on the definition of ARM64Instr case CAS.
+
+            NOTE: We could place "loop:" after mov/and but then we need
+                  an additional scratch register.
+         */
          /* Generate:
+
+            loop:
               -- one of:
               mov     x8, x5                 // AA0503E8
               and     x8, x5, #0xFFFFFFFF    // 92407CA8
@@ -3872,13 +3877,13 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc,
               bne     out                    // 54000061
 
               -- one of:
-              stxr    w1, x7, [x3]           // C8017C67
-              stxr    w1, w7, [x3]           // 88017C67
-              stxrh   w1, w7, [x3]           // 48017C67
-              stxrb   w1, w7, [x3]           // 08017C67
+              stxr    w8, x7, [x3]           // C8087C67
+              stxr    w8, w7, [x3]           // 88087C67
+              stxrh   w8, w7, [x3]           // 48087C67
+              stxrb   w8, w7, [x3]           // 08087C67
 
               -- always:
-              eor     x1, x5, x1             // CA0100A1
+              cbne    w8, loop               // 35FFFF68
             out:
          */
          switch (i->ARM64in.CAS.szB) {
@@ -3897,12 +3902,12 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc,
          *p++ = 0xEB08003F;
          *p++ = 0x54000061;
          switch (i->ARM64in.CAS.szB) {
-            case 8:  *p++ = 0xC8017C67; break;
-            case 4:  *p++ = 0x88017C67; break;
-            case 2:  *p++ = 0x48017C67; break;
-            case 1:  *p++ = 0x08017C67; break;
+            case 8:  *p++ = 0xC8087C67; break;
+            case 4:  *p++ = 0x88087C67; break;
+            case 2:  *p++ = 0x48087C67; break;
+            case 1:  *p++ = 0x08087C67; break;
          }
-         *p++ = 0xCA0100A1;
+         *p++ = 0x35FFFF68;
          goto done;
       }
       case ARM64in_MFence: {
index db500565d63f4de24685cbd79152918ba099f5d6..63cf2bb7909a37e1210791e480e38ef270f8781d 100644 (file)
@@ -679,6 +679,7 @@ typedef
             Uses x8 as scratch (but that's not allocatable).
             Hence: RD x3, x5, x7; WR x1
 
+           loop:
             (szB=8)  mov  x8, x5
             (szB=4)  and  x8, x5, #0xFFFFFFFF
             (szB=2)  and  x8, x5, #0xFFFF
@@ -690,10 +691,10 @@ typedef
             bne     after
             -- if branch taken, failure; x1[[8*szB-1 : 0] holds old value
             -- attempt to store
-            stxr    w1, x7, [x3]
+            stxr    w8, x7, [x3]
             -- if store successful, x1==0, so the eor is "x1 := x5"
-            -- if store failed,     x1==1, so the eor makes x1 != x5
-            eor     x1, x5, x1
+            -- if store failed,     branch back and try again.
+            cbne    w8, loop
            after:
          */
          struct {
index 4fa50b981929342739507da33d5b7c193de55ee6..d19b19fcf8bfd1d53f6a5d0189ebe36f9b639f4b 100644 (file)
@@ -3988,14 +3988,7 @@ static void iselStmt ( ISelEnv* env, IRStmt* stmt )
          addInstr(env, ARM64Instr_CAS(sz));
          /* Now we have the lowest szB bytes of x1 are either equal to
             the lowest szB bytes of x5, indicating success, or they
-            aren't, indicating failure.  The IR semantics actually
-            require us to return the old value at the location,
-            regardless of success or failure, but in the case of
-            failure it's not clear how to do this, since
-            ARM64Instr_CAS can't provide that.  Instead we'll just
-            return the relevant bit of x1, since that's at least
-            guaranteed to be different from the lowest bits of x5 on
-            failure. */
+            aren't, indicating failure. */
          HReg rResult = hregARM64_X1();
          switch (sz) {
             case 8:  break;