]> git.ipfire.org Git - thirdparty/valgrind.git/commitdiff
More arm64 isel tuning: create {and,orr,eor,add,sub} reg,reg,reg-shifted-by-imm
authorJulian Seward <jseward@acm.org>
Sat, 2 Jan 2021 16:18:53 +0000 (17:18 +0100)
committerJulian Seward <jseward@acm.org>
Sat, 2 Jan 2021 16:18:53 +0000 (17:18 +0100)
Thus far the arm64 isel can't generate instructions of the form

   {and,or,xor,add,sub} reg,reg,reg-shifted-by-imm

and hence sometimes winds up generating pairs like

   lsh x2, x1, #13 ; orr x4, x3, x2

when instead it could just have generated

   orr x4, x3, x1, lsh #13

This commit fixes that, although only for the 64-bit case, not the 32-bit
case.  Specifically, it can transform the IR forms

  {Add,Sub,And,Or,Xor}(E1, {Shl,Shr,Sar}(E2, immediate))   and
  {Add,And,Or,Xor}({Shl,Shr,Sar}(E1, immediate), E2)

into a single arm64 instruction.  Note that `Sub` is not included in the
second line, because shifting the first operand requires inverting the arg
order in the arm64 instruction, which isn't allowable with `Sub`, since it's
not commutative and arm64 doesn't offer us a reverse-subtract instruction to
use instead.

This gives a 1.1% reduction generated code size when running
/usr/bin/date on Memcheck.

VEX/priv/host_arm64_defs.c
VEX/priv/host_arm64_defs.h
VEX/priv/host_arm64_isel.c

index 6ea67ef3193031859de24db994ec3c8a8fae31ef..67dd06c78af9766aefc9f8b53498bafa206e64e7 100644 (file)
@@ -498,6 +498,17 @@ static const HChar* showARM64ShiftOp ( ARM64ShiftOp op ) {
    }
 }
 
+static const HChar* showARM64RRSOp ( ARM64RRSOp op ) {
+   switch (op) {
+      case ARM64rrs_ADD: return "add";
+      case ARM64rrs_SUB: return "sub";
+      case ARM64rrs_AND: return "and";
+      case ARM64rrs_OR:  return "orr";
+      case ARM64rrs_XOR: return "eor";
+      default: vpanic("showARM64RRSOp");
+   }
+}
+
 static const HChar* showARM64UnaryOp ( ARM64UnaryOp op ) {
    switch (op) {
       case ARM64un_NEG: return "neg";
@@ -858,6 +869,20 @@ ARM64Instr* ARM64Instr_Logic ( HReg dst,
    i->ARM64in.Logic.op    = op;
    return i;
 }
+ARM64Instr* ARM64Instr_RRS ( HReg dst, HReg argL, HReg argR,
+                             ARM64ShiftOp shiftOp, UChar amt,
+                             ARM64RRSOp mainOp ) {
+   ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
+   i->tag                 = ARM64in_RRS;
+   i->ARM64in.RRS.dst     = dst;
+   i->ARM64in.RRS.argL    = argL;
+   i->ARM64in.RRS.argR    = argR;
+   i->ARM64in.RRS.shiftOp = shiftOp;
+   i->ARM64in.RRS.amt     = amt;
+   i->ARM64in.RRS.mainOp  = mainOp;
+   vassert(amt >= 1 && amt <= 63);
+   return i;
+}
 ARM64Instr* ARM64Instr_Test ( HReg argL, ARM64RIL* argR ) {
    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
    i->tag               = ARM64in_Test;
@@ -1446,6 +1471,16 @@ void ppARM64Instr ( const ARM64Instr* i ) {
          vex_printf(", ");
          ppARM64RIL(i->ARM64in.Logic.argR);
          return;
+      case ARM64in_RRS:
+         vex_printf("%s    ", showARM64RRSOp(i->ARM64in.RRS.mainOp));
+         ppHRegARM64(i->ARM64in.RRS.dst);
+         vex_printf(", ");
+         ppHRegARM64(i->ARM64in.RRS.argL);
+         vex_printf(", ");
+         ppHRegARM64(i->ARM64in.RRS.argR);
+         vex_printf(", %s #%u", showARM64ShiftOp(i->ARM64in.RRS.shiftOp),
+                    i->ARM64in.RRS.amt);
+         return;
       case ARM64in_Test:
          vex_printf("tst    ");
          ppHRegARM64(i->ARM64in.Test.argL);
@@ -2018,6 +2053,11 @@ void getRegUsage_ARM64Instr ( HRegUsage* u, const ARM64Instr* i, Bool mode64 )
          addHRegUse(u, HRmRead, i->ARM64in.Logic.argL);
          addRegUsage_ARM64RIL(u, i->ARM64in.Logic.argR);
          return;
+      case ARM64in_RRS:
+         addHRegUse(u, HRmWrite, i->ARM64in.RRS.dst);
+         addHRegUse(u, HRmRead, i->ARM64in.RRS.argL);
+         addHRegUse(u, HRmRead, i->ARM64in.RRS.argR);
+         return;
       case ARM64in_Test:
          addHRegUse(u, HRmRead, i->ARM64in.Test.argL);
          addRegUsage_ARM64RIL(u, i->ARM64in.Test.argR);
@@ -2386,6 +2426,11 @@ void mapRegs_ARM64Instr ( HRegRemap* m, ARM64Instr* i, Bool mode64 )
          i->ARM64in.Logic.argL = lookupHRegRemap(m, i->ARM64in.Logic.argL);
          mapRegs_ARM64RIL(m, i->ARM64in.Logic.argR);
          return;
+      case ARM64in_RRS:
+         i->ARM64in.RRS.dst = lookupHRegRemap(m, i->ARM64in.RRS.dst);
+         i->ARM64in.RRS.argL = lookupHRegRemap(m, i->ARM64in.RRS.argL);
+         i->ARM64in.RRS.argR = lookupHRegRemap(m, i->ARM64in.RRS.argR);
+         return;
       case ARM64in_Test:
          i->ARM64in.Test.argL = lookupHRegRemap(m, i->ARM64in.Test.argL);
          mapRegs_ARM64RIL(m, i->ARM64in.Logic.argR);
@@ -2892,8 +2937,13 @@ static inline UInt qregEnc ( HReg r )
 #define X01110101  BITS8(0,1,1,1,0,1,0,1)
 #define X01110110  BITS8(0,1,1,1,0,1,1,0)
 #define X01110111  BITS8(0,1,1,1,0,1,1,1)
+#define X10001010  BITS8(1,0,0,0,1,0,1,0)
+#define X10001011  BITS8(1,0,0,0,1,0,1,1)
+#define X10101010  BITS8(1,0,1,0,1,0,1,0)
 #define X11000001  BITS8(1,1,0,0,0,0,0,1)
 #define X11000011  BITS8(1,1,0,0,0,0,1,1)
+#define X11001010  BITS8(1,1,0,0,1,0,1,0)
+#define X11001011  BITS8(1,1,0,0,1,0,1,1)
 #define X11010100  BITS8(1,1,0,1,0,1,0,0)
 #define X11010110  BITS8(1,1,0,1,0,1,1,0)
 #define X11011000  BITS8(1,1,0,1,1,0,0,0)
@@ -3064,7 +3114,6 @@ static inline UInt X_3_6_1_6_6_5_5 ( UInt f1, UInt f2, UInt f3,
    return w;
 }
 
-
 static inline UInt X_3_8_5_1_5_5_5 ( UInt f1, UInt f2, UInt f3, UInt f4,
                                      UInt f5, UInt f6, UInt f7 ) {
    vassert(3+8+5+1+5+5+5 == 32);
@@ -3086,6 +3135,27 @@ static inline UInt X_3_8_5_1_5_5_5 ( UInt f1, UInt f2, UInt f3, UInt f4,
    return w;
 }
 
+static inline UInt X_8_2_1_5_6_5_5 ( UInt f1, UInt f2, UInt f3, UInt f4,
+                                     UInt f5, UInt f6, UInt f7 ) {
+   vassert(8+2+1+5+6+5+5 == 32);
+   vassert(f1 < (1<<8));
+   vassert(f2 < (1<<2));
+   vassert(f3 < (1<<1));
+   vassert(f4 < (1<<5));
+   vassert(f5 < (1<<6));
+   vassert(f6 < (1<<5));
+   vassert(f7 < (1<<5));
+   UInt w = 0;
+   w = (w << 8) | f1;
+   w = (w << 2) | f2;
+   w = (w << 1) | f3;
+   w = (w << 5) | f4;
+   w = (w << 6) | f5;
+   w = (w << 5) | f6;
+   w = (w << 5) | f7;
+   return w;
+}
+
 //ZZ #define X0000  BITS4(0,0,0,0)
 //ZZ #define X0001  BITS4(0,0,0,1)
 //ZZ #define X0010  BITS4(0,0,1,0)
@@ -3543,6 +3613,31 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc,
          }
          goto done;
       }
+      case ARM64in_RRS: {
+         UInt top8 = 0;
+         switch (i->ARM64in.RRS.mainOp) {
+            case ARM64rrs_ADD: top8 = X10001011; break;
+            case ARM64rrs_SUB: top8 = X11001011; break;
+            case ARM64rrs_AND: top8 = X10001010; break;
+            case ARM64rrs_XOR: top8 = X11001010; break;
+            case ARM64rrs_OR:  top8 = X10101010; break;
+            default: vassert(0); /*NOTREACHED*/
+         }
+         UInt sh = 0;
+         switch (i->ARM64in.RRS.shiftOp) {
+            case ARM64sh_SHL: sh = X00; break;
+            case ARM64sh_SHR: sh = X01; break;
+            case ARM64sh_SAR: sh = X10; break;
+            default: vassert(0); /*NOTREACHED*/
+         }
+         UInt amt = i->ARM64in.RRS.amt;
+         vassert(amt >= 1 && amt <= 63);
+         *p++ = X_8_2_1_5_6_5_5(top8, sh, 0,
+                                iregEnc(i->ARM64in.RRS.argR), amt,
+                                iregEnc(i->ARM64in.RRS.argL),
+                                iregEnc(i->ARM64in.RRS.dst));
+         goto done;
+      }
       case ARM64in_Test: {
          UInt      rD   = 31; /* XZR, we are going to dump the result */
          UInt      rN   = iregEnc(i->ARM64in.Test.argL);
index 24da64e22bfa7b0da3372af98aaf56fb871475d2..105d7ce84314722718ad41e3b244d566d52f88db 100644 (file)
@@ -254,6 +254,17 @@ typedef
    }
    ARM64ShiftOp;
 
+typedef
+   enum {
+      ARM64rrs_ADD=54,
+      ARM64rrs_SUB,
+      ARM64rrs_AND,
+      ARM64rrs_OR,
+      ARM64rrs_XOR,
+      ARM64rrs_INVALID
+   }
+   ARM64RRSOp;
+
 typedef
    enum {
       ARM64un_NEG=60,
@@ -475,6 +486,7 @@ typedef
       ARM64in_Arith=1220,
       ARM64in_Cmp,
       ARM64in_Logic,
+      ARM64in_RRS,
       ARM64in_Test,
       ARM64in_Shift,
       ARM64in_Unary,
@@ -567,6 +579,15 @@ typedef
             ARM64RIL*    argR;
             ARM64LogicOp op;
          } Logic;
+         /* 64 bit AND/OR/XOR/ADD/SUB, reg, reg-with-imm-shift */
+         struct {
+            HReg         dst;
+            HReg         argL;
+            HReg         argR;
+            ARM64ShiftOp shiftOp;
+            UChar        amt; /* 1 to 63 only */
+            ARM64RRSOp   mainOp;
+         } RRS;
          /* 64 bit TST reg, reg or bimm (AND and set flags) */
          struct {
             HReg      argL;
@@ -956,6 +977,8 @@ typedef
 extern ARM64Instr* ARM64Instr_Arith   ( HReg, HReg, ARM64RIA*, Bool isAdd );
 extern ARM64Instr* ARM64Instr_Cmp     ( HReg, ARM64RIA*, Bool is64 );
 extern ARM64Instr* ARM64Instr_Logic   ( HReg, HReg, ARM64RIL*, ARM64LogicOp );
+extern ARM64Instr* ARM64Instr_RRS     ( HReg, HReg, HReg, ARM64ShiftOp,
+                                        UChar amt, ARM64RRSOp mainOp );
 extern ARM64Instr* ARM64Instr_Test    ( HReg, ARM64RIL* );
 extern ARM64Instr* ARM64Instr_Shift   ( HReg, HReg, ARM64RI6*, ARM64ShiftOp );
 extern ARM64Instr* ARM64Instr_Unary   ( HReg, HReg, ARM64UnaryOp );
index 517b7b15b8777f9521aba30313a376f4525e5893..689cdba969724d268788f2ea1dc46f12eb148842 100644 (file)
@@ -791,6 +791,94 @@ Bool doHelperCall ( /*OUT*/UInt*   stackAdjustAfterCall,
    partial values if necessary.
 */
 
+/* ---------------- RRS matching helper ---------------- */
+
+/* This helper matches 64-bit integer expressions of the form
+      {Add,Sub,And,Or,Xor}(E1, {Shl,Shr,Sar}(E2, immediate))
+   and
+      {Add,And,Or,Xor}({Shl,Shr,Sar}(E1, immediate), E2)
+   which is a useful thing to do because AArch64 can compute those in
+   a single instruction.
+ */
+static Bool matchesRegRegShift(/*OUT*/ARM64RRSOp* mainOp,
+                               /*OUT*/ARM64ShiftOp* shiftOp,
+                               /*OUT*/UChar* amt,
+                               /*OUT*/IRExpr** argUnshifted,
+                               /*OUT*/IRExpr** argToBeShifted,
+                               IRExpr* e)
+{
+   *mainOp         = (ARM64RRSOp)0;
+   *shiftOp        = (ARM64ShiftOp)0;
+   *amt            = 0;
+   *argUnshifted   = NULL;
+   *argToBeShifted = NULL;
+   if (e->tag != Iex_Binop) {
+      return False;
+   }
+   const IROp irMainOp = e->Iex.Binop.op;
+   Bool canSwap = True;
+   switch (irMainOp) {
+      case Iop_And64: *mainOp = ARM64rrs_AND; break;
+      case Iop_Or64:  *mainOp = ARM64rrs_OR;  break;
+      case Iop_Xor64: *mainOp = ARM64rrs_XOR; break;
+      case Iop_Add64: *mainOp = ARM64rrs_ADD; break;
+      case Iop_Sub64: *mainOp = ARM64rrs_SUB; canSwap = False; break;
+      default: return False;
+   }
+   /* The root node is OK.  Now check the right (2nd) arg. */
+   IRExpr* argL = e->Iex.Binop.arg1;
+   IRExpr* argR = e->Iex.Binop.arg2;
+
+   // This loop runs either one or two iterations.  In the first iteration, we
+   // check for a shiftable right (second) arg.  If that fails, at the end of
+   // the first iteration, the args are swapped, if that is valid, and we go
+   // round again, hence checking for a shiftable left (first) arg.
+   UInt iterNo = 1;
+   while (True) {
+      vassert(iterNo == 1 || iterNo == 2);
+      if (argR->tag == Iex_Binop) {
+         const IROp irShiftOp = argR->Iex.Binop.op;
+         if (irShiftOp == Iop_Shl64
+             || irShiftOp == Iop_Shr64 || irShiftOp == Iop_Sar64) {
+            IRExpr* argRL = argR->Iex.Binop.arg1;
+            const IRExpr* argRR = argR->Iex.Binop.arg2;
+            if (argRR->tag == Iex_Const) {
+               const IRConst* argRRconst = argRR->Iex.Const.con;
+               vassert(argRRconst->tag == Ico_U8); // due to typecheck rules
+               const UChar amount = argRRconst->Ico.U8;
+               if (amount >= 1 && amount <= 63) {
+                  // We got a match \o/
+                  // *mainOp is already set
+                  switch (irShiftOp) {
+                     case Iop_Shl64: *shiftOp = ARM64sh_SHL; break;
+                     case Iop_Shr64: *shiftOp = ARM64sh_SHR; break;
+                     case Iop_Sar64: *shiftOp = ARM64sh_SAR; break;
+                     default: vassert(0); // guarded above
+                  }
+                  *amt = amount;
+                  *argUnshifted = argL;
+                  *argToBeShifted = argRL;
+                  return True;
+               }
+            }
+         }
+      }
+      // We failed to get a match in the first iteration.  So, provided the
+      // root node isn't SUB, swap the arguments and make one further
+      // iteration.  If that doesn't succeed, we must give up.
+      if (iterNo == 1 && canSwap) {
+         IRExpr* tmp = argL;
+         argL = argR;
+         argR = tmp;
+         iterNo = 2;
+         continue;
+      }
+      // Give up.
+      return False;
+   }
+   /*NOTREACHED*/
+ }
+
 /* --------------------- AMode --------------------- */
 
 /* Return an AMode which computes the value of the specified
@@ -1577,7 +1665,34 @@ static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e )
             break;
       }
 
-      /* ADD/SUB */
+      /* AND64/OR64/XOR64/ADD64/SUB64(e1, e2 shifted by imm)
+         AND64/OR64/XOR64/ADD64(e1 shifted by imm, e2)
+      */
+      {
+         switch (e->Iex.Binop.op) {
+            case Iop_And64: case Iop_Or64: case Iop_Xor64:
+            case Iop_Add64: case Iop_Sub64:{
+               ARM64RRSOp mainOp = ARM64rrs_INVALID;
+               ARM64ShiftOp shiftOp = (ARM64ShiftOp)0; // Invalid
+               IRExpr* argUnshifted = NULL;
+               IRExpr* argToBeShifted = NULL;
+               UChar amt = 0;
+               if (matchesRegRegShift(&mainOp, &shiftOp, &amt, &argUnshifted,
+                                      &argToBeShifted, e)) {
+                  HReg rDst = newVRegI(env);
+                  HReg rUnshifted = iselIntExpr_R(env, argUnshifted);
+                  HReg rToBeShifted = iselIntExpr_R(env, argToBeShifted);
+                  addInstr(env, ARM64Instr_RRS(rDst, rUnshifted, rToBeShifted,
+                                               shiftOp, amt, mainOp));
+                  return rDst;
+               }
+            }
+            default:
+               break;
+         }
+      }
+
+      /* ADD/SUB(e1, e2) (for any e1, e2) */
       switch (e->Iex.Binop.op) {
          case Iop_Add64: case Iop_Add32:
          case Iop_Sub64: case Iop_Sub32: {
@@ -1593,7 +1708,7 @@ static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e )
             break;
       }
 
-      /* AND/OR/XOR */
+      /* AND/OR/XOR(e1, e2) (for any e1, e2) */
       switch (e->Iex.Binop.op) {
          case Iop_And64: case Iop_And32: lop = ARM64lo_AND; goto log_binop;
          case Iop_Or64:  case Iop_Or32:  lop = ARM64lo_OR;  goto log_binop;