From f719470439e03b757382f769d758f6aa73e22947 Mon Sep 17 00:00:00 2001 From: Julian Seward Date: Sat, 2 Jan 2021 16:15:03 +0100 Subject: [PATCH] A bit of tuning of the arm64 isel: do PUT(..) = 0x0:I64 in a single insn. When running Memcheck, most blocks will do one and often two of `PUT(..) = 0x0:I64`, as a result of the way the front end models arm64 condition codes. The arm64 isel would generate `mov xN, #0 ; str xN, [xBaseblock, #imm]`, which is pretty stupid. This patch changes it to a single insn: `str xzr, [xBaseblock, #imm]`. This is a special-case for `PUT(..) = 0x0:I64`. General-case integer stores of 0x0:I64 are unchanged. This gives a 1.9% reduction in generated code size when running /usr/bin/date on Memcheck. --- VEX/priv/host_arm64_defs.c | 37 ++++++++++++++++++++++++++++--------- VEX/priv/host_arm64_defs.h | 7 +++++++ VEX/priv/host_arm64_isel.c | 8 +++++++- 3 files changed, 42 insertions(+), 10 deletions(-) diff --git a/VEX/priv/host_arm64_defs.c b/VEX/priv/host_arm64_defs.c index 13b497f600..6ea67ef319 100644 --- a/VEX/priv/host_arm64_defs.c +++ b/VEX/priv/host_arm64_defs.c @@ -118,9 +118,13 @@ const RRegUniverse* getRRegUniverse_ARM64 ( void ) // x8 is used as a ProfInc temporary // x9 is used as a spill/reload/chaining/call temporary // x30 as LR - // x31 because dealing with the SP-vs-ZR overloading is too - // confusing, and we don't need to do so, so let's just avoid - // the problem + // + // x31 is mentionable, but not allocatable, and is dangerous to use + // because of SP-vs-ZR overloading. Here, we call it `XZR_XSP`. Whether + // it denotes the zero register or the stack pointer depends both on what + // kind of instruction it appears in and even on the position within an + // instruction that it appears. So be careful. There's absolutely + // nothing to prevent shooting oneself in the foot. // // Currently, we have 15 allocatable integer registers: // 0 1 2 3 4 5 6 7 22 23 24 25 26 27 28 @@ -137,6 +141,7 @@ const RRegUniverse* getRRegUniverse_ARM64 ( void ) ru->regs[ru->size++] = hregARM64_X8(); ru->regs[ru->size++] = hregARM64_X9(); ru->regs[ru->size++] = hregARM64_X21(); + ru->regs[ru->size++] = hregARM64_XZR_XSP(); rRegUniverse_ARM64_initted = True; @@ -155,8 +160,8 @@ UInt ppHRegARM64 ( HReg reg ) { switch (hregClass(reg)) { case HRcInt64: r = hregEncoding(reg); - vassert(r >= 0 && r < 31); - return vex_printf("x%d", r); + vassert(r >= 0 && r <= 31); + return r ==31 ? vex_printf("xzr/xsp") : vex_printf("x%d", r); case HRcFlt64: r = hregEncoding(reg); vassert(r >= 0 && r < 32); @@ -2746,6 +2751,19 @@ static inline UInt iregEnc ( HReg r ) return n; } +static inline UInt iregEncOr31 ( HReg r ) +{ + // This is the same as iregEnc() except that we're allowed to use the + // "special" encoding number 31, which means, depending on the context, + // either XZR/WZR or SP. + UInt n; + vassert(hregClass(r) == HRcInt64); + vassert(!hregIsVirtual(r)); + n = hregEncoding(r); + vassert(n <= 31); + return n; +} + static inline UInt dregEnc ( HReg r ) { UInt n; @@ -3360,13 +3378,14 @@ static UInt* do_load_or_store32 ( UInt* p, } -/* Generate a 64 bit load or store to/from xD, using the given amode +/* Generate a 64 bit integer load or store to/from xD, using the given amode for the address. */ static UInt* do_load_or_store64 ( UInt* p, Bool isLoad, UInt xD, ARM64AMode* am ) { - /* In all these cases, Rn can't be 31 since that means SP. */ - vassert(xD <= 30); + /* In all these cases, Rn can't be 31 since that means SP. But Rd can be + 31, meaning XZR/WZR. */ + vassert(xD <= 31); if (am->tag == ARM64am_RI9) { /* STUR Xd, [Xn|SP + simm9]: 11 111000 000 simm9 00 n d LDUR Xd, [Xn|SP + simm9]: 11 111000 010 simm9 00 n d @@ -3646,7 +3665,7 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc, } case ARM64in_LdSt64: { p = do_load_or_store64( p, i->ARM64in.LdSt64.isLoad, - iregEnc(i->ARM64in.LdSt64.rD), + iregEncOr31(i->ARM64in.LdSt64.rD), i->ARM64in.LdSt64.amode ); goto done; } diff --git a/VEX/priv/host_arm64_defs.h b/VEX/priv/host_arm64_defs.h index 5a82564ce6..24da64e22b 100644 --- a/VEX/priv/host_arm64_defs.h +++ b/VEX/priv/host_arm64_defs.h @@ -70,6 +70,13 @@ ST_IN HReg hregARM64_D13 ( void ) { return mkHReg(False, HRcFlt64, 13, 25); } ST_IN HReg hregARM64_X8 ( void ) { return mkHReg(False, HRcInt64, 8, 26); } ST_IN HReg hregARM64_X9 ( void ) { return mkHReg(False, HRcInt64, 9, 27); } ST_IN HReg hregARM64_X21 ( void ) { return mkHReg(False, HRcInt64, 21, 28); } + +// This is the integer register with encoding 31. Be *very* careful how you +// use it, since its meaning is dependent on the instruction and indeed even +// the position within an instruction, that it appears. It denotes either the +// zero register or the stack pointer. +ST_IN HReg hregARM64_XZR_XSP ( void ) { return mkHReg(False, + HRcInt64, 31, 29); } #undef ST_IN extern UInt ppHRegARM64 ( HReg ); diff --git a/VEX/priv/host_arm64_isel.c b/VEX/priv/host_arm64_isel.c index da1218715e..517b7b15b8 100644 --- a/VEX/priv/host_arm64_isel.c +++ b/VEX/priv/host_arm64_isel.c @@ -3745,7 +3745,13 @@ static void iselStmt ( ISelEnv* env, IRStmt* stmt ) IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Put.data); UInt offs = (UInt)stmt->Ist.Put.offset; if (tyd == Ity_I64 && 0 == (offs & 7) && offs < (8<<12)) { - HReg rD = iselIntExpr_R(env, stmt->Ist.Put.data); + HReg rD = INVALID_HREG; + if (isZeroU64(stmt->Ist.Put.data)) { + // In this context, XZR_XSP denotes the zero register. + rD = hregARM64_XZR_XSP(); + } else { + rD = iselIntExpr_R(env, stmt->Ist.Put.data); + } ARM64AMode* am = mk_baseblock_64bit_access_amode(offs); addInstr(env, ARM64Instr_LdSt64(False/*!isLoad*/, rD, am)); return; -- 2.47.3