From: Julian Seward Date: Mon, 4 Jul 2011 16:58:40 +0000 (+0000) Subject: Thumb2 front end: improved analysis of IT instructions that might X-Git-Tag: svn/VALGRIND_3_7_0^2~58 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=62b45f00458d53438ca70b1f3c88f91a5367ed58;p=thirdparty%2Fvalgrind.git Thumb2 front end: improved analysis of IT instructions that might guard the one being translated, with the goal of proving this isn't the case more of the time. Reduces the amount of generated code by about 10% with --tool=none, and performance improvements (also with --tool=none) of up to 25% have been observed. git-svn-id: svn://svn.valgrind.org/vex/trunk@2166 --- diff --git a/VEX/priv/guest_arm_toIR.c b/VEX/priv/guest_arm_toIR.c index b52ed4acca..cde8c9a09f 100644 --- a/VEX/priv/guest_arm_toIR.c +++ b/VEX/priv/guest_arm_toIR.c @@ -14078,6 +14078,8 @@ DisResult disInstr_ARM_WRK ( /*--- Disassemble a single Thumb2 instruction ---*/ /*------------------------------------------------------------*/ +static const UChar it_length_table[256]; /* fwds */ + /* NB: in Thumb mode we do fetches of regs with getIRegT, which automagically adds 4 to fetches of r15. However, writes to regs are done with putIRegT, which disallows writes to r15. Hence any @@ -14263,8 +14265,8 @@ DisResult disInstr_THUMB_WRK ( if (pageoff >= 18) { /* It's safe to poke about in the 9 halfwords preceding this insn. So, have a look at them. */ - guaranteedUnconditional = True; /* assume no 'it' insn found, till we do */ - + guaranteedUnconditional = True; /* assume no 'it' insn found, + till we do */ UShort* hwp = (UShort*)(HWord)pc; Int i; for (i = -1; i >= -9; i--) { @@ -14275,10 +14277,25 @@ DisResult disInstr_THUMB_WRK ( == ( pc & 0xFFFFF000 ) ); */ /* All valid IT instructions must have the form 0xBFxy, - where x can be anything, but y must be nonzero. */ - if ((hwp[i] & 0xFF00) == 0xBF00 && (hwp[i] & 0xF) != 0) { - /* might be an 'it' insn. Play safe. */ - guaranteedUnconditional = False; + where x can be anything, but y must be nonzero. Find + the number of insns covered by it (1 .. 4) and check to + see if it can possibly reach up to the instruction in + question. Some (x,y) combinations mean UNPREDICTABLE, + and the table is constructed to be conservative by + returning 4 for those cases, so the analysis is safe + even if the code uses unpredictable IT instructions (in + which case its authors are nuts, but hey.) */ + UShort hwp_i = hwp[i]; + if (UNLIKELY((hwp_i & 0xFF00) == 0xBF00 && (hwp_i & 0xF) != 0)) { + /* might be an 'it' insn. */ + /* # guarded insns */ + Int n_guarded = (Int)it_length_table[hwp_i & 0xFF]; + vassert(n_guarded >= 1 && n_guarded <= 4); + if (n_guarded * 2 /* # guarded HWs, worst case */ + > (-(i+1))) /* -(i+1): # remaining HWs after the IT */ + /* -(i+0) also seems to work, even though I think + it's wrong. I don't understand that. */ + guaranteedUnconditional = False; break; } } @@ -17929,6 +17946,85 @@ DisResult disInstr_THUMB_WRK ( #undef DIS +/* Helper table for figuring out how many insns an IT insn + conditionalises. + + An ITxyz instruction of the format "1011 1111 firstcond mask" + conditionalises some number of instructions, as indicated by the + following table. A value of zero indicates the instruction is + invalid in some way. + + mask = 0 means this isn't an IT instruction + fc = 15 (NV) means unpredictable + + The line fc = 14 (AL) is different from the others; there are + additional constraints in this case. + + mask(0 .. 15) + +-------------------------------- + fc(0 | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4 + .. | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4 + | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4 + | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4 + | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4 + | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4 + | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4 + | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4 + | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4 + | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4 + | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4 + | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4 + | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4 + | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4 + | 0 4 3 0 2 0 0 0 1 0 0 0 0 0 0 0 + 15) | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + + To be conservative with the analysis, let's rule out the mask = 0 + case, since that isn't an IT insn at all. But for all the other + cases where the table contains zero, that means unpredictable, so + let's say 4 to be conservative. Hence we have a safe value for any + IT (mask,fc) pair that the CPU would actually identify as an IT + instruction. The final table is + + mask(0 .. 15) + +-------------------------------- + fc(0 | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4 + .. | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4 + | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4 + | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4 + | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4 + | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4 + | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4 + | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4 + | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4 + | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4 + | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4 + | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4 + | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4 + | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4 + | 0 4 3 4 2 4 4 4 1 4 4 4 4 4 4 4 + 15) | 0 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 +*/ +static const UChar it_length_table[256] + = { 0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4, + 0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4, + 0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4, + 0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4, + 0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4, + 0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4, + 0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4, + 0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4, + 0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4, + 0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4, + 0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4, + 0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4, + 0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4, + 0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4, + 0, 4, 3, 4, 2, 4, 4, 4, 1, 4, 4, 4, 4, 4, 4, 4, + 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 + }; + + /*------------------------------------------------------------*/ /*--- Top-level fn ---*/ /*------------------------------------------------------------*/