From: Julian Seward Date: Mon, 2 May 2011 07:21:04 +0000 (+0000) Subject: Split up armg_calculate_flags_nzcv into four functions that compute X-Git-Tag: svn/VALGRIND_3_7_0^2~85 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=3fc2e35e3b7eb0204dec32dba8f8bdeb3e7146c2;p=thirdparty%2Fvalgrind.git Split up armg_calculate_flags_nzcv into four functions that compute the flags individually. This seems to be a net performance win, because often only one or two of the flags computed by armg_calculate_flags_nzcv, so time was wasted computing the other ones. git-svn-id: svn://svn.valgrind.org/vex/trunk@2139 --- diff --git a/VEX/priv/guest_arm_helpers.c b/VEX/priv/guest_arm_helpers.c index 96d5824377..061b22bf97 100644 --- a/VEX/priv/guest_arm_helpers.c +++ b/VEX/priv/guest_arm_helpers.c @@ -51,59 +51,109 @@ */ - -/* generalised left-shifter */ -static inline UInt lshift ( UInt x, Int n ) +/* Calculate the N flag from the supplied thunk components, in the + least significant bit of the word. Returned bits 31:1 are zero. */ +static +UInt armg_calculate_flag_n ( UInt cc_op, UInt cc_dep1, + UInt cc_dep2, UInt cc_dep3 ) { - if (n >= 0) - return x << n; - else - return x >> (-n); + switch (cc_op) { + case ARMG_CC_OP_COPY: { + /* (nzcv:28x0, unused, unused) */ + UInt nf = (cc_dep1 >> ARMG_CC_SHIFT_N) & 1; + return nf; + } + case ARMG_CC_OP_ADD: { + /* (argL, argR, unused) */ + UInt argL = cc_dep1; + UInt argR = cc_dep2; + UInt res = argL + argR; + UInt nf = res >> 31; + return nf; + } + case ARMG_CC_OP_SUB: { + /* (argL, argR, unused) */ + UInt argL = cc_dep1; + UInt argR = cc_dep2; + UInt res = argL - argR; + UInt nf = res >> 31; + return nf; + } + case ARMG_CC_OP_ADC: { + /* (argL, argR, oldC) */ + UInt argL = cc_dep1; + UInt argR = cc_dep2; + UInt oldC = cc_dep3; + vassert((oldC & ~1) == 0); + UInt res = argL + argR + oldC; + UInt nf = res >> 31; + return nf; + } + case ARMG_CC_OP_SBB: { + /* (argL, argR, oldC) */ + UInt argL = cc_dep1; + UInt argR = cc_dep2; + UInt oldC = cc_dep3; + vassert((oldC & ~1) == 0); + UInt res = argL - argR - (oldC ^ 1); + UInt nf = res >> 31; + return nf; + } + case ARMG_CC_OP_LOGIC: { + /* (res, shco, oldV) */ + UInt res = cc_dep1; + UInt nf = res >> 31; + return nf; + } + case ARMG_CC_OP_MUL: { + /* (res, unused, oldC:oldV) */ + UInt res = cc_dep1; + UInt nf = res >> 31; + return nf; + } + case ARMG_CC_OP_MULL: { + /* (resLo32, resHi32, oldC:oldV) */ + UInt resHi32 = cc_dep2; + UInt nf = resHi32 >> 31; + return nf; + } + default: + /* shouldn't really make these calls from generated code */ + vex_printf("armg_calculate_flag_n" + "( op=%u, dep1=0x%x, dep2=0x%x, dep3=0x%x )\n", + cc_op, cc_dep1, cc_dep2, cc_dep3 ); + vpanic("armg_calculate_flags_n"); + } } -/* CALLED FROM GENERATED CODE: CLEAN HELPER */ -/* Calculate NZCV from the supplied thunk components, in the positions - they appear in the CPSR, viz bits 31:28 for N Z C V respectively. - Returned bits 27:0 are zero. */ -UInt armg_calculate_flags_nzcv ( UInt cc_op, UInt cc_dep1, - UInt cc_dep2, UInt cc_dep3 ) +/* Calculate the Z flag from the supplied thunk components, in the + least significant bit of the word. Returned bits 31:1 are zero. */ +static +UInt armg_calculate_flag_z ( UInt cc_op, UInt cc_dep1, + UInt cc_dep2, UInt cc_dep3 ) { switch (cc_op) { - case ARMG_CC_OP_COPY: - /* (nzcv, unused, unused) */ - return cc_dep1; + case ARMG_CC_OP_COPY: { + /* (nzcv:28x0, unused, unused) */ + UInt zf = (cc_dep1 >> ARMG_CC_SHIFT_Z) & 1; + return zf; + } case ARMG_CC_OP_ADD: { /* (argL, argR, unused) */ UInt argL = cc_dep1; UInt argR = cc_dep2; UInt res = argL + argR; - UInt nf = lshift( res & (1<<31), ARMG_CC_SHIFT_N - 31 ); - UInt zf = lshift( res == 0, ARMG_CC_SHIFT_Z ); - // CF and VF need verification - UInt cf = lshift( res < argL, ARMG_CC_SHIFT_C ); - UInt vf = lshift( (res ^ argL) & (res ^ argR), - ARMG_CC_SHIFT_V + 1 - 32 ) - & ARMG_CC_MASK_V; - //vex_printf("%08x %08x -> n %x z %x c %x v %x\n", - // argL, argR, nf, zf, cf, vf); - return nf | zf | cf | vf; + UInt zf = res == 0; + return zf; } case ARMG_CC_OP_SUB: { /* (argL, argR, unused) */ UInt argL = cc_dep1; UInt argR = cc_dep2; UInt res = argL - argR; - UInt nf = lshift( res & (1<<31), ARMG_CC_SHIFT_N - 31 ); - UInt zf = lshift( res == 0, ARMG_CC_SHIFT_Z ); - // XXX cf is inverted relative to normal sense - UInt cf = lshift( argL >= argR, ARMG_CC_SHIFT_C ); - UInt vf = lshift( (argL ^ argR) & (argL ^ res), - ARMG_CC_SHIFT_V + 1 - 32 ) - & ARMG_CC_MASK_V; - //vex_printf("%08x %08x -> n %x z %x c %x v %x\n", - // argL, argR, nf, zf, cf, vf); - return nf | zf | cf | vf; + UInt zf = res == 0; + return zf; } case ARMG_CC_OP_ADC: { /* (argL, argR, oldC) */ @@ -111,17 +161,9 @@ UInt armg_calculate_flags_nzcv ( UInt cc_op, UInt cc_dep1, UInt argR = cc_dep2; UInt oldC = cc_dep3; vassert((oldC & ~1) == 0); - UInt res = (argL + argR) + oldC; - UInt nf = lshift( res & (1<<31), ARMG_CC_SHIFT_N - 31 ); - UInt zf = lshift( res == 0, ARMG_CC_SHIFT_Z ); - UInt cf = oldC ? lshift( res <= argL, ARMG_CC_SHIFT_C ) - : lshift( res < argL, ARMG_CC_SHIFT_C ); - UInt vf = lshift( (res ^ argL) & (res ^ argR), - ARMG_CC_SHIFT_V + 1 - 32 ) - & ARMG_CC_MASK_V; - //vex_printf("%08x %08x -> n %x z %x c %x v %x\n", - // argL, argR, nf, zf, cf, vf); - return nf | zf | cf | vf; + UInt res = argL + argR + oldC; + UInt zf = res == 0; + return zf; } case ARMG_CC_OP_SBB: { /* (argL, argR, oldC) */ @@ -130,86 +172,215 @@ UInt armg_calculate_flags_nzcv ( UInt cc_op, UInt cc_dep1, UInt oldC = cc_dep3; vassert((oldC & ~1) == 0); UInt res = argL - argR - (oldC ^ 1); - UInt nf = lshift( res & (1<<31), ARMG_CC_SHIFT_N - 31 ); - UInt zf = lshift( res == 0, ARMG_CC_SHIFT_Z ); - UInt cf = oldC ? lshift( argL >= argR, ARMG_CC_SHIFT_C ) - : lshift( argL > argR, ARMG_CC_SHIFT_C ); - UInt vf = lshift( (argL ^ argR) & (argL ^ res), - ARMG_CC_SHIFT_V + 1 - 32 ) - & ARMG_CC_MASK_V; - //vex_printf("%08x %08x -> n %x z %x c %x v %x\n", - // argL, argR, nf, zf, cf, vf); - return nf | zf | cf | vf; + UInt zf = res == 0; + return zf; } case ARMG_CC_OP_LOGIC: { /* (res, shco, oldV) */ UInt res = cc_dep1; - UInt shco = cc_dep2; - vassert((shco & ~1) == 0); - UInt oldV = cc_dep3; - vassert((oldV & ~1) == 0); - UInt nf = lshift( res & (1<<31), ARMG_CC_SHIFT_N - 31 ); - UInt zf = lshift( res == 0, ARMG_CC_SHIFT_Z ); - UInt cf = lshift( shco, ARMG_CC_SHIFT_C ); - UInt vf = lshift( oldV, ARMG_CC_SHIFT_V ); - return nf | zf | cf | vf; + UInt zf = res == 0; + return zf; } case ARMG_CC_OP_MUL: { /* (res, unused, oldC:oldV) */ UInt res = cc_dep1; - UInt oldC = (cc_dep3 >> 1) & 1; - UInt oldV = (cc_dep3 >> 0) & 1; - vassert((cc_dep3 & ~3) == 0); - UInt nf = lshift( res & (1<<31), ARMG_CC_SHIFT_N - 31 ); - UInt zf = lshift( res == 0, ARMG_CC_SHIFT_Z ); - UInt cf = lshift( oldC, ARMG_CC_SHIFT_C ); - UInt vf = lshift( oldV, ARMG_CC_SHIFT_V ); - return nf | zf | cf | vf; + UInt zf = res == 0; + return zf; } case ARMG_CC_OP_MULL: { /* (resLo32, resHi32, oldC:oldV) */ UInt resLo32 = cc_dep1; UInt resHi32 = cc_dep2; - UInt oldC = (cc_dep3 >> 1) & 1; - UInt oldV = (cc_dep3 >> 0) & 1; - vassert((cc_dep3 & ~3) == 0); - UInt nf = lshift( resHi32 & (1<<31), ARMG_CC_SHIFT_N - 31 ); - UInt zf = lshift( (resHi32|resLo32) == 0, ARMG_CC_SHIFT_Z ); - UInt cf = lshift( oldC, ARMG_CC_SHIFT_C ); - UInt vf = lshift( oldV, ARMG_CC_SHIFT_V ); - return nf | zf | cf | vf; + UInt zf = (resHi32|resLo32) == 0; + return zf; } default: /* shouldn't really make these calls from generated code */ - vex_printf("armg_calculate_flags_nzcv" + vex_printf("armg_calculate_flags_z" "( op=%u, dep1=0x%x, dep2=0x%x, dep3=0x%x )\n", cc_op, cc_dep1, cc_dep2, cc_dep3 ); - vpanic("armg_calculate_flags_nzcv"); + vpanic("armg_calculate_flags_z"); } } /* CALLED FROM GENERATED CODE: CLEAN HELPER */ -/* Calculate the C flag from the thunk components, in the lowest bit - of the word (bit 0). Bits 31:1 of the returned value are zero. */ +/* Calculate the C flag from the supplied thunk components, in the + least significant bit of the word. Returned bits 31:1 are zero. */ UInt armg_calculate_flag_c ( UInt cc_op, UInt cc_dep1, UInt cc_dep2, UInt cc_dep3 ) { - UInt r = armg_calculate_flags_nzcv(cc_op, cc_dep1, cc_dep2, cc_dep3); - return (r >> ARMG_CC_SHIFT_C) & 1; + switch (cc_op) { + case ARMG_CC_OP_COPY: { + /* (nzcv:28x0, unused, unused) */ + UInt cf = (cc_dep1 >> ARMG_CC_SHIFT_C) & 1; + return cf; + } + case ARMG_CC_OP_ADD: { + /* (argL, argR, unused) */ + UInt argL = cc_dep1; + UInt argR = cc_dep2; + UInt res = argL + argR; + UInt cf = res < argL; + return cf; + } + case ARMG_CC_OP_SUB: { + /* (argL, argR, unused) */ + UInt argL = cc_dep1; + UInt argR = cc_dep2; + UInt cf = argL >= argR; + return cf; + } + case ARMG_CC_OP_ADC: { + /* (argL, argR, oldC) */ + UInt argL = cc_dep1; + UInt argR = cc_dep2; + UInt oldC = cc_dep3; + vassert((oldC & ~1) == 0); + UInt res = argL + argR + oldC; + UInt cf = oldC ? (res <= argL) : (res < argL); + return cf; + } + case ARMG_CC_OP_SBB: { + /* (argL, argR, oldC) */ + UInt argL = cc_dep1; + UInt argR = cc_dep2; + UInt oldC = cc_dep3; + vassert((oldC & ~1) == 0); + UInt cf = oldC ? (argL >= argR) : (argL > argR); + return cf; + } + case ARMG_CC_OP_LOGIC: { + /* (res, shco, oldV) */ + UInt shco = cc_dep2; + vassert((shco & ~1) == 0); + UInt cf = shco; + return cf; + } + case ARMG_CC_OP_MUL: { + /* (res, unused, oldC:oldV) */ + UInt oldC = (cc_dep3 >> 1) & 1; + vassert((cc_dep3 & ~3) == 0); + UInt cf = oldC; + return cf; + } + case ARMG_CC_OP_MULL: { + /* (resLo32, resHi32, oldC:oldV) */ + UInt oldC = (cc_dep3 >> 1) & 1; + vassert((cc_dep3 & ~3) == 0); + UInt cf = oldC; + return cf; + } + default: + /* shouldn't really make these calls from generated code */ + vex_printf("armg_calculate_flag_c" + "( op=%u, dep1=0x%x, dep2=0x%x, dep3=0x%x )\n", + cc_op, cc_dep1, cc_dep2, cc_dep3 ); + vpanic("armg_calculate_flag_c"); + } } /* CALLED FROM GENERATED CODE: CLEAN HELPER */ -/* Calculate the V flag from the thunk components, in the lowest bit - of the word (bit 0). Bits 31:1 of the returned value are zero. */ +/* Calculate the V flag from the supplied thunk components, in the + least significant bit of the word. Returned bits 31:1 are zero. */ UInt armg_calculate_flag_v ( UInt cc_op, UInt cc_dep1, UInt cc_dep2, UInt cc_dep3 ) { - UInt r = armg_calculate_flags_nzcv(cc_op, cc_dep1, cc_dep2, cc_dep3); - return (r >> ARMG_CC_SHIFT_V) & 1; + switch (cc_op) { + case ARMG_CC_OP_COPY: { + /* (nzcv:28x0, unused, unused) */ + UInt vf = (cc_dep1 >> ARMG_CC_SHIFT_V) & 1; + return vf; + } + case ARMG_CC_OP_ADD: { + /* (argL, argR, unused) */ + UInt argL = cc_dep1; + UInt argR = cc_dep2; + UInt res = argL + argR; + UInt vf = ((res ^ argL) & (res ^ argR)) >> 31; + return vf; + } + case ARMG_CC_OP_SUB: { + /* (argL, argR, unused) */ + UInt argL = cc_dep1; + UInt argR = cc_dep2; + UInt res = argL - argR; + UInt vf = ((argL ^ argR) & (argL ^ res)) >> 31; + return vf; + } + case ARMG_CC_OP_ADC: { + /* (argL, argR, oldC) */ + UInt argL = cc_dep1; + UInt argR = cc_dep2; + UInt oldC = cc_dep3; + vassert((oldC & ~1) == 0); + UInt res = argL + argR + oldC; + UInt vf = ((res ^ argL) & (res ^ argR)) >> 31; + return vf; + } + case ARMG_CC_OP_SBB: { + /* (argL, argR, oldC) */ + UInt argL = cc_dep1; + UInt argR = cc_dep2; + UInt oldC = cc_dep3; + vassert((oldC & ~1) == 0); + UInt res = argL - argR - (oldC ^ 1); + UInt vf = ((argL ^ argR) & (argL ^ res)) >> 31; + return vf; + } + case ARMG_CC_OP_LOGIC: { + /* (res, shco, oldV) */ + UInt oldV = cc_dep3; + vassert((oldV & ~1) == 0); + UInt vf = oldV; + return vf; + } + case ARMG_CC_OP_MUL: { + /* (res, unused, oldC:oldV) */ + UInt oldV = (cc_dep3 >> 0) & 1; + vassert((cc_dep3 & ~3) == 0); + UInt vf = oldV; + return vf; + } + case ARMG_CC_OP_MULL: { + /* (resLo32, resHi32, oldC:oldV) */ + UInt oldV = (cc_dep3 >> 0) & 1; + vassert((cc_dep3 & ~3) == 0); + UInt vf = oldV; + return vf; + } + default: + /* shouldn't really make these calls from generated code */ + vex_printf("armg_calculate_flag_v" + "( op=%u, dep1=0x%x, dep2=0x%x, dep3=0x%x )\n", + cc_op, cc_dep1, cc_dep2, cc_dep3 ); + vpanic("armg_calculate_flag_v"); + } } + +/* CALLED FROM GENERATED CODE: CLEAN HELPER */ +/* Calculate NZCV from the supplied thunk components, in the positions + they appear in the CPSR, viz bits 31:28 for N Z C V respectively. + Returned bits 27:0 are zero. */ +UInt armg_calculate_flags_nzcv ( UInt cc_op, UInt cc_dep1, + UInt cc_dep2, UInt cc_dep3 ) +{ + UInt f; + UInt res = 0; + f = armg_calculate_flag_n(cc_op, cc_dep1, cc_dep2, cc_dep3); + res |= (f << ARMG_CC_SHIFT_N); + f = armg_calculate_flag_z(cc_op, cc_dep1, cc_dep2, cc_dep3); + res |= (f << ARMG_CC_SHIFT_Z); + f = armg_calculate_flag_c(cc_op, cc_dep1, cc_dep2, cc_dep3); + res |= (f << ARMG_CC_SHIFT_C); + f = armg_calculate_flag_v(cc_op, cc_dep1, cc_dep2, cc_dep3); + res |= (f << ARMG_CC_SHIFT_V); + return res; +} + + /* CALLED FROM GENERATED CODE: CLEAN HELPER */ /* Calculate the QC flag from the arguments, in the lowest bit of the word (bit 0). Urr, having this out of line is bizarre. @@ -225,15 +396,14 @@ UInt armg_calculate_flag_qc ( UInt resL1, UInt resL2, /* CALLED FROM GENERATED CODE: CLEAN HELPER */ /* Calculate the specified condition from the thunk components, in the - lowest bit of the word (bit 0). */ -extern + lowest bit of the word (bit 0). Returned bits 31:1 are zero. */ UInt armg_calculate_condition ( UInt cond_n_op /* (ARMCondcode << 4) | cc_op */, UInt cc_dep1, UInt cc_dep2, UInt cc_dep3 ) { UInt cond = cond_n_op >> 4; UInt cc_op = cond_n_op & 0xF; - UInt nf, zf, vf, cf, nzcv, inv; + UInt nf, zf, vf, cf, inv; // vex_printf("XXXXXXXX %x %x %x %x\n", // cond_n_op, cc_dep1, cc_dep2, cc_dep3); @@ -241,47 +411,46 @@ UInt armg_calculate_condition ( UInt cond_n_op /* (ARMCondcode << 4) | cc_op */, if (cond == ARMCondAL) return 1; inv = cond & 1; - nzcv = armg_calculate_flags_nzcv(cc_op, cc_dep1, cc_dep2, cc_dep3); switch (cond) { case ARMCondEQ: // Z=1 => z case ARMCondNE: // Z=0 - zf = nzcv >> ARMG_CC_SHIFT_Z; - return 1 & (inv ^ zf); + zf = armg_calculate_flag_z(cc_op, cc_dep1, cc_dep2, cc_dep3); + return inv ^ zf; case ARMCondHS: // C=1 => c case ARMCondLO: // C=0 - cf = nzcv >> ARMG_CC_SHIFT_C; - return 1 & (inv ^ cf); + cf = armg_calculate_flag_c(cc_op, cc_dep1, cc_dep2, cc_dep3); + return inv ^ cf; case ARMCondMI: // N=1 => n case ARMCondPL: // N=0 - nf = nzcv >> ARMG_CC_SHIFT_N; - return 1 & (inv ^ nf); + nf = armg_calculate_flag_n(cc_op, cc_dep1, cc_dep2, cc_dep3); + return inv ^ nf; case ARMCondVS: // V=1 => v case ARMCondVC: // V=0 - vf = nzcv >> ARMG_CC_SHIFT_V; - return 1 & (inv ^ vf); + vf = armg_calculate_flag_v(cc_op, cc_dep1, cc_dep2, cc_dep3); + return inv ^ vf; case ARMCondHI: // C=1 && Z=0 => c & ~z case ARMCondLS: // C=0 || Z=1 - cf = nzcv >> ARMG_CC_SHIFT_C; - zf = nzcv >> ARMG_CC_SHIFT_Z; - return 1 & (inv ^ (cf & ~zf)); + cf = armg_calculate_flag_c(cc_op, cc_dep1, cc_dep2, cc_dep3); + zf = armg_calculate_flag_z(cc_op, cc_dep1, cc_dep2, cc_dep3); + return inv ^ (cf & ~zf); case ARMCondGE: // N=V => ~(n^v) case ARMCondLT: // N!=V - nf = nzcv >> ARMG_CC_SHIFT_N; - vf = nzcv >> ARMG_CC_SHIFT_V; - return 1 & (inv ^ ~(nf ^ vf)); + nf = armg_calculate_flag_n(cc_op, cc_dep1, cc_dep2, cc_dep3); + vf = armg_calculate_flag_v(cc_op, cc_dep1, cc_dep2, cc_dep3); + return inv ^ (1 & ~(nf ^ vf)); case ARMCondGT: // Z=0 && N=V => ~z & ~(n^v) => ~(z | (n^v)) case ARMCondLE: // Z=1 || N!=V - nf = nzcv >> ARMG_CC_SHIFT_N; - vf = nzcv >> ARMG_CC_SHIFT_V; - zf = nzcv >> ARMG_CC_SHIFT_Z; - return 1 & (inv ^ ~(zf | (nf ^ vf))); + nf = armg_calculate_flag_n(cc_op, cc_dep1, cc_dep2, cc_dep3); + vf = armg_calculate_flag_v(cc_op, cc_dep1, cc_dep2, cc_dep3); + zf = armg_calculate_flag_z(cc_op, cc_dep1, cc_dep2, cc_dep3); + return inv ^ (1 & ~(zf | (nf ^ vf))); case ARMCondAL: // handled above case ARMCondNV: // should never get here: Illegal instr