From: Florian Krohm Date: Sun, 22 Apr 2012 17:38:46 +0000 (+0000) Subject: tchain optimisation for s390 (VEX bits) X-Git-Tag: svn/VALGRIND_3_8_1^2~170 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=eb392668b8cd57b4fa42bbe017e2413cb8cfc38c;p=thirdparty%2Fvalgrind.git tchain optimisation for s390 (VEX bits) Loading a 64-bit immediate into a register requires 4 insns on a z900 machine, the oldest model supported. Depending on hardware capabilities, newer machines can do the same using 2 insns. Naturally, we want to take advantage of that. However, currently, in disp_cp_chain_me_to_slowEP/fastEP we assume that the length of loading a 64-bit immediate is a compile time constant: S390_TCHAIN_LOAD64_LEN For what we want to do this constant needs to be a runtime constant. So in this patch we move this address arithmetic out of the dispatch code. The general idea being that the value in %r1 does not need to be adjusted to recover the place to patch. Upon reaching disp_cp_chain_me_to_slowEP/fastEP %r1 contains the correct address. git-svn-id: svn://svn.valgrind.org/vex/trunk@2308 --- diff --git a/VEX/priv/host_s390_defs.c b/VEX/priv/host_s390_defs.c index 1420e703e1..f76e10f513 100644 --- a/VEX/priv/host_s390_defs.c +++ b/VEX/priv/host_s390_defs.c @@ -7230,11 +7230,10 @@ s390_insn_is_BR(const UChar *p, UChar reg) return p[0] == 0x07 && p[1] == (0xF0 | reg); /* BCR 15,reg */ } -static __inline__ Bool -s390_insn_is_BASR(const UChar *p, UChar link_reg, UChar other_reg) -{ - return p[0] == 0x0D && p[1] == ((link_reg << 4) | other_reg); -} + +/* The length of the BASR insn */ +#define S390_BASR_LEN 2 + /* Load the 64-bit VALUE into REG. Note that this function must NOT optimise the generated code by looking at the value. I.e. using @@ -7262,7 +7261,7 @@ s390_tchain_load64(UChar *buf, UChar regno, ULong value) static UInt s390_tchain_load64_len(void) { - return S390_TCHAIN_LOAD64_LEN; + return 4 + 4 + 4 + 4; /* IIHH + IIHL + IILH + IILL */ } /* Verify that CODE is the code sequence generated by s390_tchain_load64 @@ -7367,24 +7366,23 @@ s390_insn_xdirect_emit(UChar *buf, const s390_insn *insn, buf = s390_emit_STG(buf, R0, 0, b, DISP20(d)); - /* --- FIRST PATCHABLE BYTE follows --- */ - /* VG_(disp_cp_chain_me_to_{slowEP,fastEP}) (where we're calling - to) backs up the return address, so as to find the address of - the first patchable byte. So: don't change the length of the - two instructions below. */ - /* Load the chosen entry point into the scratch reg */ void *disp_cp_chain_me; disp_cp_chain_me = insn->variant.xdirect.to_fast_entry ? disp_cp_chain_me_to_fastEP : disp_cp_chain_me_to_slowEP; + /* Get the address of the beginning of the load64 code sequence into %r1. + Do not change the register! This is part of the protocol with the + dispatcher. */ + buf = s390_emit_BASR(buf, 1, R0); + /* --- FIRST PATCHABLE BYTE follows (must not modify %r1) --- */ ULong addr = Ptr_to_ULong(disp_cp_chain_me); buf = s390_tchain_load64(buf, S390_REGNO_TCHAIN_SCRATCH, addr); - /* call *tchain_scratch */ - buf = s390_emit_BASR(buf, 1, S390_REGNO_TCHAIN_SCRATCH); + /* goto *tchain_scratch */ + buf = s390_emit_BCR(buf, S390_CC_ALWAYS, S390_REGNO_TCHAIN_SCRATCH); /* --- END of PATCHABLE BYTES --- */ @@ -7404,7 +7402,7 @@ s390_insn_xdirect_emit(UChar *buf, const s390_insn *insn, static UInt s390_xdirect_patchable_len(void) { - return s390_tchain_load64_len() + S390_TCHAIN_CALL_LEN; + return s390_tchain_load64_len() + S390_BASR_LEN; } @@ -7452,7 +7450,7 @@ s390_insn_xindir_emit(UChar *buf, const s390_insn *insn, void *disp_cp_xindir) /* load tchain_scratch, #disp_indir */ buf = s390_tchain_load64(buf, S390_REGNO_TCHAIN_SCRATCH, Ptr_to_ULong(disp_cp_xindir)); - /* BR *tchain_direct */ + /* goto *tchain_direct */ buf = s390_emit_BCR(buf, S390_CC_ALWAYS, S390_REGNO_TCHAIN_SCRATCH); /* Fix up the conditional jump, if there was one. */ @@ -7533,7 +7531,7 @@ s390_insn_xassisted_emit(UChar *buf, const s390_insn *insn, buf = s390_tchain_load64(buf, S390_REGNO_TCHAIN_SCRATCH, Ptr_to_ULong(disp_cp_xassisted)); - /* BR *tchain_direct */ + /* goto *tchain_direct */ buf = s390_emit_BCR(buf, S390_CC_ALWAYS, S390_REGNO_TCHAIN_SCRATCH); /* Fix up the conditional jump, if there was one. */ @@ -7808,21 +7806,21 @@ chainXDirect_S390(void *place_to_chain, void *disp_cp_chain_me_EXPECTED, void *place_to_jump_to) { - /* What we're expecting to see @ PLACE_TI_CHAIN is: + /* What we're expecting to see @ PLACE_TO_CHAIN is: - load tchain-scratch, #disp_cp_chain_me_EXPECTED - BASR 1,S390_REGNO_TCHAIN_SCRATCH + load tchain_scratch, #disp_cp_chain_me_EXPECTED + goto *tchain_scratch */ const UChar *next; next = s390_tchain_verify_load64(place_to_chain, S390_REGNO_TCHAIN_SCRATCH, Ptr_to_ULong(disp_cp_chain_me_EXPECTED)); - vassert(s390_insn_is_BASR(next, 1, S390_REGNO_TCHAIN_SCRATCH)); + vassert(s390_insn_is_BR(next, S390_REGNO_TCHAIN_SCRATCH)); /* And what we want to change it to is either: (general case): - load tchain_scratch, #place_to_jump_to - BR *tchain_scratch + load tchain_scratch, #place_to_jump_to + goto *tchain_scratch ---OR--- @@ -7872,12 +7870,12 @@ chainXDirect_S390(void *place_to_chain, p[i] = 0x00; } else { /* - load tchain_scratch, #place_to_jump_to - BR *tchain_scratch + load tchain_scratch, #place_to_jump_to + goto *tchain_scratch */ ULong addr = Ptr_to_ULong(place_to_jump_to); p = s390_tchain_load64(p, S390_REGNO_TCHAIN_SCRATCH, addr); - s390_emit_BCR(p, S390_CC_ALWAYS, S390_REGNO_TCHAIN_SCRATCH); + /* There is not need to emit a BCR here, as it is already there. */ } VexInvalRange vir = {0, 0}; @@ -7894,8 +7892,8 @@ unchainXDirect_S390(void *place_to_unchain, { /* What we're expecting to see @ PLACE_TO_UNCHAIN: - load tchain_scratch, #place_to_jump_to_EXPECTED - BR *tchain_scratch + load tchain_scratch, #place_to_jump_to_EXPECTED + goto *tchain_scratch ---OR--- in the case where the displacement falls within 32 bits @@ -7905,6 +7903,8 @@ unchainXDirect_S390(void *place_to_unchain, */ UChar *p = place_to_unchain; + Bool uses_short_form = False; + if (s390_insn_is_BRCL(p, S390_CC_ALWAYS)) { /* Looks like the short form */ Int num_hw = *(Int *)&p[2]; @@ -7915,6 +7915,7 @@ unchainXDirect_S390(void *place_to_unchain, Int i; for (i = 0; i < s390_xdirect_patchable_len() - 6; ++i) vassert(p[6+i] == 0x00); + uses_short_form = True; } else { /* Should be the long form */ const UChar *next; @@ -7928,11 +7929,24 @@ unchainXDirect_S390(void *place_to_unchain, /* And what we want to change it to is: load tchain_scratch, #disp_cp_chain_me - call *tchain_scratch + goto *tchain_scratch */ + + /* Get the address of the beginning of the load64 code sequence into %r1. + Do not change the register! This is part of the protocol with the + dispatcher. + Note: the incoming argument PLACE_TO_CHAIN points to the beginning of the + load64 insn sequence. That sequence is prefixed with a BASR to get its + address (see s390_insn_xdirect_emit). */ + p = s390_emit_BASR(p - S390_BASR_LEN, 1, R0); + ULong addr = Ptr_to_ULong(disp_cp_chain_me); p = s390_tchain_load64(p, S390_REGNO_TCHAIN_SCRATCH, addr); - s390_emit_BASR(p, 1, S390_REGNO_TCHAIN_SCRATCH); + + /* Emit the BCR in case the short form was used. In case of the long + form, the BCR is already there. */ + if (uses_short_form) + s390_emit_BCR(p, S390_CC_ALWAYS, S390_REGNO_TCHAIN_SCRATCH); VexInvalRange vir = {0, 0}; return vir; diff --git a/VEX/pub/libvex_s390x_common.h b/VEX/pub/libvex_s390x_common.h index 6d8ef5c3f8..f25c45e691 100644 --- a/VEX/pub/libvex_s390x_common.h +++ b/VEX/pub/libvex_s390x_common.h @@ -87,12 +87,6 @@ /* Number of double words needed to store all facility bits. */ #define S390_NUM_FACILITY_DW 2 -/* The length of the instructions issued by s390_tchain_load64 */ -#define S390_TCHAIN_LOAD64_LEN 16 - -/* The length of the call insn (BASR) used in translation chaining */ -#define S390_TCHAIN_CALL_LEN 2 - #endif /* __LIBVEX_PUB_S390X_H */ /*--------------------------------------------------------------------*/