From 08b043f53a9e3f37b10b5af6abc2e218678b9e2f Mon Sep 17 00:00:00 2001 From: Julian Seward Date: Mon, 2 Oct 2017 18:43:22 +0200 Subject: [PATCH] libvex_BackEnd: lift the assembler out into its own function, for tidyness. No functional change. --- VEX/priv/main_main.c | 1342 +++++++++++++++++++++--------------------- 1 file changed, 681 insertions(+), 661 deletions(-) diff --git a/VEX/priv/main_main.c b/VEX/priv/main_main.c index 8caaca26e2..0629c15598 100644 --- a/VEX/priv/main_main.c +++ b/VEX/priv/main_main.c @@ -891,760 +891,779 @@ AssemblyBufferOffset emitSimpleInsn ( /*MB_MOD*/Int* offs_profInc, } -/* ---- The back end proper ---- */ +/* ---- The assembler ---- */ -/* Back end of the compilation pipeline. Is not exported. */ +/* Assemble RCODE, writing the resulting machine code into the buffer + specified by VTA->host_bytes of size VTA->host_bytes_size. When done, + store the number of bytes written at the location specified by + VTA->host_bytes_used. RES->offs_profInc may be modified as a result. No + other fields of RES are changed. -static void libvex_BackEnd ( const VexTranslateArgs *vta, - /*MOD*/ VexTranslateResult* res, - /*MOD*/ IRSB* irsb, - VexRegisterUpdates pxControl ) + Returns True for OK, False for 'ran out of buffer space'. +*/ +static +Bool theAssembler ( /*MOD*/VexTranslateResult* res, + const VexTranslateArgs* vta, + HInstrIfThenElse* (*isIfThenElse)( const HInstr* ), + const Bool mode64, + const HInstrSB* rcode ) { - /* This the bundle of functions we need to do the back-end stuff - (insn selection, reg-alloc, assembly) whilst being insulated - from the target instruction set. */ - Bool (*isMove) ( const HInstr*, HReg*, HReg* ); - void (*getRegUsage) ( HRegUsage*, const HInstr*, Bool ); - void (*mapRegs) ( HRegRemap*, HInstr*, Bool ); - HInstrIfThenElse* (*isIfThenElse)( const HInstr* ); - void (*genSpill) ( HInstr**, HInstr**, HReg, Int, Bool ); - void (*genReload) ( HInstr**, HInstr**, HReg, Int, Bool ); - HInstr* (*genMove) ( HReg, HReg, Bool ); - HInstr* (*genHInstrITE) ( HInstrIfThenElse* ); - HInstr* (*directReload) ( HInstr*, HReg, Short ); - void (*ppInstr) ( const HInstr*, Bool ); - void (*ppCondCode) ( HCondCode ); - UInt (*ppReg) ( HReg ); - HInstrSB* (*iselSB) ( const IRSB*, VexArch, const VexArchInfo*, - const VexAbiInfo*, Int, Int, Bool, Bool, - Addr ); - Int (*emit) ( /*MB_MOD*/Bool*, - UChar*, Int, const HInstr*, Bool, VexEndness, - const void*, const void*, const void*, - const void* ); - Bool (*preciseMemExnsFn) ( Int, Int, VexRegisterUpdates ); - - const RRegUniverse* rRegUniv = NULL; + // QElem are work Queue elements. The work Queue is the top level data + // structure for the emitter. It is initialised with the HInstrVec* of + // the overall HInstrSB. Every OOL HInstrVec* in the tree will at some + // point be present in the Queue. IL HInstrVec*s are never present in + // the Queue because the inner emitter loop processes them in-line, using + // a Stack (see below) to keep track of its nesting level. + // + // The Stack (see below) is empty before and after every Queue element is + // processed. In other words, the Stack only holds state needed during + // the processing of a single Queue element. + // + // The ordering of elements in the Queue is irrelevant -- correct code + // will be emitted even with set semantics (arbitrary order). However, + // the FIFOness of the queue is believed to generate code in which + // colder and colder code (more deeply nested OOLs) is placed further + // and further from the start of the emitted machine code, which sounds + // like a layout which should minimise icache misses. + // + // QElems also contain two pieces of jump-fixup information. When we + // finally come to process a QElem, we need to know: + // + // * |jumpToOOLpoint|: the place which wants to jump to the start of the + // emitted insns for this QElem. We must have already emitted that, + // since it will be the conditional jump that leads to this QElem (OOL + // block). + // + // * |resumePoint|: the place we should jump back to after the QElem is + // finished (the "resume point"), which is the emitted code of the + // HInstr immediately following the HInstrIfThenElse that has this + // QElem as its OOL block. + // + // When the QElem is processed, we know both the |jumpToOOLpoint| and + // the |resumePoint|, and so the first can be patched, and the second + // we generate an instruction to jump to. + // + // There are three complications with patching: + // + // (1) per comments on Stack elems, we do not know the |resumePoint| when + // creating a QElem. That will only be known when processing of the + // corresponding IL block is completed. + // + // (2) The top level HInstrVec* has neither a |jumpToOOLpoint| nor a + // |resumePoint|. + // + // (3) Non-top-level OOLs may not have a valid |resumePoint| if they do + // an unconditional IR-level Exit. We can generate the resume point + // branch, but it will be never be used. + typedef + struct { + // The HInstrs for this OOL. + HInstrVec* oolVec; + // Where we should patch to jump to the OOL ("how do we get here?") + Bool jumpToOOLpoint_valid; + Relocation jumpToOOLpoint; + // Resume point offset, in bytes from start of output buffer + // ("where do we go after this block is completed?") + Bool resumePoint_valid; + AssemblyBufferOffset resumePoint; + } + QElem; - Bool mode64, chainingAllowed; - Int out_used; - Int guest_sizeB; - Int offB_HOST_EvC_COUNTER; - Int offB_HOST_EvC_FAILADDR; - Addr max_ga; - HInstrSB* vcode; - HInstrSB* rcode; - isMove = NULL; - getRegUsage = NULL; - mapRegs = NULL; - isIfThenElse = NULL; - genSpill = NULL; - genReload = NULL; - genMove = NULL; - genHInstrITE = NULL; - directReload = NULL; - ppInstr = NULL; - ppCondCode = NULL; - ppReg = NULL; - iselSB = NULL; - emit = NULL; + // SElem are stack elements. When we suspend processing a HInstrVec* in + // order to process an IL path in an IfThenElse, we push the HInstrVec* + // and the next index to process on the stack, so that we know where to + // resume when the nested IL sequence is completed. |vec| and |vec_next| + // record the resume HInstr. + // + // A second effect of processing a nested IL sequence is that we will + // have to (later) process the corresponding OOL sequence. And that OOL + // sequence will have to finish with a jump back to the "resume point" + // (the emitted instruction immediately following the IfThenElse). We + // only know the offset of the resume point instruction in the output + // buffer when we actually resume emitted from there -- that is, when the + // entry we pushed, is popped. So, when we pop, we must mark the + // corresponding OOL entry in the Queue to record there the resume point + // offset. For this reason we also carry |ool_qindex|, which is the + // index of the corresponding OOL entry in the Queue. + typedef + struct { + HInstrVec* vec; // resume point HInstr vector + UInt vec_next; // resume point HInstr vector index + Int ool_qindex; // index in Queue of OOL to mark when we resume + } + SElem; - mode64 = False; - chainingAllowed = False; - guest_sizeB = 0; - offB_HOST_EvC_COUNTER = 0; - offB_HOST_EvC_FAILADDR = 0; - preciseMemExnsFn = NULL; + // The Stack. The stack depth is bounded by maximum number of nested + // hot (IL) sections, so in practice it is going to be very small. + const Int nSTACK = 4; - vassert(vex_initdone); - vassert(vta->disp_cp_xassisted != NULL); + SElem stack[nSTACK]; + Int stackPtr; // points to most recently pushed entry <=> "-1 means empty" - vex_traceflags = vta->traceflags; + // The Queue. The queue size is bounded by the number of cold (OOL) + // sections in the entire HInstrSB, so it's also going to be pretty + // small. + const Int nQUEUE = 8; - /* Both the chainers and the indir are either NULL or non-NULL. */ - if (vta->disp_cp_chain_me_to_slowEP != NULL) { - vassert(vta->disp_cp_chain_me_to_fastEP != NULL); - vassert(vta->disp_cp_xindir != NULL); - chainingAllowed = True; - } else { - vassert(vta->disp_cp_chain_me_to_fastEP == NULL); - vassert(vta->disp_cp_xindir == NULL); - } + QElem queue[nQUEUE]; + Int queueOldest; // index of oldest entry, initially 0 + Int queueNewest; // index of newest entry, + // initially -1, otherwise must be >= queueOldest - switch (vta->arch_guest) { + /////////////////////////////////////////////////////// - case VexArchX86: - preciseMemExnsFn - = X86FN(guest_x86_state_requires_precise_mem_exns); - guest_sizeB = sizeof(VexGuestX86State); - offB_HOST_EvC_COUNTER = offsetof(VexGuestX86State,host_EvC_COUNTER); - offB_HOST_EvC_FAILADDR = offsetof(VexGuestX86State,host_EvC_FAILADDR); - break; + const Bool verbose_asm = (vex_traceflags & VEX_TRACE_ASM) != 0; - case VexArchAMD64: - preciseMemExnsFn - = AMD64FN(guest_amd64_state_requires_precise_mem_exns); - guest_sizeB = sizeof(VexGuestAMD64State); - offB_HOST_EvC_COUNTER = offsetof(VexGuestAMD64State,host_EvC_COUNTER); - offB_HOST_EvC_FAILADDR = offsetof(VexGuestAMD64State,host_EvC_FAILADDR); - break; + const EmitConstants emitConsts + = { .mode64 = mode64, + .endness_host = vta->archinfo_host.endness, + .disp_cp_chain_me_to_slowEP = vta->disp_cp_chain_me_to_slowEP, + .disp_cp_chain_me_to_fastEP = vta->disp_cp_chain_me_to_fastEP, + .disp_cp_xindir = vta->disp_cp_xindir, + .disp_cp_xassisted = vta->disp_cp_xassisted }; - case VexArchPPC32: - preciseMemExnsFn - = PPC32FN(guest_ppc32_state_requires_precise_mem_exns); - guest_sizeB = sizeof(VexGuestPPC32State); - offB_HOST_EvC_COUNTER = offsetof(VexGuestPPC32State,host_EvC_COUNTER); - offB_HOST_EvC_FAILADDR = offsetof(VexGuestPPC32State,host_EvC_FAILADDR); - break; + AssemblyBufferOffset cursor = 0; + AssemblyBufferOffset cursor_limit = vta->host_bytes_size; - case VexArchPPC64: - preciseMemExnsFn - = PPC64FN(guest_ppc64_state_requires_precise_mem_exns); - guest_sizeB = sizeof(VexGuestPPC64State); - offB_HOST_EvC_COUNTER = offsetof(VexGuestPPC64State,host_EvC_COUNTER); - offB_HOST_EvC_FAILADDR = offsetof(VexGuestPPC64State,host_EvC_FAILADDR); - break; + *(vta->host_bytes_used) = 0; - case VexArchS390X: - preciseMemExnsFn - = S390FN(guest_s390x_state_requires_precise_mem_exns); - guest_sizeB = sizeof(VexGuestS390XState); - offB_HOST_EvC_COUNTER = offsetof(VexGuestS390XState,host_EvC_COUNTER); - offB_HOST_EvC_FAILADDR = offsetof(VexGuestS390XState,host_EvC_FAILADDR); - break; + queueOldest = 0; + queueNewest = -1; - case VexArchARM: - preciseMemExnsFn - = ARMFN(guest_arm_state_requires_precise_mem_exns); - guest_sizeB = sizeof(VexGuestARMState); - offB_HOST_EvC_COUNTER = offsetof(VexGuestARMState,host_EvC_COUNTER); - offB_HOST_EvC_FAILADDR = offsetof(VexGuestARMState,host_EvC_FAILADDR); - break; + vassert(queueNewest < nQUEUE); + queueNewest++; + { + QElem* qe = &queue[queueNewest]; + vex_bzero(qe, sizeof(*qe)); + qe->oolVec = rcode->insns; + qe->jumpToOOLpoint_valid = False; + qe->resumePoint_valid = False; + } + vassert(queueNewest == 0); - case VexArchARM64: - preciseMemExnsFn - = ARM64FN(guest_arm64_state_requires_precise_mem_exns); - guest_sizeB = sizeof(VexGuestARM64State); - offB_HOST_EvC_COUNTER = offsetof(VexGuestARM64State,host_EvC_COUNTER); - offB_HOST_EvC_FAILADDR = offsetof(VexGuestARM64State,host_EvC_FAILADDR); - break; + /* Main loop, processing Queue entries, until there are no more. */ + while (queueOldest <= queueNewest) { - case VexArchMIPS32: - preciseMemExnsFn - = MIPS32FN(guest_mips32_state_requires_precise_mem_exns); - guest_sizeB = sizeof(VexGuestMIPS32State); - offB_HOST_EvC_COUNTER = offsetof(VexGuestMIPS32State,host_EvC_COUNTER); - offB_HOST_EvC_FAILADDR = offsetof(VexGuestMIPS32State,host_EvC_FAILADDR); - break; + Int qCur = queueOldest; + if (UNLIKELY(verbose_asm)) + vex_printf("BEGIN queue[%d]\n", qCur); - case VexArchMIPS64: - preciseMemExnsFn - = MIPS64FN(guest_mips64_state_requires_precise_mem_exns); - guest_sizeB = sizeof(VexGuestMIPS64State); - offB_HOST_EvC_COUNTER = offsetof(VexGuestMIPS64State,host_EvC_COUNTER); - offB_HOST_EvC_FAILADDR = offsetof(VexGuestMIPS64State,host_EvC_FAILADDR); - break; + // Take the oldest entry in the queue + QElem* qe = &queue[queueOldest]; + queueOldest++; - default: - vpanic("LibVEX_Codegen: unsupported guest insn set"); - } + // Stay sane. Only the top level block has no branch to it and no + // resume point. + if (qe->oolVec == rcode->insns) { + // This is the top level block + vassert(!qe->jumpToOOLpoint_valid); + vassert(!qe->resumePoint_valid); + } else { + vassert(qe->jumpToOOLpoint_valid); + vassert(qe->resumePoint_valid); + // In the future, we might be able to allow the resume point to be + // invalid for non-top-level blocks, if the block contains an + // unconditional exit. Currently the IR can't represent that, so + // the assertion is valid. + } + // Processing |qe| + if (qe->jumpToOOLpoint_valid) { + // patch qe->jmpToOOLpoint to jump to |here| + if (UNLIKELY(verbose_asm)) { + vex_printf(" -- APPLY "); + ppRelocation(qe->jumpToOOLpoint); + vex_printf("\n"); + } + applyRelocation(qe->jumpToOOLpoint, &vta->host_bytes[0], + cursor, cursor, vta->archinfo_host.endness, + verbose_asm); + } - switch (vta->arch_host) { + // Initialise the stack, for processing of |qe|. + stackPtr = 0; // "contains one element" - case VexArchX86: - mode64 = False; - rRegUniv = X86FN(getRRegUniverse_X86()); - isMove = CAST_TO_TYPEOF(isMove) X86FN(isMove_X86Instr); - getRegUsage - = CAST_TO_TYPEOF(getRegUsage) X86FN(getRegUsage_X86Instr); - mapRegs = CAST_TO_TYPEOF(mapRegs) X86FN(mapRegs_X86Instr); - isIfThenElse = CAST_TO_TYPEOF(isIfThenElse) X86FN(isIfThenElse_X86Instr); - genSpill = CAST_TO_TYPEOF(genSpill) X86FN(genSpill_X86); - genReload = CAST_TO_TYPEOF(genReload) X86FN(genReload_X86); - genMove = CAST_TO_TYPEOF(genMove) X86FN(genMove_X86); - genHInstrITE = CAST_TO_TYPEOF(genHInstrITE) X86FN(X86Instr_IfThenElse); - directReload = CAST_TO_TYPEOF(directReload) X86FN(directReload_X86); - ppInstr = CAST_TO_TYPEOF(ppInstr) X86FN(ppX86Instr); - ppCondCode = CAST_TO_TYPEOF(ppCondCode) X86FN(ppX86CondCode); - ppReg = CAST_TO_TYPEOF(ppReg) X86FN(ppHRegX86); - iselSB = X86FN(iselSB_X86); - emit = CAST_TO_TYPEOF(emit) X86FN(emit_X86Instr); - vassert(vta->archinfo_host.endness == VexEndnessLE); - break; + stack[stackPtr].vec = qe->oolVec; + stack[stackPtr].vec_next = 0; + stack[stackPtr].ool_qindex = -1; // INVALID - case VexArchAMD64: - mode64 = True; - rRegUniv = AMD64FN(getRRegUniverse_AMD64()); - isMove = CAST_TO_TYPEOF(isMove) AMD64FN(isMove_AMD64Instr); - getRegUsage - = CAST_TO_TYPEOF(getRegUsage) AMD64FN(getRegUsage_AMD64Instr); - mapRegs = CAST_TO_TYPEOF(mapRegs) AMD64FN(mapRegs_AMD64Instr); - genSpill = CAST_TO_TYPEOF(genSpill) AMD64FN(genSpill_AMD64); - genReload = CAST_TO_TYPEOF(genReload) AMD64FN(genReload_AMD64); - genMove = CAST_TO_TYPEOF(genMove) AMD64FN(genMove_AMD64); - directReload = CAST_TO_TYPEOF(directReload) AMD64FN(directReload_AMD64); - ppInstr = CAST_TO_TYPEOF(ppInstr) AMD64FN(ppAMD64Instr); - ppReg = CAST_TO_TYPEOF(ppReg) AMD64FN(ppHRegAMD64); - iselSB = AMD64FN(iselSB_AMD64); - emit = CAST_TO_TYPEOF(emit) AMD64FN(emit_AMD64Instr); - vassert(vta->archinfo_host.endness == VexEndnessLE); - break; + // Iterate till the stack is empty. This effectively does a + // depth-first traversal of the hot-path (IL) tree reachable from + // here, and at the same time adds any encountered cold-path (OOL) + // blocks to the Queue for later processing. This is the heart of the + // flattening algorithm. + while (stackPtr >= 0) { - case VexArchPPC32: - mode64 = False; - rRegUniv = PPC32FN(getRRegUniverse_PPC(mode64)); - isMove = CAST_TO_TYPEOF(isMove) PPC32FN(isMove_PPCInstr); - getRegUsage - = CAST_TO_TYPEOF(getRegUsage) PPC32FN(getRegUsage_PPCInstr); - mapRegs = CAST_TO_TYPEOF(mapRegs) PPC32FN(mapRegs_PPCInstr); - genSpill = CAST_TO_TYPEOF(genSpill) PPC32FN(genSpill_PPC); - genReload = CAST_TO_TYPEOF(genReload) PPC32FN(genReload_PPC); - genMove = CAST_TO_TYPEOF(genMove) PPC32FN(genMove_PPC); - ppInstr = CAST_TO_TYPEOF(ppInstr) PPC32FN(ppPPCInstr); - ppReg = CAST_TO_TYPEOF(ppReg) PPC32FN(ppHRegPPC); - iselSB = PPC32FN(iselSB_PPC); - emit = CAST_TO_TYPEOF(emit) PPC32FN(emit_PPCInstr); - vassert(vta->archinfo_host.endness == VexEndnessBE); - break; + if (UNLIKELY(verbose_asm)) + vex_printf(" -- CONSIDER stack[%d]\n", stackPtr); - case VexArchPPC64: - mode64 = True; - rRegUniv = PPC64FN(getRRegUniverse_PPC(mode64)); - isMove = CAST_TO_TYPEOF(isMove) PPC64FN(isMove_PPCInstr); - getRegUsage - = CAST_TO_TYPEOF(getRegUsage) PPC64FN(getRegUsage_PPCInstr); - mapRegs = CAST_TO_TYPEOF(mapRegs) PPC64FN(mapRegs_PPCInstr); - genSpill = CAST_TO_TYPEOF(genSpill) PPC64FN(genSpill_PPC); - genReload = CAST_TO_TYPEOF(genReload) PPC64FN(genReload_PPC); - genMove = CAST_TO_TYPEOF(genMove) PPC64FN(genMove_PPC); - ppInstr = CAST_TO_TYPEOF(ppInstr) PPC64FN(ppPPCInstr); - ppReg = CAST_TO_TYPEOF(ppReg) PPC64FN(ppHRegPPC); - iselSB = PPC64FN(iselSB_PPC); - emit = CAST_TO_TYPEOF(emit) PPC64FN(emit_PPCInstr); - vassert(vta->archinfo_host.endness == VexEndnessBE || - vta->archinfo_host.endness == VexEndnessLE ); - break; + HInstrVec* vec = stack[stackPtr].vec; + UInt vec_next = stack[stackPtr].vec_next; + Int ool_qindex = stack[stackPtr].ool_qindex; + stackPtr--; - case VexArchS390X: - mode64 = True; - rRegUniv = S390FN(getRRegUniverse_S390()); - isMove = CAST_TO_TYPEOF(isMove) S390FN(isMove_S390Instr); - getRegUsage - = CAST_TO_TYPEOF(getRegUsage) S390FN(getRegUsage_S390Instr); - mapRegs = CAST_TO_TYPEOF(mapRegs) S390FN(mapRegs_S390Instr); - genSpill = CAST_TO_TYPEOF(genSpill) S390FN(genSpill_S390); - genReload = CAST_TO_TYPEOF(genReload) S390FN(genReload_S390); - genMove = CAST_TO_TYPEOF(genMove) S390FN(genMove_S390); - // fixs390: consider implementing directReload_S390 - ppInstr = CAST_TO_TYPEOF(ppInstr) S390FN(ppS390Instr); - ppReg = CAST_TO_TYPEOF(ppReg) S390FN(ppHRegS390); - iselSB = S390FN(iselSB_S390); - emit = CAST_TO_TYPEOF(emit) S390FN(emit_S390Instr); - vassert(vta->archinfo_host.endness == VexEndnessBE); - break; + if (vec_next > 0) { + // We're resuming the current IL block having just finished + // processing a nested IL. The OOL counterpart to the nested IL + // we just finished processing will have to jump back to here. + // So we'll need to mark its Queue entry to record that fact. - case VexArchARM: - mode64 = False; - rRegUniv = ARMFN(getRRegUniverse_ARM()); - isMove = CAST_TO_TYPEOF(isMove) ARMFN(isMove_ARMInstr); - getRegUsage - = CAST_TO_TYPEOF(getRegUsage) ARMFN(getRegUsage_ARMInstr); - mapRegs = CAST_TO_TYPEOF(mapRegs) ARMFN(mapRegs_ARMInstr); - genSpill = CAST_TO_TYPEOF(genSpill) ARMFN(genSpill_ARM); - genReload = CAST_TO_TYPEOF(genReload) ARMFN(genReload_ARM); - genMove = CAST_TO_TYPEOF(genMove) ARMFN(genMove_ARM); - ppInstr = CAST_TO_TYPEOF(ppInstr) ARMFN(ppARMInstr); - ppReg = CAST_TO_TYPEOF(ppReg) ARMFN(ppHRegARM); - iselSB = ARMFN(iselSB_ARM); - emit = CAST_TO_TYPEOF(emit) ARMFN(emit_ARMInstr); - vassert(vta->archinfo_host.endness == VexEndnessLE); - break; + // First assert that the OOL actually *is* in the Queue (it + // must be, since we can't have processed it yet). + vassert(queueOldest <= queueNewest); // "at least 1 entry in Q" + vassert(queueOldest <= ool_qindex && ool_qindex <= queueNewest); - case VexArchARM64: - mode64 = True; - rRegUniv = ARM64FN(getRRegUniverse_ARM64()); - isMove = CAST_TO_TYPEOF(isMove) ARM64FN(isMove_ARM64Instr); - getRegUsage - = CAST_TO_TYPEOF(getRegUsage) ARM64FN(getRegUsage_ARM64Instr); - mapRegs = CAST_TO_TYPEOF(mapRegs) ARM64FN(mapRegs_ARM64Instr); - genSpill = CAST_TO_TYPEOF(genSpill) ARM64FN(genSpill_ARM64); - genReload = CAST_TO_TYPEOF(genReload) ARM64FN(genReload_ARM64); - genMove = CAST_TO_TYPEOF(genMove) ARM64FN(genMove_ARM64); - ppInstr = CAST_TO_TYPEOF(ppInstr) ARM64FN(ppARM64Instr); - ppReg = CAST_TO_TYPEOF(ppReg) ARM64FN(ppHRegARM64); - iselSB = ARM64FN(iselSB_ARM64); - emit = CAST_TO_TYPEOF(emit) ARM64FN(emit_ARM64Instr); - vassert(vta->archinfo_host.endness == VexEndnessLE); - break; + vassert(!queue[ool_qindex].resumePoint_valid); + queue[ool_qindex].resumePoint = cursor; + queue[ool_qindex].resumePoint_valid = True; + if (UNLIKELY(verbose_asm)) + vex_printf(" -- RESUME previous IL\n"); + } else { + // We're starting a new IL. Due to the tail-recursive nature of + // entering ILs, this means we can actually only be starting the + // outermost (top level) block for this particular Queue entry. + vassert(ool_qindex == -1); + vassert(vec == qe->oolVec); + if (UNLIKELY(verbose_asm)) + vex_printf(" -- START new IL\n"); + } - case VexArchMIPS32: - mode64 = False; - rRegUniv = MIPS32FN(getRRegUniverse_MIPS(mode64)); - isMove = CAST_TO_TYPEOF(isMove) MIPS32FN(isMove_MIPSInstr); - getRegUsage - = CAST_TO_TYPEOF(getRegUsage) MIPS32FN(getRegUsage_MIPSInstr); - mapRegs = CAST_TO_TYPEOF(mapRegs) MIPS32FN(mapRegs_MIPSInstr); - genSpill = CAST_TO_TYPEOF(genSpill) MIPS32FN(genSpill_MIPS); - genReload = CAST_TO_TYPEOF(genReload) MIPS32FN(genReload_MIPS); - genMove = CAST_TO_TYPEOF(genMove) MIPS32FN(genMove_MIPS); - ppInstr = CAST_TO_TYPEOF(ppInstr) MIPS32FN(ppMIPSInstr); - ppReg = CAST_TO_TYPEOF(ppReg) MIPS32FN(ppHRegMIPS); - iselSB = MIPS32FN(iselSB_MIPS); - emit = CAST_TO_TYPEOF(emit) MIPS32FN(emit_MIPSInstr); - vassert(vta->archinfo_host.endness == VexEndnessLE - || vta->archinfo_host.endness == VexEndnessBE); - break; + // Repeatedly process "zero or more simple HInstrs followed by (an + // IfThenElse or end-of-block)" + while (True) { - case VexArchMIPS64: - mode64 = True; - rRegUniv = MIPS64FN(getRRegUniverse_MIPS(mode64)); - isMove = CAST_TO_TYPEOF(isMove) MIPS64FN(isMove_MIPSInstr); - getRegUsage - = CAST_TO_TYPEOF(getRegUsage) MIPS64FN(getRegUsage_MIPSInstr); - mapRegs = CAST_TO_TYPEOF(mapRegs) MIPS64FN(mapRegs_MIPSInstr); - genSpill = CAST_TO_TYPEOF(genSpill) MIPS64FN(genSpill_MIPS); - genReload = CAST_TO_TYPEOF(genReload) MIPS64FN(genReload_MIPS); - genMove = CAST_TO_TYPEOF(genMove) MIPS64FN(genMove_MIPS); - ppInstr = CAST_TO_TYPEOF(ppInstr) MIPS64FN(ppMIPSInstr); - ppReg = CAST_TO_TYPEOF(ppReg) MIPS64FN(ppHRegMIPS); - iselSB = MIPS64FN(iselSB_MIPS); - emit = CAST_TO_TYPEOF(emit) MIPS64FN(emit_MIPSInstr); - vassert(vta->archinfo_host.endness == VexEndnessLE - || vta->archinfo_host.endness == VexEndnessBE); - break; + // Process "zero or more simple HInstrs" + while (vec_next < vec->insns_used + && !isIfThenElse(vec->insns[vec_next])) { + AssemblyBufferOffset cursor_next + = emitSimpleInsn( &(res->offs_profInc), &vta->host_bytes[0], + cursor, cursor_limit, vec->insns[vec_next], + &emitConsts, vta ); + if (UNLIKELY(cursor_next == cursor)) { + // We ran out of output space. Give up. + return False; + } + vec_next++; + cursor = cursor_next; + } + + // Now we've either got to the end of the hot path, or we have + // an IfThenElse. + if (vec_next >= vec->insns_used) + break; + + // So we have an IfThenElse. + HInstrIfThenElse* hite = isIfThenElse(vec->insns[vec_next]); + vassert(hite); + vassert(hite->n_phis == 0); // the regalloc will have removed them + + // Put |ite|'s OOL block in the Queue. We'll deal with it + // later. Also, generate the (skeleton) conditional branch to it, + // and collect enough information that we can create patch the + // branch later, once we know where the destination is. + vassert(queueNewest < nQUEUE-1); // else out of Queue space + queueNewest++; + queue[queueNewest].oolVec = hite->outOfLine; + queue[queueNewest].resumePoint_valid = False; // not yet known + queue[queueNewest].resumePoint = -1; // invalid + + HInstr* cond_branch + = X86Instr_JmpCond(hite->ccOOL, + queueNewest/*FOR DEBUG PRINTING ONLY*/); + AssemblyBufferOffset cursor_next + = emitSimpleInsn( &(res->offs_profInc), &vta->host_bytes[0], + cursor, cursor_limit, cond_branch, + &emitConsts, vta ); + if (UNLIKELY(cursor_next == cursor)) { + // We ran out of output space. Give up. + return False; + } + queue[queueNewest].jumpToOOLpoint_valid = True; + queue[queueNewest].jumpToOOLpoint + = collectRelocInfo_X86(cursor, cond_branch); - default: - vpanic("LibVEX_Translate: unsupported host insn set"); - } + cursor = cursor_next; - // Are the host's hardware capabilities feasible. The function will - // not return if hwcaps are infeasible in some sense. - check_hwcaps(vta->arch_host, vta->archinfo_host.hwcaps); + // Now we descend into |ite's| IL block. So we need to save + // where we are in this block, so we can resume when the inner + // one is done. + vassert(stackPtr < nSTACK-1); // else out of Stack space + stackPtr++; + stack[stackPtr].vec = vec; + stack[stackPtr].vec_next = vec_next+1; + stack[stackPtr].ool_qindex = queueNewest; + // And now descend into the inner block. We could have just + // pushed its details on the stack and immediately pop it, but + // it seems simpler to update |vec| and |vec_next| and continue + // directly. + if (UNLIKELY(verbose_asm)) { + vex_printf(" -- START inner IL\n"); + } + vec = hite->fallThrough; + vec_next = 0; - /* Turn it into virtual-registerised code. Build trees -- this - also throws away any dead bindings. */ - max_ga = ado_treebuild_BB( irsb, preciseMemExnsFn, pxControl ); + // And continue with "Repeatedly process ..." + } - if (vta->finaltidy) { - irsb = vta->finaltidy(irsb); - } + // Getting here means we've completed an inner IL and now want to + // resume the parent IL. That is, pop a saved context off the + // stack. + } - vexAllocSanityCheck(); + // Hot path is complete. Now, probably, we have to add a jump + // back to the resume point. + if (qe->resumePoint_valid) { + if (0) + vex_printf(" // Generate jump to resume point [%03u]\n", + qe->resumePoint); + HInstr* jmp = X86Instr_Jmp(cursor, qe->resumePoint); + AssemblyBufferOffset cursor_next + = emitSimpleInsn( &(res->offs_profInc), &vta->host_bytes[0], + cursor, cursor_limit, jmp, + &emitConsts, vta ); + if (UNLIKELY(cursor_next == cursor)) { + // We ran out of output space. Give up. + return False; + } + cursor = cursor_next; + } - if (vex_traceflags & VEX_TRACE_TREES) { - vex_printf("\n------------------------" - " After tree-building " - "------------------------\n\n"); - ppIRSB ( irsb ); - vex_printf("\n"); + if (UNLIKELY(verbose_asm)) + vex_printf("END queue[%d]\n\n", qCur); + // Finished with this Queue entry. } + // Queue empty, all blocks processed - /* HACK */ - if (0) { - *(vta->host_bytes_used) = 0; - res->status = VexTransOK; return; - } - /* end HACK */ + *(vta->host_bytes_used) = cursor; - if (vex_traceflags & VEX_TRACE_VCODE) - vex_printf("\n------------------------" - " Instruction selection " - "------------------------\n"); + return True; // OK +} - /* No guest has its IP field at offset zero. If this fails it - means some transformation pass somewhere failed to update/copy - irsb->offsIP properly. */ - vassert(irsb->offsIP >= 16); - vcode = iselSB ( irsb, vta->arch_host, - &vta->archinfo_host, - &vta->abiinfo_both, - offB_HOST_EvC_COUNTER, - offB_HOST_EvC_FAILADDR, - chainingAllowed, - vta->addProfInc, - max_ga ); +/* ---- The back end proper ---- */ - vexAllocSanityCheck(); +/* Back end of the compilation pipeline. Is not exported. */ - if (vex_traceflags & VEX_TRACE_VCODE) - vex_printf("\n"); +static void libvex_BackEnd ( const VexTranslateArgs* vta, + /*MOD*/ VexTranslateResult* res, + /*MOD*/ IRSB* irsb, + VexRegisterUpdates pxControl ) +{ + /* This the bundle of functions we need to do the back-end stuff + (insn selection, reg-alloc, assembly) whilst being insulated + from the target instruction set. */ + Bool (*isMove) ( const HInstr*, HReg*, HReg* ); + void (*getRegUsage) ( HRegUsage*, const HInstr*, Bool ); + void (*mapRegs) ( HRegRemap*, HInstr*, Bool ); + HInstrIfThenElse* (*isIfThenElse)( const HInstr* ); + void (*genSpill) ( HInstr**, HInstr**, HReg, Int, Bool ); + void (*genReload) ( HInstr**, HInstr**, HReg, Int, Bool ); + HInstr* (*genMove) ( HReg, HReg, Bool ); + HInstr* (*genHInstrITE) ( HInstrIfThenElse* ); + HInstr* (*directReload) ( HInstr*, HReg, Short ); + void (*ppInstr) ( const HInstr*, Bool ); + void (*ppCondCode) ( HCondCode ); + UInt (*ppReg) ( HReg ); + HInstrSB* (*iselSB) ( const IRSB*, VexArch, const VexArchInfo*, + const VexAbiInfo*, Int, Int, Bool, Bool, + Addr ); + Int (*emit) ( /*MB_MOD*/Bool*, + UChar*, Int, const HInstr*, Bool, VexEndness, + const void*, const void*, const void*, + const void* ); + Bool (*preciseMemExnsFn) ( Int, Int, VexRegisterUpdates ); - if (vex_traceflags & VEX_TRACE_VCODE) { - ppHInstrSB(vcode, isIfThenElse, ppInstr, ppCondCode, mode64); - } + const RRegUniverse* rRegUniv = NULL; - /* Register allocate. */ - RegAllocControl con = { - .univ = rRegUniv, .isMove = isMove, .getRegUsage = getRegUsage, - .mapRegs = mapRegs, .isIfThenElse = isIfThenElse, .genSpill = genSpill, - .genReload = genReload, .genMove = genMove, .genHInstrITE = genHInstrITE, - .directReload = directReload, .guest_sizeB = guest_sizeB, - .ppInstr = ppInstr, .ppCondCode = ppCondCode, .ppReg = ppReg, - .mode64 = mode64}; - rcode = doRegisterAllocation(vcode, &con); + Bool mode64, chainingAllowed; + Int guest_sizeB; + Int offB_HOST_EvC_COUNTER; + Int offB_HOST_EvC_FAILADDR; + Addr max_ga; + HInstrSB* vcode; + HInstrSB* rcode; - vexAllocSanityCheck(); + isMove = NULL; + getRegUsage = NULL; + mapRegs = NULL; + isIfThenElse = NULL; + genSpill = NULL; + genReload = NULL; + genMove = NULL; + genHInstrITE = NULL; + directReload = NULL; + ppInstr = NULL; + ppCondCode = NULL; + ppReg = NULL; + iselSB = NULL; + emit = NULL; - if (vex_traceflags & VEX_TRACE_RCODE) { - vex_printf("\n------------------------" - " Register-allocated code " - "------------------------\n\n"); - ppHInstrSB(rcode, isIfThenElse, ppInstr, ppCondCode, mode64); - vex_printf("\n"); - } + mode64 = False; + chainingAllowed = False; + guest_sizeB = 0; + offB_HOST_EvC_COUNTER = 0; + offB_HOST_EvC_FAILADDR = 0; + preciseMemExnsFn = NULL; - /* HACK */ - if (0) { - *(vta->host_bytes_used) = 0; - res->status = VexTransOK; return; - } - /* end HACK */ + vassert(vex_initdone); + vassert(vta->disp_cp_xassisted != NULL); - /* Assemble */ - if (vex_traceflags & VEX_TRACE_ASM) { - vex_printf("\n------------------------" - " Assembly " - "------------------------\n\n"); + vex_traceflags = vta->traceflags; + + /* Both the chainers and the indir are either NULL or non-NULL. */ + if (vta->disp_cp_chain_me_to_slowEP != NULL) { + vassert(vta->disp_cp_chain_me_to_fastEP != NULL); + vassert(vta->disp_cp_xindir != NULL); + chainingAllowed = True; + } else { + vassert(vta->disp_cp_chain_me_to_fastEP == NULL); + vassert(vta->disp_cp_xindir == NULL); } - //////////////////////////////////////////////////////// - //// BEGIN the assembler + switch (vta->arch_guest) { - // QElem are work Queue elements. The work Queue is the top level data - // structure for the emitter. It is initialised with the HInstrVec* of - // the overall HInstrSB. Every OOL HInstrVec* in the tree will at some - // point be present in the Queue. IL HInstrVec*s are never present in - // the Queue because the inner emitter loop processes them in-line, using - // a Stack (see below) to keep track of its nesting level. - // - // The Stack (see below) is empty before and after every Queue element is - // processed. In other words, the Stack only holds state needed during - // the processing of a single Queue element. - // - // The ordering of elements in the Queue is irrelevant -- correct code - // will be emitted even with set semantics (arbitrary order). However, - // the FIFOness of the queue is believed to generate code in which - // colder and colder code (more deeply nested OOLs) is placed further - // and further from the start of the emitted machine code, which sounds - // like a layout which should minimise icache misses. - // - // QElems also contain two pieces of jump-fixup information. When we - // finally come to process a QElem, we need to know: - // - // * |jumpToOOLpoint|: the place which wants to jump to the start of the - // emitted insns for this QElem. We must have already emitted that, - // since it will be the conditional jump that leads to this QElem (OOL - // block). - // - // * |resumePoint|: the place we should jump back to after the QElem is - // finished (the "resume point"), which is the emitted code of the - // HInstr immediately following the HInstrIfThenElse that has this - // QElem as its OOL block. - // - // When the QElem is processed, we know both the |jumpToOOLpoint| and - // the |resumePoint|, and so the first can be patched, and the second - // we generate an instruction to jump to. - // - // There are three complications with patching: - // - // (1) per comments on Stack elems, we do not know the |resumePoint| when - // creating a QElem. That will only be known when processing of the - // corresponding IL block is completed. - // - // (2) The top level HInstrVec* has neither a |jumpToOOLpoint| nor a - // |resumePoint|. - // - // (3) Non-top-level OOLs may not have a valid |resumePoint| if they do - // an unconditional IR-level Exit. We can generate the resume point - // branch, but it will be never be used. - typedef - struct { - // The HInstrs for this OOL. - HInstrVec* oolVec; - // Where we should patch to jump to the OOL ("how do we get here?") - Bool jumpToOOLpoint_valid; - Relocation jumpToOOLpoint; - // Resume point offset, in bytes from start of output buffer - // ("where do we go after this block is completed?") - Bool resumePoint_valid; - AssemblyBufferOffset resumePoint; - } - QElem; + case VexArchX86: + preciseMemExnsFn + = X86FN(guest_x86_state_requires_precise_mem_exns); + guest_sizeB = sizeof(VexGuestX86State); + offB_HOST_EvC_COUNTER = offsetof(VexGuestX86State,host_EvC_COUNTER); + offB_HOST_EvC_FAILADDR = offsetof(VexGuestX86State,host_EvC_FAILADDR); + break; + + case VexArchAMD64: + preciseMemExnsFn + = AMD64FN(guest_amd64_state_requires_precise_mem_exns); + guest_sizeB = sizeof(VexGuestAMD64State); + offB_HOST_EvC_COUNTER = offsetof(VexGuestAMD64State,host_EvC_COUNTER); + offB_HOST_EvC_FAILADDR = offsetof(VexGuestAMD64State,host_EvC_FAILADDR); + break; + case VexArchPPC32: + preciseMemExnsFn + = PPC32FN(guest_ppc32_state_requires_precise_mem_exns); + guest_sizeB = sizeof(VexGuestPPC32State); + offB_HOST_EvC_COUNTER = offsetof(VexGuestPPC32State,host_EvC_COUNTER); + offB_HOST_EvC_FAILADDR = offsetof(VexGuestPPC32State,host_EvC_FAILADDR); + break; - // SElem are stack elements. When we suspend processing a HInstrVec* in - // order to process an IL path in an IfThenElse, we push the HInstrVec* - // and the next index to process on the stack, so that we know where to - // resume when the nested IL sequence is completed. |vec| and |vec_next| - // record the resume HInstr. - // - // A second effect of processing a nested IL sequence is that we will - // have to (later) process the corresponding OOL sequence. And that OOL - // sequence will have to finish with a jump back to the "resume point" - // (the emitted instruction immediately following the IfThenElse). We - // only know the offset of the resume point instruction in the output - // buffer when we actually resume emitted from there -- that is, when the - // entry we pushed, is popped. So, when we pop, we must mark the - // corresponding OOL entry in the Queue to record there the resume point - // offset. For this reason we also carry |ool_qindex|, which is the - // index of the corresponding OOL entry in the Queue. - typedef - struct { - HInstrVec* vec; // resume point HInstr vector - UInt vec_next; // resume point HInstr vector index - Int ool_qindex; // index in Queue of OOL to mark when we resume - } - SElem; + case VexArchPPC64: + preciseMemExnsFn + = PPC64FN(guest_ppc64_state_requires_precise_mem_exns); + guest_sizeB = sizeof(VexGuestPPC64State); + offB_HOST_EvC_COUNTER = offsetof(VexGuestPPC64State,host_EvC_COUNTER); + offB_HOST_EvC_FAILADDR = offsetof(VexGuestPPC64State,host_EvC_FAILADDR); + break; - // The Stack. The stack depth is bounded by maximum number of nested - // hot (IL) sections, so in practice it is going to be very small. - const Int nSTACK = 4; + case VexArchS390X: + preciseMemExnsFn + = S390FN(guest_s390x_state_requires_precise_mem_exns); + guest_sizeB = sizeof(VexGuestS390XState); + offB_HOST_EvC_COUNTER = offsetof(VexGuestS390XState,host_EvC_COUNTER); + offB_HOST_EvC_FAILADDR = offsetof(VexGuestS390XState,host_EvC_FAILADDR); + break; - SElem stack[nSTACK]; - Int stackPtr; // points to most recently pushed entry <=> "-1 means empty" + case VexArchARM: + preciseMemExnsFn + = ARMFN(guest_arm_state_requires_precise_mem_exns); + guest_sizeB = sizeof(VexGuestARMState); + offB_HOST_EvC_COUNTER = offsetof(VexGuestARMState,host_EvC_COUNTER); + offB_HOST_EvC_FAILADDR = offsetof(VexGuestARMState,host_EvC_FAILADDR); + break; - // The Queue. The queue size is bounded by the number of cold (OOL) - // sections in the entire HInstrSB, so it's also going to be pretty - // small. - const Int nQUEUE = 8; + case VexArchARM64: + preciseMemExnsFn + = ARM64FN(guest_arm64_state_requires_precise_mem_exns); + guest_sizeB = sizeof(VexGuestARM64State); + offB_HOST_EvC_COUNTER = offsetof(VexGuestARM64State,host_EvC_COUNTER); + offB_HOST_EvC_FAILADDR = offsetof(VexGuestARM64State,host_EvC_FAILADDR); + break; - QElem queue[nQUEUE]; - Int queueOldest; // index of oldest entry, initially 0 - Int queueNewest; // index of newest entry, - // initially -1, otherwise must be >= queueOldest + case VexArchMIPS32: + preciseMemExnsFn + = MIPS32FN(guest_mips32_state_requires_precise_mem_exns); + guest_sizeB = sizeof(VexGuestMIPS32State); + offB_HOST_EvC_COUNTER = offsetof(VexGuestMIPS32State,host_EvC_COUNTER); + offB_HOST_EvC_FAILADDR = offsetof(VexGuestMIPS32State,host_EvC_FAILADDR); + break; - /////////////////////////////////////////////////////// + case VexArchMIPS64: + preciseMemExnsFn + = MIPS64FN(guest_mips64_state_requires_precise_mem_exns); + guest_sizeB = sizeof(VexGuestMIPS64State); + offB_HOST_EvC_COUNTER = offsetof(VexGuestMIPS64State,host_EvC_COUNTER); + offB_HOST_EvC_FAILADDR = offsetof(VexGuestMIPS64State,host_EvC_FAILADDR); + break; - const Bool verbose_asm = (vex_traceflags & VEX_TRACE_ASM) != 0; + default: + vpanic("LibVEX_Codegen: unsupported guest insn set"); + } - const EmitConstants emitConsts - = { .mode64 = mode64, - .endness_host = vta->archinfo_host.endness, - .disp_cp_chain_me_to_slowEP = vta->disp_cp_chain_me_to_slowEP, - .disp_cp_chain_me_to_fastEP = vta->disp_cp_chain_me_to_fastEP, - .disp_cp_xindir = vta->disp_cp_xindir, - .disp_cp_xassisted = vta->disp_cp_xassisted }; - AssemblyBufferOffset cursor = 0; - AssemblyBufferOffset cursor_limit = vta->host_bytes_size; + switch (vta->arch_host) { - queueOldest = 0; - queueNewest = -1; + case VexArchX86: + mode64 = False; + rRegUniv = X86FN(getRRegUniverse_X86()); + isMove = CAST_TO_TYPEOF(isMove) X86FN(isMove_X86Instr); + getRegUsage + = CAST_TO_TYPEOF(getRegUsage) X86FN(getRegUsage_X86Instr); + mapRegs = CAST_TO_TYPEOF(mapRegs) X86FN(mapRegs_X86Instr); + isIfThenElse = CAST_TO_TYPEOF(isIfThenElse) X86FN(isIfThenElse_X86Instr); + genSpill = CAST_TO_TYPEOF(genSpill) X86FN(genSpill_X86); + genReload = CAST_TO_TYPEOF(genReload) X86FN(genReload_X86); + genMove = CAST_TO_TYPEOF(genMove) X86FN(genMove_X86); + genHInstrITE = CAST_TO_TYPEOF(genHInstrITE) X86FN(X86Instr_IfThenElse); + directReload = CAST_TO_TYPEOF(directReload) X86FN(directReload_X86); + ppInstr = CAST_TO_TYPEOF(ppInstr) X86FN(ppX86Instr); + ppCondCode = CAST_TO_TYPEOF(ppCondCode) X86FN(ppX86CondCode); + ppReg = CAST_TO_TYPEOF(ppReg) X86FN(ppHRegX86); + iselSB = X86FN(iselSB_X86); + emit = CAST_TO_TYPEOF(emit) X86FN(emit_X86Instr); + vassert(vta->archinfo_host.endness == VexEndnessLE); + break; - vassert(queueNewest < nQUEUE); - queueNewest++; - { - QElem* qe = &queue[queueNewest]; - vex_bzero(qe, sizeof(*qe)); - qe->oolVec = rcode->insns; - qe->jumpToOOLpoint_valid = False; - qe->resumePoint_valid = False; - } - vassert(queueNewest == 0); + case VexArchAMD64: + mode64 = True; + rRegUniv = AMD64FN(getRRegUniverse_AMD64()); + isMove = CAST_TO_TYPEOF(isMove) AMD64FN(isMove_AMD64Instr); + getRegUsage + = CAST_TO_TYPEOF(getRegUsage) AMD64FN(getRegUsage_AMD64Instr); + mapRegs = CAST_TO_TYPEOF(mapRegs) AMD64FN(mapRegs_AMD64Instr); + genSpill = CAST_TO_TYPEOF(genSpill) AMD64FN(genSpill_AMD64); + genReload = CAST_TO_TYPEOF(genReload) AMD64FN(genReload_AMD64); + genMove = CAST_TO_TYPEOF(genMove) AMD64FN(genMove_AMD64); + directReload = CAST_TO_TYPEOF(directReload) AMD64FN(directReload_AMD64); + ppInstr = CAST_TO_TYPEOF(ppInstr) AMD64FN(ppAMD64Instr); + ppReg = CAST_TO_TYPEOF(ppReg) AMD64FN(ppHRegAMD64); + iselSB = AMD64FN(iselSB_AMD64); + emit = CAST_TO_TYPEOF(emit) AMD64FN(emit_AMD64Instr); + vassert(vta->archinfo_host.endness == VexEndnessLE); + break; - /* Main loop, processing Queue entries, until there are no more. */ - while (queueOldest <= queueNewest) { + case VexArchPPC32: + mode64 = False; + rRegUniv = PPC32FN(getRRegUniverse_PPC(mode64)); + isMove = CAST_TO_TYPEOF(isMove) PPC32FN(isMove_PPCInstr); + getRegUsage + = CAST_TO_TYPEOF(getRegUsage) PPC32FN(getRegUsage_PPCInstr); + mapRegs = CAST_TO_TYPEOF(mapRegs) PPC32FN(mapRegs_PPCInstr); + genSpill = CAST_TO_TYPEOF(genSpill) PPC32FN(genSpill_PPC); + genReload = CAST_TO_TYPEOF(genReload) PPC32FN(genReload_PPC); + genMove = CAST_TO_TYPEOF(genMove) PPC32FN(genMove_PPC); + ppInstr = CAST_TO_TYPEOF(ppInstr) PPC32FN(ppPPCInstr); + ppReg = CAST_TO_TYPEOF(ppReg) PPC32FN(ppHRegPPC); + iselSB = PPC32FN(iselSB_PPC); + emit = CAST_TO_TYPEOF(emit) PPC32FN(emit_PPCInstr); + vassert(vta->archinfo_host.endness == VexEndnessBE); + break; - Int qCur = queueOldest; - if (UNLIKELY(verbose_asm)) - vex_printf("BEGIN queue[%d]\n", qCur); + case VexArchPPC64: + mode64 = True; + rRegUniv = PPC64FN(getRRegUniverse_PPC(mode64)); + isMove = CAST_TO_TYPEOF(isMove) PPC64FN(isMove_PPCInstr); + getRegUsage + = CAST_TO_TYPEOF(getRegUsage) PPC64FN(getRegUsage_PPCInstr); + mapRegs = CAST_TO_TYPEOF(mapRegs) PPC64FN(mapRegs_PPCInstr); + genSpill = CAST_TO_TYPEOF(genSpill) PPC64FN(genSpill_PPC); + genReload = CAST_TO_TYPEOF(genReload) PPC64FN(genReload_PPC); + genMove = CAST_TO_TYPEOF(genMove) PPC64FN(genMove_PPC); + ppInstr = CAST_TO_TYPEOF(ppInstr) PPC64FN(ppPPCInstr); + ppReg = CAST_TO_TYPEOF(ppReg) PPC64FN(ppHRegPPC); + iselSB = PPC64FN(iselSB_PPC); + emit = CAST_TO_TYPEOF(emit) PPC64FN(emit_PPCInstr); + vassert(vta->archinfo_host.endness == VexEndnessBE || + vta->archinfo_host.endness == VexEndnessLE ); + break; - // Take the oldest entry in the queue - QElem* qe = &queue[queueOldest]; - queueOldest++; + case VexArchS390X: + mode64 = True; + rRegUniv = S390FN(getRRegUniverse_S390()); + isMove = CAST_TO_TYPEOF(isMove) S390FN(isMove_S390Instr); + getRegUsage + = CAST_TO_TYPEOF(getRegUsage) S390FN(getRegUsage_S390Instr); + mapRegs = CAST_TO_TYPEOF(mapRegs) S390FN(mapRegs_S390Instr); + genSpill = CAST_TO_TYPEOF(genSpill) S390FN(genSpill_S390); + genReload = CAST_TO_TYPEOF(genReload) S390FN(genReload_S390); + genMove = CAST_TO_TYPEOF(genMove) S390FN(genMove_S390); + // fixs390: consider implementing directReload_S390 + ppInstr = CAST_TO_TYPEOF(ppInstr) S390FN(ppS390Instr); + ppReg = CAST_TO_TYPEOF(ppReg) S390FN(ppHRegS390); + iselSB = S390FN(iselSB_S390); + emit = CAST_TO_TYPEOF(emit) S390FN(emit_S390Instr); + vassert(vta->archinfo_host.endness == VexEndnessBE); + break; - // Stay sane. Only the top level block has no branch to it and no - // resume point. - if (qe->oolVec == rcode->insns) { - // This is the top level block - vassert(!qe->jumpToOOLpoint_valid); - vassert(!qe->resumePoint_valid); - } else { - vassert(qe->jumpToOOLpoint_valid); - vassert(qe->resumePoint_valid); - // In the future, we might be able to allow the resume point to be - // invalid for non-top-level blocks, if the block contains an - // unconditional exit. Currently the IR can't represent that, so - // the assertion is valid. - } + case VexArchARM: + mode64 = False; + rRegUniv = ARMFN(getRRegUniverse_ARM()); + isMove = CAST_TO_TYPEOF(isMove) ARMFN(isMove_ARMInstr); + getRegUsage + = CAST_TO_TYPEOF(getRegUsage) ARMFN(getRegUsage_ARMInstr); + mapRegs = CAST_TO_TYPEOF(mapRegs) ARMFN(mapRegs_ARMInstr); + genSpill = CAST_TO_TYPEOF(genSpill) ARMFN(genSpill_ARM); + genReload = CAST_TO_TYPEOF(genReload) ARMFN(genReload_ARM); + genMove = CAST_TO_TYPEOF(genMove) ARMFN(genMove_ARM); + ppInstr = CAST_TO_TYPEOF(ppInstr) ARMFN(ppARMInstr); + ppReg = CAST_TO_TYPEOF(ppReg) ARMFN(ppHRegARM); + iselSB = ARMFN(iselSB_ARM); + emit = CAST_TO_TYPEOF(emit) ARMFN(emit_ARMInstr); + vassert(vta->archinfo_host.endness == VexEndnessLE); + break; - // Processing |qe| - if (qe->jumpToOOLpoint_valid) { - // patch qe->jmpToOOLpoint to jump to |here| - if (UNLIKELY(verbose_asm)) { - vex_printf(" -- APPLY "); - ppRelocation(qe->jumpToOOLpoint); - vex_printf("\n"); - } - applyRelocation(qe->jumpToOOLpoint, &vta->host_bytes[0], - cursor, cursor, vta->archinfo_host.endness, - verbose_asm); - } + case VexArchARM64: + mode64 = True; + rRegUniv = ARM64FN(getRRegUniverse_ARM64()); + isMove = CAST_TO_TYPEOF(isMove) ARM64FN(isMove_ARM64Instr); + getRegUsage + = CAST_TO_TYPEOF(getRegUsage) ARM64FN(getRegUsage_ARM64Instr); + mapRegs = CAST_TO_TYPEOF(mapRegs) ARM64FN(mapRegs_ARM64Instr); + genSpill = CAST_TO_TYPEOF(genSpill) ARM64FN(genSpill_ARM64); + genReload = CAST_TO_TYPEOF(genReload) ARM64FN(genReload_ARM64); + genMove = CAST_TO_TYPEOF(genMove) ARM64FN(genMove_ARM64); + ppInstr = CAST_TO_TYPEOF(ppInstr) ARM64FN(ppARM64Instr); + ppReg = CAST_TO_TYPEOF(ppReg) ARM64FN(ppHRegARM64); + iselSB = ARM64FN(iselSB_ARM64); + emit = CAST_TO_TYPEOF(emit) ARM64FN(emit_ARM64Instr); + vassert(vta->archinfo_host.endness == VexEndnessLE); + break; - // Initialise the stack, for processing of |qe|. - stackPtr = 0; // "contains one element" + case VexArchMIPS32: + mode64 = False; + rRegUniv = MIPS32FN(getRRegUniverse_MIPS(mode64)); + isMove = CAST_TO_TYPEOF(isMove) MIPS32FN(isMove_MIPSInstr); + getRegUsage + = CAST_TO_TYPEOF(getRegUsage) MIPS32FN(getRegUsage_MIPSInstr); + mapRegs = CAST_TO_TYPEOF(mapRegs) MIPS32FN(mapRegs_MIPSInstr); + genSpill = CAST_TO_TYPEOF(genSpill) MIPS32FN(genSpill_MIPS); + genReload = CAST_TO_TYPEOF(genReload) MIPS32FN(genReload_MIPS); + genMove = CAST_TO_TYPEOF(genMove) MIPS32FN(genMove_MIPS); + ppInstr = CAST_TO_TYPEOF(ppInstr) MIPS32FN(ppMIPSInstr); + ppReg = CAST_TO_TYPEOF(ppReg) MIPS32FN(ppHRegMIPS); + iselSB = MIPS32FN(iselSB_MIPS); + emit = CAST_TO_TYPEOF(emit) MIPS32FN(emit_MIPSInstr); + vassert(vta->archinfo_host.endness == VexEndnessLE + || vta->archinfo_host.endness == VexEndnessBE); + break; - stack[stackPtr].vec = qe->oolVec; - stack[stackPtr].vec_next = 0; - stack[stackPtr].ool_qindex = -1; // INVALID + case VexArchMIPS64: + mode64 = True; + rRegUniv = MIPS64FN(getRRegUniverse_MIPS(mode64)); + isMove = CAST_TO_TYPEOF(isMove) MIPS64FN(isMove_MIPSInstr); + getRegUsage + = CAST_TO_TYPEOF(getRegUsage) MIPS64FN(getRegUsage_MIPSInstr); + mapRegs = CAST_TO_TYPEOF(mapRegs) MIPS64FN(mapRegs_MIPSInstr); + genSpill = CAST_TO_TYPEOF(genSpill) MIPS64FN(genSpill_MIPS); + genReload = CAST_TO_TYPEOF(genReload) MIPS64FN(genReload_MIPS); + genMove = CAST_TO_TYPEOF(genMove) MIPS64FN(genMove_MIPS); + ppInstr = CAST_TO_TYPEOF(ppInstr) MIPS64FN(ppMIPSInstr); + ppReg = CAST_TO_TYPEOF(ppReg) MIPS64FN(ppHRegMIPS); + iselSB = MIPS64FN(iselSB_MIPS); + emit = CAST_TO_TYPEOF(emit) MIPS64FN(emit_MIPSInstr); + vassert(vta->archinfo_host.endness == VexEndnessLE + || vta->archinfo_host.endness == VexEndnessBE); + break; - // Iterate till the stack is empty. This effectively does a - // depth-first traversal of the hot-path (IL) tree reachable from - // here, and at the same time adds any encountered cold-path (OOL) - // blocks to the Queue for later processing. This is the heart of the - // flattening algorithm. - while (stackPtr >= 0) { + default: + vpanic("LibVEX_Translate: unsupported host insn set"); + } - if (UNLIKELY(verbose_asm)) - vex_printf(" -- CONSIDER stack[%d]\n", stackPtr); + // Are the host's hardware capabilities feasible. The function will + // not return if hwcaps are infeasible in some sense. + check_hwcaps(vta->arch_host, vta->archinfo_host.hwcaps); - HInstrVec* vec = stack[stackPtr].vec; - UInt vec_next = stack[stackPtr].vec_next; - Int ool_qindex = stack[stackPtr].ool_qindex; - stackPtr--; - if (vec_next > 0) { - // We're resuming the current IL block having just finished - // processing a nested IL. The OOL counterpart to the nested IL - // we just finished processing will have to jump back to here. - // So we'll need to mark its Queue entry to record that fact. + /* Turn it into virtual-registerised code. Build trees -- this + also throws away any dead bindings. */ + max_ga = ado_treebuild_BB( irsb, preciseMemExnsFn, pxControl ); - // First assert that the OOL actually *is* in the Queue (it - // must be, since we can't have processed it yet). - vassert(queueOldest <= queueNewest); // "at least 1 entry in Q" - vassert(queueOldest <= ool_qindex && ool_qindex <= queueNewest); + if (vta->finaltidy) { + irsb = vta->finaltidy(irsb); + } - vassert(!queue[ool_qindex].resumePoint_valid); - queue[ool_qindex].resumePoint = cursor; - queue[ool_qindex].resumePoint_valid = True; - if (UNLIKELY(verbose_asm)) - vex_printf(" -- RESUME previous IL\n"); - } else { - // We're starting a new IL. Due to the tail-recursive nature of - // entering ILs, this means we can actually only be starting the - // outermost (top level) block for this particular Queue entry. - vassert(ool_qindex == -1); - vassert(vec == qe->oolVec); - if (UNLIKELY(verbose_asm)) - vex_printf(" -- START new IL\n"); - } + vexAllocSanityCheck(); - // Repeatedly process "zero or more simple HInstrs followed by (an - // IfThenElse or end-of-block)" - while (True) { + if (vex_traceflags & VEX_TRACE_TREES) { + vex_printf("\n------------------------" + " After tree-building " + "------------------------\n\n"); + ppIRSB ( irsb ); + vex_printf("\n"); + } - // Process "zero or more simple HInstrs" - while (vec_next < vec->insns_used - && !isIfThenElse(vec->insns[vec_next])) { - AssemblyBufferOffset cursor_next - = emitSimpleInsn( &(res->offs_profInc), &vta->host_bytes[0], - cursor, cursor_limit, vec->insns[vec_next], - &emitConsts, vta ); - if (UNLIKELY(cursor_next == cursor)) { - // We ran out of output space. Give up. - goto out_of_buffer_space; - } - vec_next++; - cursor = cursor_next; - } + /* HACK */ + if (0) { + *(vta->host_bytes_used) = 0; + res->status = VexTransOK; return; + } + /* end HACK */ - // Now we've either got to the end of the hot path, or we have - // an IfThenElse. - if (vec_next >= vec->insns_used) - break; + if (vex_traceflags & VEX_TRACE_VCODE) + vex_printf("\n------------------------" + " Instruction selection " + "------------------------\n"); - // So we have an IfThenElse. - HInstrIfThenElse* hite = isIfThenElse(vec->insns[vec_next]); - vassert(hite); - vassert(hite->n_phis == 0); // the regalloc will have removed them + /* No guest has its IP field at offset zero. If this fails it + means some transformation pass somewhere failed to update/copy + irsb->offsIP properly. */ + vassert(irsb->offsIP >= 16); - // Put |ite|'s OOL block in the Queue. We'll deal with it - // later. Also, generate the (skeleton) conditional branch to it, - // and collect enough information that we can create patch the - // branch later, once we know where the destination is. - vassert(queueNewest < nQUEUE-1); // else out of Queue space - queueNewest++; - queue[queueNewest].oolVec = hite->outOfLine; - queue[queueNewest].resumePoint_valid = False; // not yet known - queue[queueNewest].resumePoint = -1; // invalid + vcode = iselSB ( irsb, vta->arch_host, + &vta->archinfo_host, + &vta->abiinfo_both, + offB_HOST_EvC_COUNTER, + offB_HOST_EvC_FAILADDR, + chainingAllowed, + vta->addProfInc, + max_ga ); - HInstr* cond_branch - = X86Instr_JmpCond(hite->ccOOL, - queueNewest/*FOR DEBUG PRINTING ONLY*/); - AssemblyBufferOffset cursor_next - = emitSimpleInsn( &(res->offs_profInc), &vta->host_bytes[0], - cursor, cursor_limit, cond_branch, - &emitConsts, vta ); - if (UNLIKELY(cursor_next == cursor)) { - // We ran out of output space. Give up. - goto out_of_buffer_space; - } - queue[queueNewest].jumpToOOLpoint_valid = True; - queue[queueNewest].jumpToOOLpoint - = collectRelocInfo_X86(cursor, cond_branch); + vexAllocSanityCheck(); - cursor = cursor_next; + if (vex_traceflags & VEX_TRACE_VCODE) + vex_printf("\n"); - // Now we descend into |ite's| IL block. So we need to save - // where we are in this block, so we can resume when the inner - // one is done. - vassert(stackPtr < nSTACK-1); // else out of Stack space - stackPtr++; - stack[stackPtr].vec = vec; - stack[stackPtr].vec_next = vec_next+1; - stack[stackPtr].ool_qindex = queueNewest; + if (vex_traceflags & VEX_TRACE_VCODE) { + ppHInstrSB(vcode, isIfThenElse, ppInstr, ppCondCode, mode64); + } - // And now descend into the inner block. We could have just - // pushed its details on the stack and immediately pop it, but - // it seems simpler to update |vec| and |vec_next| and continue - // directly. - if (UNLIKELY(verbose_asm)) { - vex_printf(" -- START inner IL\n"); - } - vec = hite->fallThrough; - vec_next = 0; + /* Register allocate. */ + RegAllocControl con = { + .univ = rRegUniv, .isMove = isMove, .getRegUsage = getRegUsage, + .mapRegs = mapRegs, .isIfThenElse = isIfThenElse, .genSpill = genSpill, + .genReload = genReload, .genMove = genMove, .genHInstrITE = genHInstrITE, + .directReload = directReload, .guest_sizeB = guest_sizeB, + .ppInstr = ppInstr, .ppCondCode = ppCondCode, .ppReg = ppReg, + .mode64 = mode64}; + rcode = doRegisterAllocation(vcode, &con); - // And continue with "Repeatedly process ..." - } + vexAllocSanityCheck(); - // Getting here means we've completed an inner IL and now want to - // resume the parent IL. That is, pop a saved context off the - // stack. - } + if (vex_traceflags & VEX_TRACE_RCODE) { + vex_printf("\n------------------------" + " Register-allocated code " + "------------------------\n\n"); + ppHInstrSB(rcode, isIfThenElse, ppInstr, ppCondCode, mode64); + vex_printf("\n"); + } - // Hot path is complete. Now, probably, we have to add a jump - // back to the resume point. - if (qe->resumePoint_valid) { - if (0) - vex_printf(" // Generate jump to resume point [%03u]\n", - qe->resumePoint); - HInstr* jmp = X86Instr_Jmp(cursor, qe->resumePoint); - AssemblyBufferOffset cursor_next - = emitSimpleInsn( &(res->offs_profInc), &vta->host_bytes[0], - cursor, cursor_limit, jmp, - &emitConsts, vta ); - if (UNLIKELY(cursor_next == cursor)) { - // We ran out of output space. Give up. - goto out_of_buffer_space; - } - cursor = cursor_next; - } + /* HACK */ + if (0) { + *(vta->host_bytes_used) = 0; + res->status = VexTransOK; return; + } + /* end HACK */ - if (UNLIKELY(verbose_asm)) - vex_printf("END queue[%d]\n\n", qCur); - // Finished with this Queue entry. + /* Assemble */ + if (vex_traceflags & VEX_TRACE_ASM) { + vex_printf("\n------------------------" + " Assembly " + "------------------------\n\n"); } - // Queue empty, all blocks processed - *(vta->host_bytes_used) = cursor; - out_used = cursor; - //// - //// END of the assembler - //////////////////////////////////////////////////////// + Bool assembly_ok = theAssembler( res, vta, isIfThenElse, mode64, rcode ); + if (!assembly_ok) + goto out_of_buffer_space; vexAllocSanityCheck(); @@ -1657,7 +1676,8 @@ static void libvex_BackEnd ( const VexTranslateArgs *vta, j += vta->guest_extents->len[i]; } if (1) vex_printf("VexExpansionRatio %d %d %d :10\n\n", - j, out_used, (10 * out_used) / (j == 0 ? 1 : j)); + j, *(vta->host_bytes_used), + (10 * *(vta->host_bytes_used)) / (j == 0 ? 1 : j)); } vex_traceflags = 0; -- 2.47.2