}
-/* ---- The back end proper ---- */
+/* ---- The assembler ---- */
-/* Back end of the compilation pipeline. Is not exported. */
+/* Assemble RCODE, writing the resulting machine code into the buffer
+ specified by VTA->host_bytes of size VTA->host_bytes_size. When done,
+ store the number of bytes written at the location specified by
+ VTA->host_bytes_used. RES->offs_profInc may be modified as a result. No
+ other fields of RES are changed.
-static void libvex_BackEnd ( const VexTranslateArgs *vta,
- /*MOD*/ VexTranslateResult* res,
- /*MOD*/ IRSB* irsb,
- VexRegisterUpdates pxControl )
+ Returns True for OK, False for 'ran out of buffer space'.
+*/
+static
+Bool theAssembler ( /*MOD*/VexTranslateResult* res,
+ const VexTranslateArgs* vta,
+ HInstrIfThenElse* (*isIfThenElse)( const HInstr* ),
+ const Bool mode64,
+ const HInstrSB* rcode )
{
- /* This the bundle of functions we need to do the back-end stuff
- (insn selection, reg-alloc, assembly) whilst being insulated
- from the target instruction set. */
- Bool (*isMove) ( const HInstr*, HReg*, HReg* );
- void (*getRegUsage) ( HRegUsage*, const HInstr*, Bool );
- void (*mapRegs) ( HRegRemap*, HInstr*, Bool );
- HInstrIfThenElse* (*isIfThenElse)( const HInstr* );
- void (*genSpill) ( HInstr**, HInstr**, HReg, Int, Bool );
- void (*genReload) ( HInstr**, HInstr**, HReg, Int, Bool );
- HInstr* (*genMove) ( HReg, HReg, Bool );
- HInstr* (*genHInstrITE) ( HInstrIfThenElse* );
- HInstr* (*directReload) ( HInstr*, HReg, Short );
- void (*ppInstr) ( const HInstr*, Bool );
- void (*ppCondCode) ( HCondCode );
- UInt (*ppReg) ( HReg );
- HInstrSB* (*iselSB) ( const IRSB*, VexArch, const VexArchInfo*,
- const VexAbiInfo*, Int, Int, Bool, Bool,
- Addr );
- Int (*emit) ( /*MB_MOD*/Bool*,
- UChar*, Int, const HInstr*, Bool, VexEndness,
- const void*, const void*, const void*,
- const void* );
- Bool (*preciseMemExnsFn) ( Int, Int, VexRegisterUpdates );
-
- const RRegUniverse* rRegUniv = NULL;
+ // QElem are work Queue elements. The work Queue is the top level data
+ // structure for the emitter. It is initialised with the HInstrVec* of
+ // the overall HInstrSB. Every OOL HInstrVec* in the tree will at some
+ // point be present in the Queue. IL HInstrVec*s are never present in
+ // the Queue because the inner emitter loop processes them in-line, using
+ // a Stack (see below) to keep track of its nesting level.
+ //
+ // The Stack (see below) is empty before and after every Queue element is
+ // processed. In other words, the Stack only holds state needed during
+ // the processing of a single Queue element.
+ //
+ // The ordering of elements in the Queue is irrelevant -- correct code
+ // will be emitted even with set semantics (arbitrary order). However,
+ // the FIFOness of the queue is believed to generate code in which
+ // colder and colder code (more deeply nested OOLs) is placed further
+ // and further from the start of the emitted machine code, which sounds
+ // like a layout which should minimise icache misses.
+ //
+ // QElems also contain two pieces of jump-fixup information. When we
+ // finally come to process a QElem, we need to know:
+ //
+ // * |jumpToOOLpoint|: the place which wants to jump to the start of the
+ // emitted insns for this QElem. We must have already emitted that,
+ // since it will be the conditional jump that leads to this QElem (OOL
+ // block).
+ //
+ // * |resumePoint|: the place we should jump back to after the QElem is
+ // finished (the "resume point"), which is the emitted code of the
+ // HInstr immediately following the HInstrIfThenElse that has this
+ // QElem as its OOL block.
+ //
+ // When the QElem is processed, we know both the |jumpToOOLpoint| and
+ // the |resumePoint|, and so the first can be patched, and the second
+ // we generate an instruction to jump to.
+ //
+ // There are three complications with patching:
+ //
+ // (1) per comments on Stack elems, we do not know the |resumePoint| when
+ // creating a QElem. That will only be known when processing of the
+ // corresponding IL block is completed.
+ //
+ // (2) The top level HInstrVec* has neither a |jumpToOOLpoint| nor a
+ // |resumePoint|.
+ //
+ // (3) Non-top-level OOLs may not have a valid |resumePoint| if they do
+ // an unconditional IR-level Exit. We can generate the resume point
+ // branch, but it will be never be used.
+ typedef
+ struct {
+ // The HInstrs for this OOL.
+ HInstrVec* oolVec;
+ // Where we should patch to jump to the OOL ("how do we get here?")
+ Bool jumpToOOLpoint_valid;
+ Relocation jumpToOOLpoint;
+ // Resume point offset, in bytes from start of output buffer
+ // ("where do we go after this block is completed?")
+ Bool resumePoint_valid;
+ AssemblyBufferOffset resumePoint;
+ }
+ QElem;
- Bool mode64, chainingAllowed;
- Int out_used;
- Int guest_sizeB;
- Int offB_HOST_EvC_COUNTER;
- Int offB_HOST_EvC_FAILADDR;
- Addr max_ga;
- HInstrSB* vcode;
- HInstrSB* rcode;
- isMove = NULL;
- getRegUsage = NULL;
- mapRegs = NULL;
- isIfThenElse = NULL;
- genSpill = NULL;
- genReload = NULL;
- genMove = NULL;
- genHInstrITE = NULL;
- directReload = NULL;
- ppInstr = NULL;
- ppCondCode = NULL;
- ppReg = NULL;
- iselSB = NULL;
- emit = NULL;
+ // SElem are stack elements. When we suspend processing a HInstrVec* in
+ // order to process an IL path in an IfThenElse, we push the HInstrVec*
+ // and the next index to process on the stack, so that we know where to
+ // resume when the nested IL sequence is completed. |vec| and |vec_next|
+ // record the resume HInstr.
+ //
+ // A second effect of processing a nested IL sequence is that we will
+ // have to (later) process the corresponding OOL sequence. And that OOL
+ // sequence will have to finish with a jump back to the "resume point"
+ // (the emitted instruction immediately following the IfThenElse). We
+ // only know the offset of the resume point instruction in the output
+ // buffer when we actually resume emitted from there -- that is, when the
+ // entry we pushed, is popped. So, when we pop, we must mark the
+ // corresponding OOL entry in the Queue to record there the resume point
+ // offset. For this reason we also carry |ool_qindex|, which is the
+ // index of the corresponding OOL entry in the Queue.
+ typedef
+ struct {
+ HInstrVec* vec; // resume point HInstr vector
+ UInt vec_next; // resume point HInstr vector index
+ Int ool_qindex; // index in Queue of OOL to mark when we resume
+ }
+ SElem;
- mode64 = False;
- chainingAllowed = False;
- guest_sizeB = 0;
- offB_HOST_EvC_COUNTER = 0;
- offB_HOST_EvC_FAILADDR = 0;
- preciseMemExnsFn = NULL;
+ // The Stack. The stack depth is bounded by maximum number of nested
+ // hot (IL) sections, so in practice it is going to be very small.
+ const Int nSTACK = 4;
- vassert(vex_initdone);
- vassert(vta->disp_cp_xassisted != NULL);
+ SElem stack[nSTACK];
+ Int stackPtr; // points to most recently pushed entry <=> "-1 means empty"
- vex_traceflags = vta->traceflags;
+ // The Queue. The queue size is bounded by the number of cold (OOL)
+ // sections in the entire HInstrSB, so it's also going to be pretty
+ // small.
+ const Int nQUEUE = 8;
- /* Both the chainers and the indir are either NULL or non-NULL. */
- if (vta->disp_cp_chain_me_to_slowEP != NULL) {
- vassert(vta->disp_cp_chain_me_to_fastEP != NULL);
- vassert(vta->disp_cp_xindir != NULL);
- chainingAllowed = True;
- } else {
- vassert(vta->disp_cp_chain_me_to_fastEP == NULL);
- vassert(vta->disp_cp_xindir == NULL);
- }
+ QElem queue[nQUEUE];
+ Int queueOldest; // index of oldest entry, initially 0
+ Int queueNewest; // index of newest entry,
+ // initially -1, otherwise must be >= queueOldest
- switch (vta->arch_guest) {
+ ///////////////////////////////////////////////////////
- case VexArchX86:
- preciseMemExnsFn
- = X86FN(guest_x86_state_requires_precise_mem_exns);
- guest_sizeB = sizeof(VexGuestX86State);
- offB_HOST_EvC_COUNTER = offsetof(VexGuestX86State,host_EvC_COUNTER);
- offB_HOST_EvC_FAILADDR = offsetof(VexGuestX86State,host_EvC_FAILADDR);
- break;
+ const Bool verbose_asm = (vex_traceflags & VEX_TRACE_ASM) != 0;
- case VexArchAMD64:
- preciseMemExnsFn
- = AMD64FN(guest_amd64_state_requires_precise_mem_exns);
- guest_sizeB = sizeof(VexGuestAMD64State);
- offB_HOST_EvC_COUNTER = offsetof(VexGuestAMD64State,host_EvC_COUNTER);
- offB_HOST_EvC_FAILADDR = offsetof(VexGuestAMD64State,host_EvC_FAILADDR);
- break;
+ const EmitConstants emitConsts
+ = { .mode64 = mode64,
+ .endness_host = vta->archinfo_host.endness,
+ .disp_cp_chain_me_to_slowEP = vta->disp_cp_chain_me_to_slowEP,
+ .disp_cp_chain_me_to_fastEP = vta->disp_cp_chain_me_to_fastEP,
+ .disp_cp_xindir = vta->disp_cp_xindir,
+ .disp_cp_xassisted = vta->disp_cp_xassisted };
- case VexArchPPC32:
- preciseMemExnsFn
- = PPC32FN(guest_ppc32_state_requires_precise_mem_exns);
- guest_sizeB = sizeof(VexGuestPPC32State);
- offB_HOST_EvC_COUNTER = offsetof(VexGuestPPC32State,host_EvC_COUNTER);
- offB_HOST_EvC_FAILADDR = offsetof(VexGuestPPC32State,host_EvC_FAILADDR);
- break;
+ AssemblyBufferOffset cursor = 0;
+ AssemblyBufferOffset cursor_limit = vta->host_bytes_size;
- case VexArchPPC64:
- preciseMemExnsFn
- = PPC64FN(guest_ppc64_state_requires_precise_mem_exns);
- guest_sizeB = sizeof(VexGuestPPC64State);
- offB_HOST_EvC_COUNTER = offsetof(VexGuestPPC64State,host_EvC_COUNTER);
- offB_HOST_EvC_FAILADDR = offsetof(VexGuestPPC64State,host_EvC_FAILADDR);
- break;
+ *(vta->host_bytes_used) = 0;
- case VexArchS390X:
- preciseMemExnsFn
- = S390FN(guest_s390x_state_requires_precise_mem_exns);
- guest_sizeB = sizeof(VexGuestS390XState);
- offB_HOST_EvC_COUNTER = offsetof(VexGuestS390XState,host_EvC_COUNTER);
- offB_HOST_EvC_FAILADDR = offsetof(VexGuestS390XState,host_EvC_FAILADDR);
- break;
+ queueOldest = 0;
+ queueNewest = -1;
- case VexArchARM:
- preciseMemExnsFn
- = ARMFN(guest_arm_state_requires_precise_mem_exns);
- guest_sizeB = sizeof(VexGuestARMState);
- offB_HOST_EvC_COUNTER = offsetof(VexGuestARMState,host_EvC_COUNTER);
- offB_HOST_EvC_FAILADDR = offsetof(VexGuestARMState,host_EvC_FAILADDR);
- break;
+ vassert(queueNewest < nQUEUE);
+ queueNewest++;
+ {
+ QElem* qe = &queue[queueNewest];
+ vex_bzero(qe, sizeof(*qe));
+ qe->oolVec = rcode->insns;
+ qe->jumpToOOLpoint_valid = False;
+ qe->resumePoint_valid = False;
+ }
+ vassert(queueNewest == 0);
- case VexArchARM64:
- preciseMemExnsFn
- = ARM64FN(guest_arm64_state_requires_precise_mem_exns);
- guest_sizeB = sizeof(VexGuestARM64State);
- offB_HOST_EvC_COUNTER = offsetof(VexGuestARM64State,host_EvC_COUNTER);
- offB_HOST_EvC_FAILADDR = offsetof(VexGuestARM64State,host_EvC_FAILADDR);
- break;
+ /* Main loop, processing Queue entries, until there are no more. */
+ while (queueOldest <= queueNewest) {
- case VexArchMIPS32:
- preciseMemExnsFn
- = MIPS32FN(guest_mips32_state_requires_precise_mem_exns);
- guest_sizeB = sizeof(VexGuestMIPS32State);
- offB_HOST_EvC_COUNTER = offsetof(VexGuestMIPS32State,host_EvC_COUNTER);
- offB_HOST_EvC_FAILADDR = offsetof(VexGuestMIPS32State,host_EvC_FAILADDR);
- break;
+ Int qCur = queueOldest;
+ if (UNLIKELY(verbose_asm))
+ vex_printf("BEGIN queue[%d]\n", qCur);
- case VexArchMIPS64:
- preciseMemExnsFn
- = MIPS64FN(guest_mips64_state_requires_precise_mem_exns);
- guest_sizeB = sizeof(VexGuestMIPS64State);
- offB_HOST_EvC_COUNTER = offsetof(VexGuestMIPS64State,host_EvC_COUNTER);
- offB_HOST_EvC_FAILADDR = offsetof(VexGuestMIPS64State,host_EvC_FAILADDR);
- break;
+ // Take the oldest entry in the queue
+ QElem* qe = &queue[queueOldest];
+ queueOldest++;
- default:
- vpanic("LibVEX_Codegen: unsupported guest insn set");
- }
+ // Stay sane. Only the top level block has no branch to it and no
+ // resume point.
+ if (qe->oolVec == rcode->insns) {
+ // This is the top level block
+ vassert(!qe->jumpToOOLpoint_valid);
+ vassert(!qe->resumePoint_valid);
+ } else {
+ vassert(qe->jumpToOOLpoint_valid);
+ vassert(qe->resumePoint_valid);
+ // In the future, we might be able to allow the resume point to be
+ // invalid for non-top-level blocks, if the block contains an
+ // unconditional exit. Currently the IR can't represent that, so
+ // the assertion is valid.
+ }
+ // Processing |qe|
+ if (qe->jumpToOOLpoint_valid) {
+ // patch qe->jmpToOOLpoint to jump to |here|
+ if (UNLIKELY(verbose_asm)) {
+ vex_printf(" -- APPLY ");
+ ppRelocation(qe->jumpToOOLpoint);
+ vex_printf("\n");
+ }
+ applyRelocation(qe->jumpToOOLpoint, &vta->host_bytes[0],
+ cursor, cursor, vta->archinfo_host.endness,
+ verbose_asm);
+ }
- switch (vta->arch_host) {
+ // Initialise the stack, for processing of |qe|.
+ stackPtr = 0; // "contains one element"
- case VexArchX86:
- mode64 = False;
- rRegUniv = X86FN(getRRegUniverse_X86());
- isMove = CAST_TO_TYPEOF(isMove) X86FN(isMove_X86Instr);
- getRegUsage
- = CAST_TO_TYPEOF(getRegUsage) X86FN(getRegUsage_X86Instr);
- mapRegs = CAST_TO_TYPEOF(mapRegs) X86FN(mapRegs_X86Instr);
- isIfThenElse = CAST_TO_TYPEOF(isIfThenElse) X86FN(isIfThenElse_X86Instr);
- genSpill = CAST_TO_TYPEOF(genSpill) X86FN(genSpill_X86);
- genReload = CAST_TO_TYPEOF(genReload) X86FN(genReload_X86);
- genMove = CAST_TO_TYPEOF(genMove) X86FN(genMove_X86);
- genHInstrITE = CAST_TO_TYPEOF(genHInstrITE) X86FN(X86Instr_IfThenElse);
- directReload = CAST_TO_TYPEOF(directReload) X86FN(directReload_X86);
- ppInstr = CAST_TO_TYPEOF(ppInstr) X86FN(ppX86Instr);
- ppCondCode = CAST_TO_TYPEOF(ppCondCode) X86FN(ppX86CondCode);
- ppReg = CAST_TO_TYPEOF(ppReg) X86FN(ppHRegX86);
- iselSB = X86FN(iselSB_X86);
- emit = CAST_TO_TYPEOF(emit) X86FN(emit_X86Instr);
- vassert(vta->archinfo_host.endness == VexEndnessLE);
- break;
+ stack[stackPtr].vec = qe->oolVec;
+ stack[stackPtr].vec_next = 0;
+ stack[stackPtr].ool_qindex = -1; // INVALID
- case VexArchAMD64:
- mode64 = True;
- rRegUniv = AMD64FN(getRRegUniverse_AMD64());
- isMove = CAST_TO_TYPEOF(isMove) AMD64FN(isMove_AMD64Instr);
- getRegUsage
- = CAST_TO_TYPEOF(getRegUsage) AMD64FN(getRegUsage_AMD64Instr);
- mapRegs = CAST_TO_TYPEOF(mapRegs) AMD64FN(mapRegs_AMD64Instr);
- genSpill = CAST_TO_TYPEOF(genSpill) AMD64FN(genSpill_AMD64);
- genReload = CAST_TO_TYPEOF(genReload) AMD64FN(genReload_AMD64);
- genMove = CAST_TO_TYPEOF(genMove) AMD64FN(genMove_AMD64);
- directReload = CAST_TO_TYPEOF(directReload) AMD64FN(directReload_AMD64);
- ppInstr = CAST_TO_TYPEOF(ppInstr) AMD64FN(ppAMD64Instr);
- ppReg = CAST_TO_TYPEOF(ppReg) AMD64FN(ppHRegAMD64);
- iselSB = AMD64FN(iselSB_AMD64);
- emit = CAST_TO_TYPEOF(emit) AMD64FN(emit_AMD64Instr);
- vassert(vta->archinfo_host.endness == VexEndnessLE);
- break;
+ // Iterate till the stack is empty. This effectively does a
+ // depth-first traversal of the hot-path (IL) tree reachable from
+ // here, and at the same time adds any encountered cold-path (OOL)
+ // blocks to the Queue for later processing. This is the heart of the
+ // flattening algorithm.
+ while (stackPtr >= 0) {
- case VexArchPPC32:
- mode64 = False;
- rRegUniv = PPC32FN(getRRegUniverse_PPC(mode64));
- isMove = CAST_TO_TYPEOF(isMove) PPC32FN(isMove_PPCInstr);
- getRegUsage
- = CAST_TO_TYPEOF(getRegUsage) PPC32FN(getRegUsage_PPCInstr);
- mapRegs = CAST_TO_TYPEOF(mapRegs) PPC32FN(mapRegs_PPCInstr);
- genSpill = CAST_TO_TYPEOF(genSpill) PPC32FN(genSpill_PPC);
- genReload = CAST_TO_TYPEOF(genReload) PPC32FN(genReload_PPC);
- genMove = CAST_TO_TYPEOF(genMove) PPC32FN(genMove_PPC);
- ppInstr = CAST_TO_TYPEOF(ppInstr) PPC32FN(ppPPCInstr);
- ppReg = CAST_TO_TYPEOF(ppReg) PPC32FN(ppHRegPPC);
- iselSB = PPC32FN(iselSB_PPC);
- emit = CAST_TO_TYPEOF(emit) PPC32FN(emit_PPCInstr);
- vassert(vta->archinfo_host.endness == VexEndnessBE);
- break;
+ if (UNLIKELY(verbose_asm))
+ vex_printf(" -- CONSIDER stack[%d]\n", stackPtr);
- case VexArchPPC64:
- mode64 = True;
- rRegUniv = PPC64FN(getRRegUniverse_PPC(mode64));
- isMove = CAST_TO_TYPEOF(isMove) PPC64FN(isMove_PPCInstr);
- getRegUsage
- = CAST_TO_TYPEOF(getRegUsage) PPC64FN(getRegUsage_PPCInstr);
- mapRegs = CAST_TO_TYPEOF(mapRegs) PPC64FN(mapRegs_PPCInstr);
- genSpill = CAST_TO_TYPEOF(genSpill) PPC64FN(genSpill_PPC);
- genReload = CAST_TO_TYPEOF(genReload) PPC64FN(genReload_PPC);
- genMove = CAST_TO_TYPEOF(genMove) PPC64FN(genMove_PPC);
- ppInstr = CAST_TO_TYPEOF(ppInstr) PPC64FN(ppPPCInstr);
- ppReg = CAST_TO_TYPEOF(ppReg) PPC64FN(ppHRegPPC);
- iselSB = PPC64FN(iselSB_PPC);
- emit = CAST_TO_TYPEOF(emit) PPC64FN(emit_PPCInstr);
- vassert(vta->archinfo_host.endness == VexEndnessBE ||
- vta->archinfo_host.endness == VexEndnessLE );
- break;
+ HInstrVec* vec = stack[stackPtr].vec;
+ UInt vec_next = stack[stackPtr].vec_next;
+ Int ool_qindex = stack[stackPtr].ool_qindex;
+ stackPtr--;
- case VexArchS390X:
- mode64 = True;
- rRegUniv = S390FN(getRRegUniverse_S390());
- isMove = CAST_TO_TYPEOF(isMove) S390FN(isMove_S390Instr);
- getRegUsage
- = CAST_TO_TYPEOF(getRegUsage) S390FN(getRegUsage_S390Instr);
- mapRegs = CAST_TO_TYPEOF(mapRegs) S390FN(mapRegs_S390Instr);
- genSpill = CAST_TO_TYPEOF(genSpill) S390FN(genSpill_S390);
- genReload = CAST_TO_TYPEOF(genReload) S390FN(genReload_S390);
- genMove = CAST_TO_TYPEOF(genMove) S390FN(genMove_S390);
- // fixs390: consider implementing directReload_S390
- ppInstr = CAST_TO_TYPEOF(ppInstr) S390FN(ppS390Instr);
- ppReg = CAST_TO_TYPEOF(ppReg) S390FN(ppHRegS390);
- iselSB = S390FN(iselSB_S390);
- emit = CAST_TO_TYPEOF(emit) S390FN(emit_S390Instr);
- vassert(vta->archinfo_host.endness == VexEndnessBE);
- break;
+ if (vec_next > 0) {
+ // We're resuming the current IL block having just finished
+ // processing a nested IL. The OOL counterpart to the nested IL
+ // we just finished processing will have to jump back to here.
+ // So we'll need to mark its Queue entry to record that fact.
- case VexArchARM:
- mode64 = False;
- rRegUniv = ARMFN(getRRegUniverse_ARM());
- isMove = CAST_TO_TYPEOF(isMove) ARMFN(isMove_ARMInstr);
- getRegUsage
- = CAST_TO_TYPEOF(getRegUsage) ARMFN(getRegUsage_ARMInstr);
- mapRegs = CAST_TO_TYPEOF(mapRegs) ARMFN(mapRegs_ARMInstr);
- genSpill = CAST_TO_TYPEOF(genSpill) ARMFN(genSpill_ARM);
- genReload = CAST_TO_TYPEOF(genReload) ARMFN(genReload_ARM);
- genMove = CAST_TO_TYPEOF(genMove) ARMFN(genMove_ARM);
- ppInstr = CAST_TO_TYPEOF(ppInstr) ARMFN(ppARMInstr);
- ppReg = CAST_TO_TYPEOF(ppReg) ARMFN(ppHRegARM);
- iselSB = ARMFN(iselSB_ARM);
- emit = CAST_TO_TYPEOF(emit) ARMFN(emit_ARMInstr);
- vassert(vta->archinfo_host.endness == VexEndnessLE);
- break;
+ // First assert that the OOL actually *is* in the Queue (it
+ // must be, since we can't have processed it yet).
+ vassert(queueOldest <= queueNewest); // "at least 1 entry in Q"
+ vassert(queueOldest <= ool_qindex && ool_qindex <= queueNewest);
- case VexArchARM64:
- mode64 = True;
- rRegUniv = ARM64FN(getRRegUniverse_ARM64());
- isMove = CAST_TO_TYPEOF(isMove) ARM64FN(isMove_ARM64Instr);
- getRegUsage
- = CAST_TO_TYPEOF(getRegUsage) ARM64FN(getRegUsage_ARM64Instr);
- mapRegs = CAST_TO_TYPEOF(mapRegs) ARM64FN(mapRegs_ARM64Instr);
- genSpill = CAST_TO_TYPEOF(genSpill) ARM64FN(genSpill_ARM64);
- genReload = CAST_TO_TYPEOF(genReload) ARM64FN(genReload_ARM64);
- genMove = CAST_TO_TYPEOF(genMove) ARM64FN(genMove_ARM64);
- ppInstr = CAST_TO_TYPEOF(ppInstr) ARM64FN(ppARM64Instr);
- ppReg = CAST_TO_TYPEOF(ppReg) ARM64FN(ppHRegARM64);
- iselSB = ARM64FN(iselSB_ARM64);
- emit = CAST_TO_TYPEOF(emit) ARM64FN(emit_ARM64Instr);
- vassert(vta->archinfo_host.endness == VexEndnessLE);
- break;
+ vassert(!queue[ool_qindex].resumePoint_valid);
+ queue[ool_qindex].resumePoint = cursor;
+ queue[ool_qindex].resumePoint_valid = True;
+ if (UNLIKELY(verbose_asm))
+ vex_printf(" -- RESUME previous IL\n");
+ } else {
+ // We're starting a new IL. Due to the tail-recursive nature of
+ // entering ILs, this means we can actually only be starting the
+ // outermost (top level) block for this particular Queue entry.
+ vassert(ool_qindex == -1);
+ vassert(vec == qe->oolVec);
+ if (UNLIKELY(verbose_asm))
+ vex_printf(" -- START new IL\n");
+ }
- case VexArchMIPS32:
- mode64 = False;
- rRegUniv = MIPS32FN(getRRegUniverse_MIPS(mode64));
- isMove = CAST_TO_TYPEOF(isMove) MIPS32FN(isMove_MIPSInstr);
- getRegUsage
- = CAST_TO_TYPEOF(getRegUsage) MIPS32FN(getRegUsage_MIPSInstr);
- mapRegs = CAST_TO_TYPEOF(mapRegs) MIPS32FN(mapRegs_MIPSInstr);
- genSpill = CAST_TO_TYPEOF(genSpill) MIPS32FN(genSpill_MIPS);
- genReload = CAST_TO_TYPEOF(genReload) MIPS32FN(genReload_MIPS);
- genMove = CAST_TO_TYPEOF(genMove) MIPS32FN(genMove_MIPS);
- ppInstr = CAST_TO_TYPEOF(ppInstr) MIPS32FN(ppMIPSInstr);
- ppReg = CAST_TO_TYPEOF(ppReg) MIPS32FN(ppHRegMIPS);
- iselSB = MIPS32FN(iselSB_MIPS);
- emit = CAST_TO_TYPEOF(emit) MIPS32FN(emit_MIPSInstr);
- vassert(vta->archinfo_host.endness == VexEndnessLE
- || vta->archinfo_host.endness == VexEndnessBE);
- break;
+ // Repeatedly process "zero or more simple HInstrs followed by (an
+ // IfThenElse or end-of-block)"
+ while (True) {
- case VexArchMIPS64:
- mode64 = True;
- rRegUniv = MIPS64FN(getRRegUniverse_MIPS(mode64));
- isMove = CAST_TO_TYPEOF(isMove) MIPS64FN(isMove_MIPSInstr);
- getRegUsage
- = CAST_TO_TYPEOF(getRegUsage) MIPS64FN(getRegUsage_MIPSInstr);
- mapRegs = CAST_TO_TYPEOF(mapRegs) MIPS64FN(mapRegs_MIPSInstr);
- genSpill = CAST_TO_TYPEOF(genSpill) MIPS64FN(genSpill_MIPS);
- genReload = CAST_TO_TYPEOF(genReload) MIPS64FN(genReload_MIPS);
- genMove = CAST_TO_TYPEOF(genMove) MIPS64FN(genMove_MIPS);
- ppInstr = CAST_TO_TYPEOF(ppInstr) MIPS64FN(ppMIPSInstr);
- ppReg = CAST_TO_TYPEOF(ppReg) MIPS64FN(ppHRegMIPS);
- iselSB = MIPS64FN(iselSB_MIPS);
- emit = CAST_TO_TYPEOF(emit) MIPS64FN(emit_MIPSInstr);
- vassert(vta->archinfo_host.endness == VexEndnessLE
- || vta->archinfo_host.endness == VexEndnessBE);
- break;
+ // Process "zero or more simple HInstrs"
+ while (vec_next < vec->insns_used
+ && !isIfThenElse(vec->insns[vec_next])) {
+ AssemblyBufferOffset cursor_next
+ = emitSimpleInsn( &(res->offs_profInc), &vta->host_bytes[0],
+ cursor, cursor_limit, vec->insns[vec_next],
+ &emitConsts, vta );
+ if (UNLIKELY(cursor_next == cursor)) {
+ // We ran out of output space. Give up.
+ return False;
+ }
+ vec_next++;
+ cursor = cursor_next;
+ }
+
+ // Now we've either got to the end of the hot path, or we have
+ // an IfThenElse.
+ if (vec_next >= vec->insns_used)
+ break;
+
+ // So we have an IfThenElse.
+ HInstrIfThenElse* hite = isIfThenElse(vec->insns[vec_next]);
+ vassert(hite);
+ vassert(hite->n_phis == 0); // the regalloc will have removed them
+
+ // Put |ite|'s OOL block in the Queue. We'll deal with it
+ // later. Also, generate the (skeleton) conditional branch to it,
+ // and collect enough information that we can create patch the
+ // branch later, once we know where the destination is.
+ vassert(queueNewest < nQUEUE-1); // else out of Queue space
+ queueNewest++;
+ queue[queueNewest].oolVec = hite->outOfLine;
+ queue[queueNewest].resumePoint_valid = False; // not yet known
+ queue[queueNewest].resumePoint = -1; // invalid
+
+ HInstr* cond_branch
+ = X86Instr_JmpCond(hite->ccOOL,
+ queueNewest/*FOR DEBUG PRINTING ONLY*/);
+ AssemblyBufferOffset cursor_next
+ = emitSimpleInsn( &(res->offs_profInc), &vta->host_bytes[0],
+ cursor, cursor_limit, cond_branch,
+ &emitConsts, vta );
+ if (UNLIKELY(cursor_next == cursor)) {
+ // We ran out of output space. Give up.
+ return False;
+ }
+ queue[queueNewest].jumpToOOLpoint_valid = True;
+ queue[queueNewest].jumpToOOLpoint
+ = collectRelocInfo_X86(cursor, cond_branch);
- default:
- vpanic("LibVEX_Translate: unsupported host insn set");
- }
+ cursor = cursor_next;
- // Are the host's hardware capabilities feasible. The function will
- // not return if hwcaps are infeasible in some sense.
- check_hwcaps(vta->arch_host, vta->archinfo_host.hwcaps);
+ // Now we descend into |ite's| IL block. So we need to save
+ // where we are in this block, so we can resume when the inner
+ // one is done.
+ vassert(stackPtr < nSTACK-1); // else out of Stack space
+ stackPtr++;
+ stack[stackPtr].vec = vec;
+ stack[stackPtr].vec_next = vec_next+1;
+ stack[stackPtr].ool_qindex = queueNewest;
+ // And now descend into the inner block. We could have just
+ // pushed its details on the stack and immediately pop it, but
+ // it seems simpler to update |vec| and |vec_next| and continue
+ // directly.
+ if (UNLIKELY(verbose_asm)) {
+ vex_printf(" -- START inner IL\n");
+ }
+ vec = hite->fallThrough;
+ vec_next = 0;
- /* Turn it into virtual-registerised code. Build trees -- this
- also throws away any dead bindings. */
- max_ga = ado_treebuild_BB( irsb, preciseMemExnsFn, pxControl );
+ // And continue with "Repeatedly process ..."
+ }
- if (vta->finaltidy) {
- irsb = vta->finaltidy(irsb);
- }
+ // Getting here means we've completed an inner IL and now want to
+ // resume the parent IL. That is, pop a saved context off the
+ // stack.
+ }
- vexAllocSanityCheck();
+ // Hot path is complete. Now, probably, we have to add a jump
+ // back to the resume point.
+ if (qe->resumePoint_valid) {
+ if (0)
+ vex_printf(" // Generate jump to resume point [%03u]\n",
+ qe->resumePoint);
+ HInstr* jmp = X86Instr_Jmp(cursor, qe->resumePoint);
+ AssemblyBufferOffset cursor_next
+ = emitSimpleInsn( &(res->offs_profInc), &vta->host_bytes[0],
+ cursor, cursor_limit, jmp,
+ &emitConsts, vta );
+ if (UNLIKELY(cursor_next == cursor)) {
+ // We ran out of output space. Give up.
+ return False;
+ }
+ cursor = cursor_next;
+ }
- if (vex_traceflags & VEX_TRACE_TREES) {
- vex_printf("\n------------------------"
- " After tree-building "
- "------------------------\n\n");
- ppIRSB ( irsb );
- vex_printf("\n");
+ if (UNLIKELY(verbose_asm))
+ vex_printf("END queue[%d]\n\n", qCur);
+ // Finished with this Queue entry.
}
+ // Queue empty, all blocks processed
- /* HACK */
- if (0) {
- *(vta->host_bytes_used) = 0;
- res->status = VexTransOK; return;
- }
- /* end HACK */
+ *(vta->host_bytes_used) = cursor;
- if (vex_traceflags & VEX_TRACE_VCODE)
- vex_printf("\n------------------------"
- " Instruction selection "
- "------------------------\n");
+ return True; // OK
+}
- /* No guest has its IP field at offset zero. If this fails it
- means some transformation pass somewhere failed to update/copy
- irsb->offsIP properly. */
- vassert(irsb->offsIP >= 16);
- vcode = iselSB ( irsb, vta->arch_host,
- &vta->archinfo_host,
- &vta->abiinfo_both,
- offB_HOST_EvC_COUNTER,
- offB_HOST_EvC_FAILADDR,
- chainingAllowed,
- vta->addProfInc,
- max_ga );
+/* ---- The back end proper ---- */
- vexAllocSanityCheck();
+/* Back end of the compilation pipeline. Is not exported. */
- if (vex_traceflags & VEX_TRACE_VCODE)
- vex_printf("\n");
+static void libvex_BackEnd ( const VexTranslateArgs* vta,
+ /*MOD*/ VexTranslateResult* res,
+ /*MOD*/ IRSB* irsb,
+ VexRegisterUpdates pxControl )
+{
+ /* This the bundle of functions we need to do the back-end stuff
+ (insn selection, reg-alloc, assembly) whilst being insulated
+ from the target instruction set. */
+ Bool (*isMove) ( const HInstr*, HReg*, HReg* );
+ void (*getRegUsage) ( HRegUsage*, const HInstr*, Bool );
+ void (*mapRegs) ( HRegRemap*, HInstr*, Bool );
+ HInstrIfThenElse* (*isIfThenElse)( const HInstr* );
+ void (*genSpill) ( HInstr**, HInstr**, HReg, Int, Bool );
+ void (*genReload) ( HInstr**, HInstr**, HReg, Int, Bool );
+ HInstr* (*genMove) ( HReg, HReg, Bool );
+ HInstr* (*genHInstrITE) ( HInstrIfThenElse* );
+ HInstr* (*directReload) ( HInstr*, HReg, Short );
+ void (*ppInstr) ( const HInstr*, Bool );
+ void (*ppCondCode) ( HCondCode );
+ UInt (*ppReg) ( HReg );
+ HInstrSB* (*iselSB) ( const IRSB*, VexArch, const VexArchInfo*,
+ const VexAbiInfo*, Int, Int, Bool, Bool,
+ Addr );
+ Int (*emit) ( /*MB_MOD*/Bool*,
+ UChar*, Int, const HInstr*, Bool, VexEndness,
+ const void*, const void*, const void*,
+ const void* );
+ Bool (*preciseMemExnsFn) ( Int, Int, VexRegisterUpdates );
- if (vex_traceflags & VEX_TRACE_VCODE) {
- ppHInstrSB(vcode, isIfThenElse, ppInstr, ppCondCode, mode64);
- }
+ const RRegUniverse* rRegUniv = NULL;
- /* Register allocate. */
- RegAllocControl con = {
- .univ = rRegUniv, .isMove = isMove, .getRegUsage = getRegUsage,
- .mapRegs = mapRegs, .isIfThenElse = isIfThenElse, .genSpill = genSpill,
- .genReload = genReload, .genMove = genMove, .genHInstrITE = genHInstrITE,
- .directReload = directReload, .guest_sizeB = guest_sizeB,
- .ppInstr = ppInstr, .ppCondCode = ppCondCode, .ppReg = ppReg,
- .mode64 = mode64};
- rcode = doRegisterAllocation(vcode, &con);
+ Bool mode64, chainingAllowed;
+ Int guest_sizeB;
+ Int offB_HOST_EvC_COUNTER;
+ Int offB_HOST_EvC_FAILADDR;
+ Addr max_ga;
+ HInstrSB* vcode;
+ HInstrSB* rcode;
- vexAllocSanityCheck();
+ isMove = NULL;
+ getRegUsage = NULL;
+ mapRegs = NULL;
+ isIfThenElse = NULL;
+ genSpill = NULL;
+ genReload = NULL;
+ genMove = NULL;
+ genHInstrITE = NULL;
+ directReload = NULL;
+ ppInstr = NULL;
+ ppCondCode = NULL;
+ ppReg = NULL;
+ iselSB = NULL;
+ emit = NULL;
- if (vex_traceflags & VEX_TRACE_RCODE) {
- vex_printf("\n------------------------"
- " Register-allocated code "
- "------------------------\n\n");
- ppHInstrSB(rcode, isIfThenElse, ppInstr, ppCondCode, mode64);
- vex_printf("\n");
- }
+ mode64 = False;
+ chainingAllowed = False;
+ guest_sizeB = 0;
+ offB_HOST_EvC_COUNTER = 0;
+ offB_HOST_EvC_FAILADDR = 0;
+ preciseMemExnsFn = NULL;
- /* HACK */
- if (0) {
- *(vta->host_bytes_used) = 0;
- res->status = VexTransOK; return;
- }
- /* end HACK */
+ vassert(vex_initdone);
+ vassert(vta->disp_cp_xassisted != NULL);
- /* Assemble */
- if (vex_traceflags & VEX_TRACE_ASM) {
- vex_printf("\n------------------------"
- " Assembly "
- "------------------------\n\n");
+ vex_traceflags = vta->traceflags;
+
+ /* Both the chainers and the indir are either NULL or non-NULL. */
+ if (vta->disp_cp_chain_me_to_slowEP != NULL) {
+ vassert(vta->disp_cp_chain_me_to_fastEP != NULL);
+ vassert(vta->disp_cp_xindir != NULL);
+ chainingAllowed = True;
+ } else {
+ vassert(vta->disp_cp_chain_me_to_fastEP == NULL);
+ vassert(vta->disp_cp_xindir == NULL);
}
- ////////////////////////////////////////////////////////
- //// BEGIN the assembler
+ switch (vta->arch_guest) {
- // QElem are work Queue elements. The work Queue is the top level data
- // structure for the emitter. It is initialised with the HInstrVec* of
- // the overall HInstrSB. Every OOL HInstrVec* in the tree will at some
- // point be present in the Queue. IL HInstrVec*s are never present in
- // the Queue because the inner emitter loop processes them in-line, using
- // a Stack (see below) to keep track of its nesting level.
- //
- // The Stack (see below) is empty before and after every Queue element is
- // processed. In other words, the Stack only holds state needed during
- // the processing of a single Queue element.
- //
- // The ordering of elements in the Queue is irrelevant -- correct code
- // will be emitted even with set semantics (arbitrary order). However,
- // the FIFOness of the queue is believed to generate code in which
- // colder and colder code (more deeply nested OOLs) is placed further
- // and further from the start of the emitted machine code, which sounds
- // like a layout which should minimise icache misses.
- //
- // QElems also contain two pieces of jump-fixup information. When we
- // finally come to process a QElem, we need to know:
- //
- // * |jumpToOOLpoint|: the place which wants to jump to the start of the
- // emitted insns for this QElem. We must have already emitted that,
- // since it will be the conditional jump that leads to this QElem (OOL
- // block).
- //
- // * |resumePoint|: the place we should jump back to after the QElem is
- // finished (the "resume point"), which is the emitted code of the
- // HInstr immediately following the HInstrIfThenElse that has this
- // QElem as its OOL block.
- //
- // When the QElem is processed, we know both the |jumpToOOLpoint| and
- // the |resumePoint|, and so the first can be patched, and the second
- // we generate an instruction to jump to.
- //
- // There are three complications with patching:
- //
- // (1) per comments on Stack elems, we do not know the |resumePoint| when
- // creating a QElem. That will only be known when processing of the
- // corresponding IL block is completed.
- //
- // (2) The top level HInstrVec* has neither a |jumpToOOLpoint| nor a
- // |resumePoint|.
- //
- // (3) Non-top-level OOLs may not have a valid |resumePoint| if they do
- // an unconditional IR-level Exit. We can generate the resume point
- // branch, but it will be never be used.
- typedef
- struct {
- // The HInstrs for this OOL.
- HInstrVec* oolVec;
- // Where we should patch to jump to the OOL ("how do we get here?")
- Bool jumpToOOLpoint_valid;
- Relocation jumpToOOLpoint;
- // Resume point offset, in bytes from start of output buffer
- // ("where do we go after this block is completed?")
- Bool resumePoint_valid;
- AssemblyBufferOffset resumePoint;
- }
- QElem;
+ case VexArchX86:
+ preciseMemExnsFn
+ = X86FN(guest_x86_state_requires_precise_mem_exns);
+ guest_sizeB = sizeof(VexGuestX86State);
+ offB_HOST_EvC_COUNTER = offsetof(VexGuestX86State,host_EvC_COUNTER);
+ offB_HOST_EvC_FAILADDR = offsetof(VexGuestX86State,host_EvC_FAILADDR);
+ break;
+
+ case VexArchAMD64:
+ preciseMemExnsFn
+ = AMD64FN(guest_amd64_state_requires_precise_mem_exns);
+ guest_sizeB = sizeof(VexGuestAMD64State);
+ offB_HOST_EvC_COUNTER = offsetof(VexGuestAMD64State,host_EvC_COUNTER);
+ offB_HOST_EvC_FAILADDR = offsetof(VexGuestAMD64State,host_EvC_FAILADDR);
+ break;
+ case VexArchPPC32:
+ preciseMemExnsFn
+ = PPC32FN(guest_ppc32_state_requires_precise_mem_exns);
+ guest_sizeB = sizeof(VexGuestPPC32State);
+ offB_HOST_EvC_COUNTER = offsetof(VexGuestPPC32State,host_EvC_COUNTER);
+ offB_HOST_EvC_FAILADDR = offsetof(VexGuestPPC32State,host_EvC_FAILADDR);
+ break;
- // SElem are stack elements. When we suspend processing a HInstrVec* in
- // order to process an IL path in an IfThenElse, we push the HInstrVec*
- // and the next index to process on the stack, so that we know where to
- // resume when the nested IL sequence is completed. |vec| and |vec_next|
- // record the resume HInstr.
- //
- // A second effect of processing a nested IL sequence is that we will
- // have to (later) process the corresponding OOL sequence. And that OOL
- // sequence will have to finish with a jump back to the "resume point"
- // (the emitted instruction immediately following the IfThenElse). We
- // only know the offset of the resume point instruction in the output
- // buffer when we actually resume emitted from there -- that is, when the
- // entry we pushed, is popped. So, when we pop, we must mark the
- // corresponding OOL entry in the Queue to record there the resume point
- // offset. For this reason we also carry |ool_qindex|, which is the
- // index of the corresponding OOL entry in the Queue.
- typedef
- struct {
- HInstrVec* vec; // resume point HInstr vector
- UInt vec_next; // resume point HInstr vector index
- Int ool_qindex; // index in Queue of OOL to mark when we resume
- }
- SElem;
+ case VexArchPPC64:
+ preciseMemExnsFn
+ = PPC64FN(guest_ppc64_state_requires_precise_mem_exns);
+ guest_sizeB = sizeof(VexGuestPPC64State);
+ offB_HOST_EvC_COUNTER = offsetof(VexGuestPPC64State,host_EvC_COUNTER);
+ offB_HOST_EvC_FAILADDR = offsetof(VexGuestPPC64State,host_EvC_FAILADDR);
+ break;
- // The Stack. The stack depth is bounded by maximum number of nested
- // hot (IL) sections, so in practice it is going to be very small.
- const Int nSTACK = 4;
+ case VexArchS390X:
+ preciseMemExnsFn
+ = S390FN(guest_s390x_state_requires_precise_mem_exns);
+ guest_sizeB = sizeof(VexGuestS390XState);
+ offB_HOST_EvC_COUNTER = offsetof(VexGuestS390XState,host_EvC_COUNTER);
+ offB_HOST_EvC_FAILADDR = offsetof(VexGuestS390XState,host_EvC_FAILADDR);
+ break;
- SElem stack[nSTACK];
- Int stackPtr; // points to most recently pushed entry <=> "-1 means empty"
+ case VexArchARM:
+ preciseMemExnsFn
+ = ARMFN(guest_arm_state_requires_precise_mem_exns);
+ guest_sizeB = sizeof(VexGuestARMState);
+ offB_HOST_EvC_COUNTER = offsetof(VexGuestARMState,host_EvC_COUNTER);
+ offB_HOST_EvC_FAILADDR = offsetof(VexGuestARMState,host_EvC_FAILADDR);
+ break;
- // The Queue. The queue size is bounded by the number of cold (OOL)
- // sections in the entire HInstrSB, so it's also going to be pretty
- // small.
- const Int nQUEUE = 8;
+ case VexArchARM64:
+ preciseMemExnsFn
+ = ARM64FN(guest_arm64_state_requires_precise_mem_exns);
+ guest_sizeB = sizeof(VexGuestARM64State);
+ offB_HOST_EvC_COUNTER = offsetof(VexGuestARM64State,host_EvC_COUNTER);
+ offB_HOST_EvC_FAILADDR = offsetof(VexGuestARM64State,host_EvC_FAILADDR);
+ break;
- QElem queue[nQUEUE];
- Int queueOldest; // index of oldest entry, initially 0
- Int queueNewest; // index of newest entry,
- // initially -1, otherwise must be >= queueOldest
+ case VexArchMIPS32:
+ preciseMemExnsFn
+ = MIPS32FN(guest_mips32_state_requires_precise_mem_exns);
+ guest_sizeB = sizeof(VexGuestMIPS32State);
+ offB_HOST_EvC_COUNTER = offsetof(VexGuestMIPS32State,host_EvC_COUNTER);
+ offB_HOST_EvC_FAILADDR = offsetof(VexGuestMIPS32State,host_EvC_FAILADDR);
+ break;
- ///////////////////////////////////////////////////////
+ case VexArchMIPS64:
+ preciseMemExnsFn
+ = MIPS64FN(guest_mips64_state_requires_precise_mem_exns);
+ guest_sizeB = sizeof(VexGuestMIPS64State);
+ offB_HOST_EvC_COUNTER = offsetof(VexGuestMIPS64State,host_EvC_COUNTER);
+ offB_HOST_EvC_FAILADDR = offsetof(VexGuestMIPS64State,host_EvC_FAILADDR);
+ break;
- const Bool verbose_asm = (vex_traceflags & VEX_TRACE_ASM) != 0;
+ default:
+ vpanic("LibVEX_Codegen: unsupported guest insn set");
+ }
- const EmitConstants emitConsts
- = { .mode64 = mode64,
- .endness_host = vta->archinfo_host.endness,
- .disp_cp_chain_me_to_slowEP = vta->disp_cp_chain_me_to_slowEP,
- .disp_cp_chain_me_to_fastEP = vta->disp_cp_chain_me_to_fastEP,
- .disp_cp_xindir = vta->disp_cp_xindir,
- .disp_cp_xassisted = vta->disp_cp_xassisted };
- AssemblyBufferOffset cursor = 0;
- AssemblyBufferOffset cursor_limit = vta->host_bytes_size;
+ switch (vta->arch_host) {
- queueOldest = 0;
- queueNewest = -1;
+ case VexArchX86:
+ mode64 = False;
+ rRegUniv = X86FN(getRRegUniverse_X86());
+ isMove = CAST_TO_TYPEOF(isMove) X86FN(isMove_X86Instr);
+ getRegUsage
+ = CAST_TO_TYPEOF(getRegUsage) X86FN(getRegUsage_X86Instr);
+ mapRegs = CAST_TO_TYPEOF(mapRegs) X86FN(mapRegs_X86Instr);
+ isIfThenElse = CAST_TO_TYPEOF(isIfThenElse) X86FN(isIfThenElse_X86Instr);
+ genSpill = CAST_TO_TYPEOF(genSpill) X86FN(genSpill_X86);
+ genReload = CAST_TO_TYPEOF(genReload) X86FN(genReload_X86);
+ genMove = CAST_TO_TYPEOF(genMove) X86FN(genMove_X86);
+ genHInstrITE = CAST_TO_TYPEOF(genHInstrITE) X86FN(X86Instr_IfThenElse);
+ directReload = CAST_TO_TYPEOF(directReload) X86FN(directReload_X86);
+ ppInstr = CAST_TO_TYPEOF(ppInstr) X86FN(ppX86Instr);
+ ppCondCode = CAST_TO_TYPEOF(ppCondCode) X86FN(ppX86CondCode);
+ ppReg = CAST_TO_TYPEOF(ppReg) X86FN(ppHRegX86);
+ iselSB = X86FN(iselSB_X86);
+ emit = CAST_TO_TYPEOF(emit) X86FN(emit_X86Instr);
+ vassert(vta->archinfo_host.endness == VexEndnessLE);
+ break;
- vassert(queueNewest < nQUEUE);
- queueNewest++;
- {
- QElem* qe = &queue[queueNewest];
- vex_bzero(qe, sizeof(*qe));
- qe->oolVec = rcode->insns;
- qe->jumpToOOLpoint_valid = False;
- qe->resumePoint_valid = False;
- }
- vassert(queueNewest == 0);
+ case VexArchAMD64:
+ mode64 = True;
+ rRegUniv = AMD64FN(getRRegUniverse_AMD64());
+ isMove = CAST_TO_TYPEOF(isMove) AMD64FN(isMove_AMD64Instr);
+ getRegUsage
+ = CAST_TO_TYPEOF(getRegUsage) AMD64FN(getRegUsage_AMD64Instr);
+ mapRegs = CAST_TO_TYPEOF(mapRegs) AMD64FN(mapRegs_AMD64Instr);
+ genSpill = CAST_TO_TYPEOF(genSpill) AMD64FN(genSpill_AMD64);
+ genReload = CAST_TO_TYPEOF(genReload) AMD64FN(genReload_AMD64);
+ genMove = CAST_TO_TYPEOF(genMove) AMD64FN(genMove_AMD64);
+ directReload = CAST_TO_TYPEOF(directReload) AMD64FN(directReload_AMD64);
+ ppInstr = CAST_TO_TYPEOF(ppInstr) AMD64FN(ppAMD64Instr);
+ ppReg = CAST_TO_TYPEOF(ppReg) AMD64FN(ppHRegAMD64);
+ iselSB = AMD64FN(iselSB_AMD64);
+ emit = CAST_TO_TYPEOF(emit) AMD64FN(emit_AMD64Instr);
+ vassert(vta->archinfo_host.endness == VexEndnessLE);
+ break;
- /* Main loop, processing Queue entries, until there are no more. */
- while (queueOldest <= queueNewest) {
+ case VexArchPPC32:
+ mode64 = False;
+ rRegUniv = PPC32FN(getRRegUniverse_PPC(mode64));
+ isMove = CAST_TO_TYPEOF(isMove) PPC32FN(isMove_PPCInstr);
+ getRegUsage
+ = CAST_TO_TYPEOF(getRegUsage) PPC32FN(getRegUsage_PPCInstr);
+ mapRegs = CAST_TO_TYPEOF(mapRegs) PPC32FN(mapRegs_PPCInstr);
+ genSpill = CAST_TO_TYPEOF(genSpill) PPC32FN(genSpill_PPC);
+ genReload = CAST_TO_TYPEOF(genReload) PPC32FN(genReload_PPC);
+ genMove = CAST_TO_TYPEOF(genMove) PPC32FN(genMove_PPC);
+ ppInstr = CAST_TO_TYPEOF(ppInstr) PPC32FN(ppPPCInstr);
+ ppReg = CAST_TO_TYPEOF(ppReg) PPC32FN(ppHRegPPC);
+ iselSB = PPC32FN(iselSB_PPC);
+ emit = CAST_TO_TYPEOF(emit) PPC32FN(emit_PPCInstr);
+ vassert(vta->archinfo_host.endness == VexEndnessBE);
+ break;
- Int qCur = queueOldest;
- if (UNLIKELY(verbose_asm))
- vex_printf("BEGIN queue[%d]\n", qCur);
+ case VexArchPPC64:
+ mode64 = True;
+ rRegUniv = PPC64FN(getRRegUniverse_PPC(mode64));
+ isMove = CAST_TO_TYPEOF(isMove) PPC64FN(isMove_PPCInstr);
+ getRegUsage
+ = CAST_TO_TYPEOF(getRegUsage) PPC64FN(getRegUsage_PPCInstr);
+ mapRegs = CAST_TO_TYPEOF(mapRegs) PPC64FN(mapRegs_PPCInstr);
+ genSpill = CAST_TO_TYPEOF(genSpill) PPC64FN(genSpill_PPC);
+ genReload = CAST_TO_TYPEOF(genReload) PPC64FN(genReload_PPC);
+ genMove = CAST_TO_TYPEOF(genMove) PPC64FN(genMove_PPC);
+ ppInstr = CAST_TO_TYPEOF(ppInstr) PPC64FN(ppPPCInstr);
+ ppReg = CAST_TO_TYPEOF(ppReg) PPC64FN(ppHRegPPC);
+ iselSB = PPC64FN(iselSB_PPC);
+ emit = CAST_TO_TYPEOF(emit) PPC64FN(emit_PPCInstr);
+ vassert(vta->archinfo_host.endness == VexEndnessBE ||
+ vta->archinfo_host.endness == VexEndnessLE );
+ break;
- // Take the oldest entry in the queue
- QElem* qe = &queue[queueOldest];
- queueOldest++;
+ case VexArchS390X:
+ mode64 = True;
+ rRegUniv = S390FN(getRRegUniverse_S390());
+ isMove = CAST_TO_TYPEOF(isMove) S390FN(isMove_S390Instr);
+ getRegUsage
+ = CAST_TO_TYPEOF(getRegUsage) S390FN(getRegUsage_S390Instr);
+ mapRegs = CAST_TO_TYPEOF(mapRegs) S390FN(mapRegs_S390Instr);
+ genSpill = CAST_TO_TYPEOF(genSpill) S390FN(genSpill_S390);
+ genReload = CAST_TO_TYPEOF(genReload) S390FN(genReload_S390);
+ genMove = CAST_TO_TYPEOF(genMove) S390FN(genMove_S390);
+ // fixs390: consider implementing directReload_S390
+ ppInstr = CAST_TO_TYPEOF(ppInstr) S390FN(ppS390Instr);
+ ppReg = CAST_TO_TYPEOF(ppReg) S390FN(ppHRegS390);
+ iselSB = S390FN(iselSB_S390);
+ emit = CAST_TO_TYPEOF(emit) S390FN(emit_S390Instr);
+ vassert(vta->archinfo_host.endness == VexEndnessBE);
+ break;
- // Stay sane. Only the top level block has no branch to it and no
- // resume point.
- if (qe->oolVec == rcode->insns) {
- // This is the top level block
- vassert(!qe->jumpToOOLpoint_valid);
- vassert(!qe->resumePoint_valid);
- } else {
- vassert(qe->jumpToOOLpoint_valid);
- vassert(qe->resumePoint_valid);
- // In the future, we might be able to allow the resume point to be
- // invalid for non-top-level blocks, if the block contains an
- // unconditional exit. Currently the IR can't represent that, so
- // the assertion is valid.
- }
+ case VexArchARM:
+ mode64 = False;
+ rRegUniv = ARMFN(getRRegUniverse_ARM());
+ isMove = CAST_TO_TYPEOF(isMove) ARMFN(isMove_ARMInstr);
+ getRegUsage
+ = CAST_TO_TYPEOF(getRegUsage) ARMFN(getRegUsage_ARMInstr);
+ mapRegs = CAST_TO_TYPEOF(mapRegs) ARMFN(mapRegs_ARMInstr);
+ genSpill = CAST_TO_TYPEOF(genSpill) ARMFN(genSpill_ARM);
+ genReload = CAST_TO_TYPEOF(genReload) ARMFN(genReload_ARM);
+ genMove = CAST_TO_TYPEOF(genMove) ARMFN(genMove_ARM);
+ ppInstr = CAST_TO_TYPEOF(ppInstr) ARMFN(ppARMInstr);
+ ppReg = CAST_TO_TYPEOF(ppReg) ARMFN(ppHRegARM);
+ iselSB = ARMFN(iselSB_ARM);
+ emit = CAST_TO_TYPEOF(emit) ARMFN(emit_ARMInstr);
+ vassert(vta->archinfo_host.endness == VexEndnessLE);
+ break;
- // Processing |qe|
- if (qe->jumpToOOLpoint_valid) {
- // patch qe->jmpToOOLpoint to jump to |here|
- if (UNLIKELY(verbose_asm)) {
- vex_printf(" -- APPLY ");
- ppRelocation(qe->jumpToOOLpoint);
- vex_printf("\n");
- }
- applyRelocation(qe->jumpToOOLpoint, &vta->host_bytes[0],
- cursor, cursor, vta->archinfo_host.endness,
- verbose_asm);
- }
+ case VexArchARM64:
+ mode64 = True;
+ rRegUniv = ARM64FN(getRRegUniverse_ARM64());
+ isMove = CAST_TO_TYPEOF(isMove) ARM64FN(isMove_ARM64Instr);
+ getRegUsage
+ = CAST_TO_TYPEOF(getRegUsage) ARM64FN(getRegUsage_ARM64Instr);
+ mapRegs = CAST_TO_TYPEOF(mapRegs) ARM64FN(mapRegs_ARM64Instr);
+ genSpill = CAST_TO_TYPEOF(genSpill) ARM64FN(genSpill_ARM64);
+ genReload = CAST_TO_TYPEOF(genReload) ARM64FN(genReload_ARM64);
+ genMove = CAST_TO_TYPEOF(genMove) ARM64FN(genMove_ARM64);
+ ppInstr = CAST_TO_TYPEOF(ppInstr) ARM64FN(ppARM64Instr);
+ ppReg = CAST_TO_TYPEOF(ppReg) ARM64FN(ppHRegARM64);
+ iselSB = ARM64FN(iselSB_ARM64);
+ emit = CAST_TO_TYPEOF(emit) ARM64FN(emit_ARM64Instr);
+ vassert(vta->archinfo_host.endness == VexEndnessLE);
+ break;
- // Initialise the stack, for processing of |qe|.
- stackPtr = 0; // "contains one element"
+ case VexArchMIPS32:
+ mode64 = False;
+ rRegUniv = MIPS32FN(getRRegUniverse_MIPS(mode64));
+ isMove = CAST_TO_TYPEOF(isMove) MIPS32FN(isMove_MIPSInstr);
+ getRegUsage
+ = CAST_TO_TYPEOF(getRegUsage) MIPS32FN(getRegUsage_MIPSInstr);
+ mapRegs = CAST_TO_TYPEOF(mapRegs) MIPS32FN(mapRegs_MIPSInstr);
+ genSpill = CAST_TO_TYPEOF(genSpill) MIPS32FN(genSpill_MIPS);
+ genReload = CAST_TO_TYPEOF(genReload) MIPS32FN(genReload_MIPS);
+ genMove = CAST_TO_TYPEOF(genMove) MIPS32FN(genMove_MIPS);
+ ppInstr = CAST_TO_TYPEOF(ppInstr) MIPS32FN(ppMIPSInstr);
+ ppReg = CAST_TO_TYPEOF(ppReg) MIPS32FN(ppHRegMIPS);
+ iselSB = MIPS32FN(iselSB_MIPS);
+ emit = CAST_TO_TYPEOF(emit) MIPS32FN(emit_MIPSInstr);
+ vassert(vta->archinfo_host.endness == VexEndnessLE
+ || vta->archinfo_host.endness == VexEndnessBE);
+ break;
- stack[stackPtr].vec = qe->oolVec;
- stack[stackPtr].vec_next = 0;
- stack[stackPtr].ool_qindex = -1; // INVALID
+ case VexArchMIPS64:
+ mode64 = True;
+ rRegUniv = MIPS64FN(getRRegUniverse_MIPS(mode64));
+ isMove = CAST_TO_TYPEOF(isMove) MIPS64FN(isMove_MIPSInstr);
+ getRegUsage
+ = CAST_TO_TYPEOF(getRegUsage) MIPS64FN(getRegUsage_MIPSInstr);
+ mapRegs = CAST_TO_TYPEOF(mapRegs) MIPS64FN(mapRegs_MIPSInstr);
+ genSpill = CAST_TO_TYPEOF(genSpill) MIPS64FN(genSpill_MIPS);
+ genReload = CAST_TO_TYPEOF(genReload) MIPS64FN(genReload_MIPS);
+ genMove = CAST_TO_TYPEOF(genMove) MIPS64FN(genMove_MIPS);
+ ppInstr = CAST_TO_TYPEOF(ppInstr) MIPS64FN(ppMIPSInstr);
+ ppReg = CAST_TO_TYPEOF(ppReg) MIPS64FN(ppHRegMIPS);
+ iselSB = MIPS64FN(iselSB_MIPS);
+ emit = CAST_TO_TYPEOF(emit) MIPS64FN(emit_MIPSInstr);
+ vassert(vta->archinfo_host.endness == VexEndnessLE
+ || vta->archinfo_host.endness == VexEndnessBE);
+ break;
- // Iterate till the stack is empty. This effectively does a
- // depth-first traversal of the hot-path (IL) tree reachable from
- // here, and at the same time adds any encountered cold-path (OOL)
- // blocks to the Queue for later processing. This is the heart of the
- // flattening algorithm.
- while (stackPtr >= 0) {
+ default:
+ vpanic("LibVEX_Translate: unsupported host insn set");
+ }
- if (UNLIKELY(verbose_asm))
- vex_printf(" -- CONSIDER stack[%d]\n", stackPtr);
+ // Are the host's hardware capabilities feasible. The function will
+ // not return if hwcaps are infeasible in some sense.
+ check_hwcaps(vta->arch_host, vta->archinfo_host.hwcaps);
- HInstrVec* vec = stack[stackPtr].vec;
- UInt vec_next = stack[stackPtr].vec_next;
- Int ool_qindex = stack[stackPtr].ool_qindex;
- stackPtr--;
- if (vec_next > 0) {
- // We're resuming the current IL block having just finished
- // processing a nested IL. The OOL counterpart to the nested IL
- // we just finished processing will have to jump back to here.
- // So we'll need to mark its Queue entry to record that fact.
+ /* Turn it into virtual-registerised code. Build trees -- this
+ also throws away any dead bindings. */
+ max_ga = ado_treebuild_BB( irsb, preciseMemExnsFn, pxControl );
- // First assert that the OOL actually *is* in the Queue (it
- // must be, since we can't have processed it yet).
- vassert(queueOldest <= queueNewest); // "at least 1 entry in Q"
- vassert(queueOldest <= ool_qindex && ool_qindex <= queueNewest);
+ if (vta->finaltidy) {
+ irsb = vta->finaltidy(irsb);
+ }
- vassert(!queue[ool_qindex].resumePoint_valid);
- queue[ool_qindex].resumePoint = cursor;
- queue[ool_qindex].resumePoint_valid = True;
- if (UNLIKELY(verbose_asm))
- vex_printf(" -- RESUME previous IL\n");
- } else {
- // We're starting a new IL. Due to the tail-recursive nature of
- // entering ILs, this means we can actually only be starting the
- // outermost (top level) block for this particular Queue entry.
- vassert(ool_qindex == -1);
- vassert(vec == qe->oolVec);
- if (UNLIKELY(verbose_asm))
- vex_printf(" -- START new IL\n");
- }
+ vexAllocSanityCheck();
- // Repeatedly process "zero or more simple HInstrs followed by (an
- // IfThenElse or end-of-block)"
- while (True) {
+ if (vex_traceflags & VEX_TRACE_TREES) {
+ vex_printf("\n------------------------"
+ " After tree-building "
+ "------------------------\n\n");
+ ppIRSB ( irsb );
+ vex_printf("\n");
+ }
- // Process "zero or more simple HInstrs"
- while (vec_next < vec->insns_used
- && !isIfThenElse(vec->insns[vec_next])) {
- AssemblyBufferOffset cursor_next
- = emitSimpleInsn( &(res->offs_profInc), &vta->host_bytes[0],
- cursor, cursor_limit, vec->insns[vec_next],
- &emitConsts, vta );
- if (UNLIKELY(cursor_next == cursor)) {
- // We ran out of output space. Give up.
- goto out_of_buffer_space;
- }
- vec_next++;
- cursor = cursor_next;
- }
+ /* HACK */
+ if (0) {
+ *(vta->host_bytes_used) = 0;
+ res->status = VexTransOK; return;
+ }
+ /* end HACK */
- // Now we've either got to the end of the hot path, or we have
- // an IfThenElse.
- if (vec_next >= vec->insns_used)
- break;
+ if (vex_traceflags & VEX_TRACE_VCODE)
+ vex_printf("\n------------------------"
+ " Instruction selection "
+ "------------------------\n");
- // So we have an IfThenElse.
- HInstrIfThenElse* hite = isIfThenElse(vec->insns[vec_next]);
- vassert(hite);
- vassert(hite->n_phis == 0); // the regalloc will have removed them
+ /* No guest has its IP field at offset zero. If this fails it
+ means some transformation pass somewhere failed to update/copy
+ irsb->offsIP properly. */
+ vassert(irsb->offsIP >= 16);
- // Put |ite|'s OOL block in the Queue. We'll deal with it
- // later. Also, generate the (skeleton) conditional branch to it,
- // and collect enough information that we can create patch the
- // branch later, once we know where the destination is.
- vassert(queueNewest < nQUEUE-1); // else out of Queue space
- queueNewest++;
- queue[queueNewest].oolVec = hite->outOfLine;
- queue[queueNewest].resumePoint_valid = False; // not yet known
- queue[queueNewest].resumePoint = -1; // invalid
+ vcode = iselSB ( irsb, vta->arch_host,
+ &vta->archinfo_host,
+ &vta->abiinfo_both,
+ offB_HOST_EvC_COUNTER,
+ offB_HOST_EvC_FAILADDR,
+ chainingAllowed,
+ vta->addProfInc,
+ max_ga );
- HInstr* cond_branch
- = X86Instr_JmpCond(hite->ccOOL,
- queueNewest/*FOR DEBUG PRINTING ONLY*/);
- AssemblyBufferOffset cursor_next
- = emitSimpleInsn( &(res->offs_profInc), &vta->host_bytes[0],
- cursor, cursor_limit, cond_branch,
- &emitConsts, vta );
- if (UNLIKELY(cursor_next == cursor)) {
- // We ran out of output space. Give up.
- goto out_of_buffer_space;
- }
- queue[queueNewest].jumpToOOLpoint_valid = True;
- queue[queueNewest].jumpToOOLpoint
- = collectRelocInfo_X86(cursor, cond_branch);
+ vexAllocSanityCheck();
- cursor = cursor_next;
+ if (vex_traceflags & VEX_TRACE_VCODE)
+ vex_printf("\n");
- // Now we descend into |ite's| IL block. So we need to save
- // where we are in this block, so we can resume when the inner
- // one is done.
- vassert(stackPtr < nSTACK-1); // else out of Stack space
- stackPtr++;
- stack[stackPtr].vec = vec;
- stack[stackPtr].vec_next = vec_next+1;
- stack[stackPtr].ool_qindex = queueNewest;
+ if (vex_traceflags & VEX_TRACE_VCODE) {
+ ppHInstrSB(vcode, isIfThenElse, ppInstr, ppCondCode, mode64);
+ }
- // And now descend into the inner block. We could have just
- // pushed its details on the stack and immediately pop it, but
- // it seems simpler to update |vec| and |vec_next| and continue
- // directly.
- if (UNLIKELY(verbose_asm)) {
- vex_printf(" -- START inner IL\n");
- }
- vec = hite->fallThrough;
- vec_next = 0;
+ /* Register allocate. */
+ RegAllocControl con = {
+ .univ = rRegUniv, .isMove = isMove, .getRegUsage = getRegUsage,
+ .mapRegs = mapRegs, .isIfThenElse = isIfThenElse, .genSpill = genSpill,
+ .genReload = genReload, .genMove = genMove, .genHInstrITE = genHInstrITE,
+ .directReload = directReload, .guest_sizeB = guest_sizeB,
+ .ppInstr = ppInstr, .ppCondCode = ppCondCode, .ppReg = ppReg,
+ .mode64 = mode64};
+ rcode = doRegisterAllocation(vcode, &con);
- // And continue with "Repeatedly process ..."
- }
+ vexAllocSanityCheck();
- // Getting here means we've completed an inner IL and now want to
- // resume the parent IL. That is, pop a saved context off the
- // stack.
- }
+ if (vex_traceflags & VEX_TRACE_RCODE) {
+ vex_printf("\n------------------------"
+ " Register-allocated code "
+ "------------------------\n\n");
+ ppHInstrSB(rcode, isIfThenElse, ppInstr, ppCondCode, mode64);
+ vex_printf("\n");
+ }
- // Hot path is complete. Now, probably, we have to add a jump
- // back to the resume point.
- if (qe->resumePoint_valid) {
- if (0)
- vex_printf(" // Generate jump to resume point [%03u]\n",
- qe->resumePoint);
- HInstr* jmp = X86Instr_Jmp(cursor, qe->resumePoint);
- AssemblyBufferOffset cursor_next
- = emitSimpleInsn( &(res->offs_profInc), &vta->host_bytes[0],
- cursor, cursor_limit, jmp,
- &emitConsts, vta );
- if (UNLIKELY(cursor_next == cursor)) {
- // We ran out of output space. Give up.
- goto out_of_buffer_space;
- }
- cursor = cursor_next;
- }
+ /* HACK */
+ if (0) {
+ *(vta->host_bytes_used) = 0;
+ res->status = VexTransOK; return;
+ }
+ /* end HACK */
- if (UNLIKELY(verbose_asm))
- vex_printf("END queue[%d]\n\n", qCur);
- // Finished with this Queue entry.
+ /* Assemble */
+ if (vex_traceflags & VEX_TRACE_ASM) {
+ vex_printf("\n------------------------"
+ " Assembly "
+ "------------------------\n\n");
}
- // Queue empty, all blocks processed
- *(vta->host_bytes_used) = cursor;
- out_used = cursor;
- ////
- //// END of the assembler
- ////////////////////////////////////////////////////////
+ Bool assembly_ok = theAssembler( res, vta, isIfThenElse, mode64, rcode );
+ if (!assembly_ok)
+ goto out_of_buffer_space;
vexAllocSanityCheck();
j += vta->guest_extents->len[i];
}
if (1) vex_printf("VexExpansionRatio %d %d %d :10\n\n",
- j, out_used, (10 * out_used) / (j == 0 ? 1 : j));
+ j, *(vta->host_bytes_used),
+ (10 * *(vta->host_bytes_used)) / (j == 0 ? 1 : j));
}
vex_traceflags = 0;