Copyright (C) 2004-2010 OpenWorks LLP
info@open-works.net
+ Copyright (C) 2010-2010 Dmitry Zhurikhin
+ zhur@ispras.ru
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
#include "host_generic_regs.h"
#include "host_arm_defs.h"
+UInt arm_hwcaps = 0;
/* --------- Registers. --------- */
return;
case HRcFlt64:
r = hregNumber(reg);
- vassert(r >= 0 && r < 16);
+ vassert(r >= 0 && r < 32);
vex_printf("d%d", r);
return;
case HRcFlt32:
vassert(r >= 0 && r < 32);
vex_printf("s%d", r);
return;
+ case HRcVec128:
+ r = hregNumber(reg);
+ vassert(r >= 0 && r < 16);
+ vex_printf("q%d", r);
+ return;
default:
vpanic("ppHRegARM");
}
HReg hregARM_S28 ( void ) { return mkHReg(28, HRcFlt32, False); }
HReg hregARM_S29 ( void ) { return mkHReg(29, HRcFlt32, False); }
HReg hregARM_S30 ( void ) { return mkHReg(30, HRcFlt32, False); }
+HReg hregARM_Q8 ( void ) { return mkHReg(8, HRcVec128, False); }
+HReg hregARM_Q9 ( void ) { return mkHReg(9, HRcVec128, False); }
+HReg hregARM_Q10 ( void ) { return mkHReg(10, HRcVec128, False); }
+HReg hregARM_Q11 ( void ) { return mkHReg(11, HRcVec128, False); }
+HReg hregARM_Q12 ( void ) { return mkHReg(12, HRcVec128, False); }
+HReg hregARM_Q13 ( void ) { return mkHReg(13, HRcVec128, False); }
+HReg hregARM_Q14 ( void ) { return mkHReg(14, HRcVec128, False); }
+HReg hregARM_Q15 ( void ) { return mkHReg(15, HRcVec128, False); }
void getAllocableRegs_ARM ( Int* nregs, HReg** arr )
{
Int i = 0;
- *nregs = 21;
+ *nregs = 29;
*arr = LibVEX_Alloc(*nregs * sizeof(HReg));
// callee saves ones are listed first, since we prefer them
// if they're available
(*arr)[i++] = hregARM_S28();
(*arr)[i++] = hregARM_S29();
(*arr)[i++] = hregARM_S30();
+
+ (*arr)[i++] = hregARM_Q8();
+ (*arr)[i++] = hregARM_Q9();
+ (*arr)[i++] = hregARM_Q10();
+ (*arr)[i++] = hregARM_Q11();
+ (*arr)[i++] = hregARM_Q12();
+ (*arr)[i++] = hregARM_Q13();
+ (*arr)[i++] = hregARM_Q14();
+ (*arr)[i++] = hregARM_Q15();
+
// unavail: r8 as GSP
// r12 'cos we're not sure what it's for
// r13 as SP
}
+/* --------- Mem AModes: Addressing Mode Neon ------- */
+
+ARMAModeN *mkARMAModeN_RR ( HReg rN, HReg rM ) {
+ ARMAModeN* am = LibVEX_Alloc(sizeof(ARMAModeN));
+ am->tag = ARMamN_RR;
+ am->ARMamN.RR.rN = rN;
+ am->ARMamN.RR.rM = rM;
+ return am;
+}
+
+ARMAModeN *mkARMAModeN_R ( HReg rN ) {
+ ARMAModeN* am = LibVEX_Alloc(sizeof(ARMAModeN));
+ am->tag = ARMamN_R;
+ am->ARMamN.R.rN = rN;
+ return am;
+}
+
+static void addRegUsage_ARMAModeN ( HRegUsage* u, ARMAModeN* am ) {
+ if (am->tag == ARMamN_R) {
+ addHRegUse(u, HRmRead, am->ARMamN.R.rN);
+ } else {
+ addHRegUse(u, HRmRead, am->ARMamN.RR.rN);
+ addHRegUse(u, HRmRead, am->ARMamN.RR.rM);
+ }
+}
+
+static void mapRegs_ARMAModeN ( HRegRemap* m, ARMAModeN* am ) {
+ if (am->tag == ARMamN_R) {
+ am->ARMamN.R.rN = lookupHRegRemap(m, am->ARMamN.R.rN);
+ } else {
+ am->ARMamN.RR.rN = lookupHRegRemap(m, am->ARMamN.RR.rN);
+ am->ARMamN.RR.rM = lookupHRegRemap(m, am->ARMamN.RR.rM);
+ }
+}
+
+void ppARMAModeN ( ARMAModeN* am ) {
+ vex_printf("[");
+ if (am->tag == ARMamN_R) {
+ ppHRegARM(am->ARMamN.R.rN);
+ } else {
+ ppHRegARM(am->ARMamN.RR.rN);
+ }
+ vex_printf("]");
+ if (am->tag == ARMamN_RR) {
+ vex_printf(", ");
+ ppHRegARM(am->ARMamN.RR.rM);
+ }
+}
+
+
/* --------- Reg or imm-8x4 operands --------- */
static UInt ROR32 ( UInt x, UInt sh ) {
}
}
+/* -------- Neon Immediate operatnd --------- */
+
+ARMNImm* ARMNImm_TI ( UInt type, UInt imm8 ) {
+ ARMNImm* i = LibVEX_Alloc(sizeof(ARMNImm));
+ i->type = type;
+ i->imm8 = imm8;
+ return i;
+}
+
+ULong ARMNImm_to_Imm64 ( ARMNImm* imm ) {
+ int i, j;
+ ULong y, x = imm->imm8;
+ switch (imm->type) {
+ case 3:
+ x = x << 8;
+ case 2:
+ x = x << 8;
+ case 1:
+ x = x << 8;
+ case 0:
+ return (x << 32) | x;
+ case 5:
+ case 6:
+ if (imm->type == 5)
+ x = x << 8;
+ else
+ x = (x << 8) | x;
+ case 4:
+ x = (x << 16) | x;
+ return (x << 32) | x;
+ case 8:
+ x = (x << 8) | 0xFF;
+ case 7:
+ x = (x << 8) | 0xFF;
+ return (x << 32) | x;
+ case 9:
+ x = 0;
+ for (i = 7; i >= 0; i--) {
+ y = ((ULong)imm->imm8 >> i) & 1;
+ for (j = 0; j < 8; j++) {
+ x = (x << 1) | y;
+ }
+ }
+ return x;
+ case 10:
+ x |= (x & 0x80) << 5;
+ x |= ~(x & 0x40) << 5;
+ x &= 0x187F; /* 0001 1000 0111 1111 */
+ x |= (x & 0x40) << 4;
+ x |= (x & 0x40) << 3;
+ x |= (x & 0x40) << 2;
+ x |= (x & 0x40) << 1;
+ x = x << 19;
+ x = (x << 32) | x;
+ return x;
+ default:
+ vpanic("ARMNImm_to_Imm64");
+ }
+}
+
+ARMNImm* Imm64_to_ARMNImm ( ULong x ) {
+ ARMNImm tmp;
+ if ((x & 0xFFFFFFFF) == (x >> 32)) {
+ if ((x & 0xFFFFFF00) == 0)
+ return ARMNImm_TI(0, x & 0xFF);
+ if ((x & 0xFFFF00FF) == 0)
+ return ARMNImm_TI(1, (x >> 8) & 0xFF);
+ if ((x & 0xFF00FFFF) == 0)
+ return ARMNImm_TI(2, (x >> 16) & 0xFF);
+ if ((x & 0x00FFFFFF) == 0)
+ return ARMNImm_TI(3, (x >> 24) & 0xFF);
+ if ((x & 0xFFFF00FF) == 0xFF)
+ return ARMNImm_TI(7, (x >> 8) & 0xFF);
+ if ((x & 0xFF00FFFF) == 0xFFFF)
+ return ARMNImm_TI(8, (x >> 16) & 0xFF);
+ if ((x & 0xFFFF) == ((x >> 16) & 0xFFFF)) {
+ if ((x & 0xFF00) == 0)
+ return ARMNImm_TI(4, x & 0xFF);
+ if ((x & 0x00FF) == 0)
+ return ARMNImm_TI(5, (x >> 8) & 0xFF);
+ if ((x & 0xFF) == ((x >> 8) & 0xFF))
+ return ARMNImm_TI(6, x & 0xFF);
+ }
+ if ((x & 0x7FFFF) == 0) {
+ tmp.type = 10;
+ tmp.imm8 = ((x >> 19) & 0x7F) | ((x >> 24) & 0x80);
+ if (ARMNImm_to_Imm64(&tmp) == x)
+ return ARMNImm_TI(tmp.type, tmp.imm8);
+ }
+ } else {
+ /* This can only be type 9. */
+ tmp.imm8 = (((x >> 56) & 1) << 7)
+ | (((x >> 48) & 1) << 6)
+ | (((x >> 40) & 1) << 5)
+ | (((x >> 32) & 1) << 4)
+ | (((x >> 24) & 1) << 3)
+ | (((x >> 16) & 1) << 2)
+ | (((x >> 8) & 1) << 1)
+ | (((x >> 0) & 1) << 0);
+ tmp.type = 9;
+ if (ARMNImm_to_Imm64 (&tmp) == x)
+ return ARMNImm_TI(tmp.type, tmp.imm8);
+ }
+ return NULL;
+}
+
+void ppARMNImm (ARMNImm* i) {
+ ULong x = ARMNImm_to_Imm64(i);
+ vex_printf("0x%llX%llX", x, x);
+}
+
+/* -- Register or scalar operand --- */
+
+ARMNRS* mkARMNRS(ARMNRS_tag tag, HReg reg, UInt index)
+{
+ ARMNRS *p = LibVEX_Alloc(sizeof(ARMNRS));
+ p->tag = tag;
+ p->reg = reg;
+ p->index = index;
+ return p;
+}
+
+void ppARMNRS(ARMNRS *p)
+{
+ ppHRegARM(p->reg);
+ if (p->tag == ARMNRS_Scalar) {
+ vex_printf("[%d]", p->index);
+ }
+}
/* --------- Instructions. --------- */
}
}
+HChar* showARMNeonBinOp ( ARMNeonBinOp op ) {
+ switch (op) {
+ case ARMneon_VAND: return "vand";
+ case ARMneon_VORR: return "vorr";
+ case ARMneon_VXOR: return "veor";
+ case ARMneon_VADD: return "vadd";
+ case ARMneon_VRHADDS: return "vrhadd";
+ case ARMneon_VRHADDU: return "vrhadd";
+ case ARMneon_VADDFP: return "vadd";
+ case ARMneon_VPADDFP: return "vpadd";
+ case ARMneon_VABDFP: return "vabd";
+ case ARMneon_VSUB: return "vsub";
+ case ARMneon_VSUBFP: return "vsub";
+ case ARMneon_VMINU: return "vmin";
+ case ARMneon_VMINS: return "vmin";
+ case ARMneon_VMINF: return "vmin";
+ case ARMneon_VMAXU: return "vmax";
+ case ARMneon_VMAXS: return "vmax";
+ case ARMneon_VMAXF: return "vmax";
+ case ARMneon_VQADDU: return "vqadd";
+ case ARMneon_VQADDS: return "vqadd";
+ case ARMneon_VQSUBU: return "vqsub";
+ case ARMneon_VQSUBS: return "vqsub";
+ case ARMneon_VCGTU: return "vcgt";
+ case ARMneon_VCGTS: return "vcgt";
+ case ARMneon_VCGTF: return "vcgt";
+ case ARMneon_VCGEF: return "vcgt";
+ case ARMneon_VCGEU: return "vcge";
+ case ARMneon_VCGES: return "vcge";
+ case ARMneon_VCEQ: return "vceq";
+ case ARMneon_VCEQF: return "vceq";
+ case ARMneon_VPADD: return "vpadd";
+ case ARMneon_VPMINU: return "vpmin";
+ case ARMneon_VPMINS: return "vpmin";
+ case ARMneon_VPMINF: return "vpmin";
+ case ARMneon_VPMAXU: return "vpmax";
+ case ARMneon_VPMAXS: return "vpmax";
+ case ARMneon_VPMAXF: return "vpmax";
+ case ARMneon_VEXT: return "vext";
+ case ARMneon_VMUL: return "vmuli";
+ case ARMneon_VMULLU: return "vmull";
+ case ARMneon_VMULLS: return "vmull";
+ case ARMneon_VMULP: return "vmul";
+ case ARMneon_VMULFP: return "vmul";
+ case ARMneon_VMULLP: return "vmul";
+ case ARMneon_VQDMULH: return "vqdmulh";
+ case ARMneon_VQRDMULH: return "vqrdmulh";
+ case ARMneon_VQDMULL: return "vqdmull";
+ case ARMneon_VTBL: return "vtbl";
+ case ARMneon_SETELEM: return "vmov";
+ case ARMneon_VABSFP: return "vabsfp";
+ case ARMneon_VRSQRTEFP: return "vrsqrtefp";
+ case ARMneon_VRSQRTE: return "vrsqrte";
+ /* ... */
+ default: vpanic("showARMNeonBinOp");
+ }
+}
+
+HChar* showARMNeonBinOpDataType ( ARMNeonBinOp op ) {
+ switch (op) {
+ case ARMneon_VAND:
+ case ARMneon_VORR:
+ case ARMneon_VXOR:
+ return "";
+ case ARMneon_VADD:
+ case ARMneon_VSUB:
+ case ARMneon_VEXT:
+ case ARMneon_VMUL:
+ case ARMneon_SETELEM:
+ case ARMneon_VPADD:
+ case ARMneon_VTBL:
+ case ARMneon_VCEQ:
+ return ".i";
+ case ARMneon_VRHADDU:
+ case ARMneon_VMINU:
+ case ARMneon_VMAXU:
+ case ARMneon_VQADDU:
+ case ARMneon_VQSUBU:
+ case ARMneon_VCGTU:
+ case ARMneon_VCGEU:
+ case ARMneon_VMULLU:
+ case ARMneon_VPMINU:
+ case ARMneon_VPMAXU:
+ case ARMneon_VRSQRTE:
+ return ".u";
+ case ARMneon_VRHADDS:
+ case ARMneon_VMINS:
+ case ARMneon_VMAXS:
+ case ARMneon_VQADDS:
+ case ARMneon_VQSUBS:
+ case ARMneon_VCGTS:
+ case ARMneon_VCGES:
+ case ARMneon_VQDMULL:
+ case ARMneon_VMULLS:
+ case ARMneon_VPMINS:
+ case ARMneon_VPMAXS:
+ case ARMneon_VQDMULH:
+ case ARMneon_VQRDMULH:
+ return ".s";
+ case ARMneon_VMULP:
+ case ARMneon_VMULLP:
+ return ".p";
+ case ARMneon_VADDFP:
+ case ARMneon_VABDFP:
+ case ARMneon_VPADDFP:
+ case ARMneon_VSUBFP:
+ case ARMneon_VMULFP:
+ case ARMneon_VMINF:
+ case ARMneon_VMAXF:
+ case ARMneon_VABSFP:
+ case ARMneon_VRSQRTEFP:
+ case ARMneon_VPMINF:
+ case ARMneon_VPMAXF:
+ case ARMneon_VCGTF:
+ case ARMneon_VCGEF:
+ case ARMneon_VCEQF:
+ return ".f";
+ /* ... */
+ default: vpanic("showARMNeonBinOpDataType");
+ }
+}
+
+HChar* showARMNeonUnOp ( ARMNeonUnOp op ) {
+ switch (op) {
+ case ARMneon_COPY: return "vmov";
+ case ARMneon_COPYLS: return "vmov";
+ case ARMneon_COPYLU: return "vmov";
+ case ARMneon_COPYN: return "vmov";
+ case ARMneon_COPYQNSS: return "vqmovn";
+ case ARMneon_COPYQNUS: return "vqmovun";
+ case ARMneon_COPYQNUU: return "vqmovn";
+ case ARMneon_NOT: return "vmvn";
+ case ARMneon_EQZ: return "vceq";
+ case ARMneon_CNT: return "vcnt";
+ case ARMneon_CLS: return "vcls";
+ case ARMneon_CLZ: return "vclz";
+ case ARMneon_DUP: return "vdup";
+ case ARMneon_PADDLS: return "vpaddl";
+ case ARMneon_PADDLU: return "vpaddl";
+ case ARMneon_VQSHLNSS: return "vqshl";
+ case ARMneon_VQSHLNUU: return "vqshl";
+ case ARMneon_VQSHLNUS: return "vqshlu";
+ case ARMneon_REV16: return "vrev16";
+ case ARMneon_REV32: return "vrev32";
+ case ARMneon_REV64: return "vrev64";
+ case ARMneon_VCVTFtoU: return "vcvt";
+ case ARMneon_VCVTFtoS: return "vcvt";
+ case ARMneon_VCVTUtoF: return "vcvt";
+ case ARMneon_VCVTStoF: return "vcvt";
+ case ARMneon_VCVTFtoFixedU: return "vcvt";
+ case ARMneon_VCVTFtoFixedS: return "vcvt";
+ case ARMneon_VCVTFixedUtoF: return "vcvt";
+ case ARMneon_VCVTFixedStoF: return "vcvt";
+ case ARMneon_VCVTF32toF16: return "vcvt";
+ case ARMneon_VCVTF16toF32: return "vcvt";
+ case ARMneon_VRECIP: return "vrecip";
+ case ARMneon_VRECIPF: return "vrecipf";
+ case ARMneon_VRECPS: return "vrecps";
+ case ARMneon_VNEGF: return "vneg";
+ case ARMneon_VRSQRTS: return "vrecps";
+ case ARMneon_ABS: return "vabs";
+ /* ... */
+ default: vpanic("showARMNeonUnOp");
+ }
+}
+
+HChar* showARMNeonUnOpDataType ( ARMNeonUnOp op ) {
+ switch (op) {
+ case ARMneon_COPY:
+ case ARMneon_NOT:
+ return "";
+ case ARMneon_COPYN:
+ case ARMneon_EQZ:
+ case ARMneon_CNT:
+ case ARMneon_DUP:
+ case ARMneon_REV16:
+ case ARMneon_REV32:
+ case ARMneon_REV64:
+ return ".i";
+ case ARMneon_COPYLU:
+ case ARMneon_PADDLU:
+ case ARMneon_COPYQNUU:
+ case ARMneon_VQSHLNUU:
+ case ARMneon_VRECIP:
+ return ".u";
+ case ARMneon_CLS:
+ case ARMneon_CLZ:
+ case ARMneon_COPYLS:
+ case ARMneon_PADDLS:
+ case ARMneon_COPYQNSS:
+ case ARMneon_COPYQNUS:
+ case ARMneon_VQSHLNSS:
+ case ARMneon_VQSHLNUS:
+ case ARMneon_ABS:
+ return ".s";
+ case ARMneon_VRECIPF:
+ case ARMneon_VRECPS:
+ case ARMneon_VNEGF:
+ case ARMneon_VRSQRTS:
+ return ".f";
+ case ARMneon_VCVTFtoU: return ".u32.f32";
+ case ARMneon_VCVTFtoS: return ".s32.f32";
+ case ARMneon_VCVTUtoF: return ".f32.u32";
+ case ARMneon_VCVTStoF: return ".f32.s32";
+ case ARMneon_VCVTF16toF32: return ".f32.f16";
+ case ARMneon_VCVTF32toF16: return ".f16.f32";
+ case ARMneon_VCVTFtoFixedU: return ".u32.f32";
+ case ARMneon_VCVTFtoFixedS: return ".s32.f32";
+ case ARMneon_VCVTFixedUtoF: return ".f32.u32";
+ case ARMneon_VCVTFixedStoF: return ".f32.s32";
+ /* ... */
+ default: vpanic("showARMNeonUnOpDataType");
+ }
+}
+
+HChar* showARMNeonUnOpS ( ARMNeonUnOpS op ) {
+ switch (op) {
+ case ARMneon_SETELEM: return "vmov";
+ case ARMneon_GETELEMU: return "vmov";
+ case ARMneon_GETELEMS: return "vmov";
+ case ARMneon_VDUP: return "vdup";
+ /* ... */
+ default: vpanic("showARMNeonUnarySOp");
+ }
+}
+
+HChar* showARMNeonUnOpSDataType ( ARMNeonUnOpS op ) {
+ switch (op) {
+ case ARMneon_SETELEM:
+ case ARMneon_VDUP:
+ return ".i";
+ case ARMneon_GETELEMS:
+ return ".s";
+ case ARMneon_GETELEMU:
+ return ".u";
+ /* ... */
+ default: vpanic("showARMNeonUnarySOp");
+ }
+}
+
+HChar* showARMNeonShiftOp ( ARMNeonShiftOp op ) {
+ switch (op) {
+ case ARMneon_VSHL: return "vshl";
+ case ARMneon_VSAL: return "vshl";
+ case ARMneon_VQSHL: return "vqshl";
+ case ARMneon_VQSAL: return "vqshl";
+ /* ... */
+ default: vpanic("showARMNeonShiftOp");
+ }
+}
+
+HChar* showARMNeonShiftOpDataType ( ARMNeonShiftOp op ) {
+ switch (op) {
+ case ARMneon_VSHL:
+ case ARMneon_VQSHL:
+ return ".u";
+ case ARMneon_VSAL:
+ case ARMneon_VQSAL:
+ return ".s";
+ /* ... */
+ default: vpanic("showARMNeonShiftOpDataType");
+ }
+}
+
+HChar* showARMNeonDualOp ( ARMNeonDualOp op ) {
+ switch (op) {
+ case ARMneon_TRN: return "vtrn";
+ case ARMneon_ZIP: return "vzip";
+ case ARMneon_UZP: return "vuzp";
+ /* ... */
+ default: vpanic("showARMNeonDualOp");
+ }
+}
+
+HChar* showARMNeonDualOpDataType ( ARMNeonDualOp op ) {
+ switch (op) {
+ case ARMneon_TRN:
+ case ARMneon_ZIP:
+ case ARMneon_UZP:
+ return "i";
+ /* ... */
+ default: vpanic("showARMNeonDualOp");
+ }
+}
+
+static HChar* showARMNeonDataSize_wrk ( UInt size )
+{
+ switch (size) {
+ case 0: return "8";
+ case 1: return "16";
+ case 2: return "32";
+ case 3: return "64";
+ default: vpanic("showARMNeonDataSize");
+ }
+}
+
+static HChar* showARMNeonDataSize ( ARMInstr* i )
+{
+ switch (i->tag) {
+ case ARMin_NBinary:
+ if (i->ARMin.NBinary.op == ARMneon_VEXT)
+ return "8";
+ if (i->ARMin.NBinary.op == ARMneon_VAND ||
+ i->ARMin.NBinary.op == ARMneon_VORR ||
+ i->ARMin.NBinary.op == ARMneon_VXOR)
+ return "";
+ return showARMNeonDataSize_wrk(i->ARMin.NBinary.size);
+ case ARMin_NUnary:
+ if (i->ARMin.NUnary.op == ARMneon_COPY ||
+ i->ARMin.NUnary.op == ARMneon_NOT ||
+ i->ARMin.NUnary.op == ARMneon_VCVTF32toF16||
+ i->ARMin.NUnary.op == ARMneon_VCVTF16toF32||
+ i->ARMin.NUnary.op == ARMneon_VCVTFtoFixedS ||
+ i->ARMin.NUnary.op == ARMneon_VCVTFtoFixedU ||
+ i->ARMin.NUnary.op == ARMneon_VCVTFixedStoF ||
+ i->ARMin.NUnary.op == ARMneon_VCVTFixedUtoF ||
+ i->ARMin.NUnary.op == ARMneon_VCVTFtoS ||
+ i->ARMin.NUnary.op == ARMneon_VCVTFtoU ||
+ i->ARMin.NUnary.op == ARMneon_VCVTStoF ||
+ i->ARMin.NUnary.op == ARMneon_VCVTUtoF)
+ return "";
+ if (i->ARMin.NUnary.op == ARMneon_VQSHLNSS ||
+ i->ARMin.NUnary.op == ARMneon_VQSHLNUU ||
+ i->ARMin.NUnary.op == ARMneon_VQSHLNUS) {
+ UInt size;
+ size = i->ARMin.NUnary.size;
+ if (size & 0x40)
+ return "64";
+ if (size & 0x20)
+ return "32";
+ if (size & 0x10)
+ return "16";
+ if (size & 0x08)
+ return "8";
+ vpanic("showARMNeonDataSize");
+ }
+ return showARMNeonDataSize_wrk(i->ARMin.NUnary.size);
+ case ARMin_NUnaryS:
+ if (i->ARMin.NUnaryS.op == ARMneon_VDUP) {
+ int size;
+ size = i->ARMin.NUnaryS.size;
+ if ((size & 1) == 1)
+ return "8";
+ if ((size & 3) == 2)
+ return "16";
+ if ((size & 7) == 4)
+ return "32";
+ vpanic("showARMNeonDataSize");
+ }
+ return showARMNeonDataSize_wrk(i->ARMin.NUnaryS.size);
+ case ARMin_NShift:
+ return showARMNeonDataSize_wrk(i->ARMin.NShift.size);
+ case ARMin_NDual:
+ return showARMNeonDataSize_wrk(i->ARMin.NDual.size);
+ default:
+ vpanic("showARMNeonDataSize");
+ }
+}
+
ARMInstr* ARMInstr_Alu ( ARMAluOp op,
HReg dst, HReg argL, ARMRI84* argR ) {
ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
return i;
}
+ARMInstr* ARMInstr_NLdStQ ( Bool isLoad, HReg dQ, ARMAModeN *amode ) {
+ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
+ i->tag = ARMin_NLdStQ;
+ i->ARMin.NLdStQ.isLoad = isLoad;
+ i->ARMin.NLdStQ.dQ = dQ;
+ i->ARMin.NLdStQ.amode = amode;
+ return i;
+}
+
+ARMInstr* ARMInstr_NLdStD ( Bool isLoad, HReg dD, ARMAModeN *amode ) {
+ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
+ i->tag = ARMin_NLdStD;
+ i->ARMin.NLdStD.isLoad = isLoad;
+ i->ARMin.NLdStD.dD = dD;
+ i->ARMin.NLdStD.amode = amode;
+ return i;
+}
+
+ARMInstr* ARMInstr_NUnary ( ARMNeonUnOp op, HReg dQ, HReg nQ,
+ UInt size, Bool Q ) {
+ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
+ i->tag = ARMin_NUnary;
+ i->ARMin.NUnary.op = op;
+ i->ARMin.NUnary.src = nQ;
+ i->ARMin.NUnary.dst = dQ;
+ i->ARMin.NUnary.size = size;
+ i->ARMin.NUnary.Q = Q;
+ return i;
+}
+
+ARMInstr* ARMInstr_NUnaryS ( ARMNeonUnOp op, ARMNRS* dst, ARMNRS* src,
+ UInt size, Bool Q ) {
+ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
+ i->tag = ARMin_NUnaryS;
+ i->ARMin.NUnaryS.op = op;
+ i->ARMin.NUnaryS.src = src;
+ i->ARMin.NUnaryS.dst = dst;
+ i->ARMin.NUnaryS.size = size;
+ i->ARMin.NUnaryS.Q = Q;
+ return i;
+}
+
+ARMInstr* ARMInstr_NDual ( ARMNeonDualOp op, HReg nQ, HReg mQ,
+ UInt size, Bool Q ) {
+ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
+ i->tag = ARMin_NDual;
+ i->ARMin.NDual.op = op;
+ i->ARMin.NDual.arg1 = nQ;
+ i->ARMin.NDual.arg2 = mQ;
+ i->ARMin.NDual.size = size;
+ i->ARMin.NDual.Q = Q;
+ return i;
+}
+
+ARMInstr* ARMInstr_NBinary ( ARMNeonBinOp op,
+ HReg dst, HReg argL, HReg argR,
+ UInt size, Bool Q ) {
+ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
+ i->tag = ARMin_NBinary;
+ i->ARMin.NBinary.op = op;
+ i->ARMin.NBinary.argL = argL;
+ i->ARMin.NBinary.argR = argR;
+ i->ARMin.NBinary.dst = dst;
+ i->ARMin.NBinary.size = size;
+ i->ARMin.NBinary.Q = Q;
+ return i;
+}
+
+ARMInstr* ARMInstr_NeonImm (HReg dst, ARMNImm* imm ) {
+ ARMInstr *i = LibVEX_Alloc(sizeof(ARMInstr));
+ i->tag = ARMin_NeonImm;
+ i->ARMin.NeonImm.dst = dst;
+ i->ARMin.NeonImm.imm = imm;
+ return i;
+}
+
+ARMInstr* ARMInstr_NCMovQ ( ARMCondCode cond, HReg dst, HReg src ) {
+ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
+ i->tag = ARMin_NCMovQ;
+ i->ARMin.NCMovQ.cond = cond;
+ i->ARMin.NCMovQ.dst = dst;
+ i->ARMin.NCMovQ.src = src;
+ vassert(cond != ARMcc_AL);
+ return i;
+}
+
+ARMInstr* ARMInstr_NShift ( ARMNeonShiftOp op,
+ HReg dst, HReg argL, HReg argR,
+ UInt size, Bool Q ) {
+ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
+ i->tag = ARMin_NShift;
+ i->ARMin.NShift.op = op;
+ i->ARMin.NShift.argL = argL;
+ i->ARMin.NShift.argR = argR;
+ i->ARMin.NShift.dst = dst;
+ i->ARMin.NShift.size = size;
+ i->ARMin.NShift.Q = Q;
+ return i;
+}
+
+/* Helper copy-pasted from isel.c */
+static Bool fitsIn8x4 ( UInt* u8, UInt* u4, UInt u )
+{
+ UInt i;
+ for (i = 0; i < 16; i++) {
+ if (0 == (u & 0xFFFFFF00)) {
+ *u8 = u;
+ *u4 = i;
+ return True;
+ }
+ u = ROR32(u, 30);
+ }
+ vassert(i == 16);
+ return False;
+}
+
+ARMInstr* ARMInstr_Add32 ( HReg rD, HReg rN, UInt imm32 ) {
+ UInt u8, u4;
+ ARMInstr *i = LibVEX_Alloc(sizeof(ARMInstr));
+ /* Try to generate single ADD if possible */
+ if (fitsIn8x4(&u8, &u4, imm32)) {
+ i->tag = ARMin_Alu;
+ i->ARMin.Alu.op = ARMalu_ADD;
+ i->ARMin.Alu.dst = rD;
+ i->ARMin.Alu.argL = rN;
+ i->ARMin.Alu.argR = ARMRI84_I84(u8, u4);
+ } else {
+ i->tag = ARMin_Add32;
+ i->ARMin.Add32.rD = rD;
+ i->ARMin.Add32.rN = rN;
+ i->ARMin.Add32.imm32 = imm32;
+ }
+ return i;
+}
+
+/* ... */
+
void ppARMInstr ( ARMInstr* i ) {
switch (i->tag) {
case ARMin_Alu:
vex_printf("mfence (mcr 15,0,r0,c7,c10,4; 15,0,r0,c7,c10,5; "
"15,0,r0,c7,c5,4)");
return;
-
+ case ARMin_NLdStQ:
+ if (i->ARMin.NLdStQ.isLoad)
+ vex_printf("vld1.32 {");
+ else
+ vex_printf("vst1.32 {");
+ ppHRegARM(i->ARMin.NLdStQ.dQ);
+ vex_printf("} ");
+ ppARMAModeN(i->ARMin.NLdStQ.amode);
+ return;
+ case ARMin_NLdStD:
+ if (i->ARMin.NLdStD.isLoad)
+ vex_printf("vld1.32 {");
+ else
+ vex_printf("vst1.32 {");
+ ppHRegARM(i->ARMin.NLdStD.dD);
+ vex_printf("} ");
+ ppARMAModeN(i->ARMin.NLdStD.amode);
+ return;
+ case ARMin_NUnary:
+ vex_printf("%s%s%s ",
+ showARMNeonUnOp(i->ARMin.NUnary.op),
+ showARMNeonUnOpDataType(i->ARMin.NUnary.op),
+ showARMNeonDataSize(i));
+ ppHRegARM(i->ARMin.NUnary.dst);
+ vex_printf(", ");
+ ppHRegARM(i->ARMin.NUnary.src);
+ if (i->ARMin.NUnary.op == ARMneon_EQZ)
+ vex_printf(", #0");
+ if (i->ARMin.NUnary.op == ARMneon_VCVTFtoFixedS ||
+ i->ARMin.NUnary.op == ARMneon_VCVTFtoFixedU ||
+ i->ARMin.NUnary.op == ARMneon_VCVTFixedStoF ||
+ i->ARMin.NUnary.op == ARMneon_VCVTFixedUtoF) {
+ vex_printf(", #%d", i->ARMin.NUnary.size);
+ }
+ if (i->ARMin.NUnary.op == ARMneon_VQSHLNSS ||
+ i->ARMin.NUnary.op == ARMneon_VQSHLNUU ||
+ i->ARMin.NUnary.op == ARMneon_VQSHLNUS) {
+ UInt size;
+ size = i->ARMin.NUnary.size;
+ if (size & 0x40) {
+ vex_printf(", #%d", size - 64);
+ } else if (size & 0x20) {
+ vex_printf(", #%d", size - 32);
+ } else if (size & 0x10) {
+ vex_printf(", #%d", size - 16);
+ } else if (size & 0x08) {
+ vex_printf(", #%d", size - 8);
+ }
+ }
+ return;
+ case ARMin_NUnaryS:
+ vex_printf("%s%s%s ",
+ showARMNeonUnOpS(i->ARMin.NUnary.op),
+ showARMNeonUnOpSDataType(i->ARMin.NUnary.op),
+ showARMNeonDataSize(i));
+ ppARMNRS(i->ARMin.NUnaryS.dst);
+ vex_printf(", ");
+ ppARMNRS(i->ARMin.NUnaryS.src);
+ return;
+ case ARMin_NShift:
+ vex_printf("%s%s%s ",
+ showARMNeonShiftOp(i->ARMin.NShift.op),
+ showARMNeonShiftOpDataType(i->ARMin.NShift.op),
+ showARMNeonDataSize(i));
+ ppHRegARM(i->ARMin.NShift.dst);
+ vex_printf(", ");
+ ppHRegARM(i->ARMin.NShift.argL);
+ vex_printf(", ");
+ ppHRegARM(i->ARMin.NShift.argR);
+ return;
+ case ARMin_NDual:
+ vex_printf("%s%s%s ",
+ showARMNeonDualOp(i->ARMin.NDual.op),
+ showARMNeonDualOpDataType(i->ARMin.NDual.op),
+ showARMNeonDataSize(i));
+ ppHRegARM(i->ARMin.NDual.arg1);
+ vex_printf(", ");
+ ppHRegARM(i->ARMin.NDual.arg2);
+ return;
+ case ARMin_NBinary:
+ vex_printf("%s%s%s",
+ showARMNeonBinOp(i->ARMin.NBinary.op),
+ showARMNeonBinOpDataType(i->ARMin.NBinary.op),
+ showARMNeonDataSize(i));
+ vex_printf(" ");
+ ppHRegARM(i->ARMin.NBinary.dst);
+ vex_printf(", ");
+ ppHRegARM(i->ARMin.NBinary.argL);
+ vex_printf(", ");
+ ppHRegARM(i->ARMin.NBinary.argR);
+ return;
+ case ARMin_NeonImm:
+ vex_printf("vmov ");
+ ppHRegARM(i->ARMin.NeonImm.dst);
+ vex_printf(", ");
+ ppARMNImm(i->ARMin.NeonImm.imm);
+ return;
+ case ARMin_NCMovQ:
+ vex_printf("vmov%s ", showARMCondCode(i->ARMin.NCMovQ.cond));
+ ppHRegARM(i->ARMin.NCMovQ.dst);
+ vex_printf(", ");
+ ppHRegARM(i->ARMin.NCMovQ.src);
+ return;
+ case ARMin_Add32:
+ vex_printf("add32 ");
+ ppHRegARM(i->ARMin.Add32.rD);
+ vex_printf(", ");
+ ppHRegARM(i->ARMin.Add32.rN);
+ vex_printf(", ");
+ vex_printf("%d", i->ARMin.Add32.imm32);
+ return;
+ default:
unhandled:
vex_printf("ppARMInstr: unhandled case (tag %d)", (Int)i->tag);
vpanic("ppARMInstr(1)");
return;
- default:
- vpanic("ppARMInstr(2)");
}
}
return;
case ARMin_MFence:
return;
+ case ARMin_NLdStQ:
+ if (i->ARMin.NLdStQ.isLoad)
+ addHRegUse(u, HRmWrite, i->ARMin.NLdStQ.dQ);
+ else
+ addHRegUse(u, HRmRead, i->ARMin.NLdStQ.dQ);
+ addRegUsage_ARMAModeN(u, i->ARMin.NLdStQ.amode);
+ return;
+ case ARMin_NLdStD:
+ if (i->ARMin.NLdStD.isLoad)
+ addHRegUse(u, HRmWrite, i->ARMin.NLdStD.dD);
+ else
+ addHRegUse(u, HRmRead, i->ARMin.NLdStD.dD);
+ addRegUsage_ARMAModeN(u, i->ARMin.NLdStD.amode);
+ return;
+ case ARMin_NUnary:
+ addHRegUse(u, HRmWrite, i->ARMin.NUnary.dst);
+ addHRegUse(u, HRmRead, i->ARMin.NUnary.src);
+ return;
+ case ARMin_NUnaryS:
+ addHRegUse(u, HRmWrite, i->ARMin.NUnaryS.dst->reg);
+ addHRegUse(u, HRmRead, i->ARMin.NUnaryS.src->reg);
+ return;
+ case ARMin_NShift:
+ addHRegUse(u, HRmWrite, i->ARMin.NShift.dst);
+ addHRegUse(u, HRmRead, i->ARMin.NShift.argL);
+ addHRegUse(u, HRmRead, i->ARMin.NShift.argR);
+ return;
+ case ARMin_NDual:
+ addHRegUse(u, HRmWrite, i->ARMin.NDual.arg1);
+ addHRegUse(u, HRmWrite, i->ARMin.NDual.arg2);
+ addHRegUse(u, HRmRead, i->ARMin.NDual.arg1);
+ addHRegUse(u, HRmRead, i->ARMin.NDual.arg2);
+ return;
+ case ARMin_NBinary:
+ addHRegUse(u, HRmWrite, i->ARMin.NBinary.dst);
+ /* TODO: sometimes dst is also being read! */
+ // XXX fix this
+ addHRegUse(u, HRmRead, i->ARMin.NBinary.argL);
+ addHRegUse(u, HRmRead, i->ARMin.NBinary.argR);
+ return;
+ case ARMin_NeonImm:
+ addHRegUse(u, HRmWrite, i->ARMin.NeonImm.dst);
+ return;
+ case ARMin_NCMovQ:
+ addHRegUse(u, HRmWrite, i->ARMin.NCMovQ.dst);
+ addHRegUse(u, HRmRead, i->ARMin.NCMovQ.dst);
+ addHRegUse(u, HRmRead, i->ARMin.NCMovQ.src);
+ return;
+ case ARMin_Add32:
+ addHRegUse(u, HRmWrite, i->ARMin.Add32.rD);
+ addHRegUse(u, HRmRead, i->ARMin.Add32.rN);
+ return;
unhandled:
default:
ppARMInstr(i);
return;
case ARMin_MFence:
return;
+ case ARMin_NLdStQ:
+ i->ARMin.NLdStQ.dQ = lookupHRegRemap(m, i->ARMin.NLdStQ.dQ);
+ mapRegs_ARMAModeN(m, i->ARMin.NLdStQ.amode);
+ return;
+ case ARMin_NLdStD:
+ i->ARMin.NLdStD.dD = lookupHRegRemap(m, i->ARMin.NLdStD.dD);
+ mapRegs_ARMAModeN(m, i->ARMin.NLdStD.amode);
+ return;
+ case ARMin_NUnary:
+ i->ARMin.NUnary.src = lookupHRegRemap(m, i->ARMin.NUnary.src);
+ i->ARMin.NUnary.dst = lookupHRegRemap(m, i->ARMin.NUnary.dst);
+ return;
+ case ARMin_NUnaryS:
+ i->ARMin.NUnaryS.src->reg
+ = lookupHRegRemap(m, i->ARMin.NUnaryS.src->reg);
+ i->ARMin.NUnaryS.dst->reg
+ = lookupHRegRemap(m, i->ARMin.NUnaryS.dst->reg);
+ return;
+ case ARMin_NShift:
+ i->ARMin.NShift.dst = lookupHRegRemap(m, i->ARMin.NShift.dst);
+ i->ARMin.NShift.argL = lookupHRegRemap(m, i->ARMin.NShift.argL);
+ i->ARMin.NShift.argR = lookupHRegRemap(m, i->ARMin.NShift.argR);
+ return;
+ case ARMin_NDual:
+ i->ARMin.NDual.arg1 = lookupHRegRemap(m, i->ARMin.NDual.arg1);
+ i->ARMin.NDual.arg2 = lookupHRegRemap(m, i->ARMin.NDual.arg2);
+ return;
+ case ARMin_NBinary:
+ i->ARMin.NBinary.argL = lookupHRegRemap(m, i->ARMin.NBinary.argL);
+ i->ARMin.NBinary.argR = lookupHRegRemap(m, i->ARMin.NBinary.argR);
+ i->ARMin.NBinary.dst = lookupHRegRemap(m, i->ARMin.NBinary.dst);
+ return;
+ case ARMin_NeonImm:
+ i->ARMin.NeonImm.dst = lookupHRegRemap(m, i->ARMin.NeonImm.dst);
+ return;
+ case ARMin_NCMovQ:
+ i->ARMin.NCMovQ.dst = lookupHRegRemap(m, i->ARMin.NCMovQ.dst);
+ i->ARMin.NCMovQ.src = lookupHRegRemap(m, i->ARMin.NCMovQ.src);
+ return;
+ case ARMin_Add32:
+ i->ARMin.Add32.rD = lookupHRegRemap(m, i->ARMin.Add32.rD);
+ i->ARMin.Add32.rN = lookupHRegRemap(m, i->ARMin.Add32.rN);
unhandled:
default:
ppARMInstr(i);
}
return;
}
- default:
+ case HRcVec128: {
+ HReg r8 = hregARM_R8();
+ HReg r12 = hregARM_R12();
+ *i1 = ARMInstr_Add32(r12, r8, offsetB);
+ *i2 = ARMInstr_NLdStQ(False, rreg, mkARMAModeN_R(r12));
+ return;
+ }
+ default:
ppHRegClass(rclass);
vpanic("genSpill_ARM: unimplemented regclass");
}
}
return;
}
- default:
+ case HRcVec128: {
+ HReg r8 = hregARM_R8();
+ HReg r12 = hregARM_R12();
+ *i1 = ARMInstr_Add32(r12, r8, offsetB);
+ *i2 = ARMInstr_NLdStQ(True, rreg, mkARMAModeN_R(r12));
+ return;
+ }
+ default:
ppHRegClass(rclass);
vpanic("genReload_ARM: unimplemented regclass");
}
static inline UChar dregNo ( HReg r )
{
UInt n;
+ if (hregClass(r) != HRcFlt64)
+ ppHRegClass(hregClass(r));
vassert(hregClass(r) == HRcFlt64);
vassert(!hregIsVirtual(r));
n = hregNumber(r);
- vassert(n <= 15);
+ vassert(n <= 31);
return toUChar(n);
}
return toUChar(n);
}
+static inline UChar qregNo ( HReg r )
+{
+ UInt n;
+ vassert(hregClass(r) == HRcVec128);
+ vassert(!hregIsVirtual(r));
+ n = hregNumber(r);
+ vassert(n <= 15);
+ return toUChar(n);
+}
+
#define BITS4(zzb3,zzb2,zzb1,zzb0) \
(((zzb3) << 3) | ((zzb2) << 2) | ((zzb1) << 1) | (zzb0))
#define X0000 BITS4(0,0,0,0)
*p++ = imm32;
}
#else
- /* Generate movw rD, #low16. Then, if the high 16 are
- nonzero, generate movt rD, #high16. */
- UInt lo16 = imm32 & 0xFFFF;
- UInt hi16 = (imm32 >> 16) & 0xFFFF;
- instr = XXXXXXXX(0xE, 0x3, 0x0, (lo16 >> 12) & 0xF, rD,
- (lo16 >> 8) & 0xF, (lo16 >> 4) & 0xF,
- lo16 & 0xF);
- *p++ = instr;
- if (hi16 != 0) {
- instr = XXXXXXXX(0xE, 0x3, 0x4, (hi16 >> 12) & 0xF, rD,
- (hi16 >> 8) & 0xF, (hi16 >> 4) & 0xF,
- hi16 & 0xF);
+ if (VEX_ARM_ARCHLEVEL(arm_hwcaps) > 6) {
+ /* Generate movw rD, #low16. Then, if the high 16 are
+ nonzero, generate movt rD, #high16. */
+ UInt lo16 = imm32 & 0xFFFF;
+ UInt hi16 = (imm32 >> 16) & 0xFFFF;
+ instr = XXXXXXXX(0xE, 0x3, 0x0, (lo16 >> 12) & 0xF, rD,
+ (lo16 >> 8) & 0xF, (lo16 >> 4) & 0xF,
+ lo16 & 0xF);
*p++ = instr;
+ if (hi16 != 0) {
+ instr = XXXXXXXX(0xE, 0x3, 0x4, (hi16 >> 12) & 0xF, rD,
+ (hi16 >> 8) & 0xF, (hi16 >> 4) & 0xF,
+ hi16 & 0xF);
+ *p++ = instr;
+ }
+ } else {
+ UInt imm, rot;
+ UInt op = X1010;
+ UInt rN = 0;
+ if ((imm32 & 0xFF) || (imm32 == 0)) {
+ imm = imm32 & 0xFF;
+ rot = 0;
+ instr = XXXXXXXX(0xE, 0x3, op, rN, rD, rot, imm >> 4, imm & 0xF);
+ *p++ = instr;
+ op = X1000;
+ rN = rD;
+ }
+ if (imm32 & 0xFF000000) {
+ imm = (imm32 >> 24) & 0xFF;
+ rot = 4;
+ instr = XXXXXXXX(0xE, 0x3, op, rN, rD, rot, imm >> 4, imm & 0xF);
+ *p++ = instr;
+ op = X1000;
+ rN = rD;
+ }
+ if (imm32 & 0xFF0000) {
+ imm = (imm32 >> 16) & 0xFF;
+ rot = 8;
+ instr = XXXXXXXX(0xE, 0x3, op, rN, rD, rot, imm >> 4, imm & 0xF);
+ *p++ = instr;
+ op = X1000;
+ rN = rD;
+ }
+ if (imm32 & 0xFF00) {
+ imm = (imm32 >> 8) & 0xFF;
+ rot = 12;
+ instr = XXXXXXXX(0xE, 0x3, op, rN, rD, rot, imm >> 4, imm & 0xF);
+ *p++ = instr;
+ op = X1000;
+ rN = rD;
+ }
}
#endif
return p;
instr = skeletal_RI84(argR);
instr |= XXXXX___(X1110, (1 & (subopc >> 3)),
(subopc << 1) & 0xF, rN, rD);
- if (i->ARMin.Alu.op == ARMalu_ADDS || i->ARMin.Alu.op == ARMalu_SUBS) {
+ if (i->ARMin.Alu.op == ARMalu_ADDS
+ || i->ARMin.Alu.op == ARMalu_SUBS) {
instr |= 1<<20; /* set the S bit */
}
*p++ = instr;
*p++ = 0xEE070F95; /* mcr 15,0,r0,c7,c5,4 (ISB) */
goto done;
}
+ case ARMin_NLdStQ: {
+ UInt regD = qregNo(i->ARMin.NLdStQ.dQ) << 1;
+ UInt regN, regM;
+ UInt D = regD >> 4;
+ UInt bL = i->ARMin.NLdStQ.isLoad ? 1 : 0;
+ UInt insn;
+ vassert(hregClass(i->ARMin.NLdStQ.dQ) == HRcVec128);
+ regD &= 0xF;
+ if (i->ARMin.NLdStQ.amode->tag == ARMamN_RR) {
+ regN = iregNo(i->ARMin.NLdStQ.amode->ARMamN.RR.rN);
+ regM = iregNo(i->ARMin.NLdStQ.amode->ARMamN.RR.rM);
+ } else {
+ regN = iregNo(i->ARMin.NLdStQ.amode->ARMamN.R.rN);
+ regM = 15;
+ }
+ insn = XXXXXXXX(0xF, X0100, BITS4(0, D, bL, 0),
+ regN, regD, X1010, X1000, regM);
+ *p++ = insn;
+ goto done;
+ }
+ case ARMin_NLdStD: {
+ UInt regD = dregNo(i->ARMin.NLdStD.dD);
+ UInt regN, regM;
+ UInt D = regD >> 4;
+ UInt bL = i->ARMin.NLdStD.isLoad ? 1 : 0;
+ UInt insn;
+ vassert(hregClass(i->ARMin.NLdStD.dD) == HRcFlt64);
+ regD &= 0xF;
+ if (i->ARMin.NLdStD.amode->tag == ARMamN_RR) {
+ regN = iregNo(i->ARMin.NLdStD.amode->ARMamN.RR.rN);
+ regM = iregNo(i->ARMin.NLdStD.amode->ARMamN.RR.rM);
+ } else {
+ regN = iregNo(i->ARMin.NLdStD.amode->ARMamN.R.rN);
+ regM = 15;
+ }
+ insn = XXXXXXXX(0xF, X0100, BITS4(0, D, bL, 0),
+ regN, regD, X0111, X1000, regM);
+ *p++ = insn;
+ goto done;
+ }
+ case ARMin_NUnaryS: {
+ UInt Q = i->ARMin.NUnaryS.Q ? 1 : 0;
+ UInt regD, D;
+ UInt regM, M;
+ UInt size = i->ARMin.NUnaryS.size;
+ UInt insn;
+ UInt opc, opc1, opc2;
+ switch (i->ARMin.NUnaryS.op) {
+ case ARMneon_VDUP:
+ if (i->ARMin.NUnaryS.size >= 16)
+ goto bad;
+ if (i->ARMin.NUnaryS.dst->tag != ARMNRS_Reg)
+ goto bad;
+ if (i->ARMin.NUnaryS.src->tag != ARMNRS_Scalar)
+ goto bad;
+ regD = (hregClass(i->ARMin.NUnaryS.dst->reg) == HRcVec128)
+ ? (qregNo(i->ARMin.NUnaryS.dst->reg) << 1)
+ : dregNo(i->ARMin.NUnaryS.dst->reg);
+ regM = (hregClass(i->ARMin.NUnaryS.src->reg) == HRcVec128)
+ ? (qregNo(i->ARMin.NUnaryS.src->reg) << 1)
+ : dregNo(i->ARMin.NUnaryS.src->reg);
+ D = regD >> 4;
+ M = regM >> 4;
+ regD &= 0xf;
+ regM &= 0xf;
+ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1),
+ (i->ARMin.NUnaryS.size & 0xf), regD,
+ X1100, BITS4(0,Q,M,0), regM);
+ *p++ = insn;
+ goto done;
+ case ARMneon_SETELEM:
+ regD = Q ? (qregNo(i->ARMin.NUnaryS.dst->reg) << 1) :
+ dregNo(i->ARMin.NUnaryS.dst->reg);
+ regM = iregNo(i->ARMin.NUnaryS.src->reg);
+ M = regM >> 4;
+ D = regD >> 4;
+ regM &= 0xF;
+ regD &= 0xF;
+ if (i->ARMin.NUnaryS.dst->tag != ARMNRS_Scalar)
+ goto bad;
+ switch (size) {
+ case 0:
+ if (i->ARMin.NUnaryS.dst->index > 7)
+ goto bad;
+ opc = X1000 | i->ARMin.NUnaryS.dst->index;
+ break;
+ case 1:
+ if (i->ARMin.NUnaryS.dst->index > 3)
+ goto bad;
+ opc = X0001 | (i->ARMin.NUnaryS.dst->index << 1);
+ break;
+ case 2:
+ if (i->ARMin.NUnaryS.dst->index > 1)
+ goto bad;
+ opc = X0000 | (i->ARMin.NUnaryS.dst->index << 2);
+ break;
+ default:
+ goto bad;
+ }
+ opc1 = (opc >> 2) & 3;
+ opc2 = opc & 3;
+ insn = XXXXXXXX(0xE, X1110, BITS4(0,(opc1 >> 1),(opc1 & 1),0),
+ regD, regM, X1011,
+ BITS4(D,(opc2 >> 1),(opc2 & 1),1), X0000);
+ *p++ = insn;
+ goto done;
+ case ARMneon_GETELEMU:
+ regM = Q ? (qregNo(i->ARMin.NUnaryS.src->reg) << 1) :
+ dregNo(i->ARMin.NUnaryS.src->reg);
+ regD = iregNo(i->ARMin.NUnaryS.dst->reg);
+ M = regM >> 4;
+ D = regD >> 4;
+ regM &= 0xF;
+ regD &= 0xF;
+ if (i->ARMin.NUnaryS.src->tag != ARMNRS_Scalar)
+ goto bad;
+ switch (size) {
+ case 0:
+ if (Q && i->ARMin.NUnaryS.src->index > 7) {
+ regM++;
+ i->ARMin.NUnaryS.src->index -= 8;
+ }
+ if (i->ARMin.NUnaryS.src->index > 7)
+ goto bad;
+ opc = X1000 | i->ARMin.NUnaryS.src->index;
+ break;
+ case 1:
+ if (Q && i->ARMin.NUnaryS.src->index > 3) {
+ regM++;
+ i->ARMin.NUnaryS.src->index -= 4;
+ }
+ if (i->ARMin.NUnaryS.src->index > 3)
+ goto bad;
+ opc = X0001 | (i->ARMin.NUnaryS.src->index << 1);
+ break;
+ case 2:
+ goto bad;
+ default:
+ goto bad;
+ }
+ opc1 = (opc >> 2) & 3;
+ opc2 = opc & 3;
+ insn = XXXXXXXX(0xE, X1110, BITS4(1,(opc1 >> 1),(opc1 & 1),1),
+ regM, regD, X1011,
+ BITS4(M,(opc2 >> 1),(opc2 & 1),1), X0000);
+ *p++ = insn;
+ goto done;
+ case ARMneon_GETELEMS:
+ regM = Q ? (qregNo(i->ARMin.NUnaryS.src->reg) << 1) :
+ dregNo(i->ARMin.NUnaryS.src->reg);
+ regD = iregNo(i->ARMin.NUnaryS.dst->reg);
+ M = regM >> 4;
+ D = regD >> 4;
+ regM &= 0xF;
+ regD &= 0xF;
+ if (i->ARMin.NUnaryS.src->tag != ARMNRS_Scalar)
+ goto bad;
+ switch (size) {
+ case 0:
+ if (Q && i->ARMin.NUnaryS.src->index > 7) {
+ regM++;
+ i->ARMin.NUnaryS.src->index -= 8;
+ }
+ if (i->ARMin.NUnaryS.src->index > 7)
+ goto bad;
+ opc = X1000 | i->ARMin.NUnaryS.src->index;
+ break;
+ case 1:
+ if (Q && i->ARMin.NUnaryS.src->index > 3) {
+ regM++;
+ i->ARMin.NUnaryS.src->index -= 4;
+ }
+ if (i->ARMin.NUnaryS.src->index > 3)
+ goto bad;
+ opc = X0001 | (i->ARMin.NUnaryS.src->index << 1);
+ break;
+ case 2:
+ if (Q && i->ARMin.NUnaryS.src->index > 1) {
+ regM++;
+ i->ARMin.NUnaryS.src->index -= 2;
+ }
+ if (i->ARMin.NUnaryS.src->index > 1)
+ goto bad;
+ opc = X0000 | (i->ARMin.NUnaryS.src->index << 2);
+ break;
+ default:
+ goto bad;
+ }
+ opc1 = (opc >> 2) & 3;
+ opc2 = opc & 3;
+ insn = XXXXXXXX(0xE, X1110, BITS4(0,(opc1 >> 1),(opc1 & 1),1),
+ regM, regD, X1011,
+ BITS4(M,(opc2 >> 1),(opc2 & 1),1), X0000);
+ *p++ = insn;
+ goto done;
+ default:
+ goto bad;
+ }
+ }
+ case ARMin_NUnary: {
+ UInt Q = i->ARMin.NUnary.Q ? 1 : 0;
+ UInt regD = (hregClass(i->ARMin.NUnary.dst) == HRcVec128)
+ ? (qregNo(i->ARMin.NUnary.dst) << 1)
+ : dregNo(i->ARMin.NUnary.dst);
+ UInt regM, M;
+ UInt D = regD >> 4;
+ UInt sz1 = i->ARMin.NUnary.size >> 1;
+ UInt sz2 = i->ARMin.NUnary.size & 1;
+ UInt sz = i->ARMin.NUnary.size;
+ UInt insn;
+ UInt F = 0; /* TODO: floating point EQZ ??? */
+ if (i->ARMin.NUnary.op != ARMneon_DUP) {
+ regM = (hregClass(i->ARMin.NUnary.src) == HRcVec128)
+ ? (qregNo(i->ARMin.NUnary.src) << 1)
+ : dregNo(i->ARMin.NUnary.src);
+ M = regM >> 4;
+ } else {
+ regM = iregNo(i->ARMin.NUnary.src);
+ M = regM >> 4;
+ }
+ regD &= 0xF;
+ regM &= 0xF;
+ switch (i->ARMin.NUnary.op) {
+ case ARMneon_COPY: /* VMOV reg, reg */
+ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regM, regD, X0001,
+ BITS4(M,Q,M,1), regM);
+ break;
+ case ARMneon_COPYN: /* VMOVN regD, regQ */
+ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
+ regD, X0010, BITS4(0,0,M,0), regM);
+ break;
+ case ARMneon_COPYQNSS: /* VQMOVN regD, regQ */
+ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
+ regD, X0010, BITS4(1,0,M,0), regM);
+ break;
+ case ARMneon_COPYQNUS: /* VQMOVUN regD, regQ */
+ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
+ regD, X0010, BITS4(0,1,M,0), regM);
+ break;
+ case ARMneon_COPYQNUU: /* VQMOVN regD, regQ */
+ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
+ regD, X0010, BITS4(1,1,M,0), regM);
+ break;
+ case ARMneon_COPYLS: /* VMOVL regQ, regD */
+ if (sz >= 3)
+ goto bad;
+ insn = XXXXXXXX(0xF, X0010,
+ BITS4(1,D,(sz == 2) ? 1 : 0,(sz == 1) ? 1 : 0),
+ BITS4((sz == 0) ? 1 : 0,0,0,0),
+ regD, X1010, BITS4(0,0,M,1), regM);
+ break;
+ case ARMneon_COPYLU: /* VMOVL regQ, regD */
+ if (sz >= 3)
+ goto bad;
+ insn = XXXXXXXX(0xF, X0011,
+ BITS4(1,D,(sz == 2) ? 1 : 0,(sz == 1) ? 1 : 0),
+ BITS4((sz == 0) ? 1 : 0,0,0,0),
+ regD, X1010, BITS4(0,0,M,1), regM);
+ break;
+ case ARMneon_NOT: /* VMVN reg, reg*/
+ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X0000, regD, X0101,
+ BITS4(1,Q,M,0), regM);
+ break;
+ case ARMneon_EQZ:
+ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,1),
+ regD, BITS4(0,F,0,1), BITS4(0,Q,M,0), regM);
+ break;
+ case ARMneon_CNT:
+ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X0000, regD, X0101,
+ BITS4(0,Q,M,0), regM);
+ break;
+ case ARMneon_CLZ:
+ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
+ regD, X0100, BITS4(1,Q,M,0), regM);
+ break;
+ case ARMneon_CLS:
+ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
+ regD, X0100, BITS4(0,Q,M,0), regM);
+ break;
+ case ARMneon_ABS:
+ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,1),
+ regD, X0011, BITS4(0,Q,M,0), regM);
+ break;
+ case ARMneon_DUP:
+ sz1 = i->ARMin.NUnary.size == 0 ? 1 : 0;
+ sz2 = i->ARMin.NUnary.size == 1 ? 1 : 0;
+ vassert(sz1 + sz2 < 2);
+ insn = XXXXXXXX(0xE, X1110, BITS4(1, sz1, Q, 0), regD, regM,
+ X1011, BITS4(D,0,sz2,1), X0000);
+ break;
+ case ARMneon_REV16:
+ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
+ regD, BITS4(0,0,0,1), BITS4(0,Q,M,0), regM);
+ break;
+ case ARMneon_REV32:
+ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
+ regD, BITS4(0,0,0,0), BITS4(1,Q,M,0), regM);
+ break;
+ case ARMneon_REV64:
+ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
+ regD, BITS4(0,0,0,0), BITS4(0,Q,M,0), regM);
+ break;
+ case ARMneon_PADDLU:
+ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
+ regD, X0010, BITS4(1,Q,M,0), regM);
+ break;
+ case ARMneon_PADDLS:
+ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
+ regD, X0010, BITS4(0,Q,M,0), regM);
+ break;
+ case ARMneon_VQSHLNUU:
+ insn = XXXXXXXX(0xF, X0011,
+ (1 << 3) | (D << 2) | ((sz >> 4) & 3),
+ sz & 0xf, regD, X0111,
+ BITS4(sz >> 6,Q,M,1), regM);
+ break;
+ case ARMneon_VQSHLNSS:
+ insn = XXXXXXXX(0xF, X0010,
+ (1 << 3) | (D << 2) | ((sz >> 4) & 3),
+ sz & 0xf, regD, X0111,
+ BITS4(sz >> 6,Q,M,1), regM);
+ break;
+ case ARMneon_VQSHLNUS:
+ insn = XXXXXXXX(0xF, X0011,
+ (1 << 3) | (D << 2) | ((sz >> 4) & 3),
+ sz & 0xf, regD, X0110,
+ BITS4(sz >> 6,Q,M,1), regM);
+ break;
+ case ARMneon_VCVTFtoS:
+ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0111,
+ BITS4(0,Q,M,0), regM);
+ break;
+ case ARMneon_VCVTFtoU:
+ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0111,
+ BITS4(1,Q,M,0), regM);
+ break;
+ case ARMneon_VCVTStoF:
+ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0110,
+ BITS4(0,Q,M,0), regM);
+ break;
+ case ARMneon_VCVTUtoF:
+ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0110,
+ BITS4(1,Q,M,0), regM);
+ break;
+ case ARMneon_VCVTFtoFixedU:
+ sz1 = (sz >> 5) & 1;
+ sz2 = (sz >> 4) & 1;
+ sz &= 0xf;
+ insn = XXXXXXXX(0xF, X0011,
+ BITS4(1,D,sz1,sz2), sz, regD, X1111,
+ BITS4(0,Q,M,1), regM);
+ break;
+ case ARMneon_VCVTFtoFixedS:
+ sz1 = (sz >> 5) & 1;
+ sz2 = (sz >> 4) & 1;
+ sz &= 0xf;
+ insn = XXXXXXXX(0xF, X0010,
+ BITS4(1,D,sz1,sz2), sz, regD, X1111,
+ BITS4(0,Q,M,1), regM);
+ break;
+ case ARMneon_VCVTFixedUtoF:
+ sz1 = (sz >> 5) & 1;
+ sz2 = (sz >> 4) & 1;
+ sz &= 0xf;
+ insn = XXXXXXXX(0xF, X0011,
+ BITS4(1,D,sz1,sz2), sz, regD, X1110,
+ BITS4(0,Q,M,1), regM);
+ break;
+ case ARMneon_VCVTFixedStoF:
+ sz1 = (sz >> 5) & 1;
+ sz2 = (sz >> 4) & 1;
+ sz &= 0xf;
+ insn = XXXXXXXX(0xF, X0010,
+ BITS4(1,D,sz1,sz2), sz, regD, X1110,
+ BITS4(0,Q,M,1), regM);
+ break;
+ case ARMneon_VCVTF32toF16:
+ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X0110, regD, X0110,
+ BITS4(0,0,M,0), regM);
+ break;
+ case ARMneon_VCVTF16toF32:
+ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X0110, regD, X0111,
+ BITS4(0,0,M,0), regM);
+ break;
+ case ARMneon_VRECIP:
+ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0100,
+ BITS4(0,Q,M,0), regM);
+ break;
+ case ARMneon_VRECIPF:
+ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0101,
+ BITS4(0,Q,M,0), regM);
+ break;
+ case ARMneon_VABSFP:
+ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1001, regD, X0111,
+ BITS4(0,Q,M,0), regM);
+ break;
+ case ARMneon_VRSQRTEFP:
+ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0101,
+ BITS4(1,Q,M,0), regM);
+ break;
+ case ARMneon_VRSQRTE:
+ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0100,
+ BITS4(1,Q,M,0), regM);
+ break;
+ case ARMneon_VNEGF:
+ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1001, regD, X0111,
+ BITS4(1,Q,M,0), regM);
+ break;
+ default:
+ goto bad;
+ }
+ *p++ = insn;
+ goto done;
+ }
+ case ARMin_NDual: {
+ UInt Q = i->ARMin.NDual.Q ? 1 : 0;
+ UInt regD = (hregClass(i->ARMin.NDual.arg1) == HRcVec128)
+ ? (qregNo(i->ARMin.NDual.arg1) << 1)
+ : dregNo(i->ARMin.NDual.arg1);
+ UInt regM = (hregClass(i->ARMin.NDual.arg2) == HRcVec128)
+ ? (qregNo(i->ARMin.NDual.arg2) << 1)
+ : dregNo(i->ARMin.NDual.arg2);
+ UInt D = regD >> 4;
+ UInt M = regM >> 4;
+ UInt sz1 = i->ARMin.NDual.size >> 1;
+ UInt sz2 = i->ARMin.NDual.size & 1;
+ UInt insn;
+ regD &= 0xF;
+ regM &= 0xF;
+ switch (i->ARMin.NDual.op) {
+ case ARMneon_TRN: /* VTRN reg, reg */
+ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
+ regD, X0000, BITS4(1,Q,M,0), regM);
+ break;
+ case ARMneon_ZIP: /* VZIP reg, reg */
+ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
+ regD, X0001, BITS4(1,Q,M,0), regM);
+ break;
+ case ARMneon_UZP: /* VUZP reg, reg */
+ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
+ regD, X0001, BITS4(0,Q,M,0), regM);
+ break;
+ default:
+ goto bad;
+ }
+ *p++ = insn;
+ goto done;
+ }
+ case ARMin_NBinary: {
+ UInt Q = i->ARMin.NBinary.Q ? 1 : 0;
+ UInt regD = (hregClass(i->ARMin.NBinary.dst) == HRcVec128)
+ ? (qregNo(i->ARMin.NBinary.dst) << 1)
+ : dregNo(i->ARMin.NBinary.dst);
+ UInt regN = (hregClass(i->ARMin.NBinary.argL) == HRcVec128)
+ ? (qregNo(i->ARMin.NBinary.argL) << 1)
+ : dregNo(i->ARMin.NBinary.argL);
+ UInt regM = (hregClass(i->ARMin.NBinary.argR) == HRcVec128)
+ ? (qregNo(i->ARMin.NBinary.argR) << 1)
+ : dregNo(i->ARMin.NBinary.argR);
+ UInt sz1 = i->ARMin.NBinary.size >> 1;
+ UInt sz2 = i->ARMin.NBinary.size & 1;
+ UInt D = regD >> 4;
+ UInt N = regN >> 4;
+ UInt M = regM >> 4;
+ UInt insn;
+ regD &= 0xF;
+ regM &= 0xF;
+ regN &= 0xF;
+ switch (i->ARMin.NBinary.op) {
+ case ARMneon_VAND: /* VAND reg, reg, reg */
+ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD, X0001,
+ BITS4(N,Q,M,1), regM);
+ break;
+ case ARMneon_VORR: /* VORR reg, reg, reg*/
+ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regN, regD, X0001,
+ BITS4(N,Q,M,1), regM);
+ break;
+ case ARMneon_VXOR: /* VEOR reg, reg, reg */
+ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD, X0001,
+ BITS4(N,Q,M,1), regM);
+ break;
+ case ARMneon_VADD: /* VADD reg, reg, reg */
+ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
+ X1000, BITS4(N,Q,M,0), regM);
+ break;
+ case ARMneon_VSUB: /* VSUB reg, reg, reg */
+ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
+ X1000, BITS4(N,Q,M,0), regM);
+ break;
+ case ARMneon_VMINU: /* VMIN.Uxx reg, reg, reg */
+ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
+ X0110, BITS4(N,Q,M,1), regM);
+ break;
+ case ARMneon_VMINS: /* VMIN.Sxx reg, reg, reg */
+ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
+ X0110, BITS4(N,Q,M,1), regM);
+ break;
+ case ARMneon_VMAXU: /* VMAX.Uxx reg, reg, reg */
+ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
+ X0110, BITS4(N,Q,M,0), regM);
+ break;
+ case ARMneon_VMAXS: /* VMAX.Sxx reg, reg, reg */
+ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
+ X0110, BITS4(N,Q,M,0), regM);
+ break;
+ case ARMneon_VRHADDS: /* VRHADD.Sxx reg, reg, reg */
+ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
+ X0001, BITS4(N,Q,M,0), regM);
+ break;
+ case ARMneon_VRHADDU: /* VRHADD.Uxx reg, reg, reg */
+ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
+ X0001, BITS4(N,Q,M,0), regM);
+ break;
+ case ARMneon_VQADDU: /* VQADD unsigned reg, reg, reg */
+ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
+ X0000, BITS4(N,Q,M,1), regM);
+ break;
+ case ARMneon_VQADDS: /* VQADD signed reg, reg, reg */
+ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
+ X0000, BITS4(N,Q,M,1), regM);
+ break;
+ case ARMneon_VQSUBU: /* VQSUB unsigned reg, reg, reg */
+ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
+ X0010, BITS4(N,Q,M,1), regM);
+ break;
+ case ARMneon_VQSUBS: /* VQSUB signed reg, reg, reg */
+ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
+ X0010, BITS4(N,Q,M,1), regM);
+ break;
+ case ARMneon_VCGTU: /* VCGT unsigned reg, reg, reg */
+ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
+ X0011, BITS4(N,Q,M,0), regM);
+ break;
+ case ARMneon_VCGTS: /* VCGT signed reg, reg, reg */
+ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
+ X0011, BITS4(N,Q,M,0), regM);
+ break;
+ case ARMneon_VCGEU: /* VCGE unsigned reg, reg, reg */
+ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
+ X0011, BITS4(N,Q,M,1), regM);
+ break;
+ case ARMneon_VCGES: /* VCGE signed reg, reg, reg */
+ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
+ X0011, BITS4(N,Q,M,1), regM);
+ break;
+ case ARMneon_VCEQ: /* VCEQ reg, reg, reg */
+ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
+ X1000, BITS4(N,Q,M,1), regM);
+ break;
+ case ARMneon_VEXT: /* VEXT.8 reg, reg, #imm4*/
+ if (i->ARMin.NBinary.size >= 16)
+ goto bad;
+ insn = XXXXXXXX(0xF, X0010, BITS4(1,D,1,1), regN, regD,
+ i->ARMin.NBinary.size & 0xf, BITS4(N,Q,M,0),
+ regM);
+ break;
+ case ARMneon_VMUL:
+ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
+ X1001, BITS4(N,Q,M,1), regM);
+ break;
+ case ARMneon_VMULLU:
+ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,sz1,sz2), regN, regD,
+ X1100, BITS4(N,0,M,0), regM);
+ break;
+ case ARMneon_VMULLS:
+ insn = XXXXXXXX(0xF, X0010, BITS4(1,D,sz1,sz2), regN, regD,
+ X1100, BITS4(N,0,M,0), regM);
+ break;
+ case ARMneon_VMULP:
+ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
+ X1001, BITS4(N,Q,M,1), regM);
+ break;
+ case ARMneon_VMULFP:
+ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD,
+ X1101, BITS4(N,Q,M,1), regM);
+ break;
+ case ARMneon_VMULLP:
+ insn = XXXXXXXX(0xF, X0010, BITS4(1,D,sz1,sz2), regN, regD,
+ X1110, BITS4(N,0,M,0), regM);
+ break;
+ case ARMneon_VQDMULH:
+ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
+ X1011, BITS4(N,Q,M,0), regM);
+ break;
+ case ARMneon_VQRDMULH:
+ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
+ X1011, BITS4(N,Q,M,0), regM);
+ break;
+ case ARMneon_VQDMULL:
+ insn = XXXXXXXX(0xF, X0010, BITS4(1,D,sz1,sz2), regN, regD,
+ X1101, BITS4(N,0,M,0), regM);
+ break;
+ case ARMneon_VTBL:
+ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), regN, regD,
+ X1000, BITS4(N,0,M,0), regM);
+ break;
+ case ARMneon_VPADD:
+ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
+ X1011, BITS4(N,Q,M,1), regM);
+ break;
+ case ARMneon_VPADDFP:
+ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD,
+ X1101, BITS4(N,Q,M,0), regM);
+ break;
+ case ARMneon_VPMINU:
+ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
+ X1010, BITS4(N,Q,M,1), regM);
+ break;
+ case ARMneon_VPMINS:
+ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
+ X1010, BITS4(N,Q,M,1), regM);
+ break;
+ case ARMneon_VPMAXU:
+ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
+ X1010, BITS4(N,Q,M,0), regM);
+ break;
+ case ARMneon_VPMAXS:
+ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
+ X1010, BITS4(N,Q,M,0), regM);
+ break;
+ case ARMneon_VADDFP: /* VADD reg, reg, reg */
+ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD,
+ X1101, BITS4(N,Q,M,0), regM);
+ break;
+ case ARMneon_VSUBFP: /* VADD reg, reg, reg */
+ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regN, regD,
+ X1101, BITS4(N,Q,M,0), regM);
+ break;
+ case ARMneon_VABDFP: /* VABD reg, reg, reg */
+ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,1,0), regN, regD,
+ X1101, BITS4(N,Q,M,0), regM);
+ break;
+ case ARMneon_VMINF:
+ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regN, regD,
+ X1111, BITS4(N,Q,M,0), regM);
+ break;
+ case ARMneon_VMAXF:
+ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD,
+ X1111, BITS4(N,Q,M,0), regM);
+ break;
+ case ARMneon_VPMINF:
+ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,1,0), regN, regD,
+ X1111, BITS4(N,Q,M,0), regM);
+ break;
+ case ARMneon_VPMAXF:
+ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD,
+ X1111, BITS4(N,Q,M,0), regM);
+ break;
+ case ARMneon_VRECPS:
+ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD, X1111,
+ BITS4(N,Q,M,1), regM);
+ break;
+ case ARMneon_VCGTF:
+ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,1,0), regN, regD, X1110,
+ BITS4(N,Q,M,0), regM);
+ break;
+ case ARMneon_VCGEF:
+ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD, X1110,
+ BITS4(N,Q,M,0), regM);
+ break;
+ case ARMneon_VCEQF:
+ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD, X1110,
+ BITS4(N,Q,M,0), regM);
+ break;
+ case ARMneon_VRSQRTS:
+ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regN, regD, X1111,
+ BITS4(N,Q,M,1), regM);
+ break;
+ default:
+ goto bad;
+ }
+ *p++ = insn;
+ goto done;
+ }
+ case ARMin_NShift: {
+ UInt Q = i->ARMin.NShift.Q ? 1 : 0;
+ UInt regD = (hregClass(i->ARMin.NShift.dst) == HRcVec128)
+ ? (qregNo(i->ARMin.NShift.dst) << 1)
+ : dregNo(i->ARMin.NShift.dst);
+ UInt regM = (hregClass(i->ARMin.NShift.argL) == HRcVec128)
+ ? (qregNo(i->ARMin.NShift.argL) << 1)
+ : dregNo(i->ARMin.NShift.argL);
+ UInt regN = (hregClass(i->ARMin.NShift.argR) == HRcVec128)
+ ? (qregNo(i->ARMin.NShift.argR) << 1)
+ : dregNo(i->ARMin.NShift.argR);
+ UInt sz1 = i->ARMin.NShift.size >> 1;
+ UInt sz2 = i->ARMin.NShift.size & 1;
+ UInt D = regD >> 4;
+ UInt N = regN >> 4;
+ UInt M = regM >> 4;
+ UInt insn;
+ regD &= 0xF;
+ regM &= 0xF;
+ regN &= 0xF;
+ switch (i->ARMin.NShift.op) {
+ case ARMneon_VSHL:
+ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
+ X0100, BITS4(N,Q,M,0), regM);
+ break;
+ case ARMneon_VSAL:
+ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
+ X0100, BITS4(N,Q,M,0), regM);
+ break;
+ case ARMneon_VQSHL:
+ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
+ X0100, BITS4(N,Q,M,1), regM);
+ break;
+ case ARMneon_VQSAL:
+ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
+ X0100, BITS4(N,Q,M,1), regM);
+ break;
+ default:
+ goto bad;
+ }
+ *p++ = insn;
+ goto done;
+ }
+ case ARMin_NeonImm: {
+ UInt Q = (hregClass(i->ARMin.NeonImm.dst) == HRcVec128) ? 1 : 0;
+ UInt regD = Q ? (qregNo(i->ARMin.NeonImm.dst) << 1) :
+ dregNo(i->ARMin.NeonImm.dst);
+ UInt D = regD >> 4;
+ UInt imm = i->ARMin.NeonImm.imm->imm8;
+ UInt tp = i->ARMin.NeonImm.imm->type;
+ UInt j = imm >> 7;
+ UInt imm3 = (imm >> 4) & 0x7;
+ UInt imm4 = imm & 0xF;
+ UInt cmode, op;
+ UInt insn;
+ regD &= 0xF;
+ if (tp == 9)
+ op = 1;
+ else
+ op = 0;
+ switch (tp) {
+ case 0:
+ case 1:
+ case 2:
+ case 3:
+ case 4:
+ case 5:
+ cmode = tp << 1;
+ break;
+ case 9:
+ case 6:
+ cmode = 14;
+ break;
+ case 7:
+ cmode = 12;
+ break;
+ case 8:
+ cmode = 13;
+ break;
+ case 10:
+ cmode = 15;
+ break;
+ default:
+ vpanic("ARMin_NeonImm");
+
+ }
+ insn = XXXXXXXX(0xF, BITS4(0,0,1,j), BITS4(1,D,0,0), imm3, regD,
+ cmode, BITS4(0,Q,op,1), imm4);
+ *p++ = insn;
+ goto done;
+ }
+ case ARMin_NCMovQ: {
+ UInt cc = (UInt)i->ARMin.NCMovQ.cond;
+ UInt qM = qregNo(i->ARMin.NCMovQ.src) << 1;
+ UInt qD = qregNo(i->ARMin.NCMovQ.dst) << 1;
+ UInt vM = qM & 0xF;
+ UInt vD = qD & 0xF;
+ UInt M = (qM >> 4) & 1;
+ UInt D = (qD >> 4) & 1;
+ vassert(cc < 16 && cc != ARMcc_AL && cc != ARMcc_NV);
+ /* b!cc here+8: !cc A00 0000 */
+ UInt insn = XXXXXXXX(cc ^ 1, 0xA, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0);
+ *p++ = insn;
+ /* vmov qD, qM */
+ insn = XXXXXXXX(0xF, 0x2, BITS4(0,D,1,0),
+ vM, vD, BITS4(0,0,0,1), BITS4(M,1,M,1), vM);
+ *p++ = insn;
+ goto done;
+ }
+ case ARMin_Add32: {
+ UInt regD = iregNo(i->ARMin.Add32.rD);
+ UInt regN = iregNo(i->ARMin.Add32.rN);
+ UInt imm32 = i->ARMin.Add32.imm32;
+ vassert(regD != regN);
+ /* MOV regD, imm32 */
+ p = imm32_to_iregNo((UInt *)p, regD, imm32);
+ /* ADD regD, regN, regD */
+ UInt insn = XXXXXXXX(0xE, 0, X1000, regN, regD, 0, 0, regD);
+ *p++ = insn;
+ goto done;
+ }
+ /* ... */
default:
goto bad;
}
ppARMInstr(i);
vpanic("emit_ARMInstr");
/*NOTREACHED*/
-
+
done:
vassert(((UChar*)p) - &buf[0] <= 32);
return ((UChar*)p) - &buf[0];
Copyright (C) 2004-2010 OpenWorks LLP
info@open-works.net
+ Copyright (C) 2010-2010 Dmitry Zhurikhin
+ zhur@ispras.ru
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
#include "libvex_basictypes.h"
#include "libvex_ir.h"
#include "libvex.h"
+#include "ir_match.h"
#include "main_util.h"
#include "main_globals.h"
ppARMInstr(instr);
vex_printf("\n");
}
+#if 0
+ if (instr->tag == ARMin_NUnary || instr->tag == ARMin_NBinary
+ || instr->tag == ARMin_NUnaryS || instr->tag == ARMin_NBinaryS
+ || instr->tag == ARMin_NDual || instr->tag == ARMin_NShift) {
+ ppARMInstr(instr);
+ vex_printf("\n");
+ }
+#endif
}
static HReg newVRegI ( ISelEnv* env )
return reg;
}
+static HReg newVRegV ( ISelEnv* env )
+{
+ HReg reg = mkHReg(env->vreg_ctr, HRcVec128, True/*virtual reg*/);
+ env->vreg_ctr++;
+ return reg;
+}
+
+/* These are duplicated in guest_arm_toIR.c */
+static IRExpr* unop ( IROp op, IRExpr* a )
+{
+ return IRExpr_Unop(op, a);
+}
+
+static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
+{
+ return IRExpr_Binop(op, a1, a2);
+}
+
+static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 )
+{
+ return IRExpr_Triop(op, a1, a2, a3);
+}
+
+static IRExpr* bind ( Int binder )
+{
+ return IRExpr_Binder(binder);
+}
+
+static IRExpr* mkU64 ( ULong i )
+{
+ return IRExpr_Const(IRConst_U64(i));
+}
+
+static IRExpr* mkU32 ( UInt i )
+{
+ return IRExpr_Const(IRConst_U32(i));
+}
+
+static IRExpr* mkU8 ( UInt i )
+{
+ vassert(i < 256);
+ return IRExpr_Const(IRConst_U8( (UChar)i ));
+}
+
+static IRExpr* mkU128 ( ULong i )
+{
+ return binop(Iop_64HLtoV128, mkU64(i), mkU64(i));
+}
/*---------------------------------------------------------*/
/*--- ISEL: Forward declarations ---*/
static ARMAModeV* iselIntExpr_AModeV_wrk ( ISelEnv* env, IRExpr* e );
static ARMAModeV* iselIntExpr_AModeV ( ISelEnv* env, IRExpr* e );
+static ARMAModeN* iselIntExpr_AModeN_wrk ( ISelEnv* env, IRExpr* e );
+static ARMAModeN* iselIntExpr_AModeN ( ISelEnv* env, IRExpr* e );
+
static ARMRI84* iselIntExpr_RI84_wrk
( /*OUT*/Bool* didInv, Bool mayInv, ISelEnv* env, IRExpr* e );
static ARMRI84* iselIntExpr_RI84
static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e );
static HReg iselFltExpr ( ISelEnv* env, IRExpr* e );
+static HReg iselNeon64Expr_wrk ( ISelEnv* env, IRExpr* e );
+static HReg iselNeon64Expr ( ISelEnv* env, IRExpr* e );
+
+static HReg iselNeonExpr_wrk ( ISelEnv* env, IRExpr* e );
+static HReg iselNeonExpr ( ISelEnv* env, IRExpr* e );
/*---------------------------------------------------------*/
/*--- ISEL: Misc helpers ---*/
}
+/* -------------------- AModeN -------------------- */
+
+static ARMAModeN* iselIntExpr_AModeN ( ISelEnv* env, IRExpr* e )
+{
+ return iselIntExpr_AModeN_wrk(env, e);
+}
+
+static ARMAModeN* iselIntExpr_AModeN_wrk ( ISelEnv* env, IRExpr* e )
+{
+ HReg reg = iselIntExpr_R(env, e);
+ return mkARMAModeN_R(reg);
+}
+
/* --------------------- RI84 --------------------- */
static ARMCondCode iselCondCode ( ISelEnv* env, IRExpr* e )
{
ARMCondCode cc = iselCondCode_wrk(env,e);
- vassert(cc != ARMcc_AL && cc != ARMcc_NV);
+ vassert(cc != ARMcc_NV);
return cc;
}
}
}
+ /* --- CasCmpEQ* --- */
+ /* Ist_Cas has a dummy argument to compare with, so comparison is
+ always true. */
+ if (e->tag == Iex_Binop
+ && (e->Iex.Binop.op == Iop_CasCmpEQ32
+ || e->Iex.Binop.op == Iop_CasCmpEQ16
+ || e->Iex.Binop.op == Iop_CasCmpEQ8)) {
+ return ARMcc_AL;
+ }
+
ppIRExpr(e);
vpanic("iselCondCode");
}
{
IRType ty = typeOfIRExpr(env->type_env,e);
vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
+// vassert(ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
switch (e->tag) {
HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
HReg dst = newVRegI(env);
- addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, argL, ARMRI84_R(argR)));
+ addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, argL,
+ ARMRI84_R(argR)));
addInstr(env, mk_iMOVds_RR(dst, argL));
addInstr(env, ARMInstr_CMov(ARMcc_LO, dst, ARMRI84_R(argR)));
return dst;
return dst;
}
+ if (e->Iex.Binop.op == Iop_GetElem8x8
+ || e->Iex.Binop.op == Iop_GetElem16x4
+ || e->Iex.Binop.op == Iop_GetElem32x2) {
+ HReg res = newVRegI(env);
+ HReg arg = iselNeon64Expr(env, e->Iex.Triop.arg1);
+ UInt index, size;
+ if (e->Iex.Binop.arg2->tag != Iex_Const ||
+ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
+ vpanic("ARM target supports GetElem with constant "
+ "second argument only\n");
+ }
+ index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
+ switch (e->Iex.Binop.op) {
+ case Iop_GetElem8x8: vassert(index < 8); size = 0; break;
+ case Iop_GetElem16x4: vassert(index < 4); size = 1; break;
+ case Iop_GetElem32x2: vassert(index < 2); size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NUnaryS(ARMneon_GETELEMS,
+ mkARMNRS(ARMNRS_Reg, res, 0),
+ mkARMNRS(ARMNRS_Scalar, arg, index),
+ size, False));
+ return res;
+ }
+
+ if (e->Iex.Binop.op == Iop_GetElem8x16
+ || e->Iex.Binop.op == Iop_GetElem16x8
+ || e->Iex.Binop.op == Iop_GetElem32x4) {
+ HReg res = newVRegI(env);
+ HReg arg = iselNeonExpr(env, e->Iex.Triop.arg1);
+ UInt index, size;
+ if (e->Iex.Binop.arg2->tag != Iex_Const ||
+ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
+ vpanic("ARM target supports GetElem with constant "
+ "second argument only\n");
+ }
+ index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
+ switch (e->Iex.Binop.op) {
+ case Iop_GetElem8x16: vassert(index < 16); size = 0; break;
+ case Iop_GetElem16x8: vassert(index < 8); size = 1; break;
+ case Iop_GetElem32x4: vassert(index < 4); size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NUnaryS(ARMneon_GETELEMS,
+ mkARMNRS(ARMNRS_Reg, res, 0),
+ mkARMNRS(ARMNRS_Scalar, arg, index),
+ size, True));
+ return res;
+ }
+
break;
}
iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
return rLo; /* similar stupid comment to the above ... */
}
+ case Iop_64to8: {
+ HReg rHi, rLo;
+ if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
+ HReg tHi = newVRegI(env);
+ HReg tLo = newVRegI(env);
+ HReg tmp = iselNeon64Expr(env, e->Iex.Unop.arg);
+ addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo));
+ rHi = tHi;
+ rLo = tLo;
+ } else {
+ iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
+ }
+ return rLo;
+ }
//zz case Iop_16HIto8:
//zz case Iop_32HIto16: {
//zz HReg dst = newVRegI(env);
/* These are no-ops. */
return iselIntExpr_R(env, e->Iex.Unop.arg);
- default:
+ default:
break;
}
break;
case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
case Ico_U8: u = 0xFF & (e->Iex.Const.con->Ico.U8); break;
- default: vpanic("iselIntExpr_R.Iex_Const(arm)");
+ default: ppIRExpr(e); vpanic("iselIntExpr_R.Iex_Const(arm)");
}
addInstr(env, ARMInstr_Imm32(dst, u));
return dst;
/* read 64-bit IRTemp */
if (e->tag == Iex_RdTmp) {
- lookupIRTemp64( rHi, rLo, env, e->Iex.RdTmp.tmp);
+ if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
+ HReg tHi = newVRegI(env);
+ HReg tLo = newVRegI(env);
+ HReg tmp = iselNeon64Expr(env, e);
+ addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo));
+ *rHi = tHi;
+ *rLo = tLo;
+ } else {
+ lookupIRTemp64( rHi, rLo, env, e->Iex.RdTmp.tmp);
+ }
return;
}
/* zero = 0 */
addInstr(env, ARMInstr_Imm32(zero, 0));
/* tLo = 0 - yLo, and set carry */
- addInstr(env, ARMInstr_Alu(ARMalu_SUBS, tLo, zero, ARMRI84_R(yLo)));
+ addInstr(env, ARMInstr_Alu(ARMalu_SUBS,
+ tLo, zero, ARMRI84_R(yLo)));
/* tHi = 0 - yHi - carry */
- addInstr(env, ARMInstr_Alu(ARMalu_SBC, tHi, zero, ARMRI84_R(yHi)));
+ addInstr(env, ARMInstr_Alu(ARMalu_SBC,
+ tHi, zero, ARMRI84_R(yHi)));
/* So now we have tHi:tLo = -arg. To finish off, or 'arg'
back in, so as to give the final result
tHi:tLo = arg | -arg. */
return;
}
+ /* It is convenient sometimes to call iselInt64Expr even when we
+ have NEON support (e.g. in do_helper_call we need 64-bit
+ arguments as 2 x 32 regs). */
+ if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
+ HReg tHi = newVRegI(env);
+ HReg tLo = newVRegI(env);
+ HReg tmp = iselNeon64Expr(env, e);
+ addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo));
+ *rHi = tHi;
+ *rLo = tLo;
+ return ;
+ }
+
ppIRExpr(e);
vpanic("iselInt64Expr");
}
/*---------------------------------------------------------*/
-/*--- ISEL: Floating point expressions (64 bit) ---*/
+/*--- ISEL: Vector (NEON) expressions (64 or 128 bit) ---*/
/*---------------------------------------------------------*/
-/* Compute a 64-bit floating point value into a register, the identity
- of which is returned. As with iselIntExpr_R, the reg may be either
- real or virtual; in any case it must not be changed by subsequent
- code emitted by the caller. */
-
-static HReg iselDblExpr ( ISelEnv* env, IRExpr* e )
+static HReg iselNeon64Expr ( ISelEnv* env, IRExpr* e )
{
- HReg r = iselDblExpr_wrk( env, e );
-# if 0
- vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
-# endif
+ HReg r = iselNeon64Expr_wrk( env, e );
vassert(hregClass(r) == HRcFlt64);
vassert(hregIsVirtual(r));
return r;
}
/* DO NOT CALL THIS DIRECTLY */
-static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e )
+static HReg iselNeon64Expr_wrk ( ISelEnv* env, IRExpr* e )
{
- IRType ty = typeOfIRExpr(env->type_env,e);
+ IRType ty = typeOfIRExpr(env->type_env, e);
+ MatchInfo mi;
vassert(e);
- vassert(ty == Ity_F64);
+ vassert(ty == Ity_I64);
if (e->tag == Iex_RdTmp) {
return lookupIRTemp(env, e->Iex.RdTmp.tmp);
}
if (e->tag == Iex_Const) {
- /* Just handle the zero case. */
- IRConst* con = e->Iex.Const.con;
- if (con->tag == Ico_F64i && con->Ico.F64i == 0ULL) {
- HReg z32 = newVRegI(env);
- HReg dst = newVRegD(env);
- addInstr(env, ARMInstr_Imm32(z32, 0));
- addInstr(env, ARMInstr_VXferD(True/*toD*/, dst, z32, z32));
- return dst;
- }
+ HReg rLo, rHi;
+ HReg res = newVRegD(env);
+ iselInt64Expr(&rHi, &rLo, env, e);
+ addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
+ return res;
}
+ /* 64-bit load */
if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
- ARMAModeV* am;
HReg res = newVRegD(env);
- vassert(e->Iex.Load.ty == Ity_F64);
- am = iselIntExpr_AModeV(env, e->Iex.Load.addr);
- addInstr(env, ARMInstr_VLdStD(True/*isLoad*/, res, am));
+ ARMAModeN* am = iselIntExpr_AModeN(env, e->Iex.Load.addr);
+ vassert(ty == Ity_I64);
+ addInstr(env, ARMInstr_NLdStD(True, res, am));
return res;
}
+ /* 64-bit GET */
if (e->tag == Iex_Get) {
- // XXX This won't work if offset > 1020 or is not 0 % 4.
- // In which case we'll have to generate more longwinded code.
- ARMAModeV* am = mkARMAModeV(hregARM_R8(), e->Iex.Get.offset);
- HReg res = newVRegD(env);
- addInstr(env, ARMInstr_VLdStD(True/*isLoad*/, res, am));
+ HReg addr = newVRegI(env);
+ HReg res = newVRegD(env);
+ vassert(ty == Ity_I64);
+ addInstr(env, ARMInstr_Add32(addr, hregARM_R8(), e->Iex.Get.offset));
+ addInstr(env, ARMInstr_NLdStD(True, res, mkARMAModeN_R(addr)));
return res;
}
- if (e->tag == Iex_Unop) {
- switch (e->Iex.Unop.op) {
- case Iop_ReinterpI64asF64: {
- HReg srcHi, srcLo;
- HReg dst = newVRegD(env);
- iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Unop.arg);
- addInstr(env, ARMInstr_VXferD(True/*toD*/, dst, srcHi, srcLo));
- return dst;
- }
- case Iop_NegF64: {
- HReg src = iselDblExpr(env, e->Iex.Unop.arg);
- HReg dst = newVRegD(env);
- addInstr(env, ARMInstr_VUnaryD(ARMvfpu_NEG, dst, src));
- return dst;
+ /* --------- BINARY ops --------- */
+ if (e->tag == Iex_Binop) {
+ switch (e->Iex.Binop.op) {
+
+ /* 32 x 32 -> 64 multiply */
+ case Iop_MullS32:
+ case Iop_MullU32: {
+ HReg rLo, rHi;
+ HReg res = newVRegD(env);
+ iselInt64Expr(&rHi, &rLo, env, e);
+ addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
+ return res;
}
- case Iop_AbsF64: {
- HReg src = iselDblExpr(env, e->Iex.Unop.arg);
- HReg dst = newVRegD(env);
- addInstr(env, ARMInstr_VUnaryD(ARMvfpu_ABS, dst, src));
- return dst;
+
+ case Iop_And64: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
+ res, argL, argR, 4, False));
+ return res;
}
- case Iop_F32toF64: {
- HReg src = iselFltExpr(env, e->Iex.Unop.arg);
- HReg dst = newVRegD(env);
- addInstr(env, ARMInstr_VCvtSD(True/*sToD*/, dst, src));
- return dst;
+ case Iop_Or64: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
+ res, argL, argR, 4, False));
+ return res;
}
- case Iop_I32UtoF64:
- case Iop_I32StoF64: {
- HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
- HReg f32 = newVRegF(env);
- HReg dst = newVRegD(env);
- Bool syned = e->Iex.Unop.op == Iop_I32StoF64;
- /* VMOV f32, src */
- addInstr(env, ARMInstr_VXferS(True/*toS*/, f32, src));
- /* FSITOD dst, f32 */
- addInstr(env, ARMInstr_VCvtID(True/*iToD*/, syned,
- dst, f32));
- return dst;
+ case Iop_Xor64: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ addInstr(env, ARMInstr_NBinary(ARMneon_VXOR,
+ res, argL, argR, 4, False));
+ return res;
}
- default:
- break;
- }
- }
- if (e->tag == Iex_Binop) {
- switch (e->Iex.Binop.op) {
- case Iop_SqrtF64: {
- /* first arg is rounding mode; we ignore it. */
- HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
- HReg dst = newVRegD(env);
- addInstr(env, ARMInstr_VUnaryD(ARMvfpu_SQRT, dst, src));
- return dst;
+ /* 32HLto64(e1,e2) */
+ case Iop_32HLto64: {
+ HReg rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
+ HReg rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
+ HReg res = newVRegD(env);
+ addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
+ return res;
}
- default:
- break;
- }
- }
- if (e->tag == Iex_Triop) {
- switch (e->Iex.Triop.op) {
- case Iop_DivF64:
- case Iop_MulF64:
- case Iop_AddF64:
- case Iop_SubF64: {
- ARMVfpOp op = 0; /*INVALID*/
- HReg argL = iselDblExpr(env, e->Iex.Triop.arg2);
- HReg argR = iselDblExpr(env, e->Iex.Triop.arg3);
- HReg dst = newVRegD(env);
- switch (e->Iex.Triop.op) {
- case Iop_DivF64: op = ARMvfp_DIV; break;
- case Iop_MulF64: op = ARMvfp_MUL; break;
- case Iop_AddF64: op = ARMvfp_ADD; break;
- case Iop_SubF64: op = ARMvfp_SUB; break;
+ case Iop_Add8x8:
+ case Iop_Add16x4:
+ case Iop_Add32x2:
+ case Iop_Add64: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ UInt size;
+ switch (e->Iex.Binop.op) {
+ case Iop_Add8x8: size = 0; break;
+ case Iop_Add16x4: size = 1; break;
+ case Iop_Add32x2: size = 2; break;
+ case Iop_Add64: size = 3; break;
default: vassert(0);
}
- addInstr(env, ARMInstr_VAluD(op, dst, argL, argR));
- return dst;
+ addInstr(env, ARMInstr_NBinary(ARMneon_VADD,
+ res, argL, argR, size, False));
+ return res;
}
- default:
- break;
- }
- }
-
- if (e->tag == Iex_Mux0X) {
- if (ty == Ity_F64
- && typeOfIRExpr(env->type_env,e->Iex.Mux0X.cond) == Ity_I8) {
- HReg r8;
- HReg rX = iselDblExpr(env, e->Iex.Mux0X.exprX);
- HReg r0 = iselDblExpr(env, e->Iex.Mux0X.expr0);
- HReg dst = newVRegD(env);
- addInstr(env, ARMInstr_VUnaryD(ARMvfpu_COPY, dst, rX));
- r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond);
- addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r8,
- ARMRI84_I84(0xFF,0)));
- addInstr(env, ARMInstr_VCMovD(ARMcc_EQ, dst, r0));
- return dst;
- }
- }
-
- ppIRExpr(e);
- vpanic("iselDblExpr_wrk");
-}
-
-
-/*---------------------------------------------------------*/
-/*--- ISEL: Floating point expressions (32 bit) ---*/
-/*---------------------------------------------------------*/
-
-/* Compute a 64-bit floating point value into a register, the identity
- of which is returned. As with iselIntExpr_R, the reg may be either
- real or virtual; in any case it must not be changed by subsequent
- code emitted by the caller. */
-
-static HReg iselFltExpr ( ISelEnv* env, IRExpr* e )
-{
- HReg r = iselFltExpr_wrk( env, e );
-# if 0
- vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
-# endif
- vassert(hregClass(r) == HRcFlt32);
- vassert(hregIsVirtual(r));
- return r;
-}
-
-/* DO NOT CALL THIS DIRECTLY */
-static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e )
-{
- IRType ty = typeOfIRExpr(env->type_env,e);
- vassert(e);
- vassert(ty == Ity_F32);
-
- if (e->tag == Iex_RdTmp) {
- return lookupIRTemp(env, e->Iex.RdTmp.tmp);
- }
-
- if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
- ARMAModeV* am;
- HReg res = newVRegF(env);
- vassert(e->Iex.Load.ty == Ity_F32);
- am = iselIntExpr_AModeV(env, e->Iex.Load.addr);
- addInstr(env, ARMInstr_VLdStS(True/*isLoad*/, res, am));
- return res;
- }
-
- if (e->tag == Iex_Get) {
- // XXX This won't work if offset > 1020 or is not 0 % 4.
- // In which case we'll have to generate more longwinded code.
- ARMAModeV* am = mkARMAModeV(hregARM_R8(), e->Iex.Get.offset);
- HReg res = newVRegF(env);
- addInstr(env, ARMInstr_VLdStS(True/*isLoad*/, res, am));
- return res;
- }
-
- if (e->tag == Iex_Unop) {
+ case Iop_Add32Fx2: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ UInt size = 0;
+ addInstr(env, ARMInstr_NBinary(ARMneon_VADDFP,
+ res, argL, argR, size, False));
+ return res;
+ }
+ case Iop_Recps32Fx2: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ UInt size = 0;
+ addInstr(env, ARMInstr_NBinary(ARMneon_VRECPS,
+ res, argL, argR, size, False));
+ return res;
+ }
+ case Iop_Rsqrts32Fx2: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ UInt size = 0;
+ addInstr(env, ARMInstr_NBinary(ARMneon_VRSQRTS,
+ res, argL, argR, size, False));
+ return res;
+ }
+ case Iop_InterleaveOddLanes8x8:
+ case Iop_InterleaveOddLanes16x4:
+ case Iop_InterleaveLO32x2:
+ case Iop_InterleaveEvenLanes8x8:
+ case Iop_InterleaveEvenLanes16x4:
+ case Iop_InterleaveHI32x2: {
+ HReg tmp = newVRegD(env);
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ UInt size;
+ UInt is_lo;
+ switch (e->Iex.Binop.op) {
+ case Iop_InterleaveOddLanes8x8: is_lo = 1; size = 0; break;
+ case Iop_InterleaveEvenLanes8x8: is_lo = 0; size = 0; break;
+ case Iop_InterleaveOddLanes16x4: is_lo = 1; size = 1; break;
+ case Iop_InterleaveEvenLanes16x4: is_lo = 0; size = 1; break;
+ case Iop_InterleaveLO32x2: is_lo = 1; size = 2; break;
+ case Iop_InterleaveHI32x2: is_lo = 0; size = 2; break;
+ default: vassert(0);
+ }
+ if (is_lo) {
+ addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
+ tmp, argL, 4, False));
+ addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
+ res, argR, 4, False));
+ addInstr(env, ARMInstr_NDual(ARMneon_TRN,
+ res, tmp, size, False));
+ } else {
+ addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
+ tmp, argR, 4, False));
+ addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
+ res, argL, 4, False));
+ addInstr(env, ARMInstr_NDual(ARMneon_TRN,
+ tmp, res, size, False));
+ }
+ return res;
+ }
+ case Iop_InterleaveHI8x8:
+ case Iop_InterleaveHI16x4:
+ case Iop_InterleaveLO8x8:
+ case Iop_InterleaveLO16x4: {
+ HReg tmp = newVRegD(env);
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ UInt size;
+ UInt is_lo;
+ switch (e->Iex.Binop.op) {
+ case Iop_InterleaveHI8x8: is_lo = 1; size = 0; break;
+ case Iop_InterleaveLO8x8: is_lo = 0; size = 0; break;
+ case Iop_InterleaveHI16x4: is_lo = 1; size = 1; break;
+ case Iop_InterleaveLO16x4: is_lo = 0; size = 1; break;
+ default: vassert(0);
+ }
+ if (is_lo) {
+ addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
+ tmp, argL, 4, False));
+ addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
+ res, argR, 4, False));
+ addInstr(env, ARMInstr_NDual(ARMneon_ZIP,
+ res, tmp, size, False));
+ } else {
+ addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
+ tmp, argR, 4, False));
+ addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
+ res, argL, 4, False));
+ addInstr(env, ARMInstr_NDual(ARMneon_ZIP,
+ tmp, res, size, False));
+ }
+ return res;
+ }
+ case Iop_CatOddLanes8x8:
+ case Iop_CatOddLanes16x4:
+ case Iop_CatEvenLanes8x8:
+ case Iop_CatEvenLanes16x4: {
+ HReg tmp = newVRegD(env);
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ UInt size;
+ UInt is_lo;
+ switch (e->Iex.Binop.op) {
+ case Iop_CatOddLanes8x8: is_lo = 1; size = 0; break;
+ case Iop_CatEvenLanes8x8: is_lo = 0; size = 0; break;
+ case Iop_CatOddLanes16x4: is_lo = 1; size = 1; break;
+ case Iop_CatEvenLanes16x4: is_lo = 0; size = 1; break;
+ default: vassert(0);
+ }
+ if (is_lo) {
+ addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
+ tmp, argL, 4, False));
+ addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
+ res, argR, 4, False));
+ addInstr(env, ARMInstr_NDual(ARMneon_UZP,
+ res, tmp, size, False));
+ } else {
+ addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
+ tmp, argR, 4, False));
+ addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
+ res, argL, 4, False));
+ addInstr(env, ARMInstr_NDual(ARMneon_UZP,
+ tmp, res, size, False));
+ }
+ return res;
+ }
+ case Iop_QAdd8Ux8:
+ case Iop_QAdd16Ux4:
+ case Iop_QAdd32Ux2:
+ case Iop_QAdd64Ux1: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ UInt size;
+ switch (e->Iex.Binop.op) {
+ case Iop_QAdd8Ux8: size = 0; break;
+ case Iop_QAdd16Ux4: size = 1; break;
+ case Iop_QAdd32Ux2: size = 2; break;
+ case Iop_QAdd64Ux1: size = 3; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NBinary(ARMneon_VQADDU,
+ res, argL, argR, size, False));
+ return res;
+ }
+ case Iop_QAdd8Sx8:
+ case Iop_QAdd16Sx4:
+ case Iop_QAdd32Sx2:
+ case Iop_QAdd64Sx1: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ UInt size;
+ switch (e->Iex.Binop.op) {
+ case Iop_QAdd8Sx8: size = 0; break;
+ case Iop_QAdd16Sx4: size = 1; break;
+ case Iop_QAdd32Sx2: size = 2; break;
+ case Iop_QAdd64Sx1: size = 3; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NBinary(ARMneon_VQADDS,
+ res, argL, argR, size, False));
+ return res;
+ }
+ case Iop_Sub8x8:
+ case Iop_Sub16x4:
+ case Iop_Sub32x2:
+ case Iop_Sub64: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ UInt size;
+ switch (e->Iex.Binop.op) {
+ case Iop_Sub8x8: size = 0; break;
+ case Iop_Sub16x4: size = 1; break;
+ case Iop_Sub32x2: size = 2; break;
+ case Iop_Sub64: size = 3; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
+ res, argL, argR, size, False));
+ return res;
+ }
+ case Iop_Sub32Fx2: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ UInt size = 0;
+ addInstr(env, ARMInstr_NBinary(ARMneon_VSUBFP,
+ res, argL, argR, size, False));
+ return res;
+ }
+ case Iop_QSub8Ux8:
+ case Iop_QSub16Ux4:
+ case Iop_QSub32Ux2:
+ case Iop_QSub64Ux1: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ UInt size;
+ switch (e->Iex.Binop.op) {
+ case Iop_QSub8Ux8: size = 0; break;
+ case Iop_QSub16Ux4: size = 1; break;
+ case Iop_QSub32Ux2: size = 2; break;
+ case Iop_QSub64Ux1: size = 3; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBU,
+ res, argL, argR, size, False));
+ return res;
+ }
+ case Iop_QSub8Sx8:
+ case Iop_QSub16Sx4:
+ case Iop_QSub32Sx2:
+ case Iop_QSub64Sx1: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ UInt size;
+ switch (e->Iex.Binop.op) {
+ case Iop_QSub8Sx8: size = 0; break;
+ case Iop_QSub16Sx4: size = 1; break;
+ case Iop_QSub32Sx2: size = 2; break;
+ case Iop_QSub64Sx1: size = 3; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBS,
+ res, argL, argR, size, False));
+ return res;
+ }
+ case Iop_Max8Ux8:
+ case Iop_Max16Ux4:
+ case Iop_Max32Ux2: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ UInt size;
+ switch (e->Iex.Binop.op) {
+ case Iop_Max8Ux8: size = 0; break;
+ case Iop_Max16Ux4: size = 1; break;
+ case Iop_Max32Ux2: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NBinary(ARMneon_VMAXU,
+ res, argL, argR, size, False));
+ return res;
+ }
+ case Iop_Max8Sx8:
+ case Iop_Max16Sx4:
+ case Iop_Max32Sx2: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ UInt size;
+ switch (e->Iex.Binop.op) {
+ case Iop_Max8Sx8: size = 0; break;
+ case Iop_Max16Sx4: size = 1; break;
+ case Iop_Max32Sx2: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NBinary(ARMneon_VMAXS,
+ res, argL, argR, size, False));
+ return res;
+ }
+ case Iop_Min8Ux8:
+ case Iop_Min16Ux4:
+ case Iop_Min32Ux2: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ UInt size;
+ switch (e->Iex.Binop.op) {
+ case Iop_Min8Ux8: size = 0; break;
+ case Iop_Min16Ux4: size = 1; break;
+ case Iop_Min32Ux2: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NBinary(ARMneon_VMINU,
+ res, argL, argR, size, False));
+ return res;
+ }
+ case Iop_Min8Sx8:
+ case Iop_Min16Sx4:
+ case Iop_Min32Sx2: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ UInt size;
+ switch (e->Iex.Binop.op) {
+ case Iop_Min8Sx8: size = 0; break;
+ case Iop_Min16Sx4: size = 1; break;
+ case Iop_Min32Sx2: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NBinary(ARMneon_VMINS,
+ res, argL, argR, size, False));
+ return res;
+ }
+ case Iop_Sar8x8:
+ case Iop_Sar16x4:
+ case Iop_Sar32x2: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ HReg argR2 = newVRegD(env);
+ HReg zero = newVRegD(env);
+ UInt size;
+ switch (e->Iex.Binop.op) {
+ case Iop_Sar8x8: size = 0; break;
+ case Iop_Sar16x4: size = 1; break;
+ case Iop_Sar32x2: size = 2; break;
+ case Iop_Sar64: size = 3; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
+ addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
+ argR2, zero, argR, size, False));
+ addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
+ res, argL, argR2, size, False));
+ return res;
+ }
+ case Iop_Sal8x8:
+ case Iop_Sal16x4:
+ case Iop_Sal32x2:
+ case Iop_Sal64x1: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ UInt size;
+ switch (e->Iex.Binop.op) {
+ case Iop_Sal8x8: size = 0; break;
+ case Iop_Sal16x4: size = 1; break;
+ case Iop_Sal32x2: size = 2; break;
+ case Iop_Sal64x1: size = 3; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
+ res, argL, argR, size, False));
+ return res;
+ }
+ case Iop_Shr8x8:
+ case Iop_Shr16x4:
+ case Iop_Shr32x2: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ HReg argR2 = newVRegD(env);
+ HReg zero = newVRegD(env);
+ UInt size;
+ switch (e->Iex.Binop.op) {
+ case Iop_Shr8x8: size = 0; break;
+ case Iop_Shr16x4: size = 1; break;
+ case Iop_Shr32x2: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
+ addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
+ argR2, zero, argR, size, False));
+ addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
+ res, argL, argR2, size, False));
+ return res;
+ }
+ case Iop_Shl8x8:
+ case Iop_Shl16x4:
+ case Iop_Shl32x2: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ UInt size;
+ switch (e->Iex.Binop.op) {
+ case Iop_Shl8x8: size = 0; break;
+ case Iop_Shl16x4: size = 1; break;
+ case Iop_Shl32x2: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
+ res, argL, argR, size, False));
+ return res;
+ }
+ case Iop_QShl8x8:
+ case Iop_QShl16x4:
+ case Iop_QShl32x2:
+ case Iop_QShl64x1: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ UInt size;
+ switch (e->Iex.Binop.op) {
+ case Iop_QShl8x8: size = 0; break;
+ case Iop_QShl16x4: size = 1; break;
+ case Iop_QShl32x2: size = 2; break;
+ case Iop_QShl64x1: size = 3; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NShift(ARMneon_VQSHL,
+ res, argL, argR, size, False));
+ return res;
+ }
+ case Iop_QSal8x8:
+ case Iop_QSal16x4:
+ case Iop_QSal32x2:
+ case Iop_QSal64x1: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ UInt size;
+ switch (e->Iex.Binop.op) {
+ case Iop_QSal8x8: size = 0; break;
+ case Iop_QSal16x4: size = 1; break;
+ case Iop_QSal32x2: size = 2; break;
+ case Iop_QSal64x1: size = 3; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NShift(ARMneon_VQSAL,
+ res, argL, argR, size, False));
+ return res;
+ }
+ case Iop_QShlN8x8:
+ case Iop_QShlN16x4:
+ case Iop_QShlN32x2:
+ case Iop_QShlN64x1: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ UInt size, imm;
+ if (e->Iex.Binop.arg2->tag != Iex_Const ||
+ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
+ vpanic("ARM taget supports Iop_QShlNAxB with constant "
+ "second argument only\n");
+ }
+ imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
+ switch (e->Iex.Binop.op) {
+ case Iop_QShlN8x8: size = 8 | imm; break;
+ case Iop_QShlN16x4: size = 16 | imm; break;
+ case Iop_QShlN32x2: size = 32 | imm; break;
+ case Iop_QShlN64x1: size = 64 | imm; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUU,
+ res, argL, size, False));
+ return res;
+ }
+ case Iop_QShlN8Sx8:
+ case Iop_QShlN16Sx4:
+ case Iop_QShlN32Sx2:
+ case Iop_QShlN64Sx1: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ UInt size, imm;
+ if (e->Iex.Binop.arg2->tag != Iex_Const ||
+ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
+ vpanic("ARM taget supports Iop_QShlNAxB with constant "
+ "second argument only\n");
+ }
+ imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
+ switch (e->Iex.Binop.op) {
+ case Iop_QShlN8Sx8: size = 8 | imm; break;
+ case Iop_QShlN16Sx4: size = 16 | imm; break;
+ case Iop_QShlN32Sx2: size = 32 | imm; break;
+ case Iop_QShlN64Sx1: size = 64 | imm; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUS,
+ res, argL, size, False));
+ return res;
+ }
+ case Iop_QSalN8x8:
+ case Iop_QSalN16x4:
+ case Iop_QSalN32x2:
+ case Iop_QSalN64x1: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ UInt size, imm;
+ if (e->Iex.Binop.arg2->tag != Iex_Const ||
+ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
+ vpanic("ARM taget supports Iop_QShlNAxB with constant "
+ "second argument only\n");
+ }
+ imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
+ switch (e->Iex.Binop.op) {
+ case Iop_QSalN8x8: size = 8 | imm; break;
+ case Iop_QSalN16x4: size = 16 | imm; break;
+ case Iop_QSalN32x2: size = 32 | imm; break;
+ case Iop_QSalN64x1: size = 64 | imm; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNSS,
+ res, argL, size, False));
+ return res;
+ }
+ case Iop_ShrN8x8:
+ case Iop_ShrN16x4:
+ case Iop_ShrN32x2:
+ case Iop_Shr64: {
+ HReg res = newVRegD(env);
+ HReg tmp = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
+ HReg argR2 = newVRegI(env);
+ UInt size;
+ switch (e->Iex.Binop.op) {
+ case Iop_ShrN8x8: size = 0; break;
+ case Iop_ShrN16x4: size = 1; break;
+ case Iop_ShrN32x2: size = 2; break;
+ case Iop_Shr64: size = 3; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
+ addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, False));
+ addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
+ res, argL, tmp, size, False));
+ return res;
+ }
+ case Iop_ShlN8x8:
+ case Iop_ShlN16x4:
+ case Iop_ShlN32x2:
+ case Iop_Shl64: {
+ HReg res = newVRegD(env);
+ HReg tmp = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
+ UInt size;
+ switch (e->Iex.Binop.op) {
+ case Iop_ShlN8x8: size = 0; break;
+ case Iop_ShlN16x4: size = 1; break;
+ case Iop_ShlN32x2: size = 2; break;
+ case Iop_Shl64: size = 3; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR, 0, False));
+ addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
+ res, argL, tmp, size, False));
+ return res;
+ }
+ case Iop_SarN8x8:
+ case Iop_SarN16x4:
+ case Iop_SarN32x2:
+ case Iop_Sar64: {
+ HReg res = newVRegD(env);
+ HReg tmp = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
+ HReg argR2 = newVRegI(env);
+ UInt size;
+ switch (e->Iex.Binop.op) {
+ case Iop_SarN8x8: size = 0; break;
+ case Iop_SarN16x4: size = 1; break;
+ case Iop_SarN32x2: size = 2; break;
+ case Iop_Sar64: size = 3; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
+ addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, False));
+ addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
+ res, argL, tmp, size, False));
+ return res;
+ }
+ case Iop_CmpGT8Ux8:
+ case Iop_CmpGT16Ux4:
+ case Iop_CmpGT32Ux2: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ UInt size;
+ switch (e->Iex.Binop.op) {
+ case Iop_CmpGT8Ux8: size = 0; break;
+ case Iop_CmpGT16Ux4: size = 1; break;
+ case Iop_CmpGT32Ux2: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NBinary(ARMneon_VCGTU,
+ res, argL, argR, size, False));
+ return res;
+ }
+ case Iop_CmpGT8Sx8:
+ case Iop_CmpGT16Sx4:
+ case Iop_CmpGT32Sx2: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ UInt size;
+ switch (e->Iex.Binop.op) {
+ case Iop_CmpGT8Sx8: size = 0; break;
+ case Iop_CmpGT16Sx4: size = 1; break;
+ case Iop_CmpGT32Sx2: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NBinary(ARMneon_VCGTS,
+ res, argL, argR, size, False));
+ return res;
+ }
+ case Iop_CmpEQ8x8:
+ case Iop_CmpEQ16x4:
+ case Iop_CmpEQ32x2: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ UInt size;
+ switch (e->Iex.Binop.op) {
+ case Iop_CmpEQ8x8: size = 0; break;
+ case Iop_CmpEQ16x4: size = 1; break;
+ case Iop_CmpEQ32x2: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NBinary(ARMneon_VCEQ,
+ res, argL, argR, size, False));
+ return res;
+ }
+ case Iop_Mul8x8:
+ case Iop_Mul16x4:
+ case Iop_Mul32x2: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ UInt size = 0;
+ switch(e->Iex.Binop.op) {
+ case Iop_Mul8x8: size = 0; break;
+ case Iop_Mul16x4: size = 1; break;
+ case Iop_Mul32x2: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NBinary(ARMneon_VMUL,
+ res, argL, argR, size, False));
+ return res;
+ }
+ case Iop_Mul32Fx2: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ UInt size = 0;
+ addInstr(env, ARMInstr_NBinary(ARMneon_VMULFP,
+ res, argL, argR, size, False));
+ return res;
+ }
+ case Iop_QDMulHi16Sx4:
+ case Iop_QDMulHi32Sx2: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ UInt size = 0;
+ switch(e->Iex.Binop.op) {
+ case Iop_QDMulHi16Sx4: size = 1; break;
+ case Iop_QDMulHi32Sx2: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULH,
+ res, argL, argR, size, False));
+ return res;
+ }
+
+ case Iop_QRDMulHi16Sx4:
+ case Iop_QRDMulHi32Sx2: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ UInt size = 0;
+ switch(e->Iex.Binop.op) {
+ case Iop_QRDMulHi16Sx4: size = 1; break;
+ case Iop_QRDMulHi32Sx2: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NBinary(ARMneon_VQRDMULH,
+ res, argL, argR, size, False));
+ return res;
+ }
+
+ case Iop_PwAdd8x8:
+ case Iop_PwAdd16x4:
+ case Iop_PwAdd32x2: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ UInt size = 0;
+ switch(e->Iex.Binop.op) {
+ case Iop_PwAdd8x8: size = 0; break;
+ case Iop_PwAdd16x4: size = 1; break;
+ case Iop_PwAdd32x2: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NBinary(ARMneon_VPADD,
+ res, argL, argR, size, False));
+ return res;
+ }
+ case Iop_PwAdd32Fx2: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ UInt size = 0;
+ addInstr(env, ARMInstr_NBinary(ARMneon_VPADDFP,
+ res, argL, argR, size, False));
+ return res;
+ }
+ case Iop_PwMin8Ux8:
+ case Iop_PwMin16Ux4:
+ case Iop_PwMin32Ux2: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ UInt size = 0;
+ switch(e->Iex.Binop.op) {
+ case Iop_PwMin8Ux8: size = 0; break;
+ case Iop_PwMin16Ux4: size = 1; break;
+ case Iop_PwMin32Ux2: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NBinary(ARMneon_VPMINU,
+ res, argL, argR, size, False));
+ return res;
+ }
+ case Iop_PwMin8Sx8:
+ case Iop_PwMin16Sx4:
+ case Iop_PwMin32Sx2: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ UInt size = 0;
+ switch(e->Iex.Binop.op) {
+ case Iop_PwMin8Sx8: size = 0; break;
+ case Iop_PwMin16Sx4: size = 1; break;
+ case Iop_PwMin32Sx2: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NBinary(ARMneon_VPMINS,
+ res, argL, argR, size, False));
+ return res;
+ }
+ case Iop_PwMax8Ux8:
+ case Iop_PwMax16Ux4:
+ case Iop_PwMax32Ux2: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ UInt size = 0;
+ switch(e->Iex.Binop.op) {
+ case Iop_PwMax8Ux8: size = 0; break;
+ case Iop_PwMax16Ux4: size = 1; break;
+ case Iop_PwMax32Ux2: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXU,
+ res, argL, argR, size, False));
+ return res;
+ }
+ case Iop_PwMax8Sx8:
+ case Iop_PwMax16Sx4:
+ case Iop_PwMax32Sx2: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ UInt size = 0;
+ switch(e->Iex.Binop.op) {
+ case Iop_PwMax8Sx8: size = 0; break;
+ case Iop_PwMax16Sx4: size = 1; break;
+ case Iop_PwMax32Sx2: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXS,
+ res, argL, argR, size, False));
+ return res;
+ }
+ case Iop_Perm8x8: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ addInstr(env, ARMInstr_NBinary(ARMneon_VTBL,
+ res, argL, argR, 0, False));
+ return res;
+ }
+ case Iop_PolynomialMul8x8: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ UInt size = 0;
+ addInstr(env, ARMInstr_NBinary(ARMneon_VMULP,
+ res, argL, argR, size, False));
+ return res;
+ }
+ case Iop_Max32Fx2: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ addInstr(env, ARMInstr_NBinary(ARMneon_VMAXF,
+ res, argL, argR, 2, False));
+ return res;
+ }
+ case Iop_Min32Fx2: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ addInstr(env, ARMInstr_NBinary(ARMneon_VMINF,
+ res, argL, argR, 2, False));
+ return res;
+ }
+ case Iop_PwMax32Fx2: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXF,
+ res, argL, argR, 2, False));
+ return res;
+ }
+ case Iop_PwMin32Fx2: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ addInstr(env, ARMInstr_NBinary(ARMneon_VPMINF,
+ res, argL, argR, 2, False));
+ return res;
+ }
+ case Iop_CmpGT32Fx2: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ addInstr(env, ARMInstr_NBinary(ARMneon_VCGTF,
+ res, argL, argR, 2, False));
+ return res;
+ }
+ case Iop_CmpGE32Fx2: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ addInstr(env, ARMInstr_NBinary(ARMneon_VCGEF,
+ res, argL, argR, 2, False));
+ return res;
+ }
+ case Iop_CmpEQ32Fx2: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ addInstr(env, ARMInstr_NBinary(ARMneon_VCEQF,
+ res, argL, argR, 2, False));
+ return res;
+ }
+ case Iop_F32ToFixed32Ux2_RZ:
+ case Iop_F32ToFixed32Sx2_RZ:
+ case Iop_Fixed32UToF32x2_RN:
+ case Iop_Fixed32SToF32x2_RN: {
+ HReg res = newVRegD(env);
+ HReg arg = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ ARMNeonUnOp op;
+ UInt imm6;
+ if (e->Iex.Binop.arg2->tag != Iex_Const ||
+ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
+ vpanic("ARM supports FP <-> Fixed conversion with constant "
+ "second argument less than 33 only\n");
+ }
+ imm6 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
+ vassert(imm6 <= 32 && imm6 > 0);
+ imm6 = 64 - imm6;
+ switch(e->Iex.Binop.op) {
+ case Iop_F32ToFixed32Ux2_RZ: op = ARMneon_VCVTFtoFixedU; break;
+ case Iop_F32ToFixed32Sx2_RZ: op = ARMneon_VCVTFtoFixedS; break;
+ case Iop_Fixed32UToF32x2_RN: op = ARMneon_VCVTFixedUtoF; break;
+ case Iop_Fixed32SToF32x2_RN: op = ARMneon_VCVTFixedStoF; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NUnary(op, res, arg, imm6, False));
+ return res;
+ }
+ /*
+ FIXME: is this here or not?
+ case Iop_VDup8x8:
+ case Iop_VDup16x4:
+ case Iop_VDup32x2: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ UInt index;
+ UInt imm4;
+ UInt size = 0;
+ if (e->Iex.Binop.arg2->tag != Iex_Const ||
+ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
+ vpanic("ARM supports Iop_VDup with constant "
+ "second argument less than 16 only\n");
+ }
+ index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
+ switch(e->Iex.Binop.op) {
+ case Iop_VDup8x8: imm4 = (index << 1) + 1; break;
+ case Iop_VDup16x4: imm4 = (index << 2) + 2; break;
+ case Iop_VDup32x2: imm4 = (index << 3) + 4; break;
+ default: vassert(0);
+ }
+ if (imm4 >= 16) {
+ vpanic("ARM supports Iop_VDup with constant "
+ "second argument less than 16 only\n");
+ }
+ addInstr(env, ARMInstr_NUnary(ARMneon_VDUP,
+ res, argL, imm4, False));
+ return res;
+ }
+ */
+ default:
+ break;
+ }
+ }
+
+ /* --------- UNARY ops --------- */
+ if (e->tag == Iex_Unop) {
+ switch (e->Iex.Unop.op) {
+
+ /* ReinterpF64asI64 */
+ case Iop_ReinterpF64asI64:
+ /* Left64(e) */
+ case Iop_Left64:
+ /* CmpwNEZ64(e) */
+ //case Iop_CmpwNEZ64:
+ case Iop_1Sto64: {
+ HReg rLo, rHi;
+ HReg res = newVRegD(env);
+ iselInt64Expr(&rHi, &rLo, env, e);
+ addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
+ return res;
+ }
+ case Iop_Not64: {
+ DECLARE_PATTERN(p_veqz_8x8);
+ DECLARE_PATTERN(p_veqz_16x4);
+ DECLARE_PATTERN(p_veqz_32x2);
+ DECLARE_PATTERN(p_vcge_8sx8);
+ DECLARE_PATTERN(p_vcge_16sx4);
+ DECLARE_PATTERN(p_vcge_32sx2);
+ DECLARE_PATTERN(p_vcge_8ux8);
+ DECLARE_PATTERN(p_vcge_16ux4);
+ DECLARE_PATTERN(p_vcge_32ux2);
+ DEFINE_PATTERN(p_veqz_8x8,
+ unop(Iop_Not64, unop(Iop_CmpNEZ8x8, bind(0))));
+ DEFINE_PATTERN(p_veqz_16x4,
+ unop(Iop_Not64, unop(Iop_CmpNEZ16x4, bind(0))));
+ DEFINE_PATTERN(p_veqz_32x2,
+ unop(Iop_Not64, unop(Iop_CmpNEZ32x2, bind(0))));
+ DEFINE_PATTERN(p_vcge_8sx8,
+ unop(Iop_Not64, binop(Iop_CmpGT8Sx8, bind(1), bind(0))));
+ DEFINE_PATTERN(p_vcge_16sx4,
+ unop(Iop_Not64, binop(Iop_CmpGT16Sx4, bind(1), bind(0))));
+ DEFINE_PATTERN(p_vcge_32sx2,
+ unop(Iop_Not64, binop(Iop_CmpGT32Sx2, bind(1), bind(0))));
+ DEFINE_PATTERN(p_vcge_8ux8,
+ unop(Iop_Not64, binop(Iop_CmpGT8Ux8, bind(1), bind(0))));
+ DEFINE_PATTERN(p_vcge_16ux4,
+ unop(Iop_Not64, binop(Iop_CmpGT16Ux4, bind(1), bind(0))));
+ DEFINE_PATTERN(p_vcge_32ux2,
+ unop(Iop_Not64, binop(Iop_CmpGT32Ux2, bind(1), bind(0))));
+ if (matchIRExpr(&mi, p_veqz_8x8, e)) {
+ HReg res = newVRegD(env);
+ HReg arg = iselNeon64Expr(env, mi.bindee[0]);
+ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 0, False));
+ return res;
+ } else if (matchIRExpr(&mi, p_veqz_16x4, e)) {
+ HReg res = newVRegD(env);
+ HReg arg = iselNeon64Expr(env, mi.bindee[0]);
+ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 1, False));
+ return res;
+ } else if (matchIRExpr(&mi, p_veqz_32x2, e)) {
+ HReg res = newVRegD(env);
+ HReg arg = iselNeon64Expr(env, mi.bindee[0]);
+ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 2, False));
+ return res;
+ } else if (matchIRExpr(&mi, p_vcge_8sx8, e)) {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, mi.bindee[0]);
+ HReg argR = iselNeon64Expr(env, mi.bindee[1]);
+ addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
+ res, argL, argR, 0, False));
+ return res;
+ } else if (matchIRExpr(&mi, p_vcge_16sx4, e)) {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, mi.bindee[0]);
+ HReg argR = iselNeon64Expr(env, mi.bindee[1]);
+ addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
+ res, argL, argR, 1, False));
+ return res;
+ } else if (matchIRExpr(&mi, p_vcge_32sx2, e)) {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, mi.bindee[0]);
+ HReg argR = iselNeon64Expr(env, mi.bindee[1]);
+ addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
+ res, argL, argR, 2, False));
+ return res;
+ } else if (matchIRExpr(&mi, p_vcge_8ux8, e)) {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, mi.bindee[0]);
+ HReg argR = iselNeon64Expr(env, mi.bindee[1]);
+ addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
+ res, argL, argR, 0, False));
+ return res;
+ } else if (matchIRExpr(&mi, p_vcge_16ux4, e)) {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, mi.bindee[0]);
+ HReg argR = iselNeon64Expr(env, mi.bindee[1]);
+ addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
+ res, argL, argR, 1, False));
+ return res;
+ } else if (matchIRExpr(&mi, p_vcge_32ux2, e)) {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, mi.bindee[0]);
+ HReg argR = iselNeon64Expr(env, mi.bindee[1]);
+ addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
+ res, argL, argR, 2, False));
+ return res;
+ } else {
+ HReg res = newVRegD(env);
+ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
+ addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, arg, 4, False));
+ return res;
+ }
+ }
+ case Iop_Dup8x8:
+ case Iop_Dup16x4:
+ case Iop_Dup32x2: {
+ HReg res, arg;
+ UInt size;
+ DECLARE_PATTERN(p_vdup_8x8);
+ DECLARE_PATTERN(p_vdup_16x4);
+ DECLARE_PATTERN(p_vdup_32x2);
+ DEFINE_PATTERN(p_vdup_8x8,
+ unop(Iop_Dup8x8, binop(Iop_GetElem8x8, bind(0), bind(1))));
+ DEFINE_PATTERN(p_vdup_16x4,
+ unop(Iop_Dup16x4, binop(Iop_GetElem16x4, bind(0), bind(1))));
+ DEFINE_PATTERN(p_vdup_32x2,
+ unop(Iop_Dup32x2, binop(Iop_GetElem32x2, bind(0), bind(1))));
+ if (matchIRExpr(&mi, p_vdup_8x8, e)) {
+ UInt index;
+ UInt imm4;
+ if (mi.bindee[1]->tag == Iex_Const &&
+ typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
+ index = mi.bindee[1]->Iex.Const.con->Ico.U8;
+ imm4 = (index << 1) + 1;
+ if (index < 8) {
+ res = newVRegD(env);
+ arg = iselNeon64Expr(env, mi.bindee[0]);
+ addInstr(env, ARMInstr_NUnaryS(
+ ARMneon_VDUP,
+ mkARMNRS(ARMNRS_Reg, res, 0),
+ mkARMNRS(ARMNRS_Scalar, arg, index),
+ imm4, False
+ ));
+ return res;
+ }
+ }
+ } else if (matchIRExpr(&mi, p_vdup_16x4, e)) {
+ UInt index;
+ UInt imm4;
+ if (mi.bindee[1]->tag == Iex_Const &&
+ typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
+ index = mi.bindee[1]->Iex.Const.con->Ico.U8;
+ imm4 = (index << 2) + 2;
+ if (index < 4) {
+ res = newVRegD(env);
+ arg = iselNeon64Expr(env, mi.bindee[0]);
+ addInstr(env, ARMInstr_NUnaryS(
+ ARMneon_VDUP,
+ mkARMNRS(ARMNRS_Reg, res, 0),
+ mkARMNRS(ARMNRS_Scalar, arg, index),
+ imm4, False
+ ));
+ return res;
+ }
+ }
+ } else if (matchIRExpr(&mi, p_vdup_32x2, e)) {
+ UInt index;
+ UInt imm4;
+ if (mi.bindee[1]->tag == Iex_Const &&
+ typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
+ index = mi.bindee[1]->Iex.Const.con->Ico.U8;
+ imm4 = (index << 3) + 4;
+ if (index < 2) {
+ res = newVRegD(env);
+ arg = iselNeon64Expr(env, mi.bindee[0]);
+ addInstr(env, ARMInstr_NUnaryS(
+ ARMneon_VDUP,
+ mkARMNRS(ARMNRS_Reg, res, 0),
+ mkARMNRS(ARMNRS_Scalar, arg, index),
+ imm4, False
+ ));
+ return res;
+ }
+ }
+ }
+ arg = iselIntExpr_R(env, e->Iex.Unop.arg);
+ res = newVRegD(env);
+ switch (e->Iex.Unop.op) {
+ case Iop_Dup8x8: size = 0; break;
+ case Iop_Dup16x4: size = 1; break;
+ case Iop_Dup32x2: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NUnary(ARMneon_DUP, res, arg, size, False));
+ return res;
+ }
+ case Iop_Abs8x8:
+ case Iop_Abs16x4:
+ case Iop_Abs32x2: {
+ HReg res = newVRegD(env);
+ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
+ UInt size = 0;
+ switch(e->Iex.Binop.op) {
+ case Iop_Abs8x8: size = 0; break;
+ case Iop_Abs16x4: size = 1; break;
+ case Iop_Abs32x2: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NUnary(ARMneon_ABS, res, arg, size, False));
+ return res;
+ }
+ case Iop_Reverse64_8x8:
+ case Iop_Reverse64_16x4:
+ case Iop_Reverse64_32x2: {
+ HReg res = newVRegD(env);
+ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
+ UInt size = 0;
+ switch(e->Iex.Binop.op) {
+ case Iop_Reverse64_8x8: size = 0; break;
+ case Iop_Reverse64_16x4: size = 1; break;
+ case Iop_Reverse64_32x2: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NUnary(ARMneon_REV64,
+ res, arg, size, False));
+ return res;
+ }
+ case Iop_Reverse32_8x8:
+ case Iop_Reverse32_16x4: {
+ HReg res = newVRegD(env);
+ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
+ UInt size = 0;
+ switch(e->Iex.Binop.op) {
+ case Iop_Reverse32_8x8: size = 0; break;
+ case Iop_Reverse32_16x4: size = 1; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NUnary(ARMneon_REV32,
+ res, arg, size, False));
+ return res;
+ }
+ case Iop_Reverse16_8x8: {
+ HReg res = newVRegD(env);
+ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
+ UInt size = 0;
+ addInstr(env, ARMInstr_NUnary(ARMneon_REV16,
+ res, arg, size, False));
+ return res;
+ }
+ case Iop_CmpwNEZ64: {
+ HReg x_lsh = newVRegD(env);
+ HReg x_rsh = newVRegD(env);
+ HReg lsh_amt = newVRegD(env);
+ HReg rsh_amt = newVRegD(env);
+ HReg zero = newVRegD(env);
+ HReg tmp = newVRegD(env);
+ HReg tmp2 = newVRegD(env);
+ HReg res = newVRegD(env);
+ HReg x = newVRegD(env);
+ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
+ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp2, arg, 2, False));
+ addInstr(env, ARMInstr_NUnary(ARMneon_NOT, x, tmp2, 4, False));
+ addInstr(env, ARMInstr_NeonImm(lsh_amt, ARMNImm_TI(0, 32)));
+ addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0, 0)));
+ addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
+ rsh_amt, zero, lsh_amt, 2, False));
+ addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
+ x_lsh, x, lsh_amt, 3, False));
+ addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
+ x_rsh, x, rsh_amt, 3, False));
+ addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
+ tmp, x_lsh, x_rsh, 0, False));
+ addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
+ res, tmp, x, 0, False));
+ return res;
+ }
+ case Iop_CmpNEZ8x8:
+ case Iop_CmpNEZ16x4:
+ case Iop_CmpNEZ32x2: {
+ HReg res = newVRegD(env);
+ HReg tmp = newVRegD(env);
+ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
+ UInt size;
+ switch (e->Iex.Unop.op) {
+ case Iop_CmpNEZ8x8: size = 0; break;
+ case Iop_CmpNEZ16x4: size = 1; break;
+ case Iop_CmpNEZ32x2: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp, arg, size, False));
+ addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, tmp, 4, False));
+ return res;
+ }
+ case Iop_Shorten16x8:
+ case Iop_Shorten32x4:
+ case Iop_Shorten64x2: {
+ HReg res = newVRegD(env);
+ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
+ UInt size = 0;
+ switch(e->Iex.Binop.op) {
+ case Iop_Shorten16x8: size = 0; break;
+ case Iop_Shorten32x4: size = 1; break;
+ case Iop_Shorten64x2: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NUnary(ARMneon_COPYN,
+ res, arg, size, False));
+ return res;
+ }
+ case Iop_QShortenS16Sx8:
+ case Iop_QShortenS32Sx4:
+ case Iop_QShortenS64Sx2: {
+ HReg res = newVRegD(env);
+ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
+ UInt size = 0;
+ switch(e->Iex.Binop.op) {
+ case Iop_QShortenS16Sx8: size = 0; break;
+ case Iop_QShortenS32Sx4: size = 1; break;
+ case Iop_QShortenS64Sx2: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNSS,
+ res, arg, size, False));
+ return res;
+ }
+ case Iop_QShortenU16Sx8:
+ case Iop_QShortenU32Sx4:
+ case Iop_QShortenU64Sx2: {
+ HReg res = newVRegD(env);
+ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
+ UInt size = 0;
+ switch(e->Iex.Binop.op) {
+ case Iop_QShortenU16Sx8: size = 0; break;
+ case Iop_QShortenU32Sx4: size = 1; break;
+ case Iop_QShortenU64Sx2: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNUS,
+ res, arg, size, False));
+ return res;
+ }
+ case Iop_QShortenU16Ux8:
+ case Iop_QShortenU32Ux4:
+ case Iop_QShortenU64Ux2: {
+ HReg res = newVRegD(env);
+ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
+ UInt size = 0;
+ switch(e->Iex.Binop.op) {
+ case Iop_QShortenU16Ux8: size = 0; break;
+ case Iop_QShortenU32Ux4: size = 1; break;
+ case Iop_QShortenU64Ux2: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNUU,
+ res, arg, size, False));
+ return res;
+ }
+ case Iop_PwAddL8Sx8:
+ case Iop_PwAddL16Sx4:
+ case Iop_PwAddL32Sx2: {
+ HReg res = newVRegD(env);
+ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
+ UInt size = 0;
+ switch(e->Iex.Binop.op) {
+ case Iop_PwAddL8Sx8: size = 0; break;
+ case Iop_PwAddL16Sx4: size = 1; break;
+ case Iop_PwAddL32Sx2: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NUnary(ARMneon_PADDLS,
+ res, arg, size, False));
+ return res;
+ }
+ case Iop_PwAddL8Ux8:
+ case Iop_PwAddL16Ux4:
+ case Iop_PwAddL32Ux2: {
+ HReg res = newVRegD(env);
+ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
+ UInt size = 0;
+ switch(e->Iex.Binop.op) {
+ case Iop_PwAddL8Ux8: size = 0; break;
+ case Iop_PwAddL16Ux4: size = 1; break;
+ case Iop_PwAddL32Ux2: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NUnary(ARMneon_PADDLU,
+ res, arg, size, False));
+ return res;
+ }
+ case Iop_Cnt8x8: {
+ HReg res = newVRegD(env);
+ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
+ UInt size = 0;
+ addInstr(env, ARMInstr_NUnary(ARMneon_CNT,
+ res, arg, size, False));
+ return res;
+ }
+ case Iop_Clz8Sx8:
+ case Iop_Clz16Sx4:
+ case Iop_Clz32Sx2: {
+ HReg res = newVRegD(env);
+ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
+ UInt size = 0;
+ switch(e->Iex.Binop.op) {
+ case Iop_Clz8Sx8: size = 0; break;
+ case Iop_Clz16Sx4: size = 1; break;
+ case Iop_Clz32Sx2: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NUnary(ARMneon_CLZ,
+ res, arg, size, False));
+ return res;
+ }
+ case Iop_Cls8Sx8:
+ case Iop_Cls16Sx4:
+ case Iop_Cls32Sx2: {
+ HReg res = newVRegD(env);
+ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
+ UInt size = 0;
+ switch(e->Iex.Binop.op) {
+ case Iop_Cls8Sx8: size = 0; break;
+ case Iop_Cls16Sx4: size = 1; break;
+ case Iop_Cls32Sx2: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NUnary(ARMneon_CLS,
+ res, arg, size, False));
+ return res;
+ }
+ case Iop_FtoI32Sx2_RZ: {
+ HReg res = newVRegD(env);
+ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
+ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoS,
+ res, arg, 2, False));
+ return res;
+ }
+ case Iop_FtoI32Ux2_RZ: {
+ HReg res = newVRegD(env);
+ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
+ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoU,
+ res, arg, 2, False));
+ return res;
+ }
+ case Iop_I32StoFx2: {
+ HReg res = newVRegD(env);
+ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
+ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTStoF,
+ res, arg, 2, False));
+ return res;
+ }
+ case Iop_I32UtoFx2: {
+ HReg res = newVRegD(env);
+ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
+ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTUtoF,
+ res, arg, 2, False));
+ return res;
+ }
+ case Iop_F32toF16x4: {
+ HReg res = newVRegD(env);
+ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
+ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTF32toF16,
+ res, arg, 2, False));
+ return res;
+ }
+ case Iop_Recip32Fx2: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ addInstr(env, ARMInstr_NUnary(ARMneon_VRECIPF,
+ res, argL, 0, False));
+ return res;
+ }
+ case Iop_Recip32x2: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ addInstr(env, ARMInstr_NUnary(ARMneon_VRECIP,
+ res, argL, 0, False));
+ return res;
+ }
+ case Iop_Abs32Fx2: {
+ DECLARE_PATTERN(p_vabd_32fx2);
+ DEFINE_PATTERN(p_vabd_32fx2,
+ unop(Iop_Abs32Fx2,
+ binop(Iop_Sub32Fx2,
+ bind(0),
+ bind(1))));
+ if (matchIRExpr(&mi, p_vabd_32fx2, e)) {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, mi.bindee[0]);
+ HReg argR = iselNeon64Expr(env, mi.bindee[1]);
+ addInstr(env, ARMInstr_NBinary(ARMneon_VABDFP,
+ res, argL, argR, 0, False));
+ return res;
+ } else {
+ HReg res = newVRegD(env);
+ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
+ addInstr(env, ARMInstr_NUnary(ARMneon_VABSFP,
+ res, arg, 0, False));
+ return res;
+ }
+ }
+ case Iop_Rsqrte32Fx2: {
+ HReg res = newVRegD(env);
+ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
+ addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTEFP,
+ res, arg, 0, False));
+ return res;
+ }
+ case Iop_Rsqrte32x2: {
+ HReg res = newVRegD(env);
+ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
+ addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTE,
+ res, arg, 0, False));
+ return res;
+ }
+ case Iop_Neg32Fx2: {
+ HReg res = newVRegD(env);
+ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
+ addInstr(env, ARMInstr_NUnary(ARMneon_VNEGF,
+ res, arg, 0, False));
+ return res;
+ }
+ default:
+ break;
+ }
+ } /* if (e->tag == Iex_Unop) */
+
+ if (e->tag == Iex_Triop) {
+ switch (e->Iex.Triop.op) {
+ case Iop_Extract64: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Triop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Triop.arg2);
+ UInt imm4;
+ if (e->Iex.Triop.arg3->tag != Iex_Const ||
+ typeOfIRExpr(env->type_env, e->Iex.Triop.arg3) != Ity_I8) {
+ vpanic("ARM target supports Iop_Extract64 with constant "
+ "third argument less than 16 only\n");
+ }
+ imm4 = e->Iex.Triop.arg3->Iex.Const.con->Ico.U8;
+ if (imm4 >= 8) {
+ vpanic("ARM target supports Iop_Extract64 with constant "
+ "third argument less than 16 only\n");
+ }
+ addInstr(env, ARMInstr_NBinary(ARMneon_VEXT,
+ res, argL, argR, imm4, False));
+ return res;
+ }
+ case Iop_SetElem8x8:
+ case Iop_SetElem16x4:
+ case Iop_SetElem32x2: {
+ HReg res = newVRegD(env);
+ HReg dreg = iselNeon64Expr(env, e->Iex.Triop.arg1);
+ HReg arg = iselIntExpr_R(env, e->Iex.Triop.arg3);
+ UInt index, size;
+ if (e->Iex.Triop.arg2->tag != Iex_Const ||
+ typeOfIRExpr(env->type_env, e->Iex.Triop.arg2) != Ity_I8) {
+ vpanic("ARM target supports SetElem with constant "
+ "second argument only\n");
+ }
+ index = e->Iex.Triop.arg2->Iex.Const.con->Ico.U8;
+ switch (e->Iex.Triop.op) {
+ case Iop_SetElem8x8: vassert(index < 8); size = 0; break;
+ case Iop_SetElem16x4: vassert(index < 4); size = 1; break;
+ case Iop_SetElem32x2: vassert(index < 2); size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, res, dreg, 4, False));
+ addInstr(env, ARMInstr_NUnaryS(ARMneon_SETELEM,
+ mkARMNRS(ARMNRS_Scalar, res, index),
+ mkARMNRS(ARMNRS_Reg, arg, 0),
+ size, False));
+ return res;
+ }
+ default:
+ break;
+ }
+ }
+
+ /* --------- MULTIPLEX --------- */
+ if (e->tag == Iex_Mux0X) {
+ HReg rLo, rHi;
+ HReg res = newVRegD(env);
+ iselInt64Expr(&rHi, &rLo, env, e);
+ addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
+ return res;
+ }
+
+ ppIRExpr(e);
+ vpanic("iselNeon64Expr");
+}
+
+static HReg iselNeonExpr ( ISelEnv* env, IRExpr* e )
+{
+ HReg r = iselNeonExpr_wrk( env, e );
+ vassert(hregClass(r) == HRcVec128);
+ vassert(hregIsVirtual(r));
+ return r;
+}
+
+/* DO NOT CALL THIS DIRECTLY */
+static HReg iselNeonExpr_wrk ( ISelEnv* env, IRExpr* e )
+{
+ IRType ty = typeOfIRExpr(env->type_env, e);
+ MatchInfo mi;
+ vassert(e);
+ vassert(ty == Ity_V128);
+
+ if (e->tag == Iex_RdTmp) {
+ return lookupIRTemp(env, e->Iex.RdTmp.tmp);
+ }
+
+ if (e->tag == Iex_Const) {
+ /* At the moment there should be no 128-bit constants in IR for ARM
+ generated during disassemble. They are represented as Iop_64HLtoV128
+ binary operation and are handled among binary ops. */
+ /* But zero can be created by valgrind internal optimizer */
+ if (e->Iex.Const.con->Ico.V128 == 0) {
+ HReg res = newVRegV(env);
+ addInstr(env, ARMInstr_NeonImm(res, ARMNImm_TI(0, 0)));
+ return res;
+ }
+ ppIRExpr(e);
+ vpanic("128-bit constant is not implemented");
+ }
+
+ if (e->tag == Iex_Load) {
+ HReg res = newVRegV(env);
+ ARMAModeN* am = iselIntExpr_AModeN(env, e->Iex.Load.addr);
+ vassert(ty == Ity_V128);
+ addInstr(env, ARMInstr_NLdStQ(True, res, am));
+ return res;
+ }
+
+ if (e->tag == Iex_Get) {
+ HReg addr = newVRegI(env);
+ HReg res = newVRegV(env);
+ vassert(ty == Ity_V128);
+ addInstr(env, ARMInstr_Add32(addr, hregARM_R8(), e->Iex.Get.offset));
+ addInstr(env, ARMInstr_NLdStQ(True, res, mkARMAModeN_R(addr)));
+ return res;
+ }
+
+ if (e->tag == Iex_Unop) {
+ switch (e->Iex.Unop.op) {
+ case Iop_NotV128: {
+ DECLARE_PATTERN(p_veqz_8x16);
+ DECLARE_PATTERN(p_veqz_16x8);
+ DECLARE_PATTERN(p_veqz_32x4);
+ DECLARE_PATTERN(p_vcge_8sx16);
+ DECLARE_PATTERN(p_vcge_16sx8);
+ DECLARE_PATTERN(p_vcge_32sx4);
+ DECLARE_PATTERN(p_vcge_8ux16);
+ DECLARE_PATTERN(p_vcge_16ux8);
+ DECLARE_PATTERN(p_vcge_32ux4);
+ DEFINE_PATTERN(p_veqz_8x16,
+ unop(Iop_NotV128, unop(Iop_CmpNEZ8x16, bind(0))));
+ DEFINE_PATTERN(p_veqz_16x8,
+ unop(Iop_NotV128, unop(Iop_CmpNEZ16x8, bind(0))));
+ DEFINE_PATTERN(p_veqz_32x4,
+ unop(Iop_NotV128, unop(Iop_CmpNEZ32x4, bind(0))));
+ DEFINE_PATTERN(p_vcge_8sx16,
+ unop(Iop_NotV128, binop(Iop_CmpGT8Sx16, bind(1), bind(0))));
+ DEFINE_PATTERN(p_vcge_16sx8,
+ unop(Iop_NotV128, binop(Iop_CmpGT16Sx8, bind(1), bind(0))));
+ DEFINE_PATTERN(p_vcge_32sx4,
+ unop(Iop_NotV128, binop(Iop_CmpGT32Sx4, bind(1), bind(0))));
+ DEFINE_PATTERN(p_vcge_8ux16,
+ unop(Iop_NotV128, binop(Iop_CmpGT8Ux16, bind(1), bind(0))));
+ DEFINE_PATTERN(p_vcge_16ux8,
+ unop(Iop_NotV128, binop(Iop_CmpGT16Ux8, bind(1), bind(0))));
+ DEFINE_PATTERN(p_vcge_32ux4,
+ unop(Iop_NotV128, binop(Iop_CmpGT32Ux4, bind(1), bind(0))));
+ if (matchIRExpr(&mi, p_veqz_8x16, e)) {
+ HReg res = newVRegV(env);
+ HReg arg = iselNeonExpr(env, mi.bindee[0]);
+ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 0, True));
+ return res;
+ } else if (matchIRExpr(&mi, p_veqz_16x8, e)) {
+ HReg res = newVRegV(env);
+ HReg arg = iselNeonExpr(env, mi.bindee[0]);
+ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 1, True));
+ return res;
+ } else if (matchIRExpr(&mi, p_veqz_32x4, e)) {
+ HReg res = newVRegV(env);
+ HReg arg = iselNeonExpr(env, mi.bindee[0]);
+ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 2, True));
+ return res;
+ } else if (matchIRExpr(&mi, p_vcge_8sx16, e)) {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, mi.bindee[0]);
+ HReg argR = iselNeonExpr(env, mi.bindee[1]);
+ addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
+ res, argL, argR, 0, True));
+ return res;
+ } else if (matchIRExpr(&mi, p_vcge_16sx8, e)) {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, mi.bindee[0]);
+ HReg argR = iselNeonExpr(env, mi.bindee[1]);
+ addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
+ res, argL, argR, 1, True));
+ return res;
+ } else if (matchIRExpr(&mi, p_vcge_32sx4, e)) {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, mi.bindee[0]);
+ HReg argR = iselNeonExpr(env, mi.bindee[1]);
+ addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
+ res, argL, argR, 2, True));
+ return res;
+ } else if (matchIRExpr(&mi, p_vcge_8ux16, e)) {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, mi.bindee[0]);
+ HReg argR = iselNeonExpr(env, mi.bindee[1]);
+ addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
+ res, argL, argR, 0, True));
+ return res;
+ } else if (matchIRExpr(&mi, p_vcge_16ux8, e)) {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, mi.bindee[0]);
+ HReg argR = iselNeonExpr(env, mi.bindee[1]);
+ addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
+ res, argL, argR, 1, True));
+ return res;
+ } else if (matchIRExpr(&mi, p_vcge_32ux4, e)) {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, mi.bindee[0]);
+ HReg argR = iselNeonExpr(env, mi.bindee[1]);
+ addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
+ res, argL, argR, 2, True));
+ return res;
+ } else {
+ HReg res = newVRegV(env);
+ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
+ addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, arg, 4, True));
+ return res;
+ }
+ }
+ case Iop_Dup8x16:
+ case Iop_Dup16x8:
+ case Iop_Dup32x4: {
+ HReg res, arg;
+ UInt size;
+ DECLARE_PATTERN(p_vdup_8x16);
+ DECLARE_PATTERN(p_vdup_16x8);
+ DECLARE_PATTERN(p_vdup_32x4);
+ DEFINE_PATTERN(p_vdup_8x16,
+ unop(Iop_Dup8x16, binop(Iop_GetElem8x8, bind(0), bind(1))));
+ DEFINE_PATTERN(p_vdup_16x8,
+ unop(Iop_Dup16x8, binop(Iop_GetElem16x4, bind(0), bind(1))));
+ DEFINE_PATTERN(p_vdup_32x4,
+ unop(Iop_Dup32x4, binop(Iop_GetElem32x2, bind(0), bind(1))));
+ if (matchIRExpr(&mi, p_vdup_8x16, e)) {
+ UInt index;
+ UInt imm4;
+ if (mi.bindee[1]->tag == Iex_Const &&
+ typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
+ index = mi.bindee[1]->Iex.Const.con->Ico.U8;
+ imm4 = (index << 1) + 1;
+ if (index < 8) {
+ res = newVRegV(env);
+ arg = iselNeon64Expr(env, mi.bindee[0]);
+ addInstr(env, ARMInstr_NUnaryS(
+ ARMneon_VDUP,
+ mkARMNRS(ARMNRS_Reg, res, 0),
+ mkARMNRS(ARMNRS_Scalar, arg, index),
+ imm4, True
+ ));
+ return res;
+ }
+ }
+ } else if (matchIRExpr(&mi, p_vdup_16x8, e)) {
+ UInt index;
+ UInt imm4;
+ if (mi.bindee[1]->tag == Iex_Const &&
+ typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
+ index = mi.bindee[1]->Iex.Const.con->Ico.U8;
+ imm4 = (index << 2) + 2;
+ if (index < 4) {
+ res = newVRegV(env);
+ arg = iselNeon64Expr(env, mi.bindee[0]);
+ addInstr(env, ARMInstr_NUnaryS(
+ ARMneon_VDUP,
+ mkARMNRS(ARMNRS_Reg, res, 0),
+ mkARMNRS(ARMNRS_Scalar, arg, index),
+ imm4, True
+ ));
+ return res;
+ }
+ }
+ } else if (matchIRExpr(&mi, p_vdup_32x4, e)) {
+ UInt index;
+ UInt imm4;
+ if (mi.bindee[1]->tag == Iex_Const &&
+ typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
+ index = mi.bindee[1]->Iex.Const.con->Ico.U8;
+ imm4 = (index << 3) + 4;
+ if (index < 2) {
+ res = newVRegV(env);
+ arg = iselNeon64Expr(env, mi.bindee[0]);
+ addInstr(env, ARMInstr_NUnaryS(
+ ARMneon_VDUP,
+ mkARMNRS(ARMNRS_Reg, res, 0),
+ mkARMNRS(ARMNRS_Scalar, arg, index),
+ imm4, True
+ ));
+ return res;
+ }
+ }
+ }
+ arg = iselIntExpr_R(env, e->Iex.Unop.arg);
+ res = newVRegV(env);
+ switch (e->Iex.Unop.op) {
+ case Iop_Dup8x16: size = 0; break;
+ case Iop_Dup16x8: size = 1; break;
+ case Iop_Dup32x4: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NUnary(ARMneon_DUP, res, arg, size, True));
+ return res;
+ }
+ case Iop_Abs8x16:
+ case Iop_Abs16x8:
+ case Iop_Abs32x4: {
+ HReg res = newVRegV(env);
+ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
+ UInt size = 0;
+ switch(e->Iex.Binop.op) {
+ case Iop_Abs8x16: size = 0; break;
+ case Iop_Abs16x8: size = 1; break;
+ case Iop_Abs32x4: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NUnary(ARMneon_ABS, res, arg, size, True));
+ return res;
+ }
+ case Iop_Reverse64_8x16:
+ case Iop_Reverse64_16x8:
+ case Iop_Reverse64_32x4: {
+ HReg res = newVRegV(env);
+ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
+ UInt size = 0;
+ switch(e->Iex.Binop.op) {
+ case Iop_Reverse64_8x16: size = 0; break;
+ case Iop_Reverse64_16x8: size = 1; break;
+ case Iop_Reverse64_32x4: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NUnary(ARMneon_REV64,
+ res, arg, size, True));
+ return res;
+ }
+ case Iop_Reverse32_8x16:
+ case Iop_Reverse32_16x8: {
+ HReg res = newVRegV(env);
+ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
+ UInt size = 0;
+ switch(e->Iex.Binop.op) {
+ case Iop_Reverse32_8x16: size = 0; break;
+ case Iop_Reverse32_16x8: size = 1; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NUnary(ARMneon_REV32,
+ res, arg, size, True));
+ return res;
+ }
+ case Iop_Reverse16_8x16: {
+ HReg res = newVRegV(env);
+ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
+ UInt size = 0;
+ addInstr(env, ARMInstr_NUnary(ARMneon_REV16,
+ res, arg, size, True));
+ return res;
+ }
+ case Iop_CmpNEZ64x2: {
+ HReg x_lsh = newVRegV(env);
+ HReg x_rsh = newVRegV(env);
+ HReg lsh_amt = newVRegV(env);
+ HReg rsh_amt = newVRegV(env);
+ HReg zero = newVRegV(env);
+ HReg tmp = newVRegV(env);
+ HReg tmp2 = newVRegV(env);
+ HReg res = newVRegV(env);
+ HReg x = newVRegV(env);
+ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
+ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp2, arg, 2, True));
+ addInstr(env, ARMInstr_NUnary(ARMneon_NOT, x, tmp2, 4, True));
+ addInstr(env, ARMInstr_NeonImm(lsh_amt, ARMNImm_TI(0, 32)));
+ addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0, 0)));
+ addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
+ rsh_amt, zero, lsh_amt, 2, True));
+ addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
+ x_lsh, x, lsh_amt, 3, True));
+ addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
+ x_rsh, x, rsh_amt, 3, True));
+ addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
+ tmp, x_lsh, x_rsh, 0, True));
+ addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
+ res, tmp, x, 0, True));
+ return res;
+ }
+ case Iop_CmpNEZ8x16:
+ case Iop_CmpNEZ16x8:
+ case Iop_CmpNEZ32x4: {
+ HReg res = newVRegV(env);
+ HReg tmp = newVRegV(env);
+ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
+ UInt size;
+ switch (e->Iex.Unop.op) {
+ case Iop_CmpNEZ8x16: size = 0; break;
+ case Iop_CmpNEZ16x8: size = 1; break;
+ case Iop_CmpNEZ32x4: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp, arg, size, True));
+ addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, tmp, 4, True));
+ return res;
+ }
+ case Iop_Longen8Ux8:
+ case Iop_Longen16Ux4:
+ case Iop_Longen32Ux2: {
+ HReg res = newVRegV(env);
+ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
+ UInt size;
+ switch (e->Iex.Unop.op) {
+ case Iop_Longen8Ux8: size = 0; break;
+ case Iop_Longen16Ux4: size = 1; break;
+ case Iop_Longen32Ux2: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NUnary(ARMneon_COPYLU,
+ res, arg, size, True));
+ return res;
+ }
+ case Iop_Longen8Sx8:
+ case Iop_Longen16Sx4:
+ case Iop_Longen32Sx2: {
+ HReg res = newVRegV(env);
+ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
+ UInt size;
+ switch (e->Iex.Unop.op) {
+ case Iop_Longen8Sx8: size = 0; break;
+ case Iop_Longen16Sx4: size = 1; break;
+ case Iop_Longen32Sx2: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NUnary(ARMneon_COPYLS,
+ res, arg, size, True));
+ return res;
+ }
+ case Iop_PwAddL8Sx16:
+ case Iop_PwAddL16Sx8:
+ case Iop_PwAddL32Sx4: {
+ HReg res = newVRegV(env);
+ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
+ UInt size = 0;
+ switch(e->Iex.Binop.op) {
+ case Iop_PwAddL8Sx16: size = 0; break;
+ case Iop_PwAddL16Sx8: size = 1; break;
+ case Iop_PwAddL32Sx4: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NUnary(ARMneon_PADDLS,
+ res, arg, size, True));
+ return res;
+ }
+ case Iop_PwAddL8Ux16:
+ case Iop_PwAddL16Ux8:
+ case Iop_PwAddL32Ux4: {
+ HReg res = newVRegV(env);
+ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
+ UInt size = 0;
+ switch(e->Iex.Binop.op) {
+ case Iop_PwAddL8Ux16: size = 0; break;
+ case Iop_PwAddL16Ux8: size = 1; break;
+ case Iop_PwAddL32Ux4: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NUnary(ARMneon_PADDLU,
+ res, arg, size, True));
+ return res;
+ }
+ case Iop_Cnt8x16: {
+ HReg res = newVRegV(env);
+ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
+ UInt size = 0;
+ addInstr(env, ARMInstr_NUnary(ARMneon_CNT, res, arg, size, True));
+ return res;
+ }
+ case Iop_Clz8Sx16:
+ case Iop_Clz16Sx8:
+ case Iop_Clz32Sx4: {
+ HReg res = newVRegV(env);
+ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
+ UInt size = 0;
+ switch(e->Iex.Binop.op) {
+ case Iop_Clz8Sx16: size = 0; break;
+ case Iop_Clz16Sx8: size = 1; break;
+ case Iop_Clz32Sx4: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NUnary(ARMneon_CLZ, res, arg, size, True));
+ return res;
+ }
+ case Iop_Cls8Sx16:
+ case Iop_Cls16Sx8:
+ case Iop_Cls32Sx4: {
+ HReg res = newVRegV(env);
+ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
+ UInt size = 0;
+ switch(e->Iex.Binop.op) {
+ case Iop_Cls8Sx16: size = 0; break;
+ case Iop_Cls16Sx8: size = 1; break;
+ case Iop_Cls32Sx4: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NUnary(ARMneon_CLS, res, arg, size, True));
+ return res;
+ }
+ case Iop_FtoI32Sx4_RZ: {
+ HReg res = newVRegV(env);
+ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
+ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoS,
+ res, arg, 2, True));
+ return res;
+ }
+ case Iop_FtoI32Ux4_RZ: {
+ HReg res = newVRegV(env);
+ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
+ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoU,
+ res, arg, 2, True));
+ return res;
+ }
+ case Iop_I32StoFx4: {
+ HReg res = newVRegV(env);
+ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
+ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTStoF,
+ res, arg, 2, True));
+ return res;
+ }
+ case Iop_I32UtoFx4: {
+ HReg res = newVRegV(env);
+ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
+ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTUtoF,
+ res, arg, 2, True));
+ return res;
+ }
+ case Iop_F16toF32x4: {
+ HReg res = newVRegV(env);
+ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
+ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTF16toF32,
+ res, arg, 2, True));
+ return res;
+ }
+ case Iop_Recip32Fx4: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
+ addInstr(env, ARMInstr_NUnary(ARMneon_VRECIPF,
+ res, argL, 0, True));
+ return res;
+ }
+ case Iop_Recip32x4: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
+ addInstr(env, ARMInstr_NUnary(ARMneon_VRECIP,
+ res, argL, 0, True));
+ return res;
+ }
+ case Iop_Abs32Fx4: {
+ DECLARE_PATTERN(p_vabd_32fx4);
+ DEFINE_PATTERN(p_vabd_32fx4,
+ unop(Iop_Abs32Fx4,
+ binop(Iop_Sub32Fx4,
+ bind(0),
+ bind(1))));
+ if (matchIRExpr(&mi, p_vabd_32fx4, e)) {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, mi.bindee[0]);
+ HReg argR = iselNeonExpr(env, mi.bindee[1]);
+ addInstr(env, ARMInstr_NBinary(ARMneon_VABDFP,
+ res, argL, argR, 0, True));
+ return res;
+ } else {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
+ addInstr(env, ARMInstr_NUnary(ARMneon_VABSFP,
+ res, argL, 0, True));
+ return res;
+ }
+ }
+ case Iop_Rsqrte32Fx4: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
+ addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTEFP,
+ res, argL, 0, True));
+ return res;
+ }
+ case Iop_Rsqrte32x4: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
+ addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTE,
+ res, argL, 0, True));
+ return res;
+ }
+ case Iop_Neg32Fx4: {
+ HReg res = newVRegV(env);
+ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
+ addInstr(env, ARMInstr_NUnary(ARMneon_VNEGF,
+ res, arg, 0, True));
+ return res;
+ }
+ /* ... */
+ default:
+ break;
+ }
+ }
+
+ if (e->tag == Iex_Binop) {
+ switch (e->Iex.Binop.op) {
+ case Iop_64HLtoV128:
+ /* Try to match into single "VMOV reg, imm" instruction */
+ if (e->Iex.Binop.arg1->tag == Iex_Const &&
+ e->Iex.Binop.arg2->tag == Iex_Const &&
+ typeOfIRExpr(env->type_env, e->Iex.Binop.arg1) == Ity_I64 &&
+ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) == Ity_I64 &&
+ e->Iex.Binop.arg1->Iex.Const.con->Ico.U64 ==
+ e->Iex.Binop.arg2->Iex.Const.con->Ico.U64) {
+ ULong imm64 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U64;
+ ARMNImm *imm = Imm64_to_ARMNImm(imm64);
+ if (imm) {
+ HReg res = newVRegV(env);
+ addInstr(env, ARMInstr_NeonImm(res, imm));
+ return res;
+ }
+ if ((imm64 >> 32) == 0LL &&
+ (imm = Imm64_to_ARMNImm(imm64 | (imm64 << 32))) != NULL) {
+ HReg tmp1 = newVRegV(env);
+ HReg tmp2 = newVRegV(env);
+ HReg res = newVRegV(env);
+ if (imm->type < 10) {
+ addInstr(env, ARMInstr_NeonImm(tmp1, ARMNImm_TI(9,0x0f)));
+ addInstr(env, ARMInstr_NeonImm(tmp2, imm));
+ addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
+ res, tmp1, tmp2, 4, True));
+ return res;
+ }
+ }
+ if ((imm64 & 0xFFFFFFFFLL) == 0LL &&
+ (imm = Imm64_to_ARMNImm(imm64 | (imm64 >> 32))) != NULL) {
+ HReg tmp1 = newVRegV(env);
+ HReg tmp2 = newVRegV(env);
+ HReg res = newVRegV(env);
+ if (imm->type < 10) {
+ addInstr(env, ARMInstr_NeonImm(tmp1, ARMNImm_TI(9,0xf0)));
+ addInstr(env, ARMInstr_NeonImm(tmp2, imm));
+ addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
+ res, tmp1, tmp2, 4, True));
+ return res;
+ }
+ }
+ }
+ /* Does not match "VMOV Reg, Imm" form */
+ goto neon_expr_bad;
+ case Iop_AndV128: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
+ addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
+ res, argL, argR, 4, True));
+ return res;
+ }
+ case Iop_OrV128: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
+ addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
+ res, argL, argR, 4, True));
+ return res;
+ }
+ case Iop_XorV128: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
+ addInstr(env, ARMInstr_NBinary(ARMneon_VXOR,
+ res, argL, argR, 4, True));
+ return res;
+ }
+ case Iop_Add8x16:
+ case Iop_Add16x8:
+ case Iop_Add32x4:
+ case Iop_Add64x2: {
+ /*
+ FIXME: remove this if not used
+ DECLARE_PATTERN(p_vrhadd_32sx4);
+ ULong one = (1LL << 32) | 1LL;
+ DEFINE_PATTERN(p_vrhadd_32sx4,
+ binop(Iop_Add32x4,
+ binop(Iop_Add32x4,
+ binop(Iop_SarN32x4,
+ bind(0),
+ mkU8(1)),
+ binop(Iop_SarN32x4,
+ bind(1),
+ mkU8(1))),
+ binop(Iop_SarN32x4,
+ binop(Iop_Add32x4,
+ binop(Iop_Add32x4,
+ binop(Iop_AndV128,
+ bind(0),
+ mkU128(one)),
+ binop(Iop_AndV128,
+ bind(1),
+ mkU128(one))),
+ mkU128(one)),
+ mkU8(1))));
+ */
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
+ UInt size;
+ switch (e->Iex.Binop.op) {
+ case Iop_Add8x16: size = 0; break;
+ case Iop_Add16x8: size = 1; break;
+ case Iop_Add32x4: size = 2; break;
+ case Iop_Add64x2: size = 3; break;
+ default:
+ ppIROp(e->Iex.Binop.op);
+ vpanic("Illegal element size in VADD");
+ }
+ addInstr(env, ARMInstr_NBinary(ARMneon_VADD,
+ res, argL, argR, size, True));
+ return res;
+ }
+ case Iop_Add32Fx4: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
+ UInt size = 0;
+ addInstr(env, ARMInstr_NBinary(ARMneon_VADDFP,
+ res, argL, argR, size, True));
+ return res;
+ }
+ case Iop_Recps32Fx4: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
+ UInt size = 0;
+ addInstr(env, ARMInstr_NBinary(ARMneon_VRECPS,
+ res, argL, argR, size, True));
+ return res;
+ }
+ case Iop_Rsqrts32Fx4: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
+ UInt size = 0;
+ addInstr(env, ARMInstr_NBinary(ARMneon_VRSQRTS,
+ res, argL, argR, size, True));
+ return res;
+ }
+ case Iop_InterleaveEvenLanes8x16:
+ case Iop_InterleaveEvenLanes16x8:
+ case Iop_InterleaveEvenLanes32x4:
+ case Iop_InterleaveOddLanes8x16:
+ case Iop_InterleaveOddLanes16x8:
+ case Iop_InterleaveOddLanes32x4: {
+ HReg tmp = newVRegV(env);
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
+ UInt size;
+ UInt is_lo;
+ switch (e->Iex.Binop.op) {
+ case Iop_InterleaveEvenLanes8x16: is_lo = 0; size = 0; break;
+ case Iop_InterleaveOddLanes8x16: is_lo = 1; size = 0; break;
+ case Iop_InterleaveEvenLanes16x8: is_lo = 0; size = 1; break;
+ case Iop_InterleaveOddLanes16x8: is_lo = 1; size = 1; break;
+ case Iop_InterleaveEvenLanes32x4: is_lo = 0; size = 2; break;
+ case Iop_InterleaveOddLanes32x4: is_lo = 1; size = 2; break;
+ default:
+ ppIROp(e->Iex.Binop.op);
+ vpanic("Illegal element size in VTRN");
+ }
+ if (is_lo) {
+ addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
+ tmp, argL, 4, True));
+ addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
+ res, argR, 4, True));
+ addInstr(env, ARMInstr_NDual(ARMneon_TRN,
+ res, tmp, size, True));
+ } else {
+ addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
+ tmp, argR, 4, True));
+ addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
+ res, argL, 4, True));
+ addInstr(env, ARMInstr_NDual(ARMneon_TRN,
+ tmp, res, size, True));
+ }
+ return res;
+ }
+ case Iop_InterleaveHI8x16:
+ case Iop_InterleaveHI16x8:
+ case Iop_InterleaveHI32x4:
+ case Iop_InterleaveLO8x16:
+ case Iop_InterleaveLO16x8:
+ case Iop_InterleaveLO32x4: {
+ HReg tmp = newVRegV(env);
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
+ UInt size;
+ UInt is_lo;
+ switch (e->Iex.Binop.op) {
+ case Iop_InterleaveHI8x16: is_lo = 1; size = 0; break;
+ case Iop_InterleaveLO8x16: is_lo = 0; size = 0; break;
+ case Iop_InterleaveHI16x8: is_lo = 1; size = 1; break;
+ case Iop_InterleaveLO16x8: is_lo = 0; size = 1; break;
+ case Iop_InterleaveHI32x4: is_lo = 1; size = 2; break;
+ case Iop_InterleaveLO32x4: is_lo = 0; size = 2; break;
+ default:
+ ppIROp(e->Iex.Binop.op);
+ vpanic("Illegal element size in VZIP");
+ }
+ if (is_lo) {
+ addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
+ tmp, argL, 4, True));
+ addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
+ res, argR, 4, True));
+ addInstr(env, ARMInstr_NDual(ARMneon_ZIP,
+ res, tmp, size, True));
+ } else {
+ addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
+ tmp, argR, 4, True));
+ addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
+ res, argL, 4, True));
+ addInstr(env, ARMInstr_NDual(ARMneon_ZIP,
+ tmp, res, size, True));
+ }
+ return res;
+ }
+ case Iop_CatOddLanes8x16:
+ case Iop_CatOddLanes16x8:
+ case Iop_CatOddLanes32x4:
+ case Iop_CatEvenLanes8x16:
+ case Iop_CatEvenLanes16x8:
+ case Iop_CatEvenLanes32x4: {
+ HReg tmp = newVRegV(env);
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
+ UInt size;
+ UInt is_lo;
+ switch (e->Iex.Binop.op) {
+ case Iop_CatOddLanes8x16: is_lo = 1; size = 0; break;
+ case Iop_CatEvenLanes8x16: is_lo = 0; size = 0; break;
+ case Iop_CatOddLanes16x8: is_lo = 1; size = 1; break;
+ case Iop_CatEvenLanes16x8: is_lo = 0; size = 1; break;
+ case Iop_CatOddLanes32x4: is_lo = 1; size = 2; break;
+ case Iop_CatEvenLanes32x4: is_lo = 0; size = 2; break;
+ default:
+ ppIROp(e->Iex.Binop.op);
+ vpanic("Illegal element size in VUZP");
+ }
+ if (is_lo) {
+ addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
+ tmp, argL, 4, True));
+ addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
+ res, argR, 4, True));
+ addInstr(env, ARMInstr_NDual(ARMneon_UZP,
+ res, tmp, size, True));
+ } else {
+ addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
+ tmp, argR, 4, True));
+ addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
+ res, argL, 4, True));
+ addInstr(env, ARMInstr_NDual(ARMneon_UZP,
+ tmp, res, size, True));
+ }
+ return res;
+ }
+ case Iop_QAdd8Ux16:
+ case Iop_QAdd16Ux8:
+ case Iop_QAdd32Ux4:
+ case Iop_QAdd64Ux2: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
+ UInt size;
+ switch (e->Iex.Binop.op) {
+ case Iop_QAdd8Ux16: size = 0; break;
+ case Iop_QAdd16Ux8: size = 1; break;
+ case Iop_QAdd32Ux4: size = 2; break;
+ case Iop_QAdd64Ux2: size = 3; break;
+ default:
+ ppIROp(e->Iex.Binop.op);
+ vpanic("Illegal element size in VQADDU");
+ }
+ addInstr(env, ARMInstr_NBinary(ARMneon_VQADDU,
+ res, argL, argR, size, True));
+ return res;
+ }
+ case Iop_QAdd8Sx16:
+ case Iop_QAdd16Sx8:
+ case Iop_QAdd32Sx4:
+ case Iop_QAdd64Sx2: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
+ UInt size;
+ switch (e->Iex.Binop.op) {
+ case Iop_QAdd8Sx16: size = 0; break;
+ case Iop_QAdd16Sx8: size = 1; break;
+ case Iop_QAdd32Sx4: size = 2; break;
+ case Iop_QAdd64Sx2: size = 3; break;
+ default:
+ ppIROp(e->Iex.Binop.op);
+ vpanic("Illegal element size in VQADDS");
+ }
+ addInstr(env, ARMInstr_NBinary(ARMneon_VQADDS,
+ res, argL, argR, size, True));
+ return res;
+ }
+ case Iop_Sub8x16:
+ case Iop_Sub16x8:
+ case Iop_Sub32x4:
+ case Iop_Sub64x2: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
+ UInt size;
+ switch (e->Iex.Binop.op) {
+ case Iop_Sub8x16: size = 0; break;
+ case Iop_Sub16x8: size = 1; break;
+ case Iop_Sub32x4: size = 2; break;
+ case Iop_Sub64x2: size = 3; break;
+ default:
+ ppIROp(e->Iex.Binop.op);
+ vpanic("Illegal element size in VSUB");
+ }
+ addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
+ res, argL, argR, size, True));
+ return res;
+ }
+ case Iop_Sub32Fx4: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
+ UInt size = 0;
+ addInstr(env, ARMInstr_NBinary(ARMneon_VSUBFP,
+ res, argL, argR, size, True));
+ return res;
+ }
+ case Iop_QSub8Ux16:
+ case Iop_QSub16Ux8:
+ case Iop_QSub32Ux4:
+ case Iop_QSub64Ux2: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
+ UInt size;
+ switch (e->Iex.Binop.op) {
+ case Iop_QSub8Ux16: size = 0; break;
+ case Iop_QSub16Ux8: size = 1; break;
+ case Iop_QSub32Ux4: size = 2; break;
+ case Iop_QSub64Ux2: size = 3; break;
+ default:
+ ppIROp(e->Iex.Binop.op);
+ vpanic("Illegal element size in VQSUBU");
+ }
+ addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBU,
+ res, argL, argR, size, True));
+ return res;
+ }
+ case Iop_QSub8Sx16:
+ case Iop_QSub16Sx8:
+ case Iop_QSub32Sx4:
+ case Iop_QSub64Sx2: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
+ UInt size;
+ switch (e->Iex.Binop.op) {
+ case Iop_QSub8Sx16: size = 0; break;
+ case Iop_QSub16Sx8: size = 1; break;
+ case Iop_QSub32Sx4: size = 2; break;
+ case Iop_QSub64Sx2: size = 3; break;
+ default:
+ ppIROp(e->Iex.Binop.op);
+ vpanic("Illegal element size in VQSUBS");
+ }
+ addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBS,
+ res, argL, argR, size, True));
+ return res;
+ }
+ case Iop_Max8Ux16:
+ case Iop_Max16Ux8:
+ case Iop_Max32Ux4: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
+ UInt size;
+ switch (e->Iex.Binop.op) {
+ case Iop_Max8Ux16: size = 0; break;
+ case Iop_Max16Ux8: size = 1; break;
+ case Iop_Max32Ux4: size = 2; break;
+ default: vpanic("Illegal element size in VMAXU");
+ }
+ addInstr(env, ARMInstr_NBinary(ARMneon_VMAXU,
+ res, argL, argR, size, True));
+ return res;
+ }
+ case Iop_Max8Sx16:
+ case Iop_Max16Sx8:
+ case Iop_Max32Sx4: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
+ UInt size;
+ switch (e->Iex.Binop.op) {
+ case Iop_Max8Sx16: size = 0; break;
+ case Iop_Max16Sx8: size = 1; break;
+ case Iop_Max32Sx4: size = 2; break;
+ default: vpanic("Illegal element size in VMAXU");
+ }
+ addInstr(env, ARMInstr_NBinary(ARMneon_VMAXS,
+ res, argL, argR, size, True));
+ return res;
+ }
+ case Iop_Min8Ux16:
+ case Iop_Min16Ux8:
+ case Iop_Min32Ux4: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
+ UInt size;
+ switch (e->Iex.Binop.op) {
+ case Iop_Min8Ux16: size = 0; break;
+ case Iop_Min16Ux8: size = 1; break;
+ case Iop_Min32Ux4: size = 2; break;
+ default: vpanic("Illegal element size in VMAXU");
+ }
+ addInstr(env, ARMInstr_NBinary(ARMneon_VMINU,
+ res, argL, argR, size, True));
+ return res;
+ }
+ case Iop_Min8Sx16:
+ case Iop_Min16Sx8:
+ case Iop_Min32Sx4: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
+ UInt size;
+ switch (e->Iex.Binop.op) {
+ case Iop_Min8Sx16: size = 0; break;
+ case Iop_Min16Sx8: size = 1; break;
+ case Iop_Min32Sx4: size = 2; break;
+ default: vpanic("Illegal element size in VMAXU");
+ }
+ addInstr(env, ARMInstr_NBinary(ARMneon_VMINS,
+ res, argL, argR, size, True));
+ return res;
+ }
+ case Iop_Sar8x16:
+ case Iop_Sar16x8:
+ case Iop_Sar32x4:
+ case Iop_Sar64x2: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
+ HReg argR2 = newVRegV(env);
+ HReg zero = newVRegV(env);
+ UInt size;
+ switch (e->Iex.Binop.op) {
+ case Iop_Sar8x16: size = 0; break;
+ case Iop_Sar16x8: size = 1; break;
+ case Iop_Sar32x4: size = 2; break;
+ case Iop_Sar64x2: size = 3; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
+ addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
+ argR2, zero, argR, size, True));
+ addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
+ res, argL, argR2, size, True));
+ return res;
+ }
+ case Iop_Sal8x16:
+ case Iop_Sal16x8:
+ case Iop_Sal32x4:
+ case Iop_Sal64x2: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
+ UInt size;
+ switch (e->Iex.Binop.op) {
+ case Iop_Sal8x16: size = 0; break;
+ case Iop_Sal16x8: size = 1; break;
+ case Iop_Sal32x4: size = 2; break;
+ case Iop_Sal64x2: size = 3; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
+ res, argL, argR, size, True));
+ return res;
+ }
+ case Iop_Shr8x16:
+ case Iop_Shr16x8:
+ case Iop_Shr32x4:
+ case Iop_Shr64x2: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
+ HReg argR2 = newVRegV(env);
+ HReg zero = newVRegV(env);
+ UInt size;
+ switch (e->Iex.Binop.op) {
+ case Iop_Shr8x16: size = 0; break;
+ case Iop_Shr16x8: size = 1; break;
+ case Iop_Shr32x4: size = 2; break;
+ case Iop_Shr64x2: size = 3; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
+ addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
+ argR2, zero, argR, size, True));
+ addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
+ res, argL, argR2, size, True));
+ return res;
+ }
+ case Iop_Shl8x16:
+ case Iop_Shl16x8:
+ case Iop_Shl32x4:
+ case Iop_Shl64x2: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
+ UInt size;
+ switch (e->Iex.Binop.op) {
+ case Iop_Shl8x16: size = 0; break;
+ case Iop_Shl16x8: size = 1; break;
+ case Iop_Shl32x4: size = 2; break;
+ case Iop_Shl64x2: size = 3; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
+ res, argL, argR, size, True));
+ return res;
+ }
+ case Iop_QShl8x16:
+ case Iop_QShl16x8:
+ case Iop_QShl32x4:
+ case Iop_QShl64x2: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
+ UInt size;
+ switch (e->Iex.Binop.op) {
+ case Iop_QShl8x16: size = 0; break;
+ case Iop_QShl16x8: size = 1; break;
+ case Iop_QShl32x4: size = 2; break;
+ case Iop_QShl64x2: size = 3; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NShift(ARMneon_VQSHL,
+ res, argL, argR, size, True));
+ return res;
+ }
+ case Iop_QSal8x16:
+ case Iop_QSal16x8:
+ case Iop_QSal32x4:
+ case Iop_QSal64x2: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
+ UInt size;
+ switch (e->Iex.Binop.op) {
+ case Iop_QSal8x16: size = 0; break;
+ case Iop_QSal16x8: size = 1; break;
+ case Iop_QSal32x4: size = 2; break;
+ case Iop_QSal64x2: size = 3; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NShift(ARMneon_VQSAL,
+ res, argL, argR, size, True));
+ return res;
+ }
+ case Iop_QShlN8x16:
+ case Iop_QShlN16x8:
+ case Iop_QShlN32x4:
+ case Iop_QShlN64x2: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ UInt size, imm;
+ if (e->Iex.Binop.arg2->tag != Iex_Const ||
+ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
+ vpanic("ARM taget supports Iop_QShlNAxB with constant "
+ "second argument only\n");
+ }
+ imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
+ switch (e->Iex.Binop.op) {
+ case Iop_QShlN8x16: size = 8 | imm; break;
+ case Iop_QShlN16x8: size = 16 | imm; break;
+ case Iop_QShlN32x4: size = 32 | imm; break;
+ case Iop_QShlN64x2: size = 64 | imm; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUU,
+ res, argL, size, True));
+ return res;
+ }
+ case Iop_QShlN8Sx16:
+ case Iop_QShlN16Sx8:
+ case Iop_QShlN32Sx4:
+ case Iop_QShlN64Sx2: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ UInt size, imm;
+ if (e->Iex.Binop.arg2->tag != Iex_Const ||
+ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
+ vpanic("ARM taget supports Iop_QShlNASxB with constant "
+ "second argument only\n");
+ }
+ imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
+ switch (e->Iex.Binop.op) {
+ case Iop_QShlN8Sx16: size = 8 | imm; break;
+ case Iop_QShlN16Sx8: size = 16 | imm; break;
+ case Iop_QShlN32Sx4: size = 32 | imm; break;
+ case Iop_QShlN64Sx2: size = 64 | imm; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUS,
+ res, argL, size, True));
+ return res;
+ }
+ case Iop_QSalN8x16:
+ case Iop_QSalN16x8:
+ case Iop_QSalN32x4:
+ case Iop_QSalN64x2: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ UInt size, imm;
+ if (e->Iex.Binop.arg2->tag != Iex_Const ||
+ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
+ vpanic("ARM taget supports Iop_QShlNAxB with constant "
+ "second argument only\n");
+ }
+ imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
+ switch (e->Iex.Binop.op) {
+ case Iop_QSalN8x16: size = 8 | imm; break;
+ case Iop_QSalN16x8: size = 16 | imm; break;
+ case Iop_QSalN32x4: size = 32 | imm; break;
+ case Iop_QSalN64x2: size = 64 | imm; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNSS,
+ res, argL, size, True));
+ return res;
+ }
+ case Iop_ShrN8x16:
+ case Iop_ShrN16x8:
+ case Iop_ShrN32x4:
+ case Iop_ShrN64x2: {
+ HReg res = newVRegV(env);
+ HReg tmp = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
+ HReg argR2 = newVRegI(env);
+ UInt size;
+ switch (e->Iex.Binop.op) {
+ case Iop_ShrN8x16: size = 0; break;
+ case Iop_ShrN16x8: size = 1; break;
+ case Iop_ShrN32x4: size = 2; break;
+ case Iop_ShrN64x2: size = 3; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
+ addInstr(env, ARMInstr_NUnary(ARMneon_DUP,
+ tmp, argR2, 0, True));
+ addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
+ res, argL, tmp, size, True));
+ return res;
+ }
+ case Iop_ShlN8x16:
+ case Iop_ShlN16x8:
+ case Iop_ShlN32x4:
+ case Iop_ShlN64x2: {
+ HReg res = newVRegV(env);
+ HReg tmp = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
+ UInt size;
+ switch (e->Iex.Binop.op) {
+ case Iop_ShlN8x16: size = 0; break;
+ case Iop_ShlN16x8: size = 1; break;
+ case Iop_ShlN32x4: size = 2; break;
+ case Iop_ShlN64x2: size = 3; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR, 0, True));
+ addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
+ res, argL, tmp, size, True));
+ return res;
+ }
+ case Iop_SarN8x16:
+ case Iop_SarN16x8:
+ case Iop_SarN32x4:
+ case Iop_SarN64x2: {
+ HReg res = newVRegV(env);
+ HReg tmp = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
+ HReg argR2 = newVRegI(env);
+ UInt size;
+ switch (e->Iex.Binop.op) {
+ case Iop_SarN8x16: size = 0; break;
+ case Iop_SarN16x8: size = 1; break;
+ case Iop_SarN32x4: size = 2; break;
+ case Iop_SarN64x2: size = 3; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
+ addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, True));
+ addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
+ res, argL, tmp, size, True));
+ return res;
+ }
+ case Iop_CmpGT8Ux16:
+ case Iop_CmpGT16Ux8:
+ case Iop_CmpGT32Ux4: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
+ UInt size;
+ switch (e->Iex.Binop.op) {
+ case Iop_CmpGT8Ux16: size = 0; break;
+ case Iop_CmpGT16Ux8: size = 1; break;
+ case Iop_CmpGT32Ux4: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NBinary(ARMneon_VCGTU,
+ res, argL, argR, size, True));
+ return res;
+ }
+ case Iop_CmpGT8Sx16:
+ case Iop_CmpGT16Sx8:
+ case Iop_CmpGT32Sx4: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
+ UInt size;
+ switch (e->Iex.Binop.op) {
+ case Iop_CmpGT8Sx16: size = 0; break;
+ case Iop_CmpGT16Sx8: size = 1; break;
+ case Iop_CmpGT32Sx4: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NBinary(ARMneon_VCGTS,
+ res, argL, argR, size, True));
+ return res;
+ }
+ case Iop_CmpEQ8x16:
+ case Iop_CmpEQ16x8:
+ case Iop_CmpEQ32x4: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
+ UInt size;
+ switch (e->Iex.Binop.op) {
+ case Iop_CmpEQ8x16: size = 0; break;
+ case Iop_CmpEQ16x8: size = 1; break;
+ case Iop_CmpEQ32x4: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NBinary(ARMneon_VCEQ,
+ res, argL, argR, size, True));
+ return res;
+ }
+ case Iop_Mul8x16:
+ case Iop_Mul16x8:
+ case Iop_Mul32x4: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
+ UInt size = 0;
+ switch(e->Iex.Binop.op) {
+ case Iop_Mul8x16: size = 0; break;
+ case Iop_Mul16x8: size = 1; break;
+ case Iop_Mul32x4: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NBinary(ARMneon_VMUL,
+ res, argL, argR, size, True));
+ return res;
+ }
+ case Iop_Mul32Fx4: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
+ UInt size = 0;
+ addInstr(env, ARMInstr_NBinary(ARMneon_VMULFP,
+ res, argL, argR, size, True));
+ return res;
+ }
+ case Iop_Mull8Ux8:
+ case Iop_Mull16Ux4:
+ case Iop_Mull32Ux2: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ UInt size = 0;
+ switch(e->Iex.Binop.op) {
+ case Iop_Mull8Ux8: size = 0; break;
+ case Iop_Mull16Ux4: size = 1; break;
+ case Iop_Mull32Ux2: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NBinary(ARMneon_VMULLU,
+ res, argL, argR, size, True));
+ return res;
+ }
+
+ case Iop_Mull8Sx8:
+ case Iop_Mull16Sx4:
+ case Iop_Mull32Sx2: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ UInt size = 0;
+ switch(e->Iex.Binop.op) {
+ case Iop_Mull8Sx8: size = 0; break;
+ case Iop_Mull16Sx4: size = 1; break;
+ case Iop_Mull32Sx2: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NBinary(ARMneon_VMULLS,
+ res, argL, argR, size, True));
+ return res;
+ }
+
+ case Iop_QDMulHi16Sx8:
+ case Iop_QDMulHi32Sx4: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
+ UInt size = 0;
+ switch(e->Iex.Binop.op) {
+ case Iop_QDMulHi16Sx8: size = 1; break;
+ case Iop_QDMulHi32Sx4: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULH,
+ res, argL, argR, size, True));
+ return res;
+ }
+
+ case Iop_QRDMulHi16Sx8:
+ case Iop_QRDMulHi32Sx4: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
+ UInt size = 0;
+ switch(e->Iex.Binop.op) {
+ case Iop_QRDMulHi16Sx8: size = 1; break;
+ case Iop_QRDMulHi32Sx4: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NBinary(ARMneon_VQRDMULH,
+ res, argL, argR, size, True));
+ return res;
+ }
+
+ case Iop_QDMulLong16Sx4:
+ case Iop_QDMulLong32Sx2: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ UInt size = 0;
+ switch(e->Iex.Binop.op) {
+ case Iop_QDMulLong16Sx4: size = 1; break;
+ case Iop_QDMulLong32Sx2: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULL,
+ res, argL, argR, size, True));
+ return res;
+ }
+ case Iop_PolynomialMul8x16: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
+ UInt size = 0;
+ addInstr(env, ARMInstr_NBinary(ARMneon_VMULP,
+ res, argL, argR, size, True));
+ return res;
+ }
+ case Iop_Max32Fx4: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
+ addInstr(env, ARMInstr_NBinary(ARMneon_VMAXF,
+ res, argL, argR, 2, True));
+ return res;
+ }
+ case Iop_Min32Fx4: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
+ addInstr(env, ARMInstr_NBinary(ARMneon_VMINF,
+ res, argL, argR, 2, True));
+ return res;
+ }
+ case Iop_PwMax32Fx4: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
+ addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXF,
+ res, argL, argR, 2, True));
+ return res;
+ }
+ case Iop_PwMin32Fx4: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
+ addInstr(env, ARMInstr_NBinary(ARMneon_VPMINF,
+ res, argL, argR, 2, True));
+ return res;
+ }
+ case Iop_CmpGT32Fx4: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
+ addInstr(env, ARMInstr_NBinary(ARMneon_VCGTF,
+ res, argL, argR, 2, True));
+ return res;
+ }
+ case Iop_CmpGE32Fx4: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
+ addInstr(env, ARMInstr_NBinary(ARMneon_VCGEF,
+ res, argL, argR, 2, True));
+ return res;
+ }
+ case Iop_CmpEQ32Fx4: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
+ addInstr(env, ARMInstr_NBinary(ARMneon_VCEQF,
+ res, argL, argR, 2, True));
+ return res;
+ }
+
+ case Iop_PolynomialMull8x8: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ UInt size = 0;
+ addInstr(env, ARMInstr_NBinary(ARMneon_VMULLP,
+ res, argL, argR, size, True));
+ return res;
+ }
+ case Iop_F32ToFixed32Ux4_RZ:
+ case Iop_F32ToFixed32Sx4_RZ:
+ case Iop_Fixed32UToF32x4_RN:
+ case Iop_Fixed32SToF32x4_RN: {
+ HReg res = newVRegV(env);
+ HReg arg = iselNeonExpr(env, e->Iex.Binop.arg1);
+ ARMNeonUnOp op;
+ UInt imm6;
+ if (e->Iex.Binop.arg2->tag != Iex_Const ||
+ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
+ vpanic("ARM supports FP <-> Fixed conversion with constant "
+ "second argument less than 33 only\n");
+ }
+ imm6 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
+ vassert(imm6 <= 32 && imm6 > 0);
+ imm6 = 64 - imm6;
+ switch(e->Iex.Binop.op) {
+ case Iop_F32ToFixed32Ux4_RZ: op = ARMneon_VCVTFtoFixedU; break;
+ case Iop_F32ToFixed32Sx4_RZ: op = ARMneon_VCVTFtoFixedS; break;
+ case Iop_Fixed32UToF32x4_RN: op = ARMneon_VCVTFixedUtoF; break;
+ case Iop_Fixed32SToF32x4_RN: op = ARMneon_VCVTFixedStoF; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NUnary(op, res, arg, imm6, True));
+ return res;
+ }
+ /*
+ FIXME remove if not used
+ case Iop_VDup8x16:
+ case Iop_VDup16x8:
+ case Iop_VDup32x4: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ UInt imm4;
+ UInt index;
+ if (e->Iex.Binop.arg2->tag != Iex_Const ||
+ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
+ vpanic("ARM supports Iop_VDup with constant "
+ "second argument less than 16 only\n");
+ }
+ index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
+ switch(e->Iex.Binop.op) {
+ case Iop_VDup8x16: imm4 = (index << 1) + 1; break;
+ case Iop_VDup16x8: imm4 = (index << 2) + 2; break;
+ case Iop_VDup32x4: imm4 = (index << 3) + 4; break;
+ default: vassert(0);
+ }
+ if (imm4 >= 16) {
+ vpanic("ARM supports Iop_VDup with constant "
+ "second argument less than 16 only\n");
+ }
+ addInstr(env, ARMInstr_NUnary(ARMneon_VDUP,
+ res, argL, imm4, True));
+ return res;
+ }
+ */
+ case Iop_PwAdd8x16:
+ case Iop_PwAdd16x8:
+ case Iop_PwAdd32x4: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
+ UInt size = 0;
+ switch(e->Iex.Binop.op) {
+ case Iop_PwAdd8x16: size = 0; break;
+ case Iop_PwAdd16x8: size = 1; break;
+ case Iop_PwAdd32x4: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NBinary(ARMneon_VPADD,
+ res, argL, argR, size, True));
+ return res;
+ }
+ /* ... */
+ default:
+ break;
+ }
+ }
+
+ if (e->tag == Iex_Triop) {
+ switch (e->Iex.Triop.op) {
+ case Iop_ExtractV128: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Triop.arg1);
+ HReg argR = iselNeonExpr(env, e->Iex.Triop.arg2);
+ UInt imm4;
+ if (e->Iex.Triop.arg3->tag != Iex_Const ||
+ typeOfIRExpr(env->type_env, e->Iex.Triop.arg3) != Ity_I8) {
+ vpanic("ARM target supports Iop_ExtractV128 with constant "
+ "third argument less than 16 only\n");
+ }
+ imm4 = e->Iex.Triop.arg3->Iex.Const.con->Ico.U8;
+ if (imm4 >= 16) {
+ vpanic("ARM target supports Iop_ExtractV128 with constant "
+ "third argument less than 16 only\n");
+ }
+ addInstr(env, ARMInstr_NBinary(ARMneon_VEXT,
+ res, argL, argR, imm4, True));
+ return res;
+ }
+ default:
+ break;
+ }
+ }
+
+ if (e->tag == Iex_Mux0X) {
+ HReg r8;
+ HReg rX = iselNeonExpr(env, e->Iex.Mux0X.exprX);
+ HReg r0 = iselNeonExpr(env, e->Iex.Mux0X.expr0);
+ HReg dst = newVRegV(env);
+ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, dst, rX, 4, True));
+ r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond);
+ addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r8,
+ ARMRI84_I84(0xFF,0)));
+ addInstr(env, ARMInstr_NCMovQ(ARMcc_EQ, dst, r0));
+ return dst;
+ }
+
+ neon_expr_bad:
+ ppIRExpr(e);
+ vpanic("iselNeonExpr_wrk");
+}
+
+/*---------------------------------------------------------*/
+/*--- ISEL: Floating point expressions (64 bit) ---*/
+/*---------------------------------------------------------*/
+
+/* Compute a 64-bit floating point value into a register, the identity
+ of which is returned. As with iselIntExpr_R, the reg may be either
+ real or virtual; in any case it must not be changed by subsequent
+ code emitted by the caller. */
+
+static HReg iselDblExpr ( ISelEnv* env, IRExpr* e )
+{
+ HReg r = iselDblExpr_wrk( env, e );
+# if 0
+ vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
+# endif
+ vassert(hregClass(r) == HRcFlt64);
+ vassert(hregIsVirtual(r));
+ return r;
+}
+
+/* DO NOT CALL THIS DIRECTLY */
+static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e )
+{
+ IRType ty = typeOfIRExpr(env->type_env,e);
+ vassert(e);
+ vassert(ty == Ity_F64);
+
+ if (e->tag == Iex_RdTmp) {
+ return lookupIRTemp(env, e->Iex.RdTmp.tmp);
+ }
+
+ if (e->tag == Iex_Const) {
+ /* Just handle the zero case. */
+ IRConst* con = e->Iex.Const.con;
+ if (con->tag == Ico_F64i && con->Ico.F64i == 0ULL) {
+ HReg z32 = newVRegI(env);
+ HReg dst = newVRegD(env);
+ addInstr(env, ARMInstr_Imm32(z32, 0));
+ addInstr(env, ARMInstr_VXferD(True/*toD*/, dst, z32, z32));
+ return dst;
+ }
+ }
+
+ if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
+ ARMAModeV* am;
+ HReg res = newVRegD(env);
+ vassert(e->Iex.Load.ty == Ity_F64);
+ am = iselIntExpr_AModeV(env, e->Iex.Load.addr);
+ addInstr(env, ARMInstr_VLdStD(True/*isLoad*/, res, am));
+ return res;
+ }
+
+ if (e->tag == Iex_Get) {
+ // XXX This won't work if offset > 1020 or is not 0 % 4.
+ // In which case we'll have to generate more longwinded code.
+ ARMAModeV* am = mkARMAModeV(hregARM_R8(), e->Iex.Get.offset);
+ HReg res = newVRegD(env);
+ addInstr(env, ARMInstr_VLdStD(True/*isLoad*/, res, am));
+ return res;
+ }
+
+ if (e->tag == Iex_Unop) {
+ switch (e->Iex.Unop.op) {
+ case Iop_ReinterpI64asF64: {
+ if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
+ return iselNeon64Expr(env, e->Iex.Unop.arg);
+ } else {
+ HReg srcHi, srcLo;
+ HReg dst = newVRegD(env);
+ iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Unop.arg);
+ addInstr(env, ARMInstr_VXferD(True/*toD*/, dst, srcHi, srcLo));
+ return dst;
+ }
+ }
+ case Iop_NegF64: {
+ HReg src = iselDblExpr(env, e->Iex.Unop.arg);
+ HReg dst = newVRegD(env);
+ addInstr(env, ARMInstr_VUnaryD(ARMvfpu_NEG, dst, src));
+ return dst;
+ }
+ case Iop_AbsF64: {
+ HReg src = iselDblExpr(env, e->Iex.Unop.arg);
+ HReg dst = newVRegD(env);
+ addInstr(env, ARMInstr_VUnaryD(ARMvfpu_ABS, dst, src));
+ return dst;
+ }
+ case Iop_F32toF64: {
+ HReg src = iselFltExpr(env, e->Iex.Unop.arg);
+ HReg dst = newVRegD(env);
+ addInstr(env, ARMInstr_VCvtSD(True/*sToD*/, dst, src));
+ return dst;
+ }
+ case Iop_I32UtoF64:
+ case Iop_I32StoF64: {
+ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
+ HReg f32 = newVRegF(env);
+ HReg dst = newVRegD(env);
+ Bool syned = e->Iex.Unop.op == Iop_I32StoF64;
+ /* VMOV f32, src */
+ addInstr(env, ARMInstr_VXferS(True/*toS*/, f32, src));
+ /* FSITOD dst, f32 */
+ addInstr(env, ARMInstr_VCvtID(True/*iToD*/, syned,
+ dst, f32));
+ return dst;
+ }
+ default:
+ break;
+ }
+ }
+
+ if (e->tag == Iex_Binop) {
+ switch (e->Iex.Binop.op) {
+ case Iop_SqrtF64: {
+ /* first arg is rounding mode; we ignore it. */
+ HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
+ HReg dst = newVRegD(env);
+ addInstr(env, ARMInstr_VUnaryD(ARMvfpu_SQRT, dst, src));
+ return dst;
+ }
+ default:
+ break;
+ }
+ }
+
+ if (e->tag == Iex_Triop) {
+ switch (e->Iex.Triop.op) {
+ case Iop_DivF64:
+ case Iop_MulF64:
+ case Iop_AddF64:
+ case Iop_SubF64: {
+ ARMVfpOp op = 0; /*INVALID*/
+ HReg argL = iselDblExpr(env, e->Iex.Triop.arg2);
+ HReg argR = iselDblExpr(env, e->Iex.Triop.arg3);
+ HReg dst = newVRegD(env);
+ switch (e->Iex.Triop.op) {
+ case Iop_DivF64: op = ARMvfp_DIV; break;
+ case Iop_MulF64: op = ARMvfp_MUL; break;
+ case Iop_AddF64: op = ARMvfp_ADD; break;
+ case Iop_SubF64: op = ARMvfp_SUB; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_VAluD(op, dst, argL, argR));
+ return dst;
+ }
+ default:
+ break;
+ }
+ }
+
+ if (e->tag == Iex_Mux0X) {
+ if (ty == Ity_F64
+ && typeOfIRExpr(env->type_env,e->Iex.Mux0X.cond) == Ity_I8) {
+ HReg r8;
+ HReg rX = iselDblExpr(env, e->Iex.Mux0X.exprX);
+ HReg r0 = iselDblExpr(env, e->Iex.Mux0X.expr0);
+ HReg dst = newVRegD(env);
+ addInstr(env, ARMInstr_VUnaryD(ARMvfpu_COPY, dst, rX));
+ r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond);
+ addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r8,
+ ARMRI84_I84(0xFF,0)));
+ addInstr(env, ARMInstr_VCMovD(ARMcc_EQ, dst, r0));
+ return dst;
+ }
+ }
+
+ ppIRExpr(e);
+ vpanic("iselDblExpr_wrk");
+}
+
+
+/*---------------------------------------------------------*/
+/*--- ISEL: Floating point expressions (32 bit) ---*/
+/*---------------------------------------------------------*/
+
+/* Compute a 64-bit floating point value into a register, the identity
+ of which is returned. As with iselIntExpr_R, the reg may be either
+ real or virtual; in any case it must not be changed by subsequent
+ code emitted by the caller. */
+
+static HReg iselFltExpr ( ISelEnv* env, IRExpr* e )
+{
+ HReg r = iselFltExpr_wrk( env, e );
+# if 0
+ vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
+# endif
+ vassert(hregClass(r) == HRcFlt32);
+ vassert(hregIsVirtual(r));
+ return r;
+}
+
+/* DO NOT CALL THIS DIRECTLY */
+static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e )
+{
+ IRType ty = typeOfIRExpr(env->type_env,e);
+ vassert(e);
+ vassert(ty == Ity_F32);
+
+ if (e->tag == Iex_RdTmp) {
+ return lookupIRTemp(env, e->Iex.RdTmp.tmp);
+ }
+
+ if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
+ ARMAModeV* am;
+ HReg res = newVRegF(env);
+ vassert(e->Iex.Load.ty == Ity_F32);
+ am = iselIntExpr_AModeV(env, e->Iex.Load.addr);
+ addInstr(env, ARMInstr_VLdStS(True/*isLoad*/, res, am));
+ return res;
+ }
+
+ if (e->tag == Iex_Get) {
+ // XXX This won't work if offset > 1020 or is not 0 % 4.
+ // In which case we'll have to generate more longwinded code.
+ ARMAModeV* am = mkARMAModeV(hregARM_R8(), e->Iex.Get.offset);
+ HReg res = newVRegF(env);
+ addInstr(env, ARMInstr_VLdStS(True/*isLoad*/, res, am));
+ return res;
+ }
+
+ if (e->tag == Iex_Unop) {
switch (e->Iex.Unop.op) {
case Iop_ReinterpI32asF32: {
HReg dst = newVRegF(env);
return;
}
if (tyd == Ity_I64) {
- HReg rDhi, rDlo, rA;
- iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.Store.data);
- rA = iselIntExpr_R(env, stmt->Ist.Store.addr);
- addInstr(env, ARMInstr_LdSt32(False/*!load*/, rDhi,
- ARMAMode1_RI(rA,4)));
- addInstr(env, ARMInstr_LdSt32(False/*!load*/, rDlo,
- ARMAMode1_RI(rA,0)));
+ if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
+ HReg dD = iselNeon64Expr(env, stmt->Ist.Store.data);
+ ARMAModeN* am = iselIntExpr_AModeN(env, stmt->Ist.Store.addr);
+ addInstr(env, ARMInstr_NLdStD(False, dD, am));
+ } else {
+ HReg rDhi, rDlo, rA;
+ iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.Store.data);
+ rA = iselIntExpr_R(env, stmt->Ist.Store.addr);
+ addInstr(env, ARMInstr_LdSt32(False/*!load*/, rDhi,
+ ARMAMode1_RI(rA,4)));
+ addInstr(env, ARMInstr_LdSt32(False/*!load*/, rDlo,
+ ARMAMode1_RI(rA,0)));
+ }
return;
}
if (tyd == Ity_F64) {
addInstr(env, ARMInstr_VLdStS(False/*!isLoad*/, fD, am));
return;
}
+ if (tyd == Ity_V128) {
+ HReg qD = iselNeonExpr(env, stmt->Ist.Store.data);
+ ARMAModeN* am = iselIntExpr_AModeN(env, stmt->Ist.Store.addr);
+ addInstr(env, ARMInstr_NLdStQ(False, qD, am));
+ return;
+ }
break;
}
return;
}
if (tyd == Ity_I64) {
- HReg rDhi, rDlo;
- ARMAMode1* am0 = ARMAMode1_RI(hregARM_R8(), stmt->Ist.Put.offset + 0);
- ARMAMode1* am4 = ARMAMode1_RI(hregARM_R8(), stmt->Ist.Put.offset + 4);
- iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.Put.data);
- addInstr(env, ARMInstr_LdSt32(False/*!isLoad*/, rDhi, am4));
- addInstr(env, ARMInstr_LdSt32(False/*!isLoad*/, rDlo, am0));
+ if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
+ HReg addr = newVRegI(env);
+ HReg qD = iselNeon64Expr(env, stmt->Ist.Put.data);
+ addInstr(env, ARMInstr_Add32(addr, hregARM_R8(),
+ stmt->Ist.Put.offset));
+ addInstr(env, ARMInstr_NLdStD(False, qD, mkARMAModeN_R(addr)));
+ } else {
+ HReg rDhi, rDlo;
+ ARMAMode1* am0 = ARMAMode1_RI(hregARM_R8(),
+ stmt->Ist.Put.offset + 0);
+ ARMAMode1* am4 = ARMAMode1_RI(hregARM_R8(),
+ stmt->Ist.Put.offset + 4);
+ iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.Put.data);
+ addInstr(env, ARMInstr_LdSt32(False/*!isLoad*/, rDhi, am4));
+ addInstr(env, ARMInstr_LdSt32(False/*!isLoad*/, rDlo, am0));
+ }
return;
}
if (tyd == Ity_F64) {
addInstr(env, ARMInstr_VLdStS(False/*!isLoad*/, rD, am));
return;
}
+ if (tyd == Ity_V128) {
+ HReg addr = newVRegI(env);
+ HReg qD = iselNeonExpr(env, stmt->Ist.Put.data);
+ addInstr(env, ARMInstr_Add32(addr, hregARM_R8(),
+ stmt->Ist.Put.offset));
+ addInstr(env, ARMInstr_NLdStQ(False, qD, mkARMAModeN_R(addr)));
+ return;
+ }
break;
}
return;
}
if (ty == Ity_I64) {
- HReg rHi, rLo, dstHi, dstLo;
- iselInt64Expr(&rHi,&rLo, env, stmt->Ist.WrTmp.data);
- lookupIRTemp64( &dstHi, &dstLo, env, tmp);
- addInstr(env, mk_iMOVds_RR(dstHi, rHi) );
- addInstr(env, mk_iMOVds_RR(dstLo, rLo) );
+ if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
+ HReg src = iselNeon64Expr(env, stmt->Ist.WrTmp.data);
+ HReg dst = lookupIRTemp(env, tmp);
+ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, dst, src, 4, False));
+ } else {
+ HReg rHi, rLo, dstHi, dstLo;
+ iselInt64Expr(&rHi,&rLo, env, stmt->Ist.WrTmp.data);
+ lookupIRTemp64( &dstHi, &dstLo, env, tmp);
+ addInstr(env, mk_iMOVds_RR(dstHi, rHi) );
+ addInstr(env, mk_iMOVds_RR(dstLo, rLo) );
+ }
return;
}
if (ty == Ity_F64) {
addInstr(env, ARMInstr_VUnaryS(ARMvfpu_COPY, dst, src));
return;
}
+ if (ty == Ity_V128) {
+ HReg src = iselNeonExpr(env, stmt->Ist.WrTmp.data);
+ HReg dst = lookupIRTemp(env, tmp);
+ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, dst, src, 4, True));
+ return;
+ }
break;
}
retty = typeOfIRTemp(env->type_env, d->tmp);
if (retty == Ity_I64) {
- HReg dstHi, dstLo;
- /* The returned value is in r1:r0. Park it in the
- register-pair associated with tmp. */
- lookupIRTemp64( &dstHi, &dstLo, env, d->tmp);
- addInstr(env, mk_iMOVds_RR(dstHi, hregARM_R1()) );
- addInstr(env, mk_iMOVds_RR(dstLo, hregARM_R0()) );
+ if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
+ HReg tmp = lookupIRTemp(env, d->tmp);
+ addInstr(env, ARMInstr_VXferD(True, tmp, hregARM_R1(),
+ hregARM_R0()));
+ } else {
+ HReg dstHi, dstLo;
+ /* The returned value is in r1:r0. Park it in the
+ register-pair associated with tmp. */
+ lookupIRTemp64( &dstHi, &dstLo, env, d->tmp);
+ addInstr(env, mk_iMOVds_RR(dstHi, hregARM_R1()) );
+ addInstr(env, mk_iMOVds_RR(dstLo, hregARM_R0()) );
+ }
return;
}
if (retty == Ity_I32 || retty == Ity_I16 || retty == Ity_I8) {
HReg hreg, hregHI;
ISelEnv* env;
UInt hwcaps_host = archinfo_host->hwcaps;
+ Bool neon = False;
+ static UInt counter = 0;
/* sanity ... */
vassert(arch_host == VexArchARM);
- vassert(0 == hwcaps_host);
+
+ /* hwcaps should not change from one ISEL call to another. */
+ arm_hwcaps = hwcaps_host;
/* Make up an initial environment to use. */
env = LibVEX_Alloc(sizeof(ISelEnv));
case Ity_I8:
case Ity_I16:
case Ity_I32: hreg = mkHReg(j++, HRcInt32, True); break;
- case Ity_I64: hregHI = mkHReg(j++, HRcInt32, True);
- hreg = mkHReg(j++, HRcInt32, True); break;
+ case Ity_I64:
+ if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
+ hreg = mkHReg(j++, HRcFlt64, True);
+ neon = True;
+ } else {
+ hregHI = mkHReg(j++, HRcInt32, True);
+ hreg = mkHReg(j++, HRcInt32, True);
+ }
+ break;
case Ity_F32: hreg = mkHReg(j++, HRcFlt32, True); break;
case Ity_F64: hreg = mkHReg(j++, HRcFlt64, True); break;
- //case Ity_V128: hreg = mkHReg(j++, HRcVec128, True); break;
+ case Ity_V128: hreg = mkHReg(j++, HRcVec128, True);
+ neon = True; break;
default: ppIRType(bb->tyenv->types[i]);
vpanic("iselBB: IRTemp type");
}
/* record the number of vregs we used. */
env->code->n_vregs = env->vreg_ctr;
+ counter++;
return env->code;
}