From 288e5f18ca9a848fec958ca551d184cf3e152d73 Mon Sep 17 00:00:00 2001 From: Julian Seward Date: Thu, 5 May 2005 12:05:54 +0000 Subject: [PATCH] Play a few more rounds of the SSE game on amd64. git-svn-id: svn://svn.valgrind.org/vex/trunk@1162 --- VEX/priv/guest-amd64/toIR.c | 180 ++++++++++++++++++------------------ VEX/priv/host-amd64/hdefs.c | 91 +++++++++--------- VEX/priv/host-amd64/hdefs.h | 14 +-- VEX/priv/host-amd64/isel.c | 27 +++--- 4 files changed, 159 insertions(+), 153 deletions(-) diff --git a/VEX/priv/guest-amd64/toIR.c b/VEX/priv/guest-amd64/toIR.c index b85121ab80..24207c9f7b 100644 --- a/VEX/priv/guest-amd64/toIR.c +++ b/VEX/priv/guest-amd64/toIR.c @@ -9164,12 +9164,13 @@ DisResult disInstr ( /*IN*/ Bool resteerOK, //.. goto after_sse_decoders; //.. //.. insn = (UChar*)&guest_code[delta]; -//.. -//.. /* 66 0F 58 = ADDPD -- add 32Fx4 from R/M to R */ -//.. if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x58) { -//.. delta = dis_SSE_E_to_G_all( sorb, delta+2, "addpd", Iop_Add64Fx2 ); -//.. goto decode_success; -//.. } + + /* 66 0F 58 = ADDPD -- add 32Fx4 from R/M to R */ + if (have66noF2noF3(pfx) && sz == 2 + && insn[0] == 0x0F && insn[1] == 0x58) { + delta = dis_SSE_E_to_G_all( pfx, delta+2, "addpd", Iop_Add64Fx2 ); + goto decode_success; + } /* F2 0F 58 = ADDSD -- add 64F0x2 from R/M to R */ if (haveF2no66noF3(pfx) && insn[0] == 0x0F && insn[1] == 0x58) { @@ -9893,21 +9894,21 @@ DisResult disInstr ( /*IN*/ Bool resteerOK, goto decode_success; } -//.. /* 66 0F 29 = MOVAPD -- move from G (xmm) to E (mem or xmm). */ -//.. if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x29) { -//.. modrm = getUChar(delta+2); -//.. if (epartIsReg(modrm)) { -//.. /* fall through; awaiting test case */ -//.. } else { -//.. addr = disAMode ( &alen, sorb, delta+2, dis_buf ); -//.. storeLE( mkexpr(addr), getXMMReg(gregOfRM(modrm)) ); -//.. DIP("movapd %s,%s\n", nameXMMReg(gregOfRM(modrm)), -//.. dis_buf ); -//.. delta += 2+alen; -//.. goto decode_success; -//.. } -//.. } -//.. + /* 66 0F 29 = MOVAPD -- move from G (xmm) to E (mem or xmm). */ + if (have66noF2noF3(pfx) && insn[0] == 0x0F && insn[1] == 0x29) { + modrm = getUChar(delta+2); + if (epartIsReg(modrm)) { + /* fall through; awaiting test case */ + } else { + addr = disAMode ( &alen, pfx, delta+2, dis_buf, 0 ); + storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) ); + DIP("movapd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), + dis_buf ); + delta += 2+alen; + goto decode_success; + } + } + //.. /* 66 0F 6E = MOVD from r/m32 to xmm, zeroing high 3/4 of xmm. */ //.. if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x6E) { //.. modrm = getUChar(delta+2); @@ -10027,43 +10028,43 @@ DisResult disInstr ( /*IN*/ Bool resteerOK, //.. } else { //.. /* fall through, apparently no mem case for this insn */ //.. } -//.. } -//.. -//.. /* 66 0F 16 = MOVHPD -- move from mem to high half of XMM. */ -//.. /* These seems identical to MOVHPS. This instruction encoding is -//.. completely crazy. */ -//.. if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x16) { -//.. modrm = getUChar(delta+2); -//.. if (epartIsReg(modrm)) { -//.. /* fall through; apparently reg-reg is not possible */ -//.. } else { -//.. addr = disAMode ( &alen, sorb, delta+2, dis_buf ); -//.. delta += 2+alen; -//.. putXMMRegLane64( gregOfRM(modrm), 1/*upper lane*/, -//.. loadLE(Ity_I64, mkexpr(addr)) ); -//.. DIP("movhpd %s,%s\n", dis_buf, -//.. nameXMMReg( gregOfRM(modrm) )); -//.. goto decode_success; -//.. } -//.. } -//.. -//.. /* 66 0F 17 = MOVHPD -- move from high half of XMM to mem. */ -//.. /* Again, this seems identical to MOVHPS. */ -//.. if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x17) { -//.. if (!epartIsReg(insn[2])) { -//.. delta += 2; -//.. addr = disAMode ( &alen, sorb, delta, dis_buf ); -//.. delta += alen; -//.. storeLE( mkexpr(addr), -//.. getXMMRegLane64( gregOfRM(insn[2]), -//.. 1/*upper lane*/ ) ); -//.. DIP("movhpd %s,%s\n", nameXMMReg( gregOfRM(insn[2]) ), -//.. dis_buf); -//.. goto decode_success; -//.. } -//.. /* else fall through */ //.. } + /* 66 0F 16 = MOVHPD -- move from mem to high half of XMM. */ + /* These seems identical to MOVHPS. This instruction encoding is + completely crazy. */ + if (have66noF2noF3(pfx) && insn[0] == 0x0F && insn[1] == 0x16) { + modrm = getUChar(delta+2); + if (epartIsReg(modrm)) { + /* fall through; apparently reg-reg is not possible */ + } else { + addr = disAMode ( &alen, pfx, delta+2, dis_buf, 0 ); + delta += 2+alen; + putXMMRegLane64( gregOfRexRM(pfx,modrm), 1/*upper lane*/, + loadLE(Ity_I64, mkexpr(addr)) ); + DIP("movhpd %s,%s\n", dis_buf, + nameXMMReg( gregOfRexRM(pfx,modrm) )); + goto decode_success; + } + } + + /* 66 0F 17 = MOVHPD -- move from high half of XMM to mem. */ + /* Again, this seems identical to MOVHPS. */ + if (have66noF2noF3(pfx) && insn[0] == 0x0F && insn[1] == 0x17) { + if (!epartIsReg(insn[2])) { + delta += 2; + addr = disAMode ( &alen, pfx, delta, dis_buf, 0 ); + delta += alen; + storeLE( mkexpr(addr), + getXMMRegLane64( gregOfRexRM(pfx,insn[2]), + 1/*upper lane*/ ) ); + DIP("movhpd %s,%s\n", nameXMMReg( gregOfRexRM(pfx,insn[2]) ), + dis_buf); + goto decode_success; + } + /* else fall through */ + } + /* 66 0F 12 = MOVLPD -- move from mem to low half of XMM. */ /* Identical to MOVLPS ? */ if (have66noF2noF3(pfx) && insn[0] == 0x0F && insn[1] == 0x12) { @@ -10082,23 +10083,23 @@ DisResult disInstr ( /*IN*/ Bool resteerOK, } } -//.. /* 66 0F 13 = MOVLPD -- move from low half of XMM to mem. */ -//.. /* Identical to MOVLPS ? */ -//.. if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x13) { -//.. if (!epartIsReg(insn[2])) { -//.. delta += 2; -//.. addr = disAMode ( &alen, sorb, delta, dis_buf ); -//.. delta += alen; -//.. storeLE( mkexpr(addr), -//.. getXMMRegLane64( gregOfRM(insn[2]), -//.. 0/*lower lane*/ ) ); -//.. DIP("movlpd %s, %s\n", nameXMMReg( gregOfRM(insn[2]) ), -//.. dis_buf); -//.. goto decode_success; -//.. } -//.. /* else fall through */ -//.. } -//.. + /* 66 0F 13 = MOVLPD -- move from low half of XMM to mem. */ + /* Identical to MOVLPS ? */ + if (have66noF2noF3(pfx) && insn[0] == 0x0F && insn[1] == 0x13) { + modrm = getUChar(delta+2); + if (!epartIsReg(modrm)) { + addr = disAMode ( &alen, pfx, delta+2, dis_buf, 0 ); + delta += 2+alen; + storeLE( mkexpr(addr), + getXMMRegLane64( gregOfRexRM(pfx,modrm), + 0/*lower lane*/ ) ); + DIP("movlpd %s, %s\n", nameXMMReg( gregOfRexRM(pfx,modrm) ), + dis_buf); + goto decode_success; + } + /* else fall through */ + } + //.. /* 66 0F 50 = MOVMSKPD - move 2 sign bits from 2 x F64 in xmm(E) to //.. 2 lowest bits of ireg(G) */ //.. if (insn[0] == 0x0F && insn[1] == 0x50) { @@ -10237,11 +10238,12 @@ DisResult disInstr ( /*IN*/ Bool resteerOK, } } -//.. /* 66 0F 59 = MULPD -- mul 64Fx2 from R/M to R */ -//.. if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x59) { -//.. delta = dis_SSE_E_to_G_all( sorb, delta+2, "mulpd", Iop_Mul64Fx2 ); -//.. goto decode_success; -//.. } + /* 66 0F 59 = MULPD -- mul 64Fx2 from R/M to R */ + if (have66noF2noF3(pfx) && sz == 2 + && insn[0] == 0x0F && insn[1] == 0x59) { + delta = dis_SSE_E_to_G_all( pfx, delta+2, "mulpd", Iop_Mul64Fx2 ); + goto decode_success; + } /* F2 0F 59 = MULSD -- mul 64F0x2 from R/M to R */ if (haveF2no66noF3(pfx) && sz == 4 @@ -10321,11 +10323,12 @@ DisResult disInstr ( /*IN*/ Bool resteerOK, goto decode_success; } -//.. /* 66 0F 5C = SUBPD -- sub 64Fx2 from R/M to R */ -//.. if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x5C) { -//.. delta = dis_SSE_E_to_G_all( sorb, delta+2, "subpd", Iop_Sub64Fx2 ); -//.. goto decode_success; -//.. } + /* 66 0F 5C = SUBPD -- sub 64Fx2 from R/M to R */ + if (have66noF2noF3(pfx) && sz == 2 + && insn[0] == 0x0F && insn[1] == 0x5C) { + delta = dis_SSE_E_to_G_all( pfx, delta+2, "subpd", Iop_Sub64Fx2 ); + goto decode_success; + } /* F2 0F 5C = SUBSD -- sub 64F0x2 from R/M to R */ if (haveF2no66noF3(pfx) && insn[0] == 0x0F && insn[1] == 0x5C) { @@ -11836,11 +11839,12 @@ DisResult disInstr ( /*IN*/ Bool resteerOK, goto decode_failure; } -//.. /* ------------------------ opl imm, A ----------------- */ -//.. -//.. case 0x04: /* ADD Ib, AL */ -//.. delta = dis_op_imm_A( 1, Iop_Add8, True, delta, "add" ); -//.. break; + /* ------------------------ opl imm, A ----------------- */ + + case 0x04: /* ADD Ib, AL */ + if (haveF2orF3(pfx)) goto decode_failure; + delta = dis_op_imm_A( 1, Iop_Add8, True, delta, "add" ); + break; case 0x05: /* ADD Iv, eAX */ if (haveF2orF3(pfx)) goto decode_failure; delta = dis_op_imm_A(sz, Iop_Add8, True, delta, "add" ); diff --git a/VEX/priv/host-amd64/hdefs.c b/VEX/priv/host-amd64/hdefs.c index 67a3bdc87a..42ae32c8e2 100644 --- a/VEX/priv/host-amd64/hdefs.c +++ b/VEX/priv/host-amd64/hdefs.c @@ -966,15 +966,15 @@ AMD64Instr* AMD64Instr_Sse32FLo ( AMD64SseOp op, HReg src, HReg dst ) { vassert(op != Asse_MOV); return i; } -//.. AMD64Instr* AMD64Instr_Sse64Fx2 ( AMD64SseOp op, HReg src, HReg dst ) { -//.. AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); -//.. i->tag = Xin_Sse64Fx2; -//.. i->Xin.Sse64Fx2.op = op; -//.. i->Xin.Sse64Fx2.src = src; -//.. i->Xin.Sse64Fx2.dst = dst; -//.. vassert(op != Xsse_MOV); -//.. return i; -//.. } +AMD64Instr* AMD64Instr_Sse64Fx2 ( AMD64SseOp op, HReg src, HReg dst ) { + AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); + i->tag = Ain_Sse64Fx2; + i->Ain.Sse64Fx2.op = op; + i->Ain.Sse64Fx2.src = src; + i->Ain.Sse64Fx2.dst = dst; + vassert(op != Asse_MOV); + return i; +} AMD64Instr* AMD64Instr_Sse64FLo ( AMD64SseOp op, HReg src, HReg dst ) { AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); i->tag = Ain_Sse64FLo; @@ -1279,12 +1279,12 @@ void ppAMD64Instr ( AMD64Instr* i ) vex_printf(","); ppHRegAMD64(i->Ain.Sse32FLo.dst); return; -//.. case Xin_Sse64Fx2: -//.. vex_printf("%spd ", showAMD64SseOp(i->Xin.Sse64Fx2.op)); -//.. ppHRegAMD64(i->Xin.Sse64Fx2.src); -//.. vex_printf(","); -//.. ppHRegAMD64(i->Xin.Sse64Fx2.dst); -//.. return; + case Ain_Sse64Fx2: + vex_printf("%spd ", showAMD64SseOp(i->Ain.Sse64Fx2.op)); + ppHRegAMD64(i->Ain.Sse64Fx2.src); + vex_printf(","); + ppHRegAMD64(i->Ain.Sse64Fx2.dst); + return; case Ain_Sse64FLo: vex_printf("%ssd ", showAMD64SseOp(i->Ain.Sse64FLo.op)); ppHRegAMD64(i->Ain.Sse64FLo.src); @@ -1537,15 +1537,15 @@ void getRegUsage_AMD64Instr ( HRegUsage* u, AMD64Instr* i ) addHRegUse(u, unary ? HRmWrite : HRmModify, i->Ain.Sse32FLo.dst); return; -//.. case Xin_Sse64Fx2: -//.. vassert(i->Xin.Sse64Fx2.op != Xsse_MOV); -//.. unary = i->Xin.Sse64Fx2.op == Xsse_RCPF -//.. || i->Xin.Sse64Fx2.op == Xsse_RSQRTF -//.. || i->Xin.Sse64Fx2.op == Xsse_SQRTF; -//.. addHRegUse(u, HRmRead, i->Xin.Sse64Fx2.src); -//.. addHRegUse(u, unary ? HRmWrite : HRmModify, -//.. i->Xin.Sse64Fx2.dst); -//.. return; + case Ain_Sse64Fx2: + vassert(i->Ain.Sse64Fx2.op != Asse_MOV); + unary = i->Ain.Sse64Fx2.op == Asse_RCPF + || i->Ain.Sse64Fx2.op == Asse_RSQRTF + || i->Ain.Sse64Fx2.op == Asse_SQRTF; + addHRegUse(u, HRmRead, i->Ain.Sse64Fx2.src); + addHRegUse(u, unary ? HRmWrite : HRmModify, + i->Ain.Sse64Fx2.dst); + return; case Ain_Sse64FLo: vassert(i->Ain.Sse64FLo.op != Asse_MOV); unary = toBool( i->Ain.Sse64FLo.op == Asse_RCPF @@ -1721,10 +1721,10 @@ void mapRegs_AMD64Instr ( HRegRemap* m, AMD64Instr* i ) mapReg(m, &i->Ain.Sse32FLo.src); mapReg(m, &i->Ain.Sse32FLo.dst); return; -//.. case Xin_Sse64Fx2: -//.. mapReg(m, &i->Xin.Sse64Fx2.src); -//.. mapReg(m, &i->Xin.Sse64Fx2.dst); -//.. return; + case Ain_Sse64Fx2: + mapReg(m, &i->Ain.Sse64Fx2.src); + mapReg(m, &i->Ain.Sse64Fx2.dst); + return; case Ain_Sse64FLo: mapReg(m, &i->Ain.Sse64FLo.src); mapReg(m, &i->Ain.Sse64FLo.dst); @@ -3100,30 +3100,33 @@ Int emit_AMD64Instr ( UChar* buf, Int nbuf, AMD64Instr* i ) *p++ = toUChar(xtra & 0xFF); goto done; -//.. case Xin_Sse64Fx2: -//.. xtra = 0; -//.. *p++ = 0x66; -//.. *p++ = 0x0F; -//.. switch (i->Xin.Sse64Fx2.op) { -//.. case Xsse_ADDF: *p++ = 0x58; break; + case Ain_Sse64Fx2: + xtra = 0; + *p++ = 0x66; + *p++ = clearWBit( + rexAMode_R( vreg2ireg(i->Ain.Sse64Fx2.dst), + vreg2ireg(i->Ain.Sse64Fx2.src) )); + *p++ = 0x0F; + switch (i->Ain.Sse64Fx2.op) { + case Asse_ADDF: *p++ = 0x58; break; //.. case Xsse_DIVF: *p++ = 0x5E; break; //.. case Xsse_MAXF: *p++ = 0x5F; break; //.. case Xsse_MINF: *p++ = 0x5D; break; -//.. case Xsse_MULF: *p++ = 0x59; break; + case Asse_MULF: *p++ = 0x59; break; //.. case Xsse_RCPF: *p++ = 0x53; break; //.. case Xsse_RSQRTF: *p++ = 0x52; break; //.. case Xsse_SQRTF: *p++ = 0x51; break; -//.. case Xsse_SUBF: *p++ = 0x5C; break; + case Asse_SUBF: *p++ = 0x5C; break; //.. case Xsse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break; //.. case Xsse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break; //.. case Xsse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break; -//.. default: goto bad; -//.. } -//.. p = doAMode_R(p, fake(vregNo(i->Xin.Sse64Fx2.dst)), -//.. fake(vregNo(i->Xin.Sse64Fx2.src)) ); -//.. if (xtra & 0x100) -//.. *p++ = (UChar)(xtra & 0xFF); -//.. goto done; + default: goto bad; + } + p = doAMode_R(p, vreg2ireg(i->Ain.Sse64Fx2.dst), + vreg2ireg(i->Ain.Sse64Fx2.src) ); + if (xtra & 0x100) + *p++ = (UChar)(xtra & 0xFF); + goto done; case Ain_Sse32FLo: xtra = 0; @@ -3144,7 +3147,7 @@ Int emit_AMD64Instr ( UChar* buf, Int nbuf, AMD64Instr* i ) case Asse_SUBF: *p++ = 0x5C; break; //.. case Xsse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break; case Asse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break; -//.. case Xsse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break; + case Asse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break; default: goto bad; } p = doAMode_R(p, vreg2ireg(i->Ain.Sse32FLo.dst), diff --git a/VEX/priv/host-amd64/hdefs.h b/VEX/priv/host-amd64/hdefs.h index 9f1854c7c6..d7d5186aff 100644 --- a/VEX/priv/host-amd64/hdefs.h +++ b/VEX/priv/host-amd64/hdefs.h @@ -393,7 +393,7 @@ typedef Ain_SseLdzLO, /* SSE load low 32/64 bits, zero remainder of reg */ Ain_Sse32Fx4, /* SSE binary, 32Fx4 */ Ain_Sse32FLo, /* SSE binary, 32F in lowest lane only */ -//.. Xin_Sse64Fx2, /* SSE binary, 64Fx2 */ + Ain_Sse64Fx2, /* SSE binary, 64Fx2 */ Ain_Sse64FLo, /* SSE binary, 64F in lowest lane only */ Ain_SseReRg, /* SSE binary general reg-reg, Re, Rg */ Ain_SseCMov, /* SSE conditional move */ @@ -620,11 +620,11 @@ typedef HReg src; HReg dst; } Sse32FLo; -//.. struct { -//.. X86SseOp op; -//.. HReg src; -//.. HReg dst; -//.. } Sse64Fx2; + struct { + AMD64SseOp op; + HReg src; + HReg dst; + } Sse64Fx2; struct { AMD64SseOp op; HReg src; @@ -691,7 +691,7 @@ extern AMD64Instr* AMD64Instr_SseLdSt ( Bool isLoad, Int sz, HReg, AMD64AMode* extern AMD64Instr* AMD64Instr_SseLdzLO ( Int sz, HReg, AMD64AMode* ); extern AMD64Instr* AMD64Instr_Sse32Fx4 ( AMD64SseOp, HReg, HReg ); extern AMD64Instr* AMD64Instr_Sse32FLo ( AMD64SseOp, HReg, HReg ); -//.. extern AMD64Instr* AMD64Instr_Sse64Fx2 ( AMD64SseOp, HReg, HReg ); +extern AMD64Instr* AMD64Instr_Sse64Fx2 ( AMD64SseOp, HReg, HReg ); extern AMD64Instr* AMD64Instr_Sse64FLo ( AMD64SseOp, HReg, HReg ); extern AMD64Instr* AMD64Instr_SseReRg ( AMD64SseOp, HReg, HReg ); extern AMD64Instr* AMD64Instr_SseCMov ( AMD64CondCode, HReg src, HReg dst ); diff --git a/VEX/priv/host-amd64/isel.c b/VEX/priv/host-amd64/isel.c index 8b1de2c692..2ddd8c919c 100644 --- a/VEX/priv/host-amd64/isel.c +++ b/VEX/priv/host-amd64/isel.c @@ -3272,26 +3272,25 @@ static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e ) //.. case Iop_CmpEQ64Fx2: op = Xsse_CMPEQF; goto do_64Fx2; //.. case Iop_CmpLT64Fx2: op = Xsse_CMPLTF; goto do_64Fx2; //.. case Iop_CmpLE64Fx2: op = Xsse_CMPLEF; goto do_64Fx2; -//.. case Iop_Add64Fx2: op = Xsse_ADDF; goto do_64Fx2; + case Iop_Add64Fx2: op = Asse_ADDF; goto do_64Fx2; //.. case Iop_Div64Fx2: op = Xsse_DIVF; goto do_64Fx2; //.. case Iop_Max64Fx2: op = Xsse_MAXF; goto do_64Fx2; //.. case Iop_Min64Fx2: op = Xsse_MINF; goto do_64Fx2; -//.. case Iop_Mul64Fx2: op = Xsse_MULF; goto do_64Fx2; -//.. case Iop_Sub64Fx2: op = Xsse_SUBF; goto do_64Fx2; -//.. do_64Fx2: -//.. { -//.. HReg argL = iselVecExpr(env, e->Iex.Binop.arg1); -//.. HReg argR = iselVecExpr(env, e->Iex.Binop.arg2); -//.. HReg dst = newVRegV(env); -//.. REQUIRE_SSE2; -//.. addInstr(env, mk_vMOVsd_RR(argL, dst)); -//.. addInstr(env, X86Instr_Sse64Fx2(op, argR, dst)); -//.. return dst; -//.. } + case Iop_Mul64Fx2: op = Asse_MULF; goto do_64Fx2; + case Iop_Sub64Fx2: op = Asse_SUBF; goto do_64Fx2; + do_64Fx2: + { + HReg argL = iselVecExpr(env, e->Iex.Binop.arg1); + HReg argR = iselVecExpr(env, e->Iex.Binop.arg2); + HReg dst = newVRegV(env); + addInstr(env, mk_vMOVsd_RR(argL, dst)); + addInstr(env, AMD64Instr_Sse64Fx2(op, argR, dst)); + return dst; + } //.. case Iop_CmpEQ32F0x4: op = Xsse_CMPEQF; goto do_32F0x4; case Iop_CmpLT32F0x4: op = Asse_CMPLTF; goto do_32F0x4; -//.. case Iop_CmpLE32F0x4: op = Xsse_CMPLEF; goto do_32F0x4; + case Iop_CmpLE32F0x4: op = Asse_CMPLEF; goto do_32F0x4; case Iop_Add32F0x4: op = Asse_ADDF; goto do_32F0x4; case Iop_Div32F0x4: op = Asse_DIVF; goto do_32F0x4; case Iop_Max32F0x4: op = Asse_MAXF; goto do_32F0x4; -- 2.47.3