From: Julian Seward Date: Mon, 14 Feb 2011 13:44:28 +0000 (+0000) Subject: Merge from trunk, r2082 (Add support for SSE4.2 CRC32{B,W,L,Q}.) X-Git-Tag: svn/VALGRIND_3_6_1^2~6 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=e5fb04d99b6b93eb22370734e48604a17dcfb2c1;p=thirdparty%2Fvalgrind.git Merge from trunk, r2082 (Add support for SSE4.2 CRC32{B,W,L,Q}.) git-svn-id: svn://svn.valgrind.org/vex/branches/VEX_3_6_BRANCH@2097 --- diff --git a/VEX/priv/guest_amd64_defs.h b/VEX/priv/guest_amd64_defs.h index 42451fafa9..74ce1e6153 100644 --- a/VEX/priv/guest_amd64_defs.h +++ b/VEX/priv/guest_amd64_defs.h @@ -137,6 +137,10 @@ extern ULong amd64g_calculate_mmx_psadbw ( ULong, ULong ); extern ULong amd64g_calculate_mmx_pmovmskb ( ULong ); extern ULong amd64g_calculate_sse_pmovmskb ( ULong w64hi, ULong w64lo ); +extern ULong amd64g_calc_crc32b ( ULong crcIn, ULong b ); +extern ULong amd64g_calc_crc32w ( ULong crcIn, ULong w ); +extern ULong amd64g_calc_crc32l ( ULong crcIn, ULong l ); +extern ULong amd64g_calc_crc32q ( ULong crcIn, ULong q ); /* --- DIRTY HELPERS --- */ diff --git a/VEX/priv/guest_amd64_helpers.c b/VEX/priv/guest_amd64_helpers.c index a920ecd0d7..c4917fda7a 100644 --- a/VEX/priv/guest_amd64_helpers.c +++ b/VEX/priv/guest_amd64_helpers.c @@ -2563,6 +2563,43 @@ ULong amd64g_calculate_sse_pmovmskb ( ULong w64hi, ULong w64lo ) return ((rHi8 & 0xFF) << 8) | (rLo8 & 0xFF); } +/* CALLED FROM GENERATED CODE: CLEAN HELPER */ +ULong amd64g_calc_crc32b ( ULong crcIn, ULong b ) +{ + UInt i; + ULong crc = (b & 0xFFULL) ^ crcIn; + for (i = 0; i < 8; i++) + crc = (crc >> 1) ^ ((crc & 1) ? 0x82f63b78ULL : 0); + return crc; +} + +/* CALLED FROM GENERATED CODE: CLEAN HELPER */ +ULong amd64g_calc_crc32w ( ULong crcIn, ULong w ) +{ + UInt i; + ULong crc = (w & 0xFFFFULL) ^ crcIn; + for (i = 0; i < 16; i++) + crc = (crc >> 1) ^ ((crc & 1) ? 0x82f63b78ULL : 0); + return crc; +} + +/* CALLED FROM GENERATED CODE: CLEAN HELPER */ +ULong amd64g_calc_crc32l ( ULong crcIn, ULong l ) +{ + UInt i; + ULong crc = (l & 0xFFFFFFFFULL) ^ crcIn; + for (i = 0; i < 32; i++) + crc = (crc >> 1) ^ ((crc & 1) ? 0x82f63b78ULL : 0); + return crc; +} + +/* CALLED FROM GENERATED CODE: CLEAN HELPER */ +ULong amd64g_calc_crc32q ( ULong crcIn, ULong q ) +{ + ULong crc = amd64g_calc_crc32l(crcIn, q); + return amd64g_calc_crc32l(crc, q >> 32); +} + /*---------------------------------------------------------------*/ /*--- Helpers for SSE4.2 PCMP{E,I}STR{I,M} ---*/ diff --git a/VEX/priv/guest_amd64_toIR.c b/VEX/priv/guest_amd64_toIR.c index 53df72a6b3..442acd3ec5 100644 --- a/VEX/priv/guest_amd64_toIR.c +++ b/VEX/priv/guest_amd64_toIR.c @@ -750,6 +750,13 @@ static Bool haveF3noF2 ( Prefix pfx ) toBool((pfx & (PFX_F2|PFX_F3)) == PFX_F3); } +/* Return True iff pfx has F2 set and F3 clear */ +static Bool haveF2noF3 ( Prefix pfx ) +{ + return + toBool((pfx & (PFX_F2|PFX_F3)) == PFX_F2); +} + /* Return True iff pfx has 66, F2 and F3 clear */ static Bool haveNo66noF2noF3 ( Prefix pfx ) { @@ -15850,6 +15857,68 @@ DisResult disInstr_AMD64_WRK ( goto decode_success; } + /* F2 0F 38 F0 /r = CRC32 r/m8, r32 (REX.W ok, 66 not ok) + F2 0F 38 F1 /r = CRC32 r/m{16,32,64}, r32 + The decoding on this is a bit unusual. + */ + if (haveF2noF3(pfx) + && insn[0] == 0x0F && insn[1] == 0x38 + && (insn[2] == 0xF1 + || (insn[2] == 0xF0 && !have66(pfx)))) { + modrm = insn[3]; + + if (insn[2] == 0xF0) + sz = 1; + else + vassert(sz == 2 || sz == 4 || sz == 8); + + IRType tyE = szToITy(sz); + IRTemp valE = newTemp(tyE); + + if (epartIsReg(modrm)) { + assign(valE, getIRegE(sz, pfx, modrm)); + delta += 3+1; + DIP("crc32b %s,%s\n", nameIRegE(sz, pfx, modrm), + nameIRegG(1==getRexW(pfx) ? 8 : 4 ,pfx, modrm)); + } else { + addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 ); + assign(valE, loadLE(tyE, mkexpr(addr))); + delta += 3+alen; + DIP("crc32b %s,%s\n", dis_buf, + nameIRegG(1==getRexW(pfx) ? 8 : 4 ,pfx, modrm)); + } + + /* Somewhat funny getting/putting of the crc32 value, in order + to ensure that it turns into 64-bit gets and puts. However, + mask off the upper 32 bits so as to not get memcheck false + +ves around the helper call. */ + IRTemp valG0 = newTemp(Ity_I64); + assign(valG0, binop(Iop_And64, getIRegG(8, pfx, modrm), + mkU64(0xFFFFFFFF))); + + HChar* nm = NULL; + void* fn = NULL; + switch (sz) { + case 1: nm = "amd64g_calc_crc32b"; + fn = &amd64g_calc_crc32b; break; + case 2: nm = "amd64g_calc_crc32w"; + fn = &amd64g_calc_crc32w; break; + case 4: nm = "amd64g_calc_crc32l"; + fn = &amd64g_calc_crc32l; break; + case 8: nm = "amd64g_calc_crc32q"; + fn = &amd64g_calc_crc32q; break; + } + vassert(nm && fn); + IRTemp valG1 = newTemp(Ity_I64); + assign(valG1, + mkIRExprCCall(Ity_I64, 0/*regparm*/, nm, fn, + mkIRExprVec_2(mkexpr(valG0), + widenUto64(mkexpr(valE))))); + + putIRegG(4, pfx, modrm, unop(Iop_64to32, mkexpr(valG1))); + goto decode_success; + } + /* ---------------------------------------------------- */ /* --- end of the SSE4 decoder --- */ /* ---------------------------------------------------- */