From: Julian Seward <jseward@acm.org>
Date: Sun, 27 May 2012 16:18:13 +0000 (+0000)
Subject: Remove, or (where it might later come in handy) comment out artefacts
X-Git-Tag: svn/VALGRIND_3_8_1^2~123
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=d6a5e7a23cf48ddcf6b913af8b005f661febcb56;p=thirdparty%2Fvalgrind.git

Remove, or (where it might later come in handy) comment out artefacts
for 256 bit (AVX) code generation on amd64.  Although that was the
plan at first, it turns out to be infeasible to generate 256 bit
instructions for the IR created by Memcheck's instrumentation of 256
bit Ity_V256 IR.  This is because it would require 256 bit integer
SIMD operations, and AVX as currently available only provides 256 bit
operations for floating point.  So, fall back to generating 256 IR
into 128-bit XMM register pairs, and using the existing SSE facilities
in the back end.  This change only affects the amd64 back end -- it
does not affect IR, which remains unchanged, and capable of
representing 256 bit vector operations wherever needed.


git-svn-id: svn://svn.valgrind.org/vex/trunk@2355
---

diff --git a/VEX/priv/host_amd64_defs.c b/VEX/priv/host_amd64_defs.c
index e2bba42e6a..d0eda6276f 100644
--- a/VEX/priv/host_amd64_defs.c
+++ b/VEX/priv/host_amd64_defs.c
@@ -72,11 +72,6 @@ void ppHRegAMD64 ( HReg reg )
          vassert(r >= 0 && r < 16);
          vex_printf("%%xmm%d", r);
          return;
-      case HRcVec256:
-         r = hregNumber(reg);
-         vassert(r >= 0 && r < 16);
-         vex_printf("%%ymm%d", r);
-         return;
       default:
          vpanic("ppHRegAMD64");
    }
@@ -136,11 +131,6 @@ HReg hregAMD64_XMM10 ( void ) { return mkHReg(10, HRcVec128, False); }
 HReg hregAMD64_XMM11 ( void ) { return mkHReg(11, HRcVec128, False); }
 HReg hregAMD64_XMM12 ( void ) { return mkHReg(12, HRcVec128, False); }
 
-HReg hregAMD64_YMM2  ( void ) { return mkHReg( 2, HRcVec256, False); }
-HReg hregAMD64_YMM13 ( void ) { return mkHReg(13, HRcVec256, False); }
-HReg hregAMD64_YMM14 ( void ) { return mkHReg(14, HRcVec256, False); }
-HReg hregAMD64_YMM15 ( void ) { return mkHReg(15, HRcVec256, False); }
-
 
 void getAllocableRegs_AMD64 ( Int* nregs, HReg** arr )
 {
@@ -986,23 +976,23 @@ AMD64Instr* AMD64Instr_SseShuf ( Int order, HReg src, HReg dst ) {
    vassert(order >= 0 && order <= 0xFF);
    return i;
 }
-AMD64Instr* AMD64Instr_AvxLdSt ( Bool isLoad,
-                                 HReg reg, AMD64AMode* addr ) {
-   AMD64Instr* i         = LibVEX_Alloc(sizeof(AMD64Instr));
-   i->tag                = Ain_AvxLdSt;
-   i->Ain.AvxLdSt.isLoad = isLoad;
-   i->Ain.AvxLdSt.reg    = reg;
-   i->Ain.AvxLdSt.addr   = addr;
-   return i;
-}
-AMD64Instr* AMD64Instr_AvxReRg ( AMD64SseOp op, HReg re, HReg rg ) {
-   AMD64Instr* i      = LibVEX_Alloc(sizeof(AMD64Instr));
-   i->tag             = Ain_AvxReRg;
-   i->Ain.AvxReRg.op  = op;
-   i->Ain.AvxReRg.src = re;
-   i->Ain.AvxReRg.dst = rg;
-   return i;
-}
+//uu AMD64Instr* AMD64Instr_AvxLdSt ( Bool isLoad,
+//uu                                  HReg reg, AMD64AMode* addr ) {
+//uu    AMD64Instr* i         = LibVEX_Alloc(sizeof(AMD64Instr));
+//uu    i->tag                = Ain_AvxLdSt;
+//uu    i->Ain.AvxLdSt.isLoad = isLoad;
+//uu    i->Ain.AvxLdSt.reg    = reg;
+//uu    i->Ain.AvxLdSt.addr   = addr;
+//uu    return i;
+//uu }
+//uu AMD64Instr* AMD64Instr_AvxReRg ( AMD64SseOp op, HReg re, HReg rg ) {
+//uu    AMD64Instr* i      = LibVEX_Alloc(sizeof(AMD64Instr));
+//uu    i->tag             = Ain_AvxReRg;
+//uu    i->Ain.AvxReRg.op  = op;
+//uu    i->Ain.AvxReRg.src = re;
+//uu    i->Ain.AvxReRg.dst = rg;
+//uu    return i;
+//uu }
 AMD64Instr* AMD64Instr_EvCheck ( AMD64AMode* amCounter,
                                  AMD64AMode* amFailAddr ) {
    AMD64Instr* i             = LibVEX_Alloc(sizeof(AMD64Instr));
@@ -1298,25 +1288,24 @@ void ppAMD64Instr ( AMD64Instr* i, Bool mode64 )
          vex_printf(",");
          ppHRegAMD64(i->Ain.SseShuf.dst);
          return;
-
-      case Ain_AvxLdSt:
-         vex_printf("vmovups ");
-         if (i->Ain.AvxLdSt.isLoad) {
-            ppAMD64AMode(i->Ain.AvxLdSt.addr);
-            vex_printf(",");
-            ppHRegAMD64(i->Ain.AvxLdSt.reg);
-         } else {
-            ppHRegAMD64(i->Ain.AvxLdSt.reg);
-            vex_printf(",");
-            ppAMD64AMode(i->Ain.AvxLdSt.addr);
-         }
-         return;
-      case Ain_AvxReRg:
-         vex_printf("v%s ", showAMD64SseOp(i->Ain.SseReRg.op));
-         ppHRegAMD64(i->Ain.AvxReRg.src);
-         vex_printf(",");
-         ppHRegAMD64(i->Ain.AvxReRg.dst);
-         return;
+      //uu case Ain_AvxLdSt:
+      //uu    vex_printf("vmovups ");
+      //uu    if (i->Ain.AvxLdSt.isLoad) {
+      //uu       ppAMD64AMode(i->Ain.AvxLdSt.addr);
+      //uu       vex_printf(",");
+      //uu       ppHRegAMD64(i->Ain.AvxLdSt.reg);
+      //uu    } else {
+      //uu       ppHRegAMD64(i->Ain.AvxLdSt.reg);
+      //uu       vex_printf(",");
+      //uu       ppAMD64AMode(i->Ain.AvxLdSt.addr);
+      //uu    }
+      //uu    return;
+      //uu case Ain_AvxReRg:
+      //uu    vex_printf("v%s ", showAMD64SseOp(i->Ain.SseReRg.op));
+      //uu    ppHRegAMD64(i->Ain.AvxReRg.src);
+      //uu    vex_printf(",");
+      //uu    ppHRegAMD64(i->Ain.AvxReRg.dst);
+      //uu    return;
       case Ain_EvCheck:
          vex_printf("(evCheck) decl ");
          ppAMD64AMode(i->Ain.EvCheck.amCounter);
@@ -1402,7 +1391,7 @@ void getRegUsage_AMD64Instr ( HRegUsage* u, AMD64Instr* i, Bool mode64 )
          /* First off, claim it trashes all the caller-saved regs
             which fall within the register allocator's jurisdiction.
             These I believe to be: rax rcx rdx rsi rdi r8 r9 r10 r11 
-            and all the xmm/ymm registers.
+            and all the xmm registers.
          */
          addHRegUse(u, HRmWrite, hregAMD64_RAX());
          addHRegUse(u, HRmWrite, hregAMD64_RCX());
@@ -1425,10 +1414,6 @@ void getRegUsage_AMD64Instr ( HRegUsage* u, AMD64Instr* i, Bool mode64 )
          addHRegUse(u, HRmWrite, hregAMD64_XMM10());
          addHRegUse(u, HRmWrite, hregAMD64_XMM11());
          addHRegUse(u, HRmWrite, hregAMD64_XMM12());
-         addHRegUse(u, HRmWrite, hregAMD64_YMM2());
-         addHRegUse(u, HRmWrite, hregAMD64_YMM13());
-         addHRegUse(u, HRmWrite, hregAMD64_YMM14());
-         addHRegUse(u, HRmWrite, hregAMD64_YMM15());
 
          /* Now we have to state any parameter-carrying registers
             which might be read.  This depends on the regparmness. */
@@ -1609,24 +1594,24 @@ void getRegUsage_AMD64Instr ( HRegUsage* u, AMD64Instr* i, Bool mode64 )
          addHRegUse(u, HRmRead,  i->Ain.SseShuf.src);
          addHRegUse(u, HRmWrite, i->Ain.SseShuf.dst);
          return;
-      case Ain_AvxLdSt:
-         addRegUsage_AMD64AMode(u, i->Ain.AvxLdSt.addr);
-         addHRegUse(u, i->Ain.AvxLdSt.isLoad ? HRmWrite : HRmRead,
-                       i->Ain.AvxLdSt.reg);
-         return;
-      case Ain_AvxReRg:
-         if ( (i->Ain.AvxReRg.op == Asse_XOR
-               || i->Ain.AvxReRg.op == Asse_CMPEQ32)
-              && i->Ain.AvxReRg.src == i->Ain.AvxReRg.dst) {
-            /* See comments on the case for Ain_SseReRg. */
-            addHRegUse(u, HRmWrite, i->Ain.AvxReRg.dst);
-         } else {
-            addHRegUse(u, HRmRead, i->Ain.AvxReRg.src);
-            addHRegUse(u, i->Ain.AvxReRg.op == Asse_MOV 
-                             ? HRmWrite : HRmModify, 
-                          i->Ain.AvxReRg.dst);
-         }
-         return;
+      //uu case Ain_AvxLdSt:
+      //uu addRegUsage_AMD64AMode(u, i->Ain.AvxLdSt.addr);
+      //uu addHRegUse(u, i->Ain.AvxLdSt.isLoad ? HRmWrite : HRmRead,
+      //uu               i->Ain.AvxLdSt.reg);
+      //uu return;
+      //uu case Ain_AvxReRg:
+      //uu    if ( (i->Ain.AvxReRg.op == Asse_XOR
+      //uu          || i->Ain.AvxReRg.op == Asse_CMPEQ32)
+      //uu         && i->Ain.AvxReRg.src == i->Ain.AvxReRg.dst) {
+      //uu       /* See comments on the case for Ain_SseReRg. */
+      //uu       addHRegUse(u, HRmWrite, i->Ain.AvxReRg.dst);
+      //uu    } else {
+      //uu       addHRegUse(u, HRmRead, i->Ain.AvxReRg.src);
+      //uu       addHRegUse(u, i->Ain.AvxReRg.op == Asse_MOV 
+      //uu                        ? HRmWrite : HRmModify, 
+      //uu                     i->Ain.AvxReRg.dst);
+      //uu    }
+      //uu    return;
       case Ain_EvCheck:
          /* We expect both amodes only to mention %rbp, so this is in
             fact pointless, since %rbp isn't allocatable, but anyway.. */
@@ -1802,14 +1787,14 @@ void mapRegs_AMD64Instr ( HRegRemap* m, AMD64Instr* i, Bool mode64 )
          mapReg(m, &i->Ain.SseShuf.src);
          mapReg(m, &i->Ain.SseShuf.dst);
          return;
-      case Ain_AvxLdSt:
-         mapReg(m, &i->Ain.AvxLdSt.reg);
-         mapRegs_AMD64AMode(m, i->Ain.AvxLdSt.addr);
-         break;
-      case Ain_AvxReRg:
-         mapReg(m, &i->Ain.AvxReRg.src);
-         mapReg(m, &i->Ain.AvxReRg.dst);
-         return;
+      //uu case Ain_AvxLdSt:
+      //uu    mapReg(m, &i->Ain.AvxLdSt.reg);
+      //uu    mapRegs_AMD64AMode(m, i->Ain.AvxLdSt.addr);
+      //uu    break;
+      //uu case Ain_AvxReRg:
+      //uu    mapReg(m, &i->Ain.AvxReRg.src);
+      //uu    mapReg(m, &i->Ain.AvxReRg.dst);
+      //uu    return;
       case Ain_EvCheck:
          /* We expect both amodes only to mention %rbp, so this is in
             fact pointless, since %rbp isn't allocatable, but anyway.. */
@@ -1848,13 +1833,13 @@ Bool isMove_AMD64Instr ( AMD64Instr* i, HReg* src, HReg* dst )
          *src = i->Ain.SseReRg.src;
          *dst = i->Ain.SseReRg.dst;
          return True;
-      case Ain_AvxReRg:
-         /* Moves between AVX regs */
-         if (i->Ain.AvxReRg.op != Asse_MOV)
-            return False;
-         *src = i->Ain.AvxReRg.src;
-         *dst = i->Ain.AvxReRg.dst;
-         return True;
+      //uu case Ain_AvxReRg:
+      //uu    /* Moves between AVX regs */
+      //uu    if (i->Ain.AvxReRg.op != Asse_MOV)
+      //uu       return False;
+      //uu    *src = i->Ain.AvxReRg.src;
+      //uu    *dst = i->Ain.AvxReRg.dst;
+      //uu    return True;
       default:
          return False;
    }
@@ -1882,9 +1867,6 @@ void genSpill_AMD64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
       case HRcVec128:
          *i1 = AMD64Instr_SseLdSt ( False/*store*/, 16, rreg, am );
          return;
-      case HRcVec256:
-         *i1 = AMD64Instr_AvxLdSt ( False/*store*/, rreg, am );
-         return;
       default: 
          ppHRegClass(hregClass(rreg));
          vpanic("genSpill_AMD64: unimplemented regclass");
@@ -1907,9 +1889,6 @@ void genReload_AMD64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
       case HRcVec128:
          *i1 = AMD64Instr_SseLdSt ( True/*load*/, 16, rreg, am );
          return;
-      case HRcVec256:
-         *i1 = AMD64Instr_AvxLdSt ( True/*load*/, rreg, am );
-         return;
       default: 
          ppHRegClass(hregClass(rreg));
          vpanic("genReload_AMD64: unimplemented regclass");
@@ -1967,16 +1946,16 @@ static UInt vreg2ireg ( HReg r )
    return mkHReg(n, HRcInt64, False);
 }
 
-/* Ditto for ymm regs. */
-static UInt dvreg2ireg ( HReg r )
-{
-   UInt n;
-   vassert(hregClass(r) == HRcVec256);
-   vassert(!hregIsVirtual(r));
-   n = hregNumber(r);
-   vassert(n <= 15);
-   return mkHReg(n, HRcInt64, False);
-}
+//uu /* Ditto for ymm regs. */
+//uu static UInt dvreg2ireg ( HReg r )
+//uu {
+//uu    UInt n;
+//uu    vassert(hregClass(r) == HRcVec256);
+//uu    vassert(!hregIsVirtual(r));
+//uu    n = hregNumber(r);
+//uu    vassert(n <= 15);
+//uu    return mkHReg(n, HRcInt64, False);
+//uu }
 
 static UChar mkModRegRM ( UChar mod, UChar reg, UChar regmem )
 {
@@ -2180,81 +2159,85 @@ static UChar rexAMode_R ( HReg greg, HReg ereg )
 }
 
 
-/* Assemble a 2 or 3 byte VEX prefix from parts.  rexR, rexX, rexB and
-   notVvvvv need to be not-ed before packing.  mmmmm, rexW, L and pp go
-   in verbatim.  There's no range checking on the bits. */
-static UInt packVexPrefix ( UInt rexR, UInt rexX, UInt rexB,
-                            UInt mmmmm, UInt rexW, UInt notVvvv,
-                            UInt L, UInt pp )
-{
-   UChar byte0 = 0;
-   UChar byte1 = 0;
-   UChar byte2 = 0;
-   if (rexX == 0 && rexB == 0 && mmmmm == 1 && rexW == 0) {
-      /* 2 byte encoding is possible. */
-      byte0 = 0xC5;
-      byte1 = ((rexR ^ 1) << 7) | ((notVvvv ^ 0xF) << 3) 
-              | (L << 2) | pp;
-   } else {
-      /* 3 byte encoding is needed. */
-      byte0 = 0xC4;
-      byte1 = ((rexR ^ 1) << 7) | ((rexX ^ 1) << 6)
-              | ((rexB ^ 1) << 5) | mmmmm;
-      byte2 = (rexW << 7) | ((notVvvv ^ 0xF) << 3) | (L << 2) | pp;
-   }
-   return (((UInt)byte2) << 16) | (((UInt)byte1) << 8) | ((UInt)byte0);
-}
-
-/* Make up a VEX prefix for a (greg,amode) pair.  First byte in bits
-   7:0 of result, second in 15:8, third (for a 3 byte prefix) in
-   23:16.  Has m-mmmm set to indicate a prefix of 0F, pp set to
-   indicate no SIMD prefix, W=0 (ignore), L=1 (size=256), and
-   vvvv=1111 (unused 3rd reg). */
-static UInt vexAMode_M ( HReg greg, AMD64AMode* am )
-{
-   UChar L       = 1; /* size = 256 */
-   UChar pp      = 0; /* no SIMD prefix */
-   UChar mmmmm   = 1; /* 0F */
-   UChar notVvvv = 0; /* unused */
-   UChar rexW    = 0;
-   UChar rexR    = 0;
-   UChar rexX    = 0;
-   UChar rexB    = 0;
-   /* Same logic as in rexAMode_M. */
-   if (am->tag == Aam_IR) {
-      rexR = iregBit3(greg);
-      rexX = 0; /* not relevant */
-      rexB = iregBit3(am->Aam.IR.reg);
-   }
-   else if (am->tag == Aam_IRRS) {
-      rexR = iregBit3(greg);
-      rexX = iregBit3(am->Aam.IRRS.index);
-      rexB = iregBit3(am->Aam.IRRS.base);
-   } else {
-      vassert(0);
-   }
-   return packVexPrefix( rexR, rexX, rexB, mmmmm, rexW, notVvvv, L, pp );
-}
-
-static UChar* emitVexPrefix ( UChar* p, UInt vex )
-{
-   switch (vex & 0xFF) {
-      case 0xC5:
-         *p++ = 0xC5;
-         *p++ = (vex >> 8) & 0xFF;
-         vassert(0 == (vex >> 16));
-         break;
-      case 0xC4:
-         *p++ = 0xC4;
-         *p++ = (vex >> 8) & 0xFF;
-         *p++ = (vex >> 16) & 0xFF;
-         vassert(0 == (vex >> 24));
-         break;
-      default:
-         vassert(0);
-   }
-   return p;
-}
+//uu /* May 2012: this VEX prefix stuff is currently unused, but has
+//uu    verified correct (I reckon).  Certainly it has been known to
+//uu    produce correct VEX prefixes during testing. */
+//uu 
+//uu /* Assemble a 2 or 3 byte VEX prefix from parts.  rexR, rexX, rexB and
+//uu    notVvvvv need to be not-ed before packing.  mmmmm, rexW, L and pp go
+//uu    in verbatim.  There's no range checking on the bits. */
+//uu static UInt packVexPrefix ( UInt rexR, UInt rexX, UInt rexB,
+//uu                             UInt mmmmm, UInt rexW, UInt notVvvv,
+//uu                             UInt L, UInt pp )
+//uu {
+//uu    UChar byte0 = 0;
+//uu    UChar byte1 = 0;
+//uu    UChar byte2 = 0;
+//uu    if (rexX == 0 && rexB == 0 && mmmmm == 1 && rexW == 0) {
+//uu       /* 2 byte encoding is possible. */
+//uu       byte0 = 0xC5;
+//uu       byte1 = ((rexR ^ 1) << 7) | ((notVvvv ^ 0xF) << 3) 
+//uu               | (L << 2) | pp;
+//uu    } else {
+//uu       /* 3 byte encoding is needed. */
+//uu       byte0 = 0xC4;
+//uu       byte1 = ((rexR ^ 1) << 7) | ((rexX ^ 1) << 6)
+//uu               | ((rexB ^ 1) << 5) | mmmmm;
+//uu       byte2 = (rexW << 7) | ((notVvvv ^ 0xF) << 3) | (L << 2) | pp;
+//uu    }
+//uu    return (((UInt)byte2) << 16) | (((UInt)byte1) << 8) | ((UInt)byte0);
+//uu }
+//uu 
+//uu /* Make up a VEX prefix for a (greg,amode) pair.  First byte in bits
+//uu    7:0 of result, second in 15:8, third (for a 3 byte prefix) in
+//uu    23:16.  Has m-mmmm set to indicate a prefix of 0F, pp set to
+//uu    indicate no SIMD prefix, W=0 (ignore), L=1 (size=256), and
+//uu    vvvv=1111 (unused 3rd reg). */
+//uu static UInt vexAMode_M ( HReg greg, AMD64AMode* am )
+//uu {
+//uu    UChar L       = 1; /* size = 256 */
+//uu    UChar pp      = 0; /* no SIMD prefix */
+//uu    UChar mmmmm   = 1; /* 0F */
+//uu    UChar notVvvv = 0; /* unused */
+//uu    UChar rexW    = 0;
+//uu    UChar rexR    = 0;
+//uu    UChar rexX    = 0;
+//uu    UChar rexB    = 0;
+//uu    /* Same logic as in rexAMode_M. */
+//uu    if (am->tag == Aam_IR) {
+//uu       rexR = iregBit3(greg);
+//uu       rexX = 0; /* not relevant */
+//uu       rexB = iregBit3(am->Aam.IR.reg);
+//uu    }
+//uu    else if (am->tag == Aam_IRRS) {
+//uu       rexR = iregBit3(greg);
+//uu       rexX = iregBit3(am->Aam.IRRS.index);
+//uu       rexB = iregBit3(am->Aam.IRRS.base);
+//uu    } else {
+//uu       vassert(0);
+//uu    }
+//uu    return packVexPrefix( rexR, rexX, rexB, mmmmm, rexW, notVvvv, L, pp );
+//uu }
+//uu 
+//uu static UChar* emitVexPrefix ( UChar* p, UInt vex )
+//uu {
+//uu    switch (vex & 0xFF) {
+//uu       case 0xC5:
+//uu          *p++ = 0xC5;
+//uu          *p++ = (vex >> 8) & 0xFF;
+//uu          vassert(0 == (vex >> 16));
+//uu          break;
+//uu       case 0xC4:
+//uu          *p++ = 0xC4;
+//uu          *p++ = (vex >> 8) & 0xFF;
+//uu          *p++ = (vex >> 16) & 0xFF;
+//uu          vassert(0 == (vex >> 24));
+//uu          break;
+//uu       default:
+//uu          vassert(0);
+//uu    }
+//uu    return p;
+//uu }
 
 
 /* Emit ffree %st(N) */
@@ -3432,14 +3415,14 @@ Int emit_AMD64Instr ( /*MB_MOD*/Bool* is_profInc,
       *p++ = (UChar)(i->Ain.SseShuf.order);
       goto done;
 
-   case Ain_AvxLdSt: {
-      UInt vex = vexAMode_M( dvreg2ireg(i->Ain.AvxLdSt.reg),
-                             i->Ain.AvxLdSt.addr );
-      p = emitVexPrefix(p, vex);
-      *p++ = toUChar(i->Ain.AvxLdSt.isLoad ? 0x10 : 0x11);
-      p = doAMode_M(p, dvreg2ireg(i->Ain.AvxLdSt.reg), i->Ain.AvxLdSt.addr);
-      goto done;
-   }
+   //uu case Ain_AvxLdSt: {
+   //uu    UInt vex = vexAMode_M( dvreg2ireg(i->Ain.AvxLdSt.reg),
+   //uu                           i->Ain.AvxLdSt.addr );
+   //uu    p = emitVexPrefix(p, vex);
+   //uu    *p++ = toUChar(i->Ain.AvxLdSt.isLoad ? 0x10 : 0x11);
+   //uu    p = doAMode_M(p, dvreg2ireg(i->Ain.AvxLdSt.reg), i->Ain.AvxLdSt.addr);
+   //uu      goto done;
+   //uu }
 
    case Ain_EvCheck: {
       /* We generate:
diff --git a/VEX/priv/host_amd64_defs.h b/VEX/priv/host_amd64_defs.h
index 5e32dadbca..8861c4cf34 100644
--- a/VEX/priv/host_amd64_defs.h
+++ b/VEX/priv/host_amd64_defs.h
@@ -82,11 +82,6 @@ extern HReg hregAMD64_XMM10 ( void );
 extern HReg hregAMD64_XMM11 ( void );
 extern HReg hregAMD64_XMM12 ( void );
 
-extern HReg hregAMD64_YMM2  ( void );
-extern HReg hregAMD64_YMM13 ( void );
-extern HReg hregAMD64_YMM14 ( void );
-extern HReg hregAMD64_YMM15 ( void );
-
 
 /* --------- Condition codes, AMD encoding. --------- */
 
@@ -400,9 +395,9 @@ typedef
       Ain_SseReRg,     /* SSE binary general reg-reg, Re, Rg */
       Ain_SseCMov,     /* SSE conditional move */
       Ain_SseShuf,     /* SSE2 shuffle (pshufd) */
-      Ain_AvxLdSt,     /* AVX load/store 256 bits,
-                          no alignment constraints */
-      Ain_AvxReRg,     /* AVX binary general reg-reg, Re, Rg */
+      //uu Ain_AvxLdSt,     /* AVX load/store 256 bits,
+      //uu                     no alignment constraints */
+      //uu Ain_AvxReRg,     /* AVX binary general reg-reg, Re, Rg */
       Ain_EvCheck,     /* Event check */
       Ain_ProfInc      /* 64-bit profile counter increment */
    }
@@ -668,16 +663,16 @@ typedef
             HReg   src;
             HReg   dst;
          } SseShuf;
-         struct {
-            Bool        isLoad;
-            HReg        reg;
-            AMD64AMode* addr;
-         } AvxLdSt;
-         struct {
-            AMD64SseOp op;
-            HReg       src;
-            HReg       dst;
-         } AvxReRg;
+         //uu struct {
+         //uu    Bool        isLoad;
+         //uu    HReg        reg;
+         //uu    AMD64AMode* addr;
+         //uu } AvxLdSt;
+         //uu struct {
+         //uu    AMD64SseOp op;
+         //uu    HReg       src;
+         //uu    HReg       dst;
+         //uu } AvxReRg;
          struct {
             AMD64AMode* amCounter;
             AMD64AMode* amFailAddr;
@@ -740,8 +735,8 @@ extern AMD64Instr* AMD64Instr_Sse64FLo   ( AMD64SseOp, HReg, HReg );
 extern AMD64Instr* AMD64Instr_SseReRg    ( AMD64SseOp, HReg, HReg );
 extern AMD64Instr* AMD64Instr_SseCMov    ( AMD64CondCode, HReg src, HReg dst );
 extern AMD64Instr* AMD64Instr_SseShuf    ( Int order, HReg src, HReg dst );
-extern AMD64Instr* AMD64Instr_AvxLdSt    ( Bool isLoad, HReg, AMD64AMode* );
-extern AMD64Instr* AMD64Instr_AvxReRg    ( AMD64SseOp, HReg, HReg );
+//uu extern AMD64Instr* AMD64Instr_AvxLdSt    ( Bool isLoad, HReg, AMD64AMode* );
+//uu extern AMD64Instr* AMD64Instr_AvxReRg    ( AMD64SseOp, HReg, HReg );
 extern AMD64Instr* AMD64Instr_EvCheck    ( AMD64AMode* amCounter,
                                            AMD64AMode* amFailAddr );
 extern AMD64Instr* AMD64Instr_ProfInc    ( void );
diff --git a/VEX/priv/host_amd64_isel.c b/VEX/priv/host_amd64_isel.c
index 24c0376bf5..4f1f5f364f 100644
--- a/VEX/priv/host_amd64_isel.c
+++ b/VEX/priv/host_amd64_isel.c
@@ -196,13 +196,6 @@ static HReg newVRegV ( ISelEnv* env )
    return reg;
 }
 
-static HReg newVRegDV ( ISelEnv* env )
-{
-   HReg reg = mkHReg(env->vreg_ctr, HRcVec256, True/*virtual reg*/);
-   env->vreg_ctr++;
-   return reg;
-}
-
 
 /*---------------------------------------------------------*/
 /*--- ISEL: Forward declarations                        ---*/
@@ -246,9 +239,6 @@ static HReg          iselFltExpr         ( ISelEnv* env, IRExpr* e );
 static HReg          iselVecExpr_wrk     ( ISelEnv* env, IRExpr* e );
 static HReg          iselVecExpr         ( ISelEnv* env, IRExpr* e );
 
-static HReg          iselV256Expr_wrk    ( ISelEnv* env, IRExpr* e );
-static HReg          iselV256Expr        ( ISelEnv* env, IRExpr* e );
-
 static void          iselDVecExpr_wrk ( /*OUT*/HReg* rHi, HReg* rLo, 
                                         ISelEnv* env, IRExpr* e );
 static void          iselDVecExpr     ( /*OUT*/HReg* rHi, HReg* rLo, 
@@ -325,15 +315,6 @@ static AMD64Instr* mk_vMOVsd_RR ( HReg src, HReg dst )
    return AMD64Instr_SseReRg(Asse_MOV, src, dst);
 }
 
-/* Make a double-vector (256 bit) reg-reg move. */
-
-static AMD64Instr* mk_dvMOVsd_RR ( HReg src, HReg dst )
-{
-   vassert(hregClass(src) == HRcVec256);
-   vassert(hregClass(dst) == HRcVec256);
-   return AMD64Instr_AvxReRg(Asse_MOV, src, dst);
-}
-
 /* Advance/retreat %rsp by n. */
 
 static void add_to_rsp ( ISelEnv* env, Int n )
@@ -3380,62 +3361,6 @@ static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e )
 }
 
 
-/*---------------------------------------------------------*/
-/*--- ISEL: SIMD (V256) expressions, 256 bit.           ---*/
-/*---------------------------------------------------------*/
-
-static HReg iselV256Expr ( ISelEnv* env, IRExpr* e )
-{
-   HReg r = iselV256Expr_wrk( env, e );
-#  if 0
-   vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
-#  endif
-   vassert(hregClass(r) == HRcVec256);
-   vassert(hregIsVirtual(r));
-   return r;
-}
-
-
-/* DO NOT CALL THIS DIRECTLY */
-static HReg iselV256Expr_wrk ( ISelEnv* env, IRExpr* e )
-{
-   //HWord      fn = 0; /* address of helper fn, if required */
-   //Bool       arg1isEReg = False;
-   //AMD64SseOp op = Asse_INVALID;
-   IRType     ty = typeOfIRExpr(env->type_env,e);
-   vassert(e);
-   vassert(ty == Ity_V256);
-#if 0
-   if (e->tag == Iex_RdTmp) {
-      return lookupIRTemp(env, e->Iex.RdTmp.tmp);
-   }
-
-   if (e->tag == Iex_Get) {
-      HReg dst = newVRegDV(env);
-      addInstr(env, AMD64Instr_AvxLdSt(
-                       True/*load*/, 
-                       dst,
-                       AMD64AMode_IR(e->Iex.Get.offset, hregAMD64_RBP())
-                    )
-              );
-      return dst;
-   }
-
-   if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
-      HReg        dst = newVRegDV(env);
-      AMD64AMode* am  = iselIntExpr_AMode(env, e->Iex.Load.addr);
-      addInstr(env, AMD64Instr_AvxLdSt( True/*load*/, dst, am ));
-      return dst;
-   }
-#endif
-   //avx_fail:
-   vex_printf("iselV256Expr (amd64, subarch = %s): can't reduce\n",
-              LibVEX_ppVexHwCaps(VexArchAMD64, env->hwcaps));
-   ppIRExpr(e);
-   vpanic("iselV256Expr_wrk");
-}
-
-
 /*---------------------------------------------------------*/
 /*--- ISEL: SIMD (V256) expressions, into 2 XMM regs.    --*/
 /*---------------------------------------------------------*/
diff --git a/VEX/priv/host_generic_reg_alloc2.c b/VEX/priv/host_generic_reg_alloc2.c
index 2fc97c3c6f..e413929c35 100644
--- a/VEX/priv/host_generic_reg_alloc2.c
+++ b/VEX/priv/host_generic_reg_alloc2.c
@@ -208,8 +208,6 @@ Int findMostDistantlyMentionedVReg (
 static inline void sanity_check_spill_offset ( VRegLR* vreg )
 {
    switch (vreg->reg_class) {
-      case HRcVec256:
-         vassert(0 == ((UShort)vreg->spill_offset % 32)); break;
       case HRcVec128: case HRcFlt64:
          vassert(0 == ((UShort)vreg->spill_offset % 16)); break;
       default:
@@ -838,29 +836,6 @@ HInstrArray* doRegisterAllocation (
          HRegClass. */
       switch (vreg_lrs[j].reg_class) {
 
-         case HRcVec256:
-            /* Find four adjacent free slots in which between them
-               provide 256 bits in which to spill the vreg.  Since we
-               are trying to find an 32-byte-aligned slot, move along
-               in steps of 4 (slots). */
-            for (k = 0; k < N_SPILL64S-3; k += 4)
-               if (ss_busy_until_before[k+0] <= vreg_lrs[j].live_after
-                   && ss_busy_until_before[k+1] <= vreg_lrs[j].live_after
-                   && ss_busy_until_before[k+2] <= vreg_lrs[j].live_after
-                   && ss_busy_until_before[k+3] <= vreg_lrs[j].live_after)
-                  break;
-            if (k >= N_SPILL64S-3) {
-               vpanic("LibVEX_N_SPILL_BYTES is too low.  " 
-                      "Increase and recompile.");
-            }
-            if (0) vex_printf("32-byte spill offset in spill slot %d\n",
-                              (Int)k);
-            ss_busy_until_before[k+0] = vreg_lrs[j].dead_before;
-            ss_busy_until_before[k+1] = vreg_lrs[j].dead_before;
-            ss_busy_until_before[k+2] = vreg_lrs[j].dead_before;
-            ss_busy_until_before[k+3] = vreg_lrs[j].dead_before;
-            break;
-
          case HRcVec128: case HRcFlt64:
             /* Find two adjacent free slots in which between them
                provide up to 128 bits in which to spill the vreg.
diff --git a/VEX/priv/host_generic_regs.c b/VEX/priv/host_generic_regs.c
index 999c7f2abf..713add9cb7 100644
--- a/VEX/priv/host_generic_regs.c
+++ b/VEX/priv/host_generic_regs.c
@@ -49,7 +49,6 @@ void ppHRegClass ( HRegClass hrc )
       case HRcFlt64:   vex_printf("HRcFlt64"); break;
       case HRcVec64:   vex_printf("HRcVec64"); break;
       case HRcVec128:  vex_printf("HRcVec128"); break;
-      case HRcVec256:  vex_printf("HRcVec256"); break;
       default: vpanic("ppHRegClass");
    }
 }
@@ -66,7 +65,6 @@ void ppHReg ( HReg r )
       case HRcFlt64:   vex_printf("%%%sD%d", maybe_v, regNo); return;
       case HRcVec64:   vex_printf("%%%sv%d", maybe_v, regNo); return;
       case HRcVec128:  vex_printf("%%%sV%d", maybe_v, regNo); return;
-      case HRcVec256:  vex_printf("%%%sDV%d", maybe_v, regNo); return;
       default: vpanic("ppHReg");
    }
 }
diff --git a/VEX/priv/host_generic_regs.h b/VEX/priv/host_generic_regs.h
index f23d7f6375..e5c25b5263 100644
--- a/VEX/priv/host_generic_regs.h
+++ b/VEX/priv/host_generic_regs.h
@@ -87,7 +87,6 @@ typedef UInt HReg;
                              so won't fit in a 64-bit slot)
       HRcVec64     64 bits
       HRcVec128    128 bits
-      HRcVec256    256 bits
 
    If you add another regclass, you must remember to update
    host_generic_reg_alloc2.c accordingly.
@@ -100,8 +99,7 @@ typedef
       HRcFlt32=5,     /* 32-bit float */
       HRcFlt64=6,     /* 64-bit float */
       HRcVec64=7,     /* 64-bit SIMD */
-      HRcVec128=8,    /* 128-bit SIMD */
-      HRcVec256=9
+      HRcVec128=8     /* 128-bit SIMD */
    }
    HRegClass;
 
@@ -124,7 +122,7 @@ static inline HReg mkHReg ( UInt regno, HRegClass rc, Bool virtual ) {
 static inline HRegClass hregClass ( HReg r ) {
    UInt rc = r;
    rc = (rc >> 28) & 0x0F;
-   vassert(rc >= HRcInt32 && rc <= HRcVec256);
+   vassert(rc >= HRcInt32 && rc <= HRcVec128);
    return (HRegClass)rc;
 }