]> git.ipfire.org Git - thirdparty/LuaJIT.git/commitdiff
Add SSE2 variants of basic arithmetic ops in interpreter.
authorMike Pall <mike>
Mon, 21 Dec 2009 19:11:02 +0000 (20:11 +0100)
committerMike Pall <mike>
Mon, 21 Dec 2009 19:11:02 +0000 (20:11 +0100)
src/buildvm_x86.dasc
src/buildvm_x86.h

index 960afa1df2af843fd24de8b07648ad09c5491371..b220c58faec482e0fd3b78653ccc64f2d922bde6 100644 (file)
 |.macro fdup; fld st0; .endmacro
 |.macro fpop1; fstp st1; .endmacro
 |
+|// Synthesize SSE FP constants.
+|.macro sseconst_sign, reg, tmp                // Synthesize sign mask.
+|.if X64
+|  mov64 tmp, U64x(80000000,00000000); movd reg, tmp
+|.else
+|  mov tmp, 0x80000000; movd xmm1, tmp; pshufd reg, reg, 0x51
+|.endif
+|.endmacro
+|
+|.macro sseconst_abs, reg, tmp         // Synthesize abs mask.
+|.if X64
+|  mov64 tmp, U64x(7fffffff,ffffffff); movd reg, tmp
+|.else
+|  pxor reg, reg; pcmpeqd reg, reg; psrlq reg, 1
+|.endif
+|.endmacro
+|
+|.macro sseconst_1, reg, tmp           // Synthesize 1.0.
+|.if X64
+|  mov64 tmp, U64x(3ff00000,00000000)
+|  movd reg, tmp
+|.else
+|  mov tmp, 0x3ff00000; movd reg, tmp; pshufd reg, reg, 0x51
+|.endif
+|.endmacro
+|
+|.macro sseconst_2p52, reg, tmp                // Synthesize 2^52.
+|.if X64
+|  mov64 tmp, U64x(43300000,00000000); movd reg, tmp
+|.else
+|  mov tmp, 0x43300000; movd reg, tmp; pshufd reg, reg, 0x51
+|.endif
+|.endmacro
+|
 |// Move table write barrier back. Overwrites reg.
 |.macro barrierback, tab, reg
 |  and byte tab->marked, cast_byte(~LJ_GC_BLACK)       // black2gray(tab)
 
 /* Generate subroutines used by opcodes and other parts of the VM. */
 /* The .code_sub section should be last to help static branch prediction. */
-static void build_subroutines(BuildCtx *ctx, int cmov)
+static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
 {
   |.code_sub
   |
@@ -2454,21 +2488,51 @@ static void build_subroutines(BuildCtx *ctx, int cmov)
   |  vm_round 0x0c00, 0xffff
   |
   |// FP modulo x%y. Called by BC_MOD* and vm_arith.
-  |// Args/ret on x87 stack (y on top). No xmm registers modified.
-  |// Caveat: needs 3 slots on x87 stack! RC (eax) modified!
   |->vm_mod:
-  |  fld st1
-  |  fdiv st1
-  |  fnstcw word [esp+4]
-  |  mov ax, 0x0400
-  |  or ax, [esp+4]
-  |  and ax, 0xf7ff
-  |  mov [esp+6], ax
-  |  fldcw word [esp+6]
-  |  frndint
-  |  fldcw word [esp+4]
-  |  fmulp st1
-  |  fsubp st1
+  if (sse) {
+    |// Args in xmm0/xmm1, return value in xmm0.
+    |// Caveat: xmm0-xmm5 and RC (eax) modified!
+    |  movaps xmm5, xmm0
+    |  divsd xmm0, xmm1
+    |  sseconst_abs xmm2, RDa
+    |  sseconst_2p52 xmm3, RDa
+    |  movaps xmm4, xmm0
+    |  andpd xmm4, xmm2                        // |x/y|
+    |  ucomisd xmm3, xmm4              // No truncation if 2^52 <= |x/y|.
+    |  jbe >1
+    |  andnpd xmm2, xmm0               // Isolate sign bit.
+    |  addsd xmm4, xmm3                        // (|x/y| + 2^52) - 2^52
+    |  subsd xmm4, xmm3
+    |  orpd xmm4, xmm2                 // Merge sign bit back in.
+    |  sseconst_1 xmm2, RDa
+    |  cmpsd xmm0, xmm4, 1             // x/y < result?
+    |  andpd xmm0, xmm2
+    |  subsd xmm4, xmm0                        // If yes, subtract 1.0.
+    |  movaps xmm0, xmm5
+    |  mulsd xmm1, xmm4
+    |  subsd xmm0, xmm1
+    |  ret
+    |1:
+    |  mulsd xmm1, xmm0
+    |  movaps xmm0, xmm5
+    |  subsd xmm0, xmm1
+    |  ret
+  } else {
+    |// Args/ret on x87 stack (y on top). No xmm registers modified.
+    |// Caveat: needs 3 slots on x87 stack! RC (eax) modified!
+    |  fld st1
+    |  fdiv st1
+    |  fnstcw word [esp+4]
+    |  mov ax, 0x0400
+    |  or ax, [esp+4]
+    |  and ax, 0xf7ff
+    |  mov [esp+6], ax
+    |  fldcw word [esp+6]
+    |  frndint
+    |  fldcw word [esp+4]
+    |  fmulp st1
+    |  fsubp st1
+  }
   |  ret
   |
   |// FP exponentiation e^x and 2^x. Called by math.exp fast function and
@@ -2619,31 +2683,100 @@ static void build_subroutines(BuildCtx *ctx, int cmov)
   |// Compute x op y for basic arithmetic operators (+ - * / % ^ and unary -)
   |// and basic math functions. ORDER ARITH
   |->vm_foldarith:
-  |  mov eax, [esp+20]
-  |  fld qword [esp+4]
-  |  fld qword [esp+12]
-  |  cmp eax, 1; je >1; ja >2
-  |  faddp st1; ret
-  |1: ; fsubp st1; ret
-  |2: ; cmp eax, 3; je >1; ja >2
-  |  fmulp st1; ret
-  |1: ; fdivp st1; ret
-  |2: ; cmp eax, 5; jb ->vm_mod; je ->vm_pow
-  |  cmp eax, 7; je >1; ja >2
-  |  fpop; fchs; ret
-  |1: ; fpop; fabs; ret
-  |2: ; cmp eax, 9; je >1; ja >2
-  |  fpatan; ret
-  |1: ; fxch; fscale; fpop1; ret
-  |2: ; cmp eax, 11; je >1; ja >9
-  ||if (cmov) {
-  |  fucomi st1; fcmovnbe st1; fpop1; ret
-  |1: ; fucomi st1; fcmovbe st1; fpop1; ret
-  ||} else {
-  |  fucom st1; fnstsw ax; test ah, 1; jz >2; fxch; 2: ; fpop; ret
-  |1: ; fucom st1; fnstsw ax; test ah, 1; jnz >2; fxch; 2: ; fpop; ret
-  ||}
-  |9: ; int3                                   // Bad op.
+  if (sse) {
+    |.macro retxmm0; .if X64; ret; .else; jmp >7; .endif; .endmacro
+    |.macro retst0; .if X64; jmp >7; .else; ret; .endif; .endmacro
+    |
+    |.if X64WIN
+    |  .define foldop, CARG3d
+    |.elif X64
+    |  .define foldop, CARG1d
+    |.else
+    |  .define foldop, eax
+    |  mov foldop, [esp+20]
+    |  movsd xmm0, qword [esp+4]
+    |  movsd xmm1, qword [esp+12]
+    |.endif
+    |  cmp foldop, 1; je >1; ja >2
+    |  addsd xmm0, xmm1; retxmm0
+    |1: ; subsd xmm0, xmm1; retxmm0
+    |2: ; cmp foldop, 3; je >1; ja >2
+    |  mulsd xmm0, xmm1; retxmm0
+    |1: ; divsd xmm0, xmm1; retxmm0
+    |2: ; cmp foldop, 5
+    |.if X64
+    |  jb ->vm_mod; je ->vm_pow                // NYI: broken without SSE vm_pow.
+    |.else
+    |  je >1; ja >2
+    |  call ->vm_mod; retxmm0
+    |1: ; fld qword [esp+4]; fld qword [esp+12]; jmp ->vm_pow  // NYI
+    |2:
+    |.endif
+    |  cmp foldop, 7; je >1; ja >2
+    |  sseconst_sign xmm1, RDa; xorps xmm0, xmm1; retxmm0
+    |1:
+    |  sseconst_abs xmm1, RDa; andps xmm0, xmm1; retxmm0
+    |2: ; cmp foldop, 9; ja >2
+    |.if X64WIN
+    |  movsd qword [esp+8], xmm0       // Use scratch area.
+    |  movsd qword [esp+16], xmm1
+    |  fld qword [esp+8]
+    |  fld qword [esp+16]
+    |.elif X64
+    |  movsd qword [esp-8], xmm0       // Use red zone.
+    |  movsd qword [esp-16], xmm1
+    |  fld qword [esp-8]
+    |  fld qword [esp-16]
+    |.else
+    |  fld qword [esp+4]               // Reload from stack
+    |  fld qword [esp+12]
+    |.endif
+    |  je >1
+    |  fpatan; retst0
+    |1: ; fxch; fscale; fpop1; retst0
+    |2: ; cmp foldop, 11; je >1; ja >9
+    |  minsd xmm0, xmm1; retxmm0
+    |1: ; maxsd xmm0, xmm1; retxmm0
+    |9: ; int3                         // Bad op.
+    |7:  // Move return value depending on calling convention.
+    |.if X64WIN
+    |  fstp qword [esp+8]              // Use scratch area.
+    |  movsd xmm0, qword [esp+8]
+    |.elif X64
+    |  fstp qword [esp-8]              // Use red zone.
+    |  movsd xmm0, qword [esp-8]
+    |.else
+    |  movsd qword [esp+4], xmm0       // Overwrite callee-owned args.
+    |  fld qword [esp+4]
+    |.endif
+    |  ret
+  } else {
+    |  mov eax, [esp+20]
+    |  fld qword [esp+4]
+    |  fld qword [esp+12]
+    |  cmp eax, 1; je >1; ja >2
+    |  faddp st1; ret
+    |1: ; fsubp st1; ret
+    |2: ; cmp eax, 3; je >1; ja >2
+    |  fmulp st1; ret
+    |1: ; fdivp st1; ret
+    |2: ; cmp eax, 5; jb ->vm_mod; je ->vm_pow
+    |  cmp eax, 7; je >1; ja >2
+    |  fpop; fchs; ret
+    |1: ; fpop; fabs; ret
+    |2: ; cmp eax, 9; je >1; ja >2
+    |  fpatan; ret
+    |1: ; fxch; fscale; fpop1; ret
+    |2: ; cmp eax, 11; je >1; ja >9
+    ||if (cmov) {
+    |  fucomi st1; fcmovnbe st1; fpop1; ret
+    |1: ; fucomi st1; fcmovbe st1; fpop1; ret
+    ||} else {
+    |  fucom st1; fnstsw ax; test ah, 1; jz >2; fxch; 2: ; fpop; ret
+    |1: ; fucom st1; fnstsw ax; test ah, 1; jnz >2; fxch; 2: ; fpop; ret
+    ||}
+    |9: ; int3                         // Bad op.
+  }
   |
   |//-----------------------------------------------------------------------
   |//-- Miscellaneous functions --------------------------------------------
@@ -2694,7 +2827,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov)
 }
 
 /* Generate the code for a single instruction. */
-static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov)
+static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse)
 {
   int vk = 0;
   |// Note: aligning all instructions does not pay off.
@@ -2711,10 +2844,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov)
     |  ins_AD
     |  checknum RA, ->vmeta_comp
     |  checknum RD, ->vmeta_comp
-    |  fld qword [BASE+RA*8]           // Reverse order, i.e like cmp D, A.
-    |  fld qword [BASE+RD*8]
-    |  add PC, 4
-    |  fcomparepp                      // eax (RD) modified!
+    if (sse) {
+      |  movsd xmm0, qword [BASE+RD*8]
+      |  add PC, 4
+      |  ucomisd xmm0, qword [BASE+RA*8]
+    } else {
+      |  fld qword [BASE+RA*8]         // Reverse order, i.e like cmp D, A.
+      |  fld qword [BASE+RD*8]
+      |  add PC, 4
+      |  fcomparepp                    // eax (RD) modified!
+    }
     |  // Unordered: all of ZF CF PF set, ordered: PF clear.
     |  // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't.
     switch (op) {
@@ -2746,9 +2885,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov)
     |  add PC, 4
     |  cmp RB, LJ_TISNUM; ja >5
     |  checknum RA, >5
-    |  fld qword [BASE+RA*8]
-    |  fld qword [BASE+RD*8]
-    |  fcomparepp                      // eax (RD) modified!
+    if (sse) {
+      |  movsd xmm0, qword [BASE+RD*8]
+      |  ucomisd xmm0, qword [BASE+RA*8]
+    } else {
+      |  fld qword [BASE+RA*8]
+      |  fld qword [BASE+RD*8]
+      |  fcomparepp                    // eax (RD) modified!
+    }
   iseqne_fp:
     if (vk) {
       |  jp >2                         // Unordered means not equal.
@@ -2820,9 +2964,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov)
     |  ins_AD  // RA = src, RD = num const, JMP with RD = target
     |  add PC, 4
     |  checknum RA, >2
-    |  fld qword [BASE+RA*8]
-    |  fld qword [KBASE+RD*8]
-    |  fcomparepp                      // eax (RD) modified!
+    if (sse) {
+      |  movsd xmm0, qword [KBASE+RD*8]
+      |  ucomisd xmm0, qword [BASE+RA*8]
+    } else {
+      |  fld qword [BASE+RA*8]
+      |  fld qword [KBASE+RD*8]
+      |  fcomparepp                    // eax (RD) modified!
+    }
     goto iseqne_fp;
   case BC_ISEQP: case BC_ISNEP:
     vk = op == BC_ISEQP;
@@ -2875,18 +3024,32 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov)
   case BC_UNM:
     |  ins_AD  // RA = dst, RD = src
     |  checknum RD, ->vmeta_unm
-    |  fld qword [BASE+RD*8]
-    |  fchs
-    |  fstp qword [BASE+RA*8]
+    if (sse) {
+      |  movsd xmm0, qword [BASE+RD*8]
+      |  sseconst_sign xmm1, RDa
+      |  xorps xmm0, xmm1
+      |  movsd qword [BASE+RA*8], xmm0
+    } else {
+      |  fld qword [BASE+RD*8]
+      |  fchs
+      |  fstp qword [BASE+RA*8]
+    }
     |  ins_next
     break;
   case BC_LEN:
     |  ins_AD  // RA = dst, RD = src
     |  checkstr RD, >2
     |  mov STR:RD, [BASE+RD*8]
-    |  fild dword STR:RD->len
-    |1:
-    |  fstp qword [BASE+RA*8]
+    if (sse) {
+      |  xorps xmm0, xmm0
+      |  cvtsi2sd xmm0, dword STR:RD->len
+      |1:
+      |  movsd qword [BASE+RA*8], xmm0
+    } else {
+      |  fild dword STR:RD->len
+      |1:
+      |  fstp qword [BASE+RA*8]
+    }
     |  ins_next
     |2:
     |  checktab RD, ->vmeta_len
@@ -2894,72 +3057,108 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov)
     |  mov RB, BASE                    // Save BASE.
     |  call extern lj_tab_len@4                // (GCtab *t)
     |  // Length of table returned in eax (RC).
-    |  mov ARG1, RC
-    |  mov BASE, RB                    // Restore BASE.
-    |  fild ARG1
+    if (sse) {
+      |  cvtsi2sd xmm0, RC
+      |  mov BASE, RB                  // Restore BASE.
+    } else {
+      |  mov ARG1, RC
+      |  mov BASE, RB                  // Restore BASE.
+      |  fild ARG1
+    }
     |  movzx RA, PC_RA
     |  jmp <1
     break;
 
   /* -- Binary ops -------------------------------------------------------- */
 
-    |.macro ins_arithpre, ins
+    |.macro ins_arithpre, ins, sseins, ssereg
     |  ins_ABC
     ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
     ||switch (vk) {
     ||case 0:
     |   checknum RB, ->vmeta_arith_vn
+    ||if (sse) {
+    |   movsd xmm0, qword [BASE+RB*8]
+    |   sseins ssereg, qword [KBASE+RC*8]
+    ||} else {
     |   fld qword [BASE+RB*8]
     |   ins qword [KBASE+RC*8]
+    ||}
     ||  break;
     ||case 1:
     |   checknum RB, ->vmeta_arith_nv
+    ||if (sse) {
+    |   movsd xmm0, qword [KBASE+RC*8]
+    |   sseins ssereg, qword [BASE+RB*8]
+    ||} else {
     |   fld qword [KBASE+RC*8]
     |   ins qword [BASE+RB*8]
+    ||}
     ||  break;
     ||default:
     |   checknum RB, ->vmeta_arith_vv
     |   checknum RC, ->vmeta_arith_vv
+    ||if (sse) {
+    |   movsd xmm0, qword [BASE+RB*8]
+    |   sseins ssereg, qword [BASE+RC*8]
+    ||} else {
     |   fld qword [BASE+RB*8]
     |   ins qword [BASE+RC*8]
+    ||}
     ||  break;
     ||}
     |.endmacro
     |
-    |.macro ins_arith, ins
-    |  ins_arithpre ins
+    |.macro ins_arithpost
+    ||if (sse) {
+    |  movsd qword [BASE+RA*8], xmm0
+    ||} else {
     |  fstp qword [BASE+RA*8]
+    ||}
+    |.endmacro
+    |
+    |.macro ins_arith, ins, sseins
+    |  ins_arithpre ins, sseins, xmm0
+    |  ins_arithpost
     |  ins_next
     |.endmacro
 
     |  // RA = dst, RB = src1 or num const, RC = src2 or num const
   case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
-    |  ins_arith fadd
+    |  ins_arith fadd, addsd
     break;
   case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
-    |  ins_arith fsub
+    |  ins_arith fsub, subsd
     break;
   case BC_MULVN: case BC_MULNV: case BC_MULVV:
-    |  ins_arith fmul
+    |  ins_arith fmul, mulsd
     break;
   case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
-    |  ins_arith fdiv
+    |  ins_arith fdiv, divsd
     break;
   case BC_MODVN:
-    |  ins_arithpre fld
+    |  ins_arithpre fld, movsd, xmm1
     |->BC_MODVN_Z:
     |  call ->vm_mod
-    |  fstp qword [BASE+RA*8]
+    |  ins_arithpost
     |  ins_next
     break;
   case BC_MODNV: case BC_MODVV:
-    |  ins_arithpre fld
+    |  ins_arithpre fld, movsd, xmm1
     |  jmp ->BC_MODVN_Z                        // Avoid 3 copies. It's slow anyway.
     break;
   case BC_POW:
-    |  ins_arithpre fld
-    |  call ->vm_pow
-    |  fstp qword [BASE+RA*8]
+    if (sse) {
+      sse = 0;  /* NYI: temporary workaround. */
+      |  ins_arithpre fld, movsd, xmm1
+      |  call ->vm_pow
+      |  ins_arithpost
+      sse = 1;
+    } else {
+      |  ins_arithpre fld, movsd, xmm1
+      |  call ->vm_pow
+      |  ins_arithpost
+    }
     |  ins_next
     break;
 
@@ -3945,17 +4144,21 @@ static int build_backend(BuildCtx *ctx)
 {
   int op;
   int cmov = 1;
+  int sse = 0;
 #ifdef LUAJIT_CPU_NOCMOV
   cmov = 0;
 #endif
+#ifdef LUAJIT_CPU_SSE2
+  sse = 1;
+#endif
 
   dasm_growpc(Dst, BC__MAX);
 
-  build_subroutines(ctx, cmov);
+  build_subroutines(ctx, cmov, sse);
 
   |.code_op
   for (op = 0; op < BC__MAX; op++)
-    build_ins(ctx, (BCOp)op, op, cmov);
+    build_ins(ctx, (BCOp)op, op, cmov, sse);
 
   return BC__MAX;
 }
index a799cbae060fe73ce3b0656f3e634c3f0b0e49d7..1f990f0971abc0ad9944897ba89356686bb9b627 100644 (file)
@@ -12,7 +12,7 @@
 #define DASM_SECTION_CODE_OP   0
 #define DASM_SECTION_CODE_SUB  1
 #define DASM_MAXSECTION                2
-static const unsigned char build_actionlist[12310] = {
+static const unsigned char build_actionlist[12791] = {
   254,1,248,10,137,202,139,173,233,137,114,252,252,15,182,141,233,139,181,233,
   139,189,233,139,108,36,48,141,12,202,141,68,194,252,252,59,141,233,15,135,
   244,11,248,9,189,237,248,1,137,40,137,104,8,131,192,16,57,200,15,130,244,
@@ -374,61 +374,82 @@ static const unsigned char build_actionlist[12310] = {
   252,255,252,251,102,137,68,36,6,217,108,36,6,217,252,252,217,108,36,4,139,
   68,36,8,195,248,102,217,124,36,4,137,68,36,8,102,184,0,12,102,11,68,36,4,
   102,137,68,36,6,217,108,36,6,217,252,252,217,108,36,4,139,68,36,8,195,248,
-  140,217,193,216,252,241,217,124,36,4,102,184,0,4,102,11,68,36,4,102,37,252,
-  255,252,247,102,137,68,36,6,217,108,36,6,217,252,252,217,108,36,4,222,201,
-  222,252,233,195,248,86,217,252,234,222,201,248,141,217,84,36,4,129,124,36,
-  4,0,0,128,127,15,132,244,247,129,124,36,4,0,0,128,252,255,15,132,244,248,
-  248,142,217,192,217,252,252,220,252,233,217,201,217,252,240,217,232,222,193,
-  217,252,253,221,217,248,1,195,248,2,221,216,217,252,238,195,248,105,219,84,
-  36,4,219,68,36,4,255,223,252,233,255,80,221,252,233,223,224,158,88,255,15,
-  133,244,254,15,138,244,255,221,216,248,143,80,139,68,36,8,131,252,248,1,15,
-  142,244,252,248,1,169,1,0,0,0,15,133,244,248,216,200,209,232,252,233,244,
-  1,248,2,209,232,15,132,244,251,217,192,248,3,216,200,209,232,15,132,244,250,
-  15,131,244,3,255,220,201,252,233,244,3,248,4,222,201,248,5,88,195,248,6,15,
-  132,244,5,15,130,244,253,217,232,222,252,241,252,247,216,131,252,248,1,15,
-  132,244,5,252,233,244,1,248,7,221,216,217,232,88,195,248,8,80,217,84,36,8,
-  217,201,217,84,36,12,139,68,36,8,209,224,61,0,0,0,252,255,15,132,244,248,
-  139,68,36,12,209,224,15,132,244,250,61,0,0,0,252,255,15,132,244,250,255,88,
-  217,252,241,252,233,244,142,248,9,217,232,255,223,252,234,255,80,221,252,
-  234,223,224,158,88,255,15,132,244,247,217,201,248,1,221,216,195,248,2,217,
-  225,217,232,255,221,252,233,223,224,158,255,15,132,244,249,221,216,217,225,
-  217,252,238,184,0,0,0,0,15,146,208,209,200,51,68,36,8,15,137,244,249,217,
-  201,248,3,221,217,217,225,88,195,248,4,131,124,36,8,0,15,141,244,3,221,216,
-  221,216,133,192,88,15,132,244,251,217,252,238,195,248,5,199,68,36,8,0,0,128,
-  127,217,68,36,8,195,248,144,139,68,36,12,221,68,36,4,131,252,248,1,15,130,
-  244,79,15,132,244,81,131,252,248,3,15,130,244,102,15,135,244,247,255,217,
-  252,250,195,248,1,131,252,248,5,15,130,244,86,15,132,244,141,131,252,248,
-  7,15,132,244,247,15,135,244,248,217,252,237,217,201,217,252,241,195,248,1,
-  217,232,217,201,217,252,241,195,248,2,131,252,248,9,15,132,244,247,15,135,
-  244,248,217,252,236,217,201,217,252,241,195,248,1,217,252,254,195,248,2,131,
-  252,248,11,15,132,244,247,15,135,244,255,255,217,252,255,195,248,1,217,252,
-  242,221,216,195,248,9,204,248,145,139,68,36,20,221,68,36,4,221,68,36,12,131,
-  252,248,1,15,132,244,247,15,135,244,248,222,193,195,248,1,222,252,233,195,
-  248,2,131,252,248,3,15,132,244,247,15,135,244,248,222,201,195,248,1,222,252,
-  249,195,248,2,131,252,248,5,15,130,244,140,15,132,244,105,131,252,248,7,15,
-  132,244,247,255,15,135,244,248,221,216,217,224,195,248,1,221,216,217,225,
-  195,248,2,131,252,248,9,15,132,244,247,15,135,244,248,217,252,243,195,248,
-  1,217,201,217,252,253,221,217,195,248,2,131,252,248,11,15,132,244,247,15,
-  135,244,255,255,219,252,233,219,209,221,217,195,248,1,219,252,233,218,209,
-  221,217,195,255,221,225,223,224,252,246,196,1,15,132,244,248,217,201,248,
-  2,221,216,195,248,1,221,225,223,224,252,246,196,1,15,133,244,248,217,201,
-  248,2,221,216,195,255,248,9,204,248,146,156,90,137,209,129,252,242,0,0,32,
-  0,82,157,156,90,49,192,57,209,15,132,244,247,139,68,36,4,87,83,15,162,139,
-  124,36,16,137,7,137,95,4,137,79,8,137,87,12,91,95,248,1,195,255,129,124,253,
-  202,4,239,15,135,244,41,129,124,253,194,4,239,15,135,244,41,221,4,202,221,
-  4,194,131,198,4,255,223,252,233,221,216,255,218,252,233,223,224,158,255,15,
-  134,244,248,255,15,135,244,248,255,15,131,244,248,255,248,1,15,183,70,252,
-  254,141,180,253,134,233,248,2,139,6,15,182,204,15,182,232,131,198,4,193,232,
-  16,252,255,36,171,255,139,108,194,4,131,198,4,129,252,253,239,15,135,244,
-  251,129,124,253,202,4,239,15,135,244,251,221,4,202,221,4,194,255,15,138,244,
-  248,15,133,244,248,255,15,138,244,248,15,132,244,247,255,248,1,15,183,70,
-  252,254,141,180,253,134,233,248,2,255,248,2,15,183,70,252,254,141,180,253,
-  134,233,248,1,255,248,5,57,108,202,4,15,133,244,2,129,252,253,239,15,131,
-  244,1,139,12,202,139,4,194,57,193,15,132,244,1,129,252,253,239,15,135,244,
-  2,139,169,233,133,252,237,15,132,244,2,252,246,133,233,235,15,133,244,2,255,
-  49,252,237,255,189,1,0,0,0,255,252,233,244,45,255,252,247,208,131,198,4,129,
-  124,253,202,4,239,15,133,244,248,139,12,202,59,12,135,255,131,198,4,129,124,
-  253,202,4,239,15,135,244,248,221,4,202,221,4,199,255,252,247,208,131,198,
+  140,255,15,40,232,252,242,15,94,193,102,15,252,239,210,102,15,118,210,102,
+  15,115,210,1,184,0,0,48,67,102,15,110,216,102,15,112,219,81,15,40,224,102,
+  15,84,226,102,15,46,220,15,134,244,247,102,15,85,208,252,242,15,88,227,252,
+  242,15,92,227,102,15,86,226,184,0,0,252,240,63,102,15,110,208,102,15,112,
+  210,81,252,242,15,194,196,1,102,15,84,194,252,242,15,92,224,15,40,197,252,
+  242,15,89,204,252,242,15,92,193,195,248,1,252,242,15,89,200,15,40,197,252,
+  242,15,92,193,195,255,217,193,216,252,241,217,124,36,4,102,184,0,4,102,11,
+  68,36,4,102,37,252,255,252,247,102,137,68,36,6,217,108,36,6,217,252,252,217,
+  108,36,4,222,201,222,252,233,255,195,248,86,217,252,234,222,201,248,141,217,
+  84,36,4,129,124,36,4,0,0,128,127,15,132,244,247,129,124,36,4,0,0,128,252,
+  255,15,132,244,248,248,142,217,192,217,252,252,220,252,233,217,201,217,252,
+  240,217,232,222,193,217,252,253,221,217,248,1,195,248,2,221,216,217,252,238,
+  195,248,105,219,84,36,4,219,68,36,4,255,223,252,233,255,80,221,252,233,223,
+  224,158,88,255,15,133,244,254,15,138,244,255,221,216,248,143,80,139,68,36,
+  8,131,252,248,1,15,142,244,252,248,1,169,1,0,0,0,15,133,244,248,216,200,209,
+  232,252,233,244,1,248,2,209,232,15,132,244,251,217,192,248,3,216,200,209,
+  232,15,132,244,250,15,131,244,3,255,220,201,252,233,244,3,248,4,222,201,248,
+  5,88,195,248,6,15,132,244,5,15,130,244,253,217,232,222,252,241,252,247,216,
+  131,252,248,1,15,132,244,5,252,233,244,1,248,7,221,216,217,232,88,195,248,
+  8,80,217,84,36,8,217,201,217,84,36,12,139,68,36,8,209,224,61,0,0,0,252,255,
+  15,132,244,248,139,68,36,12,209,224,15,132,244,250,61,0,0,0,252,255,15,132,
+  244,250,255,88,217,252,241,252,233,244,142,248,9,217,232,255,223,252,234,
+  255,80,221,252,234,223,224,158,88,255,15,132,244,247,217,201,248,1,221,216,
+  195,248,2,217,225,217,232,255,221,252,233,223,224,158,255,15,132,244,249,
+  221,216,217,225,217,252,238,184,0,0,0,0,15,146,208,209,200,51,68,36,8,15,
+  137,244,249,217,201,248,3,221,217,217,225,88,195,248,4,131,124,36,8,0,15,
+  141,244,3,221,216,221,216,133,192,88,15,132,244,251,217,252,238,195,248,5,
+  199,68,36,8,0,0,128,127,217,68,36,8,195,248,144,139,68,36,12,221,68,36,4,
+  131,252,248,1,15,130,244,79,15,132,244,81,131,252,248,3,15,130,244,102,15,
+  135,244,247,255,217,252,250,195,248,1,131,252,248,5,15,130,244,86,15,132,
+  244,141,131,252,248,7,15,132,244,247,15,135,244,248,217,252,237,217,201,217,
+  252,241,195,248,1,217,232,217,201,217,252,241,195,248,2,131,252,248,9,15,
+  132,244,247,15,135,244,248,217,252,236,217,201,217,252,241,195,248,1,217,
+  252,254,195,248,2,131,252,248,11,15,132,244,247,15,135,244,255,255,217,252,
+  255,195,248,1,217,252,242,221,216,195,248,9,204,248,145,255,139,68,36,20,
+  252,242,15,16,68,36,4,252,242,15,16,76,36,12,131,252,248,1,15,132,244,247,
+  15,135,244,248,252,242,15,88,193,252,233,244,253,248,1,252,242,15,92,193,
+  252,233,244,253,248,2,131,252,248,3,15,132,244,247,15,135,244,248,252,242,
+  15,89,193,252,233,244,253,248,1,252,242,15,94,193,252,233,244,253,248,2,255,
+  131,252,248,5,15,132,244,247,15,135,244,248,232,244,140,252,233,244,253,248,
+  1,221,68,36,4,221,68,36,12,252,233,244,105,248,2,131,252,248,7,15,132,244,
+  247,15,135,244,248,184,0,0,0,128,102,15,110,200,102,15,112,201,81,15,87,193,
+  252,233,244,253,248,1,102,15,252,239,201,102,15,118,201,102,15,115,209,1,
+  15,84,193,252,233,244,253,248,2,255,131,252,248,9,15,135,244,248,221,68,36,
+  4,221,68,36,12,15,132,244,247,217,252,243,195,248,1,217,201,217,252,253,221,
+  217,195,248,2,131,252,248,11,15,132,244,247,15,135,244,255,252,242,15,93,
+  193,252,233,244,253,248,1,252,242,15,95,193,252,233,244,253,248,9,204,248,
+  7,252,242,15,17,68,36,4,221,68,36,4,195,255,139,68,36,20,221,68,36,4,221,
+  68,36,12,131,252,248,1,15,132,244,247,15,135,244,248,222,193,195,248,1,222,
+  252,233,195,248,2,131,252,248,3,15,132,244,247,15,135,244,248,222,201,195,
+  248,1,222,252,249,195,248,2,131,252,248,5,15,130,244,140,15,132,244,105,131,
+  252,248,7,15,132,244,247,15,135,244,248,255,221,216,217,224,195,248,1,221,
+  216,217,225,195,248,2,131,252,248,9,15,132,244,247,15,135,244,248,217,252,
+  243,195,248,1,217,201,217,252,253,221,217,195,248,2,131,252,248,11,15,132,
+  244,247,15,135,244,255,255,219,252,233,219,209,221,217,195,248,1,219,252,
+  233,218,209,221,217,195,255,221,225,223,224,252,246,196,1,15,132,244,248,
+  217,201,248,2,221,216,195,248,1,221,225,223,224,252,246,196,1,15,133,244,
+  248,217,201,248,2,221,216,195,255,248,9,204,255,248,146,156,90,137,209,129,
+  252,242,0,0,32,0,82,157,156,90,49,192,57,209,15,132,244,247,139,68,36,4,87,
+  83,15,162,139,124,36,16,137,7,137,95,4,137,79,8,137,87,12,91,95,248,1,195,
+  255,129,124,253,202,4,239,15,135,244,41,129,124,253,194,4,239,15,135,244,
+  41,255,252,242,15,16,4,194,131,198,4,102,15,46,4,202,255,221,4,202,221,4,
+  194,131,198,4,255,223,252,233,221,216,255,218,252,233,223,224,158,255,15,
+  134,244,248,255,15,131,244,248,255,248,1,15,183,70,252,254,141,180,253,134,
+  233,248,2,139,6,15,182,204,15,182,232,131,198,4,193,232,16,252,255,36,171,
+  255,139,108,194,4,131,198,4,129,252,253,239,15,135,244,251,129,124,253,202,
+  4,239,15,135,244,251,255,252,242,15,16,4,194,102,15,46,4,202,255,221,4,202,
+  221,4,194,255,15,138,244,248,15,133,244,248,255,15,138,244,248,15,132,244,
+  247,255,248,1,15,183,70,252,254,141,180,253,134,233,248,2,255,248,2,15,183,
+  70,252,254,141,180,253,134,233,248,1,255,248,5,57,108,202,4,15,133,244,2,
+  129,252,253,239,15,131,244,1,139,12,202,139,4,194,57,193,15,132,244,1,129,
+  252,253,239,15,135,244,2,139,169,233,133,252,237,15,132,244,2,252,246,133,
+  233,235,15,133,244,2,255,49,252,237,255,189,1,0,0,0,255,252,233,244,45,255,
+  252,247,208,131,198,4,129,124,253,202,4,239,15,133,244,248,139,12,202,59,
+  12,135,255,131,198,4,129,124,253,202,4,239,15,135,244,248,255,252,242,15,
+  16,4,199,102,15,46,4,202,255,221,4,202,221,4,199,255,252,247,208,131,198,
   4,57,68,202,4,255,139,108,194,4,131,198,4,129,252,253,239,255,15,131,244,
   247,255,15,130,244,247,255,137,108,202,4,139,44,194,137,44,202,255,15,183,
   70,252,254,141,180,253,134,233,248,1,139,6,15,182,204,15,182,232,131,198,
@@ -436,176 +457,177 @@ static const unsigned char build_actionlist[12310] = {
   4,202,139,6,15,182,204,15,182,232,131,198,4,193,232,16,252,255,36,171,255,
   49,252,237,129,124,253,194,4,239,129,213,239,137,108,202,4,139,6,15,182,204,
   15,182,232,131,198,4,193,232,16,252,255,36,171,255,129,124,253,194,4,239,
-  15,135,244,48,221,4,194,217,224,221,28,202,139,6,15,182,204,15,182,232,131,
-  198,4,193,232,16,252,255,36,171,255,129,124,253,194,4,239,15,133,244,248,
-  139,4,194,219,128,233,248,1,221,28,202,139,6,15,182,204,15,182,232,131,198,
-  4,193,232,16,252,255,36,171,248,2,129,124,253,194,4,239,15,133,244,50,139,
-  12,194,137,213,232,251,1,18,137,4,36,137,252,234,219,4,36,15,182,78,252,253,
-  252,233,244,1,255,15,182,252,236,15,182,192,255,129,124,253,252,234,4,239,
-  15,135,244,46,221,4,252,234,220,4,199,255,129,124,253,252,234,4,239,15,135,
-  244,47,221,4,199,220,4,252,234,255,129,124,253,252,234,4,239,15,135,244,49,
-  129,124,253,194,4,239,15,135,244,49,221,4,252,234,220,4,194,255,129,124,253,
-  252,234,4,239,15,135,244,46,221,4,252,234,220,36,199,255,129,124,253,252,
-  234,4,239,15,135,244,47,221,4,199,220,36,252,234,255,129,124,253,252,234,
-  4,239,15,135,244,49,129,124,253,194,4,239,15,135,244,49,221,4,252,234,220,
-  36,194,255,129,124,253,252,234,4,239,15,135,244,46,221,4,252,234,220,12,199,
-  255,129,124,253,252,234,4,239,15,135,244,47,221,4,199,220,12,252,234,255,
-  129,124,253,252,234,4,239,15,135,244,49,129,124,253,194,4,239,15,135,244,
-  49,221,4,252,234,220,12,194,255,129,124,253,252,234,4,239,15,135,244,46,221,
-  4,252,234,220,52,199,255,129,124,253,252,234,4,239,15,135,244,47,221,4,199,
-  220,52,252,234,255,129,124,253,252,234,4,239,15,135,244,49,129,124,253,194,
-  4,239,15,135,244,49,221,4,252,234,220,52,194,255,129,124,253,252,234,4,239,
-  15,135,244,46,221,4,252,234,221,4,199,255,129,124,253,252,234,4,239,15,135,
-  244,47,221,4,199,221,4,252,234,255,129,124,253,252,234,4,239,15,135,244,49,
-  129,124,253,194,4,239,15,135,244,49,221,4,252,234,221,4,194,255,248,147,232,
-  244,140,221,28,202,139,6,15,182,204,15,182,232,131,198,4,193,232,16,252,255,
-  36,171,255,252,233,244,147,255,232,244,105,221,28,202,139,6,15,182,204,15,
-  182,232,131,198,4,193,232,16,252,255,36,171,255,15,182,252,236,15,182,192,
-  141,12,194,41,232,137,76,36,4,137,68,36,8,248,33,139,108,36,48,137,44,36,
-  137,116,36,24,137,149,233,232,251,1,23,139,149,233,133,192,15,133,244,42,
-  15,182,110,252,255,15,182,78,252,253,139,68,252,234,4,139,44,252,234,137,
-  68,202,4,137,44,202,139,6,15,182,204,15,182,232,131,198,4,193,232,16,252,
-  255,36,171,255,252,247,208,139,4,135,199,68,202,4,237,137,4,202,139,6,15,
-  182,204,15,182,232,131,198,4,193,232,16,252,255,36,171,255,223,70,252,254,
-  221,28,202,139,6,15,182,204,15,182,232,131,198,4,193,232,16,252,255,36,171,
-  255,221,4,199,221,28,202,139,6,15,182,204,15,182,232,131,198,4,193,232,16,
-  252,255,36,171,255,252,247,208,137,68,202,4,139,6,15,182,204,15,182,232,131,
-  198,4,193,232,16,252,255,36,171,255,141,76,202,12,141,68,194,4,189,237,137,
-  105,252,248,248,1,137,41,131,193,8,57,193,15,134,244,1,139,6,15,182,204,15,
-  182,232,131,198,4,193,232,16,252,255,36,171,255,139,106,252,248,139,172,253,
-  133,233,139,173,233,139,69,4,139,109,0,137,68,202,4,137,44,202,139,6,15,182,
-  204,15,182,232,131,198,4,193,232,16,252,255,36,171,255,139,106,252,248,139,
-  172,253,141,233,128,189,233,0,139,173,233,139,12,194,139,68,194,4,137,77,
-  0,137,69,4,15,132,244,247,252,246,133,233,235,15,133,244,248,248,1,139,6,
-  15,182,204,15,182,232,131,198,4,193,232,16,252,255,36,171,248,2,129,232,239,
-  129,252,248,239,15,134,244,1,252,246,129,233,235,15,132,244,1,135,213,141,
-  139,233,255,232,251,1,24,137,252,234,252,233,244,1,255,252,247,208,139,106,
-  252,248,139,172,253,141,233,139,12,135,139,133,233,137,8,199,64,4,237,252,
-  246,133,233,235,15,133,244,248,248,1,139,6,15,182,204,15,182,232,131,198,
-  4,193,232,16,252,255,36,171,248,2,252,246,129,233,235,15,132,244,1,128,189,
-  233,0,15,132,244,1,137,213,137,194,141,139,233,232,251,1,24,137,252,234,252,
-  233,244,1,255,139,106,252,248,221,4,199,139,172,253,141,233,139,141,233,221,
-  25,139,6,15,182,204,15,182,232,131,198,4,193,232,16,252,255,36,171,255,252,
-  247,208,139,106,252,248,139,172,253,141,233,139,141,233,137,65,4,139,6,15,
-  182,204,15,182,232,131,198,4,193,232,16,252,255,36,171,255,141,180,253,134,
-  233,139,108,36,48,131,189,233,0,15,132,244,247,141,12,202,137,76,36,4,137,
-  44,36,137,149,233,232,251,1,25,139,149,233,248,1,139,6,15,182,204,15,182,
-  232,131,198,4,193,232,16,252,255,36,171,255,252,247,208,139,74,252,248,139,
-  4,135,139,108,36,48,137,76,36,8,137,68,36,4,137,116,36,24,137,44,36,137,149,
-  233,232,251,1,26,139,149,233,15,182,78,252,253,137,4,202,199,68,202,4,237,
-  139,6,15,182,204,15,182,232,131,198,4,193,232,16,252,255,36,171,255,137,197,
-  37,252,255,7,0,0,193,252,237,11,61,252,255,7,0,0,15,148,209,137,108,36,8,
-  1,200,139,108,36,48,1,200,137,68,36,4,137,116,36,24,139,139,233,137,44,36,
-  59,139,233,137,149,233,15,131,244,248,248,1,232,251,1,27,139,149,233,15,182,
-  78,252,253,137,4,202,199,68,202,4,237,139,6,15,182,204,15,182,232,131,198,
-  4,193,232,16,252,255,36,171,248,2,137,252,233,232,251,1,28,252,233,244,1,
-  255,252,247,208,139,108,36,48,139,139,233,137,116,36,24,59,139,233,137,149,
-  233,15,131,244,249,248,2,139,20,135,137,252,233,232,251,1,29,139,149,233,
-  15,182,78,252,253,137,4,202,199,68,202,4,237,139,6,15,182,204,15,182,232,
-  131,198,4,193,232,16,252,255,36,171,248,3,137,252,233,232,251,1,28,15,183,
-  70,252,254,252,247,208,252,233,244,2,255,252,247,208,139,106,252,248,139,
-  173,233,139,4,135,252,233,244,148,255,252,247,208,139,106,252,248,139,173,
-  233,139,4,135,252,233,244,149,255,15,182,252,236,15,182,192,129,124,253,252,
-  234,4,239,15,133,244,36,139,44,252,234,129,124,253,194,4,239,15,135,244,251,
-  221,4,194,219,20,36,219,4,36,255,139,4,36,15,133,244,36,59,133,233,15,131,
-  244,36,193,224,3,3,133,233,129,120,253,4,239,15,132,244,248,248,1,139,40,
-  139,64,4,137,44,202,137,68,202,4,139,6,15,182,204,15,182,232,131,198,4,193,
-  232,16,252,255,36,171,248,2,131,189,233,0,15,132,244,1,139,141,233,252,246,
-  129,233,235,15,132,244,36,15,182,78,252,253,252,233,244,1,248,5,255,129,124,
-  253,194,4,239,15,133,244,36,139,4,194,252,233,244,148,255,15,182,252,236,
-  15,182,192,252,247,208,139,4,135,129,124,253,252,234,4,239,15,133,244,34,
-  139,44,252,234,248,148,139,141,233,35,136,233,105,201,239,3,141,233,248,1,
-  129,185,233,239,15,133,244,250,57,129,233,15,133,244,250,129,121,253,4,239,
-  15,132,244,251,15,182,70,252,253,139,41,139,73,4,137,44,194,248,2,255,137,
-  76,194,4,139,6,15,182,204,15,182,232,131,198,4,193,232,16,252,255,36,171,
-  248,3,15,182,70,252,253,185,237,252,233,244,2,248,4,139,137,233,133,201,15,
-  133,244,1,248,5,139,141,233,133,201,15,132,244,3,252,246,129,233,235,15,133,
-  244,3,252,233,244,34,255,15,182,252,236,15,182,192,129,124,253,252,234,4,
-  239,15,133,244,35,139,44,252,234,59,133,233,15,131,244,35,193,224,3,3,133,
-  233,129,120,253,4,239,15,132,244,248,248,1,139,40,139,64,4,137,44,202,137,
-  68,202,4,139,6,15,182,204,15,182,232,131,198,4,193,232,16,252,255,36,171,
-  248,2,131,189,233,0,15,132,244,1,139,141,233,252,246,129,233,235,15,132,244,
-  35,255,15,182,252,236,15,182,192,129,124,253,252,234,4,239,15,133,244,39,
-  139,44,252,234,129,124,253,194,4,239,15,135,244,251,221,4,194,219,20,36,219,
-  4,36,255,139,4,36,15,133,244,39,59,133,233,15,131,244,39,193,224,3,3,133,
-  233,129,120,253,4,239,15,132,244,249,248,1,252,246,133,233,235,15,133,244,
-  253,248,2,139,108,202,4,139,12,202,137,104,4,137,8,139,6,15,182,204,15,182,
-  232,131,198,4,193,232,16,252,255,36,171,248,3,131,189,233,0,15,132,244,1,
-  139,141,233,255,252,246,129,233,235,15,132,244,39,15,182,78,252,253,252,233,
-  244,1,248,5,129,124,253,194,4,239,15,133,244,39,139,4,194,252,233,244,149,
-  248,7,128,165,233,235,139,139,233,137,171,233,137,141,233,15,182,78,252,253,
-  252,233,244,2,255,15,182,252,236,15,182,192,252,247,208,139,4,135,129,124,
-  253,252,234,4,239,15,133,244,37,139,44,252,234,248,149,139,141,233,35,136,
-  233,105,201,239,198,133,233,0,3,141,233,248,1,129,185,233,239,15,133,244,
-  251,57,129,233,15,133,244,251,129,121,253,4,239,15,132,244,250,248,2,255,
-  252,246,133,233,235,15,133,244,253,248,3,15,182,70,252,253,139,108,194,4,
-  139,4,194,137,105,4,137,1,139,6,15,182,204,15,182,232,131,198,4,193,232,16,
-  252,255,36,171,248,4,131,189,233,0,15,132,244,2,137,12,36,139,141,233,252,
-  246,129,233,235,15,132,244,37,139,12,36,252,233,244,2,248,5,139,137,233,133,
-  201,15,133,244,1,255,139,141,233,133,201,15,132,244,252,252,246,129,233,235,
-  15,132,244,37,248,6,137,68,36,16,199,68,36,20,237,141,68,36,16,137,108,36,
-  12,137,108,36,4,139,108,36,48,137,68,36,8,137,44,36,137,116,36,24,137,149,
-  233,232,251,1,30,139,149,233,139,108,36,12,137,193,252,233,244,2,248,7,128,
-  165,233,235,139,131,233,137,171,233,137,133,233,252,233,244,3,255,15,182,
-  252,236,15,182,192,129,124,253,252,234,4,239,15,133,244,38,139,44,252,234,
-  59,133,233,15,131,244,38,193,224,3,3,133,233,129,120,253,4,239,15,132,244,
-  249,248,1,252,246,133,233,235,15,133,244,253,248,2,139,108,202,4,139,12,202,
-  137,104,4,137,8,139,6,15,182,204,15,182,232,131,198,4,193,232,16,252,255,
-  36,171,248,3,131,189,233,0,15,132,244,1,255,139,141,233,252,246,129,233,235,
-  15,132,244,38,15,182,78,252,253,252,233,244,1,248,7,128,165,233,235,139,139,
-  233,137,171,233,137,141,233,15,182,78,252,253,252,233,244,2,255,137,124,36,
-  16,221,4,199,219,92,36,12,248,1,141,12,202,139,105,252,248,252,246,133,233,
-  235,15,133,244,253,248,2,139,68,36,20,139,124,36,12,131,232,1,15,132,244,
-  250,1,252,248,59,133,233,15,131,244,251,41,252,248,193,231,3,3,189,233,248,
-  3,139,41,137,47,139,105,4,131,193,8,137,111,4,131,199,8,131,232,1,15,133,
-  244,3,248,4,139,124,36,16,139,6,15,182,204,15,182,232,131,198,4,193,232,16,
-  252,255,36,171,248,5,137,108,36,4,139,108,36,48,137,68,36,8,137,44,36,137,
-  116,36,24,137,149,233,232,251,1,31,139,149,233,15,182,78,252,253,252,233,
-  244,1,248,7,255,128,165,233,235,139,131,233,137,171,233,137,133,233,252,233,
-  244,2,255,3,68,36,20,255,141,76,202,8,139,105,252,248,129,121,253,252,252,
-  239,15,133,244,29,252,255,165,233,255,141,76,202,8,137,215,139,105,252,248,
-  129,121,253,252,252,239,15,133,244,29,248,51,139,114,252,252,252,247,198,
-  237,15,133,244,253,248,1,137,106,252,248,137,68,36,20,131,232,1,15,132,244,
-  249,248,2,139,41,137,47,139,105,4,137,111,4,131,199,8,131,193,8,131,232,1,
-  15,133,244,2,139,106,252,248,248,3,137,209,128,189,233,1,15,135,244,251,248,
-  4,139,68,36,20,252,255,165,233,248,5,255,252,247,198,237,15,133,244,4,15,
-  182,70,252,253,252,247,208,141,20,194,139,122,252,248,139,191,233,139,191,
-  233,252,233,244,4,248,7,15,139,244,1,131,230,252,248,41,252,242,137,215,139,
-  114,252,252,252,233,244,1,255,141,76,202,8,139,105,232,139,65,252,236,137,
-  41,137,65,4,139,105,252,240,139,65,252,244,137,105,8,137,65,12,139,105,224,
-  139,65,228,137,105,252,248,137,65,252,252,129,252,248,239,184,3,0,0,0,15,
-  133,244,29,252,255,165,233,255,15,182,252,236,139,66,252,248,141,12,202,139,
-  128,233,15,182,128,233,137,124,36,8,141,188,253,194,233,43,122,252,252,133,
-  252,237,15,132,244,251,141,108,252,233,252,248,57,215,15,131,244,248,248,
-  1,139,71,252,248,137,1,139,71,252,252,131,199,8,137,65,4,131,193,8,57,252,
-  233,15,131,244,249,57,215,15,130,244,1,248,2,199,65,4,237,131,193,8,57,252,
-  233,15,130,244,2,248,3,139,124,36,8,139,6,15,182,204,15,182,232,131,198,4,
-  193,232,16,252,255,36,171,248,5,199,68,36,20,1,0,0,0,137,208,41,252,248,15,
-  134,244,3,255,137,197,193,252,237,3,137,108,36,4,131,197,1,137,108,36,20,
-  139,108,36,48,1,200,59,133,233,15,135,244,253,248,6,139,71,252,248,137,1,
-  139,71,252,252,131,199,8,137,65,4,131,193,8,57,215,15,130,244,6,252,233,244,
-  3,248,7,137,149,233,137,141,233,137,116,36,24,41,215,137,44,36,232,251,1,
-  0,139,149,233,139,141,233,1,215,252,233,244,6,255,193,225,3,255,248,1,139,
-  114,252,252,137,68,36,20,252,247,198,237,15,133,244,253,255,248,17,137,215,
-  131,232,1,15,132,244,249,248,2,139,44,15,137,111,252,248,139,108,15,4,137,
-  111,252,252,131,199,8,131,232,1,15,133,244,2,248,3,139,68,36,20,15,182,110,
-  252,255,248,5,57,197,15,135,244,252,255,139,108,10,4,137,106,252,252,139,
-  44,10,137,106,252,248,255,15,182,78,252,253,252,247,209,141,20,202,139,122,
-  252,248,139,191,233,139,191,233,139,6,15,182,204,15,182,232,131,198,4,193,
-  232,16,252,255,36,171,248,6,255,199,71,252,252,237,131,199,8,255,199,68,194,
-  252,244,237,255,131,192,1,252,233,244,5,248,7,15,139,244,18,131,230,252,248,
-  41,252,242,255,1,252,241,255,137,252,245,209,252,237,129,229,239,102,131,
-  172,253,43,233,1,15,132,244,136,255,141,12,202,255,129,121,253,4,239,15,135,
-  244,52,129,121,253,12,239,15,135,244,52,255,139,105,20,255,129,252,253,239,
-  15,135,244,52,255,221,65,8,221,1,255,220,65,16,221,17,255,221,81,24,133,252,
-  237,15,136,244,247,217,201,248,1,255,15,183,70,252,254,255,15,131,244,248,
-  141,180,253,134,233,255,141,180,253,134,233,15,183,70,252,254,15,131,245,
-  255,15,130,244,248,141,180,253,134,233,255,141,12,202,139,105,4,129,252,253,
-  239,15,132,244,247,255,137,105,252,252,139,41,137,105,252,248,252,233,245,
-  255,141,180,253,134,233,139,1,137,105,252,252,137,65,252,248,255,139,139,
-  233,139,4,129,139,128,233,139,108,36,48,137,147,233,137,171,233,252,255,224,
-  255,141,180,253,134,233,139,6,15,182,204,15,182,232,131,198,4,193,232,16,
-  252,255,36,171,255,254,0
+  15,135,244,48,255,252,242,15,16,4,194,184,0,0,0,128,102,15,110,200,102,15,
+  112,201,81,15,87,193,252,242,15,17,4,202,255,221,4,194,217,224,221,28,202,
+  255,129,124,253,194,4,239,15,133,244,248,139,4,194,255,15,87,192,252,242,
+  15,42,128,233,248,1,252,242,15,17,4,202,255,219,128,233,248,1,221,28,202,
+  255,139,6,15,182,204,15,182,232,131,198,4,193,232,16,252,255,36,171,248,2,
+  129,124,253,194,4,239,15,133,244,50,139,12,194,137,213,232,251,1,18,255,252,
+  242,15,42,192,137,252,234,255,137,4,36,137,252,234,219,4,36,255,15,182,78,
+  252,253,252,233,244,1,255,15,182,252,236,15,182,192,255,129,124,253,252,234,
+  4,239,15,135,244,46,255,252,242,15,16,4,252,234,252,242,15,88,4,199,255,221,
+  4,252,234,220,4,199,255,129,124,253,252,234,4,239,15,135,244,47,255,252,242,
+  15,16,4,199,252,242,15,88,4,252,234,255,221,4,199,220,4,252,234,255,129,124,
+  253,252,234,4,239,15,135,244,49,129,124,253,194,4,239,15,135,244,49,255,252,
+  242,15,16,4,252,234,252,242,15,88,4,194,255,221,4,252,234,220,4,194,255,252,
+  242,15,16,4,252,234,252,242,15,92,4,199,255,221,4,252,234,220,36,199,255,
+  252,242,15,16,4,199,252,242,15,92,4,252,234,255,221,4,199,220,36,252,234,
+  255,252,242,15,16,4,252,234,252,242,15,92,4,194,255,221,4,252,234,220,36,
+  194,255,252,242,15,16,4,252,234,252,242,15,89,4,199,255,221,4,252,234,220,
+  12,199,255,252,242,15,16,4,199,252,242,15,89,4,252,234,255,221,4,199,220,
+  12,252,234,255,252,242,15,16,4,252,234,252,242,15,89,4,194,255,221,4,252,
+  234,220,12,194,255,252,242,15,16,4,252,234,252,242,15,94,4,199,255,221,4,
+  252,234,220,52,199,255,252,242,15,16,4,199,252,242,15,94,4,252,234,255,221,
+  4,199,220,52,252,234,255,252,242,15,16,4,252,234,252,242,15,94,4,194,255,
+  221,4,252,234,220,52,194,255,252,242,15,16,4,252,234,252,242,15,16,12,199,
+  255,221,4,252,234,221,4,199,255,252,242,15,16,4,199,252,242,15,16,12,252,
+  234,255,221,4,199,221,4,252,234,255,252,242,15,16,4,252,234,252,242,15,16,
+  12,194,255,221,4,252,234,221,4,194,255,248,147,232,244,140,255,252,233,244,
+  147,255,232,244,105,255,15,182,252,236,15,182,192,141,12,194,41,232,137,76,
+  36,4,137,68,36,8,248,33,139,108,36,48,137,44,36,137,116,36,24,137,149,233,
+  232,251,1,23,139,149,233,133,192,15,133,244,42,15,182,110,252,255,15,182,
+  78,252,253,139,68,252,234,4,139,44,252,234,137,68,202,4,137,44,202,139,6,
+  15,182,204,15,182,232,131,198,4,193,232,16,252,255,36,171,255,252,247,208,
+  139,4,135,199,68,202,4,237,137,4,202,139,6,15,182,204,15,182,232,131,198,
+  4,193,232,16,252,255,36,171,255,223,70,252,254,221,28,202,139,6,15,182,204,
+  15,182,232,131,198,4,193,232,16,252,255,36,171,255,221,4,199,221,28,202,139,
+  6,15,182,204,15,182,232,131,198,4,193,232,16,252,255,36,171,255,252,247,208,
+  137,68,202,4,139,6,15,182,204,15,182,232,131,198,4,193,232,16,252,255,36,
+  171,255,141,76,202,12,141,68,194,4,189,237,137,105,252,248,248,1,137,41,131,
+  193,8,57,193,15,134,244,1,139,6,15,182,204,15,182,232,131,198,4,193,232,16,
+  252,255,36,171,255,139,106,252,248,139,172,253,133,233,139,173,233,139,69,
+  4,139,109,0,137,68,202,4,137,44,202,139,6,15,182,204,15,182,232,131,198,4,
+  193,232,16,252,255,36,171,255,139,106,252,248,139,172,253,141,233,128,189,
+  233,0,139,173,233,139,12,194,139,68,194,4,137,77,0,137,69,4,15,132,244,247,
+  252,246,133,233,235,15,133,244,248,248,1,139,6,15,182,204,15,182,232,131,
+  198,4,193,232,16,252,255,36,171,248,2,129,232,239,129,252,248,239,15,134,
+  244,1,252,246,129,233,235,15,132,244,1,135,213,141,139,233,255,232,251,1,
+  24,137,252,234,252,233,244,1,255,252,247,208,139,106,252,248,139,172,253,
+  141,233,139,12,135,139,133,233,137,8,199,64,4,237,252,246,133,233,235,15,
+  133,244,248,248,1,139,6,15,182,204,15,182,232,131,198,4,193,232,16,252,255,
+  36,171,248,2,252,246,129,233,235,15,132,244,1,128,189,233,0,15,132,244,1,
+  137,213,137,194,141,139,233,232,251,1,24,137,252,234,252,233,244,1,255,139,
+  106,252,248,221,4,199,139,172,253,141,233,139,141,233,221,25,139,6,15,182,
+  204,15,182,232,131,198,4,193,232,16,252,255,36,171,255,252,247,208,139,106,
+  252,248,139,172,253,141,233,139,141,233,137,65,4,139,6,15,182,204,15,182,
+  232,131,198,4,193,232,16,252,255,36,171,255,141,180,253,134,233,139,108,36,
+  48,131,189,233,0,15,132,244,247,141,12,202,137,76,36,4,137,44,36,137,149,
+  233,232,251,1,25,139,149,233,248,1,139,6,15,182,204,15,182,232,131,198,4,
+  193,232,16,252,255,36,171,255,252,247,208,139,74,252,248,139,4,135,139,108,
+  36,48,137,76,36,8,137,68,36,4,137,116,36,24,137,44,36,137,149,233,232,251,
+  1,26,139,149,233,15,182,78,252,253,137,4,202,199,68,202,4,237,139,6,15,182,
+  204,15,182,232,131,198,4,193,232,16,252,255,36,171,255,137,197,37,252,255,
+  7,0,0,193,252,237,11,61,252,255,7,0,0,15,148,209,137,108,36,8,1,200,139,108,
+  36,48,1,200,137,68,36,4,137,116,36,24,139,139,233,137,44,36,59,139,233,137,
+  149,233,15,131,244,248,248,1,232,251,1,27,139,149,233,15,182,78,252,253,137,
+  4,202,199,68,202,4,237,139,6,15,182,204,15,182,232,131,198,4,193,232,16,252,
+  255,36,171,248,2,137,252,233,232,251,1,28,252,233,244,1,255,252,247,208,139,
+  108,36,48,139,139,233,137,116,36,24,59,139,233,137,149,233,15,131,244,249,
+  248,2,139,20,135,137,252,233,232,251,1,29,139,149,233,15,182,78,252,253,137,
+  4,202,199,68,202,4,237,139,6,15,182,204,15,182,232,131,198,4,193,232,16,252,
+  255,36,171,248,3,137,252,233,232,251,1,28,15,183,70,252,254,252,247,208,252,
+  233,244,2,255,252,247,208,139,106,252,248,139,173,233,139,4,135,252,233,244,
+  148,255,252,247,208,139,106,252,248,139,173,233,139,4,135,252,233,244,149,
+  255,15,182,252,236,15,182,192,129,124,253,252,234,4,239,15,133,244,36,139,
+  44,252,234,129,124,253,194,4,239,15,135,244,251,221,4,194,219,20,36,219,4,
+  36,255,139,4,36,15,133,244,36,59,133,233,15,131,244,36,193,224,3,3,133,233,
+  129,120,253,4,239,15,132,244,248,248,1,139,40,139,64,4,137,44,202,137,68,
+  202,4,139,6,15,182,204,15,182,232,131,198,4,193,232,16,252,255,36,171,248,
+  2,131,189,233,0,15,132,244,1,139,141,233,252,246,129,233,235,15,132,244,36,
+  15,182,78,252,253,252,233,244,1,248,5,255,129,124,253,194,4,239,15,133,244,
+  36,139,4,194,252,233,244,148,255,15,182,252,236,15,182,192,252,247,208,139,
+  4,135,129,124,253,252,234,4,239,15,133,244,34,139,44,252,234,248,148,139,
+  141,233,35,136,233,105,201,239,3,141,233,248,1,129,185,233,239,15,133,244,
+  250,57,129,233,15,133,244,250,129,121,253,4,239,15,132,244,251,15,182,70,
+  252,253,139,41,139,73,4,137,44,194,248,2,255,137,76,194,4,139,6,15,182,204,
+  15,182,232,131,198,4,193,232,16,252,255,36,171,248,3,15,182,70,252,253,185,
+  237,252,233,244,2,248,4,139,137,233,133,201,15,133,244,1,248,5,139,141,233,
+  133,201,15,132,244,3,252,246,129,233,235,15,133,244,3,252,233,244,34,255,
+  15,182,252,236,15,182,192,129,124,253,252,234,4,239,15,133,244,35,139,44,
+  252,234,59,133,233,15,131,244,35,193,224,3,3,133,233,129,120,253,4,239,15,
+  132,244,248,248,1,139,40,139,64,4,137,44,202,137,68,202,4,139,6,15,182,204,
+  15,182,232,131,198,4,193,232,16,252,255,36,171,248,2,131,189,233,0,15,132,
+  244,1,139,141,233,252,246,129,233,235,15,132,244,35,255,15,182,252,236,15,
+  182,192,129,124,253,252,234,4,239,15,133,244,39,139,44,252,234,129,124,253,
+  194,4,239,15,135,244,251,221,4,194,219,20,36,219,4,36,255,139,4,36,15,133,
+  244,39,59,133,233,15,131,244,39,193,224,3,3,133,233,129,120,253,4,239,15,
+  132,244,249,248,1,252,246,133,233,235,15,133,244,253,248,2,139,108,202,4,
+  139,12,202,137,104,4,137,8,139,6,15,182,204,15,182,232,131,198,4,193,232,
+  16,252,255,36,171,248,3,131,189,233,0,15,132,244,1,139,141,233,255,252,246,
+  129,233,235,15,132,244,39,15,182,78,252,253,252,233,244,1,248,5,129,124,253,
+  194,4,239,15,133,244,39,139,4,194,252,233,244,149,248,7,128,165,233,235,139,
+  139,233,137,171,233,137,141,233,15,182,78,252,253,252,233,244,2,255,15,182,
+  252,236,15,182,192,252,247,208,139,4,135,129,124,253,252,234,4,239,15,133,
+  244,37,139,44,252,234,248,149,139,141,233,35,136,233,105,201,239,198,133,
+  233,0,3,141,233,248,1,129,185,233,239,15,133,244,251,57,129,233,15,133,244,
+  251,129,121,253,4,239,15,132,244,250,248,2,255,252,246,133,233,235,15,133,
+  244,253,248,3,15,182,70,252,253,139,108,194,4,139,4,194,137,105,4,137,1,139,
+  6,15,182,204,15,182,232,131,198,4,193,232,16,252,255,36,171,248,4,131,189,
+  233,0,15,132,244,2,137,12,36,139,141,233,252,246,129,233,235,15,132,244,37,
+  139,12,36,252,233,244,2,248,5,139,137,233,133,201,15,133,244,1,255,139,141,
+  233,133,201,15,132,244,252,252,246,129,233,235,15,132,244,37,248,6,137,68,
+  36,16,199,68,36,20,237,141,68,36,16,137,108,36,12,137,108,36,4,139,108,36,
+  48,137,68,36,8,137,44,36,137,116,36,24,137,149,233,232,251,1,30,139,149,233,
+  139,108,36,12,137,193,252,233,244,2,248,7,128,165,233,235,139,131,233,137,
+  171,233,137,133,233,252,233,244,3,255,15,182,252,236,15,182,192,129,124,253,
+  252,234,4,239,15,133,244,38,139,44,252,234,59,133,233,15,131,244,38,193,224,
+  3,3,133,233,129,120,253,4,239,15,132,244,249,248,1,252,246,133,233,235,15,
+  133,244,253,248,2,139,108,202,4,139,12,202,137,104,4,137,8,139,6,15,182,204,
+  15,182,232,131,198,4,193,232,16,252,255,36,171,248,3,131,189,233,0,15,132,
+  244,1,255,139,141,233,252,246,129,233,235,15,132,244,38,15,182,78,252,253,
+  252,233,244,1,248,7,128,165,233,235,139,139,233,137,171,233,137,141,233,15,
+  182,78,252,253,252,233,244,2,255,137,124,36,16,221,4,199,219,92,36,12,248,
+  1,141,12,202,139,105,252,248,252,246,133,233,235,15,133,244,253,248,2,139,
+  68,36,20,139,124,36,12,131,232,1,15,132,244,250,1,252,248,59,133,233,15,131,
+  244,251,41,252,248,193,231,3,3,189,233,248,3,139,41,137,47,139,105,4,131,
+  193,8,137,111,4,131,199,8,131,232,1,15,133,244,3,248,4,139,124,36,16,139,
+  6,15,182,204,15,182,232,131,198,4,193,232,16,252,255,36,171,248,5,137,108,
+  36,4,139,108,36,48,137,68,36,8,137,44,36,137,116,36,24,137,149,233,232,251,
+  1,31,139,149,233,15,182,78,252,253,252,233,244,1,248,7,255,128,165,233,235,
+  139,131,233,137,171,233,137,133,233,252,233,244,2,255,3,68,36,20,255,141,
+  76,202,8,139,105,252,248,129,121,253,252,252,239,15,133,244,29,252,255,165,
+  233,255,141,76,202,8,137,215,139,105,252,248,129,121,253,252,252,239,15,133,
+  244,29,248,51,139,114,252,252,252,247,198,237,15,133,244,253,248,1,137,106,
+  252,248,137,68,36,20,131,232,1,15,132,244,249,248,2,139,41,137,47,139,105,
+  4,137,111,4,131,199,8,131,193,8,131,232,1,15,133,244,2,139,106,252,248,248,
+  3,137,209,128,189,233,1,15,135,244,251,248,4,139,68,36,20,252,255,165,233,
+  248,5,255,252,247,198,237,15,133,244,4,15,182,70,252,253,252,247,208,141,
+  20,194,139,122,252,248,139,191,233,139,191,233,252,233,244,4,248,7,15,139,
+  244,1,131,230,252,248,41,252,242,137,215,139,114,252,252,252,233,244,1,255,
+  141,76,202,8,139,105,232,139,65,252,236,137,41,137,65,4,139,105,252,240,139,
+  65,252,244,137,105,8,137,65,12,139,105,224,139,65,228,137,105,252,248,137,
+  65,252,252,129,252,248,239,184,3,0,0,0,15,133,244,29,252,255,165,233,255,
+  15,182,252,236,139,66,252,248,141,12,202,139,128,233,15,182,128,233,137,124,
+  36,8,141,188,253,194,233,43,122,252,252,133,252,237,15,132,244,251,141,108,
+  252,233,252,248,57,215,15,131,244,248,248,1,139,71,252,248,137,1,139,71,252,
+  252,131,199,8,137,65,4,131,193,8,57,252,233,15,131,244,249,57,215,15,130,
+  244,1,248,2,199,65,4,237,131,193,8,57,252,233,15,130,244,2,248,3,139,124,
+  36,8,139,6,15,182,204,15,182,232,131,198,4,193,232,16,252,255,36,171,248,
+  5,199,68,36,20,1,0,0,0,137,208,41,252,248,15,134,244,3,255,137,197,193,252,
+  237,3,137,108,36,4,131,197,1,137,108,36,20,139,108,36,48,1,200,59,133,233,
+  15,135,244,253,248,6,139,71,252,248,137,1,139,71,252,252,131,199,8,137,65,
+  4,131,193,8,57,215,15,130,244,6,252,233,244,3,248,7,137,149,233,137,141,233,
+  137,116,36,24,41,215,137,44,36,232,251,1,0,139,149,233,139,141,233,1,215,
+  252,233,244,6,255,193,225,3,255,248,1,139,114,252,252,137,68,36,20,252,247,
+  198,237,15,133,244,253,255,248,17,137,215,131,232,1,15,132,244,249,248,2,
+  139,44,15,137,111,252,248,139,108,15,4,137,111,252,252,131,199,8,131,232,
+  1,15,133,244,2,248,3,139,68,36,20,15,182,110,252,255,248,5,57,197,15,135,
+  244,252,255,139,108,10,4,137,106,252,252,139,44,10,137,106,252,248,255,15,
+  182,78,252,253,252,247,209,141,20,202,139,122,252,248,139,191,233,139,191,
+  233,139,6,15,182,204,15,182,232,131,198,4,193,232,16,252,255,36,171,248,6,
+  255,199,71,252,252,237,131,199,8,255,199,68,194,252,244,237,255,131,192,1,
+  252,233,244,5,248,7,15,139,244,18,131,230,252,248,41,252,242,255,1,252,241,
+  255,137,252,245,209,252,237,129,229,239,102,131,172,253,43,233,1,15,132,244,
+  136,255,141,12,202,255,129,121,253,4,239,15,135,244,52,129,121,253,12,239,
+  15,135,244,52,255,139,105,20,255,129,252,253,239,15,135,244,52,255,221,65,
+  8,221,1,255,220,65,16,221,17,255,221,81,24,133,252,237,15,136,244,247,217,
+  201,248,1,255,15,183,70,252,254,255,15,131,244,248,141,180,253,134,233,255,
+  141,180,253,134,233,15,183,70,252,254,15,131,245,255,15,130,244,248,141,180,
+  253,134,233,255,141,12,202,139,105,4,129,252,253,239,15,132,244,247,255,137,
+  105,252,252,139,41,137,105,252,248,252,233,245,255,141,180,253,134,233,139,
+  1,137,105,252,252,137,65,252,248,255,139,139,233,139,4,129,139,128,233,139,
+  108,36,48,137,147,233,137,171,233,252,255,224,255,141,180,253,134,233,139,
+  6,15,182,204,15,182,232,131,198,4,193,232,16,252,255,36,171,255,254,0
 };
 
 enum {
@@ -948,7 +970,7 @@ static const char *const extnames[] = {
 
 /* Generate subroutines used by opcodes and other parts of the VM. */
 /* The .code_sub section should be last to help static branch prediction. */
-static void build_subroutines(BuildCtx *ctx, int cmov)
+static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
 {
   dasm_put(Dst, 0);
   dasm_put(Dst, 2, Dt7(->pt), Dt9(->framesize), Dt9(->bc), Dt9(->k), Dt1(->maxstack), LJ_TNIL);
@@ -1062,39 +1084,53 @@ static void build_subroutines(BuildCtx *ctx, int cmov)
   dasm_put(Dst, 7349, Dt7(->pt), Dt9(->k), DISPATCH_GL(jit_L), DISPATCH_GL(vmstate), ~LJ_VMST_INTERP);
 #endif
   dasm_put(Dst, 7389);
-  if (cmov) {
-  dasm_put(Dst, 7653);
+  if (sse) {
+    dasm_put(Dst, 7524);
   } else {
-  dasm_put(Dst, 7657);
+    dasm_put(Dst, 7654);
   }
-  dasm_put(Dst, 7666);
-  dasm_put(Dst, 7735);
-  dasm_put(Dst, 7837);
+  dasm_put(Dst, 7700);
   if (cmov) {
-  dasm_put(Dst, 7850);
+  dasm_put(Dst, 7785);
   } else {
-  dasm_put(Dst, 7854);
+  dasm_put(Dst, 7789);
   }
-  dasm_put(Dst, 7863);
+  dasm_put(Dst, 7798);
+  dasm_put(Dst, 7867);
+  dasm_put(Dst, 7969);
   if (cmov) {
-  dasm_put(Dst, 7653);
+  dasm_put(Dst, 7982);
   } else {
-  dasm_put(Dst, 7881);
+  dasm_put(Dst, 7986);
   }
-  dasm_put(Dst, 7888);
-  dasm_put(Dst, 8003);
-  dasm_put(Dst, 8096);
-  dasm_put(Dst, 8192);
+  dasm_put(Dst, 7995);
   if (cmov) {
-  dasm_put(Dst, 8251);
+  dasm_put(Dst, 7785);
   } else {
-  dasm_put(Dst, 8270);
+  dasm_put(Dst, 8013);
   }
-  dasm_put(Dst, 8311);
+  dasm_put(Dst, 8020);
+  dasm_put(Dst, 8135);
+  dasm_put(Dst, 8228);
+  if (sse) {
+    dasm_put(Dst, 8246);
+    dasm_put(Dst, 8333);
+    dasm_put(Dst, 8427);
+  } else {
+    dasm_put(Dst, 8513);
+    dasm_put(Dst, 8596);
+    if (cmov) {
+    dasm_put(Dst, 8651);
+    } else {
+    dasm_put(Dst, 8670);
+    }
+    dasm_put(Dst, 8711);
+  }
+  dasm_put(Dst, 8715);
 }
 
 /* Generate the code for a single instruction. */
-static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov)
+static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse)
 {
   int vk = 0;
   dasm_put(Dst, 1454, defop);
@@ -1106,403 +1142,612 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov)
   /* Remember: all ops branch for a true comparison, fall through otherwise. */
 
   case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
-    dasm_put(Dst, 8368, LJ_TISNUM, LJ_TISNUM);
-    if (cmov) {
-    dasm_put(Dst, 8398);
+    dasm_put(Dst, 8769, LJ_TISNUM, LJ_TISNUM);
+    if (sse) {
+      dasm_put(Dst, 8790);
     } else {
-    dasm_put(Dst, 8404);
+      dasm_put(Dst, 8805);
+      if (cmov) {
+      dasm_put(Dst, 8815);
+      } else {
+      dasm_put(Dst, 8821);
+      }
     }
     switch (op) {
     case BC_ISLT:
-      dasm_put(Dst, 8411);
+      dasm_put(Dst, 8828);
       break;
     case BC_ISGE:
-      dasm_put(Dst, 8416);
+      dasm_put(Dst, 8591);
       break;
     case BC_ISLE:
       dasm_put(Dst, 5884);
       break;
     case BC_ISGT:
-      dasm_put(Dst, 8421);
+      dasm_put(Dst, 8833);
       break;
     default: break;  /* Shut up GCC. */
     }
-    dasm_put(Dst, 8426, -BCBIAS_J*4);
+    dasm_put(Dst, 8838, -BCBIAS_J*4);
     break;
 
   case BC_ISEQV: case BC_ISNEV:
     vk = op == BC_ISEQV;
-    dasm_put(Dst, 8459, LJ_TISNUM, LJ_TISNUM);
-    if (cmov) {
-    dasm_put(Dst, 8398);
+    dasm_put(Dst, 8871, LJ_TISNUM, LJ_TISNUM);
+    if (sse) {
+      dasm_put(Dst, 8897);
     } else {
-    dasm_put(Dst, 8404);
+      dasm_put(Dst, 8909);
+      if (cmov) {
+      dasm_put(Dst, 8815);
+      } else {
+      dasm_put(Dst, 8821);
+      }
     }
   iseqne_fp:
     if (vk) {
-      dasm_put(Dst, 8491);
+      dasm_put(Dst, 8916);
     } else {
-      dasm_put(Dst, 8500);
+      dasm_put(Dst, 8925);
     }
   iseqne_end:
     if (vk) {
-      dasm_put(Dst, 8509, -BCBIAS_J*4);
+      dasm_put(Dst, 8934, -BCBIAS_J*4);
     } else {
-      dasm_put(Dst, 8524, -BCBIAS_J*4);
+      dasm_put(Dst, 8949, -BCBIAS_J*4);
     }
     dasm_put(Dst, 7174);
     if (op == BC_ISEQV || op == BC_ISNEV) {
-      dasm_put(Dst, 8539, LJ_TISPRI, LJ_TISTABUD, Dt6(->metatable), Dt6(->nomm), 1<<MM_eq);
+      dasm_put(Dst, 8964, LJ_TISPRI, LJ_TISTABUD, Dt6(->metatable), Dt6(->nomm), 1<<MM_eq);
       if (vk) {
-       dasm_put(Dst, 8597);
+       dasm_put(Dst, 9022);
       } else {
-       dasm_put(Dst, 8601);
+       dasm_put(Dst, 9026);
       }
-      dasm_put(Dst, 8607);
+      dasm_put(Dst, 9032);
     }
     break;
   case BC_ISEQS: case BC_ISNES:
     vk = op == BC_ISEQS;
-    dasm_put(Dst, 8612, LJ_TSTR);
+    dasm_put(Dst, 9037, LJ_TSTR);
   iseqne_test:
     if (vk) {
-      dasm_put(Dst, 8495);
+      dasm_put(Dst, 8920);
     } else {
-      dasm_put(Dst, 8187);
+      dasm_put(Dst, 8929);
     }
     goto iseqne_end;
   case BC_ISEQN: case BC_ISNEN:
     vk = op == BC_ISEQN;
-    dasm_put(Dst, 8635, LJ_TISNUM);
-    if (cmov) {
-    dasm_put(Dst, 8398);
+    dasm_put(Dst, 9060, LJ_TISNUM);
+    if (sse) {
+      dasm_put(Dst, 9074);
     } else {
-    dasm_put(Dst, 8404);
+      dasm_put(Dst, 9086);
+      if (cmov) {
+      dasm_put(Dst, 8815);
+      } else {
+      dasm_put(Dst, 8821);
+      }
     }
     goto iseqne_fp;
   case BC_ISEQP: case BC_ISNEP:
     vk = op == BC_ISEQP;
-    dasm_put(Dst, 8655);
+    dasm_put(Dst, 9093);
     goto iseqne_test;
 
   /* -- Unary test and copy ops ------------------------------------------- */
 
   case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF:
-    dasm_put(Dst, 8666, LJ_TISTRUECOND);
+    dasm_put(Dst, 9104, LJ_TISTRUECOND);
     if (op == BC_IST || op == BC_ISTC) {
-      dasm_put(Dst, 8678);
+      dasm_put(Dst, 9116);
     } else {
-      dasm_put(Dst, 8683);
+      dasm_put(Dst, 9121);
     }
     if (op == BC_ISTC || op == BC_ISFC) {
-      dasm_put(Dst, 8688);
+      dasm_put(Dst, 9126);
     }
-    dasm_put(Dst, 8699, -BCBIAS_J*4);
+    dasm_put(Dst, 9137, -BCBIAS_J*4);
     break;
 
   /* -- Unary ops --------------------------------------------------------- */
 
   case BC_MOV:
-    dasm_put(Dst, 8730);
+    dasm_put(Dst, 9168);
     break;
   case BC_NOT:
-    dasm_put(Dst, 8763, LJ_TISTRUECOND, LJ_TTRUE);
+    dasm_put(Dst, 9201, LJ_TISTRUECOND, LJ_TTRUE);
     break;
   case BC_UNM:
-    dasm_put(Dst, 8798, LJ_TISNUM);
+    dasm_put(Dst, 9236, LJ_TISNUM);
+    if (sse) {
+      dasm_put(Dst, 9247);
+    } else {
+      dasm_put(Dst, 9277);
+    }
+    dasm_put(Dst, 7174);
     break;
   case BC_LEN:
-    dasm_put(Dst, 8835, LJ_TSTR, Dt5(->len), LJ_TTAB);
+    dasm_put(Dst, 9286, LJ_TSTR);
+    if (sse) {
+      dasm_put(Dst, 9300, Dt5(->len));
+    } else {
+      dasm_put(Dst, 9318, Dt5(->len));
+    }
+    dasm_put(Dst, 9327, LJ_TTAB);
+    if (sse) {
+      dasm_put(Dst, 9367);
+    } else {
+      dasm_put(Dst, 9376);
+    }
+    dasm_put(Dst, 9386);
     break;
 
   /* -- Binary ops -------------------------------------------------------- */
 
 
   case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
-    dasm_put(Dst, 8914);
+    dasm_put(Dst, 9396);
     vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
     switch (vk) {
     case 0:
-    dasm_put(Dst, 8922, LJ_TISNUM);
+    dasm_put(Dst, 9404, LJ_TISNUM);
+    if (sse) {
+    dasm_put(Dst, 9416);
+    } else {
+    dasm_put(Dst, 9430);
+    }
       break;
     case 1:
-    dasm_put(Dst, 8941, LJ_TISNUM);
+    dasm_put(Dst, 9438, LJ_TISNUM);
+    if (sse) {
+    dasm_put(Dst, 9450);
+    } else {
+    dasm_put(Dst, 9464);
+    }
       break;
     default:
-    dasm_put(Dst, 8960, LJ_TISNUM, LJ_TISNUM);
+    dasm_put(Dst, 9472, LJ_TISNUM, LJ_TISNUM);
+    if (sse) {
+    dasm_put(Dst, 9494);
+    } else {
+    dasm_put(Dst, 9508);
+    }
       break;
     }
-    dasm_put(Dst, 8813);
+    if (sse) {
+    dasm_put(Dst, 9270);
+    } else {
+    dasm_put(Dst, 9282);
+    }
+    dasm_put(Dst, 7174);
     break;
   case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
-    dasm_put(Dst, 8914);
+    dasm_put(Dst, 9396);
     vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
     switch (vk) {
     case 0:
-    dasm_put(Dst, 8989, LJ_TISNUM);
+    dasm_put(Dst, 9404, LJ_TISNUM);
+    if (sse) {
+    dasm_put(Dst, 9516);
+    } else {
+    dasm_put(Dst, 9530);
+    }
       break;
     case 1:
-    dasm_put(Dst, 9008, LJ_TISNUM);
+    dasm_put(Dst, 9438, LJ_TISNUM);
+    if (sse) {
+    dasm_put(Dst, 9538);
+    } else {
+    dasm_put(Dst, 9552);
+    }
       break;
     default:
-    dasm_put(Dst, 9027, LJ_TISNUM, LJ_TISNUM);
+    dasm_put(Dst, 9472, LJ_TISNUM, LJ_TISNUM);
+    if (sse) {
+    dasm_put(Dst, 9560);
+    } else {
+    dasm_put(Dst, 9574);
+    }
       break;
     }
-    dasm_put(Dst, 8813);
+    if (sse) {
+    dasm_put(Dst, 9270);
+    } else {
+    dasm_put(Dst, 9282);
+    }
+    dasm_put(Dst, 7174);
     break;
   case BC_MULVN: case BC_MULNV: case BC_MULVV:
-    dasm_put(Dst, 8914);
+    dasm_put(Dst, 9396);
     vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
     switch (vk) {
     case 0:
-    dasm_put(Dst, 9056, LJ_TISNUM);
+    dasm_put(Dst, 9404, LJ_TISNUM);
+    if (sse) {
+    dasm_put(Dst, 9582);
+    } else {
+    dasm_put(Dst, 9596);
+    }
       break;
     case 1:
-    dasm_put(Dst, 9075, LJ_TISNUM);
+    dasm_put(Dst, 9438, LJ_TISNUM);
+    if (sse) {
+    dasm_put(Dst, 9604);
+    } else {
+    dasm_put(Dst, 9618);
+    }
       break;
     default:
-    dasm_put(Dst, 9094, LJ_TISNUM, LJ_TISNUM);
+    dasm_put(Dst, 9472, LJ_TISNUM, LJ_TISNUM);
+    if (sse) {
+    dasm_put(Dst, 9626);
+    } else {
+    dasm_put(Dst, 9640);
+    }
       break;
     }
-    dasm_put(Dst, 8813);
+    if (sse) {
+    dasm_put(Dst, 9270);
+    } else {
+    dasm_put(Dst, 9282);
+    }
+    dasm_put(Dst, 7174);
     break;
   case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
-    dasm_put(Dst, 8914);
+    dasm_put(Dst, 9396);
     vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
     switch (vk) {
     case 0:
-    dasm_put(Dst, 9123, LJ_TISNUM);
+    dasm_put(Dst, 9404, LJ_TISNUM);
+    if (sse) {
+    dasm_put(Dst, 9648);
+    } else {
+    dasm_put(Dst, 9662);
+    }
       break;
     case 1:
-    dasm_put(Dst, 9142, LJ_TISNUM);
+    dasm_put(Dst, 9438, LJ_TISNUM);
+    if (sse) {
+    dasm_put(Dst, 9670);
+    } else {
+    dasm_put(Dst, 9684);
+    }
       break;
     default:
-    dasm_put(Dst, 9161, LJ_TISNUM, LJ_TISNUM);
+    dasm_put(Dst, 9472, LJ_TISNUM, LJ_TISNUM);
+    if (sse) {
+    dasm_put(Dst, 9692);
+    } else {
+    dasm_put(Dst, 9706);
+    }
       break;
     }
-    dasm_put(Dst, 8813);
+    if (sse) {
+    dasm_put(Dst, 9270);
+    } else {
+    dasm_put(Dst, 9282);
+    }
+    dasm_put(Dst, 7174);
     break;
   case BC_MODVN:
-    dasm_put(Dst, 8914);
+    dasm_put(Dst, 9396);
     vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
     switch (vk) {
     case 0:
-    dasm_put(Dst, 9190, LJ_TISNUM);
+    dasm_put(Dst, 9404, LJ_TISNUM);
+    if (sse) {
+    dasm_put(Dst, 9714);
+    } else {
+    dasm_put(Dst, 9728);
+    }
       break;
     case 1:
-    dasm_put(Dst, 9209, LJ_TISNUM);
+    dasm_put(Dst, 9438, LJ_TISNUM);
+    if (sse) {
+    dasm_put(Dst, 9736);
+    } else {
+    dasm_put(Dst, 9750);
+    }
       break;
     default:
-    dasm_put(Dst, 9228, LJ_TISNUM, LJ_TISNUM);
+    dasm_put(Dst, 9472, LJ_TISNUM, LJ_TISNUM);
+    if (sse) {
+    dasm_put(Dst, 9758);
+    } else {
+    dasm_put(Dst, 9772);
+    }
       break;
     }
-    dasm_put(Dst, 9257);
+    dasm_put(Dst, 9780);
+    if (sse) {
+    dasm_put(Dst, 9270);
+    } else {
+    dasm_put(Dst, 9282);
+    }
+    dasm_put(Dst, 7174);
     break;
   case BC_MODNV: case BC_MODVV:
-    dasm_put(Dst, 8914);
+    dasm_put(Dst, 9396);
     vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
     switch (vk) {
     case 0:
-    dasm_put(Dst, 9190, LJ_TISNUM);
+    dasm_put(Dst, 9404, LJ_TISNUM);
+    if (sse) {
+    dasm_put(Dst, 9714);
+    } else {
+    dasm_put(Dst, 9728);
+    }
       break;
     case 1:
-    dasm_put(Dst, 9209, LJ_TISNUM);
+    dasm_put(Dst, 9438, LJ_TISNUM);
+    if (sse) {
+    dasm_put(Dst, 9736);
+    } else {
+    dasm_put(Dst, 9750);
+    }
       break;
     default:
-    dasm_put(Dst, 9228, LJ_TISNUM, LJ_TISNUM);
+    dasm_put(Dst, 9472, LJ_TISNUM, LJ_TISNUM);
+    if (sse) {
+    dasm_put(Dst, 9758);
+    } else {
+    dasm_put(Dst, 9772);
+    }
       break;
     }
-    dasm_put(Dst, 9284);
+    dasm_put(Dst, 9786);
     break;
   case BC_POW:
-    dasm_put(Dst, 8914);
-    vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
-    switch (vk) {
-    case 0:
-    dasm_put(Dst, 9190, LJ_TISNUM);
-      break;
-    case 1:
-    dasm_put(Dst, 9209, LJ_TISNUM);
-      break;
-    default:
-    dasm_put(Dst, 9228, LJ_TISNUM, LJ_TISNUM);
-      break;
+    if (sse) {
+      sse = 0;  /* NYI: temporary workaround. */
+      dasm_put(Dst, 9396);
+      vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
+      switch (vk) {
+      case 0:
+      dasm_put(Dst, 9404, LJ_TISNUM);
+      if (sse) {
+      dasm_put(Dst, 9714);
+      } else {
+      dasm_put(Dst, 9728);
+      }
+        break;
+      case 1:
+      dasm_put(Dst, 9438, LJ_TISNUM);
+      if (sse) {
+      dasm_put(Dst, 9736);
+      } else {
+      dasm_put(Dst, 9750);
+      }
+        break;
+      default:
+      dasm_put(Dst, 9472, LJ_TISNUM, LJ_TISNUM);
+      if (sse) {
+      dasm_put(Dst, 9758);
+      } else {
+      dasm_put(Dst, 9772);
+      }
+        break;
+      }
+      dasm_put(Dst, 9791);
+      if (sse) {
+      dasm_put(Dst, 9270);
+      } else {
+      dasm_put(Dst, 9282);
+      }
+      sse = 1;
+    } else {
+      dasm_put(Dst, 9396);
+      vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
+      switch (vk) {
+      case 0:
+      dasm_put(Dst, 9404, LJ_TISNUM);
+      if (sse) {
+      dasm_put(Dst, 9714);
+      } else {
+      dasm_put(Dst, 9728);
+      }
+        break;
+      case 1:
+      dasm_put(Dst, 9438, LJ_TISNUM);
+      if (sse) {
+      dasm_put(Dst, 9736);
+      } else {
+      dasm_put(Dst, 9750);
+      }
+        break;
+      default:
+      dasm_put(Dst, 9472, LJ_TISNUM, LJ_TISNUM);
+      if (sse) {
+      dasm_put(Dst, 9758);
+      } else {
+      dasm_put(Dst, 9772);
+      }
+        break;
+      }
+      dasm_put(Dst, 9791);
+      if (sse) {
+      dasm_put(Dst, 9270);
+      } else {
+      dasm_put(Dst, 9282);
+      }
     }
-    dasm_put(Dst, 9289);
+    dasm_put(Dst, 7174);
     break;
 
   case BC_CAT:
-    dasm_put(Dst, 9314, Dt1(->base), Dt1(->base));
+    dasm_put(Dst, 9795, Dt1(->base), Dt1(->base));
     break;
 
   /* -- Constant ops ------------------------------------------------------ */
 
   case BC_KSTR:
-    dasm_put(Dst, 9408, LJ_TSTR);
+    dasm_put(Dst, 9889, LJ_TSTR);
     break;
   case BC_KSHORT:
-    dasm_put(Dst, 9441);
+    dasm_put(Dst, 9922);
     break;
   case BC_KNUM:
-    dasm_put(Dst, 9467);
+    dasm_put(Dst, 9948);
     break;
   case BC_KPRI:
-    dasm_put(Dst, 9492);
+    dasm_put(Dst, 9973);
     break;
   case BC_KNIL:
-    dasm_put(Dst, 9518, LJ_TNIL);
+    dasm_put(Dst, 9999, LJ_TNIL);
     break;
 
   /* -- Upvalue and function ops ------------------------------------------ */
 
   case BC_UGET:
-    dasm_put(Dst, 9564, offsetof(GCfuncL, uvptr), DtA(->v));
+    dasm_put(Dst, 10045, offsetof(GCfuncL, uvptr), DtA(->v));
     break;
   case BC_USETV:
 #define TV2MARKOFS \
  ((int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv))
-    dasm_put(Dst, 9608, offsetof(GCfuncL, uvptr), DtA(->closed), DtA(->v), TV2MARKOFS, LJ_GC_BLACK, LJ_TISGCV, LJ_TISNUM - LJ_TISGCV, Dt4(->gch.marked), LJ_GC_WHITES, GG_DISP2G);
-    dasm_put(Dst, 9698);
+    dasm_put(Dst, 10089, offsetof(GCfuncL, uvptr), DtA(->closed), DtA(->v), TV2MARKOFS, LJ_GC_BLACK, LJ_TISGCV, LJ_TISNUM - LJ_TISGCV, Dt4(->gch.marked), LJ_GC_WHITES, GG_DISP2G);
+    dasm_put(Dst, 10179);
     break;
 #undef TV2MARKOFS
   case BC_USETS:
-    dasm_put(Dst, 9710, offsetof(GCfuncL, uvptr), DtA(->v), LJ_TSTR, DtA(->marked), LJ_GC_BLACK, Dt4(->gch.marked), LJ_GC_WHITES, DtA(->closed), GG_DISP2G);
+    dasm_put(Dst, 10191, offsetof(GCfuncL, uvptr), DtA(->v), LJ_TSTR, DtA(->marked), LJ_GC_BLACK, Dt4(->gch.marked), LJ_GC_WHITES, DtA(->closed), GG_DISP2G);
     break;
   case BC_USETN:
-    dasm_put(Dst, 9801, offsetof(GCfuncL, uvptr), DtA(->v));
+    dasm_put(Dst, 10282, offsetof(GCfuncL, uvptr), DtA(->v));
     break;
   case BC_USETP:
-    dasm_put(Dst, 9837, offsetof(GCfuncL, uvptr), DtA(->v));
+    dasm_put(Dst, 10318, offsetof(GCfuncL, uvptr), DtA(->v));
     break;
   case BC_UCLO:
-    dasm_put(Dst, 9874, -BCBIAS_J*4, Dt1(->openupval), Dt1(->base), Dt1(->base));
+    dasm_put(Dst, 10355, -BCBIAS_J*4, Dt1(->openupval), Dt1(->base), Dt1(->base));
     break;
 
   case BC_FNEW:
-    dasm_put(Dst, 9932, Dt1(->base), Dt1(->base), LJ_TFUNC);
+    dasm_put(Dst, 10413, Dt1(->base), Dt1(->base), LJ_TFUNC);
     break;
 
   /* -- Table ops --------------------------------------------------------- */
 
   case BC_TNEW:
-    dasm_put(Dst, 10003, DISPATCH_GL(gc.total), DISPATCH_GL(gc.threshold), Dt1(->base), Dt1(->base), LJ_TTAB);
+    dasm_put(Dst, 10484, DISPATCH_GL(gc.total), DISPATCH_GL(gc.threshold), Dt1(->base), Dt1(->base), LJ_TTAB);
     break;
   case BC_TDUP:
-    dasm_put(Dst, 10114, DISPATCH_GL(gc.total), DISPATCH_GL(gc.threshold), Dt1(->base), Dt1(->base), LJ_TTAB);
+    dasm_put(Dst, 10595, DISPATCH_GL(gc.total), DISPATCH_GL(gc.threshold), Dt1(->base), Dt1(->base), LJ_TTAB);
     break;
 
   case BC_GGET:
-    dasm_put(Dst, 10206, Dt7(->env));
+    dasm_put(Dst, 10687, Dt7(->env));
     break;
   case BC_GSET:
-    dasm_put(Dst, 10224, Dt7(->env));
+    dasm_put(Dst, 10705, Dt7(->env));
     break;
 
   case BC_TGETV:
-    dasm_put(Dst, 10242, LJ_TTAB, LJ_TISNUM);
+    dasm_put(Dst, 10723, LJ_TTAB, LJ_TISNUM);
     if (cmov) {
-    dasm_put(Dst, 8398);
+    dasm_put(Dst, 8815);
     } else {
-    dasm_put(Dst, 8404);
+    dasm_put(Dst, 8821);
     }
-    dasm_put(Dst, 10284, Dt6(->asize), Dt6(->array), LJ_TNIL, Dt6(->metatable), Dt6(->metatable), Dt6(->nomm), 1<<MM_index);
-    dasm_put(Dst, 10379, LJ_TSTR);
+    dasm_put(Dst, 10765, Dt6(->asize), Dt6(->array), LJ_TNIL, Dt6(->metatable), Dt6(->metatable), Dt6(->nomm), 1<<MM_index);
+    dasm_put(Dst, 10860, LJ_TSTR);
     break;
   case BC_TGETS:
-    dasm_put(Dst, 10397, LJ_TTAB, Dt6(->hmask), Dt5(->hash), sizeof(Node), Dt6(->node), DtB(->key.it), LJ_TSTR, DtB(->key.gcr), LJ_TNIL);
-    dasm_put(Dst, 10481, LJ_TNIL, DtB(->next), Dt6(->metatable), Dt6(->nomm), 1<<MM_index);
+    dasm_put(Dst, 10878, LJ_TTAB, Dt6(->hmask), Dt5(->hash), sizeof(Node), Dt6(->node), DtB(->key.it), LJ_TSTR, DtB(->key.gcr), LJ_TNIL);
+    dasm_put(Dst, 10962, LJ_TNIL, DtB(->next), Dt6(->metatable), Dt6(->nomm), 1<<MM_index);
     break;
   case BC_TGETB:
-    dasm_put(Dst, 10552, LJ_TTAB, Dt6(->asize), Dt6(->array), LJ_TNIL, Dt6(->metatable), Dt6(->metatable), Dt6(->nomm), 1<<MM_index);
-    dasm_put(Dst, 8904);
+    dasm_put(Dst, 11033, LJ_TTAB, Dt6(->asize), Dt6(->array), LJ_TNIL, Dt6(->metatable), Dt6(->metatable), Dt6(->nomm), 1<<MM_index);
+    dasm_put(Dst, 9386);
     break;
 
   case BC_TSETV:
-    dasm_put(Dst, 10651, LJ_TTAB, LJ_TISNUM);
+    dasm_put(Dst, 11132, LJ_TTAB, LJ_TISNUM);
     if (cmov) {
-    dasm_put(Dst, 8398);
+    dasm_put(Dst, 8815);
     } else {
-    dasm_put(Dst, 8404);
+    dasm_put(Dst, 8821);
     }
-    dasm_put(Dst, 10693, Dt6(->asize), Dt6(->array), LJ_TNIL, Dt6(->marked), LJ_GC_BLACK, Dt6(->metatable), Dt6(->metatable));
-    dasm_put(Dst, 10779, Dt6(->nomm), 1<<MM_newindex, LJ_TSTR, Dt6(->marked), cast_byte(~LJ_GC_BLACK), DISPATCH_GL(gc.grayagain), DISPATCH_GL(gc.grayagain), Dt6(->gclist));
+    dasm_put(Dst, 11174, Dt6(->asize), Dt6(->array), LJ_TNIL, Dt6(->marked), LJ_GC_BLACK, Dt6(->metatable), Dt6(->metatable));
+    dasm_put(Dst, 11260, Dt6(->nomm), 1<<MM_newindex, LJ_TSTR, Dt6(->marked), cast_byte(~LJ_GC_BLACK), DISPATCH_GL(gc.grayagain), DISPATCH_GL(gc.grayagain), Dt6(->gclist));
     break;
   case BC_TSETS:
-    dasm_put(Dst, 10841, LJ_TTAB, Dt6(->hmask), Dt5(->hash), sizeof(Node), Dt6(->nomm), Dt6(->node), DtB(->key.it), LJ_TSTR, DtB(->key.gcr), LJ_TNIL);
-    dasm_put(Dst, 10916, Dt6(->marked), LJ_GC_BLACK, Dt6(->metatable), Dt6(->metatable), Dt6(->nomm), 1<<MM_newindex, DtB(->next));
-    dasm_put(Dst, 11006, Dt6(->metatable), Dt6(->nomm), 1<<MM_newindex, LJ_TSTR, Dt1(->base), Dt1(->base), Dt6(->marked), cast_byte(~LJ_GC_BLACK), DISPATCH_GL(gc.grayagain), DISPATCH_GL(gc.grayagain), Dt6(->gclist));
+    dasm_put(Dst, 11322, LJ_TTAB, Dt6(->hmask), Dt5(->hash), sizeof(Node), Dt6(->nomm), Dt6(->node), DtB(->key.it), LJ_TSTR, DtB(->key.gcr), LJ_TNIL);
+    dasm_put(Dst, 11397, Dt6(->marked), LJ_GC_BLACK, Dt6(->metatable), Dt6(->metatable), Dt6(->nomm), 1<<MM_newindex, DtB(->next));
+    dasm_put(Dst, 11487, Dt6(->metatable), Dt6(->nomm), 1<<MM_newindex, LJ_TSTR, Dt1(->base), Dt1(->base), Dt6(->marked), cast_byte(~LJ_GC_BLACK), DISPATCH_GL(gc.grayagain), DISPATCH_GL(gc.grayagain), Dt6(->gclist));
     break;
   case BC_TSETB:
-    dasm_put(Dst, 11102, LJ_TTAB, Dt6(->asize), Dt6(->array), LJ_TNIL, Dt6(->marked), LJ_GC_BLACK, Dt6(->metatable));
-    dasm_put(Dst, 11200, Dt6(->metatable), Dt6(->nomm), 1<<MM_newindex, Dt6(->marked), cast_byte(~LJ_GC_BLACK), DISPATCH_GL(gc.grayagain), DISPATCH_GL(gc.grayagain), Dt6(->gclist));
+    dasm_put(Dst, 11583, LJ_TTAB, Dt6(->asize), Dt6(->array), LJ_TNIL, Dt6(->marked), LJ_GC_BLACK, Dt6(->metatable));
+    dasm_put(Dst, 11681, Dt6(->metatable), Dt6(->nomm), 1<<MM_newindex, Dt6(->marked), cast_byte(~LJ_GC_BLACK), DISPATCH_GL(gc.grayagain), DISPATCH_GL(gc.grayagain), Dt6(->gclist));
     break;
 
   case BC_TSETM:
-    dasm_put(Dst, 11246, Dt6(->marked), LJ_GC_BLACK, Dt6(->asize), Dt6(->array), Dt1(->base), Dt1(->base));
-    dasm_put(Dst, 11403, Dt6(->marked), cast_byte(~LJ_GC_BLACK), DISPATCH_GL(gc.grayagain), DISPATCH_GL(gc.grayagain), Dt6(->gclist));
+    dasm_put(Dst, 11727, Dt6(->marked), LJ_GC_BLACK, Dt6(->asize), Dt6(->array), Dt1(->base), Dt1(->base));
+    dasm_put(Dst, 11884, Dt6(->marked), cast_byte(~LJ_GC_BLACK), DISPATCH_GL(gc.grayagain), DISPATCH_GL(gc.grayagain), Dt6(->gclist));
     break;
 
   /* -- Calls and vararg handling ----------------------------------------- */
 
   case BC_CALL: case BC_CALLM:
-    dasm_put(Dst, 8918);
+    dasm_put(Dst, 9400);
     if (op == BC_CALLM) {
-      dasm_put(Dst, 11421);
+      dasm_put(Dst, 11902);
     }
-    dasm_put(Dst, 11426, LJ_TFUNC, Dt7(->gate));
+    dasm_put(Dst, 11907, LJ_TFUNC, Dt7(->gate));
     break;
 
   case BC_CALLMT:
-    dasm_put(Dst, 11421);
+    dasm_put(Dst, 11902);
     break;
   case BC_CALLT:
-    dasm_put(Dst, 11449, LJ_TFUNC, FRAME_TYPE, Dt7(->ffid), Dt7(->gate));
-    dasm_put(Dst, 11554, FRAME_TYPE, Dt7(->pt), Dt9(->k));
+    dasm_put(Dst, 11930, LJ_TFUNC, FRAME_TYPE, Dt7(->ffid), Dt7(->gate));
+    dasm_put(Dst, 12035, FRAME_TYPE, Dt7(->pt), Dt9(->k));
     break;
 
   case BC_ITERC:
-    dasm_put(Dst, 11611, LJ_TFUNC, Dt7(->gate));
+    dasm_put(Dst, 12092, LJ_TFUNC, Dt7(->gate));
     break;
 
   case BC_VARG:
-    dasm_put(Dst, 11673, Dt7(->pt), Dt9(->numparams), (8+FRAME_VARG), LJ_TNIL);
-    dasm_put(Dst, 11817, Dt1(->maxstack), Dt1(->base), Dt1(->top), Dt1(->base), Dt1(->top));
+    dasm_put(Dst, 12154, Dt7(->pt), Dt9(->numparams), (8+FRAME_VARG), LJ_TNIL);
+    dasm_put(Dst, 12298, Dt1(->maxstack), Dt1(->base), Dt1(->top), Dt1(->base), Dt1(->top));
     break;
 
   /* -- Returns ----------------------------------------------------------- */
 
   case BC_RETM:
-    dasm_put(Dst, 11421);
+    dasm_put(Dst, 11902);
     break;
 
   case BC_RET: case BC_RET0: case BC_RET1:
     if (op != BC_RET0) {
-      dasm_put(Dst, 11912);
+      dasm_put(Dst, 12393);
     }
-    dasm_put(Dst, 11916, FRAME_TYPE);
+    dasm_put(Dst, 12397, FRAME_TYPE);
     switch (op) {
     case BC_RET:
-      dasm_put(Dst, 11935);
+      dasm_put(Dst, 12416);
       break;
     case BC_RET1:
-      dasm_put(Dst, 11993);
+      dasm_put(Dst, 12474);
       /* fallthrough */
     case BC_RET0:
       dasm_put(Dst, 3824);
     default:
       break;
     }
-    dasm_put(Dst, 12009, Dt7(->pt), Dt9(->k));
+    dasm_put(Dst, 12490, Dt7(->pt), Dt9(->k));
     if (op == BC_RET) {
-      dasm_put(Dst, 12051, LJ_TNIL);
+      dasm_put(Dst, 12532, LJ_TNIL);
     } else {
-      dasm_put(Dst, 12060, LJ_TNIL);
+      dasm_put(Dst, 12541, LJ_TNIL);
     }
-    dasm_put(Dst, 12067);
+    dasm_put(Dst, 12548);
     if (op != BC_RET0) {
-      dasm_put(Dst, 12088);
+      dasm_put(Dst, 12569);
     }
-    dasm_put(Dst, 8909);
+    dasm_put(Dst, 9391);
     break;
 
   /* -- Loops and branches ------------------------------------------------ */
@@ -1510,7 +1755,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov)
 
   case BC_FORL:
 #if LJ_HASJIT
-    dasm_put(Dst, 12092, HOTCOUNT_PCMASK, GG_DISP2HOT);
+    dasm_put(Dst, 12573, HOTCOUNT_PCMASK, GG_DISP2HOT);
 #endif
     break;
 
@@ -1522,42 +1767,42 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov)
   case BC_FORI:
   case BC_IFORL:
     vk = (op == BC_IFORL || op == BC_JFORL);
-    dasm_put(Dst, 12113);
+    dasm_put(Dst, 12594);
     if (!vk) {
-      dasm_put(Dst, 12117, LJ_TISNUM, LJ_TISNUM);
+      dasm_put(Dst, 12598, LJ_TISNUM, LJ_TISNUM);
     }
-    dasm_put(Dst, 12136);
+    dasm_put(Dst, 12617);
     if (!vk) {
-      dasm_put(Dst, 12140, LJ_TISNUM);
+      dasm_put(Dst, 12621, LJ_TISNUM);
     }
-    dasm_put(Dst, 12149);
+    dasm_put(Dst, 12630);
     if (vk) {
-      dasm_put(Dst, 12155);
+      dasm_put(Dst, 12636);
     }
-    dasm_put(Dst, 12161);
+    dasm_put(Dst, 12642);
     if (cmov) {
-    dasm_put(Dst, 8398);
+    dasm_put(Dst, 8815);
     } else {
-    dasm_put(Dst, 8404);
+    dasm_put(Dst, 8821);
     }
     if (!cmov) {
-      dasm_put(Dst, 12176);
+      dasm_put(Dst, 12657);
     }
     if (op == BC_FORI) {
-      dasm_put(Dst, 12182, -BCBIAS_J*4);
+      dasm_put(Dst, 12663, -BCBIAS_J*4);
     } else if (op == BC_JFORI) {
-      dasm_put(Dst, 12192, -BCBIAS_J*4, BC_JLOOP);
+      dasm_put(Dst, 12673, -BCBIAS_J*4, BC_JLOOP);
     } else if (op == BC_IFORL) {
-      dasm_put(Dst, 12206, -BCBIAS_J*4);
+      dasm_put(Dst, 12687, -BCBIAS_J*4);
     } else {
-      dasm_put(Dst, 12202, BC_JLOOP);
+      dasm_put(Dst, 12683, BC_JLOOP);
     }
-    dasm_put(Dst, 8438);
+    dasm_put(Dst, 8850);
     break;
 
   case BC_ITERL:
 #if LJ_HASJIT
-    dasm_put(Dst, 12092, HOTCOUNT_PCMASK, GG_DISP2HOT);
+    dasm_put(Dst, 12573, HOTCOUNT_PCMASK, GG_DISP2HOT);
 #endif
     break;
 
@@ -1566,18 +1811,18 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov)
     break;
 #endif
   case BC_IITERL:
-    dasm_put(Dst, 12216, LJ_TNIL);
+    dasm_put(Dst, 12697, LJ_TNIL);
     if (op == BC_JITERL) {
-      dasm_put(Dst, 12231, BC_JLOOP);
+      dasm_put(Dst, 12712, BC_JLOOP);
     } else {
-      dasm_put(Dst, 12245, -BCBIAS_J*4);
+      dasm_put(Dst, 12726, -BCBIAS_J*4);
     }
-    dasm_put(Dst, 8709);
+    dasm_put(Dst, 9147);
     break;
 
   case BC_LOOP:
 #if LJ_HASJIT
-    dasm_put(Dst, 12092, HOTCOUNT_PCMASK, GG_DISP2HOT);
+    dasm_put(Dst, 12573, HOTCOUNT_PCMASK, GG_DISP2HOT);
 #endif
     break;
 
@@ -1587,12 +1832,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov)
 
   case BC_JLOOP:
 #if LJ_HASJIT
-    dasm_put(Dst, 12261, DISPATCH_J(trace), DtD(->mcode), DISPATCH_GL(jit_base), DISPATCH_GL(jit_L));
+    dasm_put(Dst, 12742, DISPATCH_J(trace), DtD(->mcode), DISPATCH_GL(jit_base), DISPATCH_GL(jit_L));
 #endif
     break;
 
   case BC_JMP:
-    dasm_put(Dst, 12284, -BCBIAS_J*4);
+    dasm_put(Dst, 12765, -BCBIAS_J*4);
     break;
 
   /* ---------------------------------------------------------------------- */
@@ -1608,17 +1853,21 @@ static int build_backend(BuildCtx *ctx)
 {
   int op;
   int cmov = 1;
+  int sse = 0;
 #ifdef LUAJIT_CPU_NOCMOV
   cmov = 0;
 #endif
+#ifdef LUAJIT_CPU_SSE2
+  sse = 1;
+#endif
 
   dasm_growpc(Dst, BC__MAX);
 
-  build_subroutines(ctx, cmov);
+  build_subroutines(ctx, cmov, sse);
 
-  dasm_put(Dst, 12308);
+  dasm_put(Dst, 12789);
   for (op = 0; op < BC__MAX; op++)
-    build_ins(ctx, (BCOp)op, op, cmov);
+    build_ins(ctx, (BCOp)op, op, cmov, sse);
 
   return BC__MAX;
 }