From: Petar Jovanovic <mips32r2@gmail.com>
Date: Wed, 21 Aug 2019 16:08:42 +0000 (+0000)
Subject: mips: Add nanoMIPS support to Valgrind 1/4
X-Git-Tag: VALGRIND_3_16_0~222
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=eddb917a6c67e1361b95ac2e49badf7bf7b3f8be;p=thirdparty%2Fvalgrind.git

mips: Add nanoMIPS support to Valgrind 1/4

Necessary changes to support nanoMIPS on Linux.

Part 1/4 - VEX changes

Patch by Aleksandar Rikalo, Dimitrije Nikolic, Tamara Vlahovic and
Aleksandra Karadzic.

nanoMIPS architecture in brief

Designed for embedded devices, nanoMIPS is a variable lengths instruction
set architecture (ISA) offering high performance in substantially reduced
code size.

The nanoMIPS ISA combines recoded and new 16-, 32-, and 48-bit instructions
to achieve an ideal balance of performance and code density.
It incorporates all MIPS32 instructions and architecture modules including
MIPS DSP and MIPS MT, as well as new instructions for advanced code size
reduction.

nanoMIPS is supported in release 6 of the MIPS architecture. It is first
implemented in the new MIPS I7200 multi-threaded multi-core processor
series. Compiler support is included in the MIPS GNU-based development
tools.

Related KDE issue: #400872.
---

diff --git a/Makefile.vex.am b/Makefile.vex.am
index 10e1890f63..98d8483594 100644
--- a/Makefile.vex.am
+++ b/Makefile.vex.am
@@ -48,6 +48,7 @@ noinst_HEADERS = \
 	priv/guest_s390_defs.h \
 	priv/guest_mips_defs.h \
 	priv/mips_defs.h \
+	priv/guest_nanomips_defs.h \
 	priv/host_generic_regs.h \
 	priv/host_generic_simd64.h \
 	priv/host_generic_simd128.h \
@@ -61,7 +62,9 @@ noinst_HEADERS = \
 	priv/host_s390_defs.h \
 	priv/s390_disasm.h \
 	priv/s390_defs.h \
-	priv/host_mips_defs.h
+	priv/host_mips_defs.h \
+	priv/host_nanomips_defs.h \
+	priv/common_nanomips_defs.h
 
 BUILT_SOURCES = pub/libvex_guest_offsets.h
 CLEANFILES    = pub/libvex_guest_offsets.h
@@ -146,6 +149,8 @@ LIBVEX_SOURCES_COMMON = \
 	priv/guest_mips_helpers.c \
 	priv/guest_mipsdsp_toIR.c \
 	priv/guest_mips_toIR.c \
+	priv/guest_nanomips_helpers.c \
+	priv/guest_nanomips_toIR.c \
 	priv/host_generic_regs.c \
 	priv/host_generic_simd64.c \
 	priv/host_generic_simd128.c \
@@ -167,7 +172,9 @@ LIBVEX_SOURCES_COMMON = \
 	priv/host_s390_isel.c \
 	priv/s390_disasm.c \
 	priv/host_mips_defs.c \
-	priv/host_mips_isel.c
+	priv/host_nanomips_defs.c \
+	priv/host_mips_isel.c \
+	priv/host_nanomips_isel.c
 
 LIBVEXMULTIARCH_SOURCES = priv/multiarch_main_main.c
 
diff --git a/VEX/priv/common_nanomips_defs.h b/VEX/priv/common_nanomips_defs.h
new file mode 100644
index 0000000000..ee79943058
--- /dev/null
+++ b/VEX/priv/common_nanomips_defs.h
@@ -0,0 +1,374 @@
+
+/*---------------------------------------------------------------*/
+/*--- begin                            common_nanomips_defs.h ---*/
+/*---------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, a dynamic binary instrumentation
+   framework.
+
+   Copyright (C) 2017-2018 RT-RK
+      mips-valgrind@rt-rk.com
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+#ifndef __VEX_COMMON_NANOMIPS_DEFS_H
+#define __VEX_COMMON_NANOMIPS_DEFS_H
+
+typedef enum {
+   P_ADDIURI = 0x00,
+   ADDIUPC32 = 0x01,
+   MOVE_BALC = 0x02,
+   P16MV     = 0x04,
+   LW16      = 0x05,
+   BC16      = 0x06,
+   P16SR     = 0x07,
+   P32A      = 0x08,
+   PBAL      = 0x0A,
+   P16SHIFT  = 0x0C,
+   LWSP      = 0x0D,
+   BALC16    = 0x0E,
+   P164X4    = 0x0F,
+   PGPW      = 0x10,
+   PGPBH     = 0x11,
+   PJ        = 0x12,
+   P16C      = 0x14,
+   LWGP16    = 0x15,
+   P16LB     = 0x17,
+   P48I      = 0x18,
+   P16A1     = 0x1C,
+   LW4X4     = 0x1D,
+   P16LH     = 0x1F,
+   PU12      = 0x20,
+   PLSU12    = 0x21,
+   PBR1      = 0x22,
+   P16A2     = 0x24,
+   SW16      = 0x25,
+   BEQZC16   = 0x26,
+   PLSS9     = 0x29,
+   PBR2      = 0x2A,
+   P16ADDU   = 0x2C,
+   SWSP      = 0x2D,
+   BNEZC16   = 0x2E,
+   MOVEP     = 0x2F,
+   PBRI      = 0x32,
+   LI16      = 0x34,
+   SWGP16    = 0x35,
+   P16BR     = 0x36,
+   P_LUI     = 0x38,
+   ANDI16    = 0x3C,
+   SW4X4     = 0x3D,
+   MOVEPREV  = 0x3F,
+} nanoMIPSopcodes;
+
+typedef enum {
+   P48I_LI       = 0x00,
+   P48I_ADDIU    = 0x01,
+   P48I_ADDIU_GP = 0x02,
+   P48I_ADDIUPC  = 0x03,
+   P48I_LWPC     = 0x0B,
+   P48I_SWPC     = 0x0F,
+} nanoP48I;
+
+typedef enum {
+   JALRC32 = 0x00,
+   JALRCHB = 0x01,
+   PBALRSC = 0x08
+} nano_PJ;
+
+typedef enum {
+   PLSS0   = 0x00,
+   PLSS1   = 0x01,
+   PLSE0   = 0x02,
+   PLSWM   = 0x04,
+   PLSUAWM = 0x05,
+   PLSDM   = 0x06,
+   PLSUADM = 0x07,
+} nanoPLSS9;
+
+typedef enum {
+   PU12_ORI       = 0x00,
+   PU12_XORI      = 0x01,
+   PU12_ANDI      = 0x02,
+   PU12_PSR       = 0x03,
+   PU12_SLTI      = 0x04,
+   PU12_SLTIU     = 0x05,
+   PU12_SEQI      = 0x06,
+   PU12_ADDIU_NEG = 0x08,
+   PU12_PSHIFT    = 0x0C,
+   PU12_PROTX     = 0x0D,
+   PU12_PINS      = 0x0E,
+   PU12_PEXT      = 0x0F
+} nanoPU12;
+
+typedef enum {
+   RI_PSYSCALL  = 0x1,
+   RI_BREAK     = 0x2,
+   RI_SDBBP     = 0x3
+} nanoP16RI;
+
+typedef enum {
+   PRI_SIGRIE    = 0x0,
+   PRI_PSYSCALL  = 0x1,
+   PRI_BREAK     = 0x2,
+   PRI_SDBBP     = 0x3
+} nanoPRI;
+
+typedef enum {
+   P32A_POOL32A0 = 0x00,
+   P32A_POOL32A7 = 0x07
+} nano_P32A;
+
+typedef enum {
+   _POOL32A0_PTRAP  = 0x00,
+   _POOL32A0_SEB    = 0x01,
+   _POOL32A0_SLLV   = 0x02,
+   _POOL32A0_MUL32  = 0x03,
+   _POOL32A0_MFC0   = 0x06,
+   _POOL32A0_MFHC0  = 0x07,
+   _POOL32A0_SEH    = 0x09,
+   _POOL32A0_SRLV   = 0x0A,
+   _POOL32A0_MUH    = 0x0B,
+   _POOL32A0_MTC0   = 0x0E,
+   _POOL32A0_MTHC0  = 0x0F,
+   _POOL32A0_SRAV   = 0x12,
+   _POOL32A0_MULU   = 0x13,
+   _POOL32A0_MFGC0  = 0x16,
+   _POOL32A0_MFHGC0 = 0x17,
+   _POOL32A0_ROTRV  = 0x1A,
+   _POOL32A0_MUHU   = 0x1B,
+   _POOL32A0_MTGC0  = 0x1E,
+   _POOL32A0_MTHGC0 = 0x1F,
+   _POOL32A0_ADD    = 0x22,
+   _POOL32A0_DIV    = 0x23,
+   _POOL32A0_DMFC0  = 0x26,
+   _POOL32A0_ADDU32 = 0x2A,
+   _POOL32A0_MOD    = 0x2B,
+   _POOL32A0_DMTC0  = 0x2E,
+   _POOL32A0_SUB    = 0x32,
+   _POOL32A0_DIVU   = 0x33,
+   _POOL32A0_DMFGC0 = 0x36,
+   _POOL32A0_RDHWR  = 0x38,
+   _POOL32A0_SUBU32 = 0x3A,
+   _POOL32A0_MODU   = 0x3B,
+   _POOL32A0_DMTGC0 = 0x3E,
+   _POOL32A0_PCMOVE = 0x42,
+   _POOL32A0_FORK   = 0x45,
+   _POOL32A0_MFTR   = 0x46,
+   _POOL32A0_MFHTR  = 0x47,
+   _POOL32A0_AND32  = 0x4A,
+   _POOL32A0_YIELD  = 0x4D,
+   _POOL32A0_MTTR   = 0x4E,
+   _POOL32A0_MTHTR  = 0x4F,
+   _POOL32A0_OR32   = 0x52,
+   _POOL32A0_PMTVPE = 0x56,
+   _POOL32A0_NOR    = 0x5A,
+   _POOL32A0_XOR32  = 0x62,
+   _POOL32A0_SLT    = 0x6A,
+   _POOL32A0_PSLTU  = 0x72,
+   _POOL32A0_SOV    = 0x7A,
+} nano_POOL32A0;
+
+typedef enum {
+   _POOL32A7_PLSX    = 0x00,
+   _POOL32A7_LSA     = 0x01,
+   _POOL32A7_EXTW    = 0x03,
+   _POOL32A7_P32Axf  = 0x07,
+} nano_POOL32A7;
+
+typedef enum {
+   nano_POOL32Axf4_CLO = 0x25,
+   nano_POOL32Axf4_CLZ = 0x2D,
+} nano_POOL32Axf4;
+
+typedef enum {
+   PLSX_PPLSX  = 0x00,
+   PLSX_PPLSXS = 0x01,
+} nano_PLSX;
+
+typedef enum {
+   LBX   = 0x00,
+   SBX   = 0x01,
+   LBUX  = 0x02,
+   LHX   = 0x04,
+   SHX   = 0x05,
+   LHUX  = 0x06,
+   LWUX  = 0x07,
+   LWX   = 0x08,
+   SWX   = 0x09,
+   LWC1X = 0x0A,
+   SWC1X = 0x0B,
+   LDX   = 0x0C,
+   SDX   = 0x0D,
+   LDC1X = 0x0E,
+   SDC1X = 0x0F
+} nano_PPLSX;
+
+typedef enum {
+   LHXS   = 0x04,
+   SHXS   = 0x05,
+   LHUXS  = 0x06,
+   LWUXS  = 0x07,
+   LWXS32 = 0x08,
+   SWXS   = 0x09,
+   LWC1XS = 0x0A,
+   SWC1XS = 0x0B,
+   LDXS   = 0x0C,
+   SDXS   = 0x0D,
+   LDC1XS = 0x0E,
+   SDC1XS = 0x0F
+} nano_PPLSXS;
+
+typedef enum {
+   PLSU12_LB    = 0x00,
+   PLSU12_SB    = 0x01,
+   PLSU12_LBU   = 0x02,
+   PLSU12_PREF  = 0x03,
+   PLSU12_LH    = 0x04,
+   PLSU12_SH    = 0x05,
+   PLSU12_LHU   = 0x06,
+   PLSU12_LWU   = 0x07,
+   PLSU12_LW    = 0x08,
+   PLSU12_SW    = 0x09,
+   PLSU12_LWC1  = 0x0A,
+   PLSU12_SWC1  = 0x0B,
+   PLSU12_LD    = 0x0C,
+   PLSU12_SD    = 0x0D,
+   PLSU12_LDC1  = 0x0E,
+   PLSU12_SDC1  = 0x0F,
+
+} nano_PLSU12;
+
+typedef enum {
+   PSLL      = 0x00,
+   SRL32     = 0x02,
+   SRA       = 0x04,
+   ROTR      = 0x06,
+   DSLL      = 0x08,
+   DSLL32    = 0x09,
+   DSRL      = 0x0A,
+   DSRL32    = 0x0B,
+   DSRA      = 0x0C,
+   DSRA32    = 0x0D,
+   DROTR     = 0x0E,
+   DROTR32   = 0x0F,
+} nano_PSHIFT;
+
+typedef enum {
+   LBS9     = 0x00,
+   SBS9     = 0x01,
+   LBUS9    = 0x02,
+   PPREFS9  = 0x03,
+   LHS9     = 0x04,
+   SHS9     = 0x05,
+   LHUS9    = 0x06,
+   LWUS9    = 0x07,
+   LWS9     = 0x08,
+   SWS9     = 0x09,
+   LWC1S9   = 0x0A,
+   SWC1S9   = 0x0B,
+   LDS9     = 0x0C,
+   SDS9     = 0x0D,
+   LDC1S9   = 0x0E,
+   SDC1S9   = 0x0F,
+} nano_PLSS0;
+
+typedef enum {
+   LBGP     = 0x00,
+   SBGP     = 0x01,
+   LBUGP    = 0x02,
+   ADDIUGPB = 0x03,
+   PGPLH    = 0x04,
+   PGPSH    = 0x05,
+   PGPCP1   = 0x06,
+   PGPM64   = 0x07
+} nano_PGPBH;
+
+typedef enum {
+   ASET_ACLER = 0x02,
+   UALH       = 0x04,
+   UASH       = 0x05,
+   CACHE      = 0x07,
+   LWC2       = 0x08,
+   SWC2       = 0x09,
+   PLL        = 0x0A,
+   PSC        = 0x0B,
+   LDC2       = 0x0C,
+   SDC2       = 0x0D,
+   PLLD       = 0x0E,
+   PSCD       = 0x0F
+} nano_PLSS1;
+
+typedef enum {
+   LL   = 0x00,
+   LLWP = 0x01
+} nano_LL;
+
+typedef enum {
+   SC   = 0x00,
+   SCWP = 0x01
+} nano_SC;
+
+typedef enum {
+   PBR1_BEQC32 = 0x00,
+   PBR1_PBR3A  = 0x01,
+   PBR1_BGEC   = 0x02,
+   PBR1_BGEUC  = 0x03,
+} nano_PBR1;
+
+typedef enum {
+   PBR2_BNEC32 = 0x00,
+   PBR2_BLTC   = 0x02,
+   PBR2_BLTUC  = 0x03,
+} nano_PBR2;
+
+typedef enum {
+   PBRI_BEQIC  = 0x00,
+   PBRI_BBEQZC = 0x01,
+   PBRI_BGEIC  = 0x02,
+   PBRI_BGEIUC = 0x03,
+   PBRI_BNEIC  = 0x04,
+   PBRI_BBNEZC = 0x05,
+   PBRI_BLTIC  = 0x06,
+   PBRI_BLTIUC = 0x07
+} nano_PBRI;
+
+typedef enum {
+   PGPW_ADDIU = 0x00,
+   PGPW_PGPD  = 0X01,
+   PGPW_LW  = 0X02,
+   PGPW_SW  = 0X03
+} nano_PGPW;
+
+typedef enum {
+   POOL32aXF_4 = 0x04,
+   POOL32aXF_5 = 0x05,
+} nano_POOL32Axf;
+
+typedef enum {
+   POOL16C00_NOT = 0x00,
+   POOL16C00_XOR = 0x04,
+   POOL16C00_AND = 0x08,
+   POOL16C00_OR  = 0x0C,
+} nano_POOL16C_00;
+
+#endif
+
+/*---------------------------------------------------------------*/
+/*--- end                              common_nanomips_defs.h ---*/
+/*---------------------------------------------------------------*/
diff --git a/VEX/priv/guest_nanomips_defs.h b/VEX/priv/guest_nanomips_defs.h
new file mode 100644
index 0000000000..490ef4bc59
--- /dev/null
+++ b/VEX/priv/guest_nanomips_defs.h
@@ -0,0 +1,74 @@
+
+/*---------------------------------------------------------------*/
+/*--- begin                             guest_nanomips_defs.h ---*/
+/*---------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, a dynamic binary instrumentation
+   framework.
+
+   Copyright (C) 2017-2018 RT-RK
+      mips-valgrind@rt-rk.com
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+/* Only to be used within the guest-mips directory. */
+
+#ifndef __VEX_GUEST_NANOMIPS_DEFS_H
+#define __VEX_GUEST_NANOMIPS_DEFS_H
+
+#include "libvex_basictypes.h"
+#include "guest_generic_bb_to_IR.h"  /* DisResult */
+#include "common_nanomips_defs.h"
+
+#if defined (_MIPSEL)
+   #define MIPS_IEND Iend_LE
+#else
+   #define MIPS_IEND Iend_BE
+#endif
+
+/*---------------------------------------------------------*/
+/*---               mips to IR conversion               ---*/
+/*---------------------------------------------------------*/
+
+/* Convert one nanoMIPS insn to IR. See the type DisOneInstrFn in
+   guest_generic_bb_to_IR.h. */
+extern DisResult disInstr_nanoMIPS ( IRSB*        irbb,
+                                     Bool         (*resteerOkFn) (void *, Addr),
+                                     Bool         resteerCisOk,
+                                     void*        callback_opaque,
+                                     const UChar* guest_code,
+                                     Long         delta,
+                                     Addr         guest_IP,
+                                     VexArch      guest_arch,
+                                     const VexArchInfo* archinfo,
+                                     const VexAbiInfo*  abiinfo,
+                                     VexEndness   host_endness,
+                                     Bool         sigill_diag );
+
+
+extern VexGuestLayout nanomipsGuest_layout;
+
+extern HWord nanomips_dirtyhelper_rdhwr ( UInt rd );
+
+#endif
+
+/*---------------------------------------------------------------*/
+/*--- end                               guest_nanomips_defs.h ---*/
+/*---------------------------------------------------------------*/
diff --git a/VEX/priv/guest_nanomips_helpers.c b/VEX/priv/guest_nanomips_helpers.c
new file mode 100644
index 0000000000..f1bceaba93
--- /dev/null
+++ b/VEX/priv/guest_nanomips_helpers.c
@@ -0,0 +1,127 @@
+
+/*---------------------------------------------------------------*/
+/*--- begin                          guest_nanomips_helpers.c ---*/
+/*---------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, a dynamic binary instrumentation
+   framework.
+
+   Copyright (C) 2017-2018 RT-RK
+      mips-valgrind@rt-rk.com
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#include "libvex_basictypes.h"
+#include "libvex_emnote.h"
+#include "libvex_guest_mips32.h"
+#include "libvex_ir.h"
+#include "libvex.h"
+
+#include "main_util.h"
+#include "main_globals.h"
+#include "guest_generic_bb_to_IR.h"
+#include "guest_nanomips_defs.h"
+
+#if defined (__GNUC__)
+#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+#else
+#define GCC_VERSION 0
+#endif
+
+/* This file contains helper functions for mips guest code.  Calls to
+   these functions are generated by the back end.
+*/
+
+#define ALWAYSDEFD32(field)                            \
+    { offsetof(VexGuestMIPS32State, field),            \
+      (sizeof ((VexGuestMIPS32State*)0)->field) }
+
+VexGuestLayout nanomipsGuest_layout = {
+   /* Total size of the guest state, in bytes. */
+   .total_sizeB = sizeof(VexGuestMIPS32State),
+   /* Describe the stack pointer. */
+   .offset_SP = offsetof(VexGuestMIPS32State, guest_r29),
+   .sizeof_SP = 4,
+   /* Describe the frame pointer. */
+   .offset_FP = offsetof(VexGuestMIPS32State, guest_r30),
+   .sizeof_FP = 4,
+   /* Describe the instruction pointer. */
+   .offset_IP = offsetof(VexGuestMIPS32State, guest_PC),
+   .sizeof_IP = 4,
+   /* Describe any sections to be regarded by Memcheck as
+      'always-defined'. */
+   .n_alwaysDefd = 8,
+   /* ? :(  */
+   .alwaysDefd = {
+      /* 0 */ ALWAYSDEFD32(guest_r0),
+      /* 1 */ ALWAYSDEFD32(guest_r1),
+      /* 2 */ ALWAYSDEFD32(guest_EMNOTE),
+      /* 3 */ ALWAYSDEFD32(guest_CMSTART),
+      /* 4 */ ALWAYSDEFD32(guest_CMLEN),
+      /* 5 */ ALWAYSDEFD32(guest_r29),
+      /* 6 */ ALWAYSDEFD32(guest_r31),
+      /* 7 */ ALWAYSDEFD32(guest_ULR)
+   }
+};
+
+
+#define ASM_VOLATILE_RDHWR(opcode)                                 \
+   __asm__ __volatile__(".word 0x204001C0 | "#opcode" << 16  \n\t" \
+                        : "+r" (x) : :                             \
+                       )
+
+HWord nanomips_dirtyhelper_rdhwr ( UInt rd )
+{
+#if defined(__nanomips__)
+   register HWord x __asm__("t4") = 0;
+
+   switch (rd) {
+      case 0:  /* x = CPUNum() */
+         ASM_VOLATILE_RDHWR(0); /* rdhwr t4, $0 */
+         break;
+
+      case 1:  /* x = SYNCI_Step() */
+         ASM_VOLATILE_RDHWR(1); /* rdhwr t4, $1 */
+         break;
+
+      case 2:  /* x = CC() */
+         ASM_VOLATILE_RDHWR(2); /* rdhwr t4, $2 */
+         break;
+
+      case 3:  /* x = CCRes() */
+         ASM_VOLATILE_RDHWR(3); /* rdhwr t4, $3 */
+         break;
+
+
+      default:
+         vassert(0);
+         break;
+   }
+
+   return x;
+#else
+   return 0;
+#endif
+}
+
+
+/*---------------------------------------------------------------*/
+/*--- end                            guest_nanomips_helpers.c ---*/
+/*---------------------------------------------------------------*/
diff --git a/VEX/priv/guest_nanomips_toIR.c b/VEX/priv/guest_nanomips_toIR.c
new file mode 100644
index 0000000000..1a6ed0d7f8
--- /dev/null
+++ b/VEX/priv/guest_nanomips_toIR.c
@@ -0,0 +1,3101 @@
+
+/*--------------------------------------------------------------------*/
+/*--- begin                                  guest_nanomips_toIR.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, a dynamic binary instrumentation
+   framework.
+
+   Copyright (C) 2017-2018 RT-RK
+      mips-valgrind@rt-rk.com
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+/* Translates nanoMIPS code to IR. */
+
+#include "libvex_basictypes.h"
+#include "libvex_ir.h"
+#include "libvex.h"
+#include "libvex_guest_mips32.h"
+
+#include "main_util.h"
+#include "main_globals.h"
+#include "guest_generic_bb_to_IR.h"
+#include "guest_nanomips_defs.h"
+
+#define P16 0x4
+
+#define DIP(format, args...)           \
+   if (vex_traceflags & VEX_TRACE_FE)  \
+      vex_printf(format, ## args)
+
+#define OFFB_PC offsetof(VexGuestMIPS32State, guest_PC)
+
+#define ILLEGAL_INSTRUCTON          \
+   dres->jk_StopHere = Ijk_SigILL;  \
+   dres->whatNext    = Dis_StopHere;
+
+#define LLADDR_INVALID (mkU32(0xFFFFFFFF))
+
+/* MOD: The IRSB* into which we're generating code. */
+static IRSB *irsb;
+
+/* CONST: The guest address for the instruction currently being
+   translated. */
+static Addr32 guest_PC_curr_instr;
+
+/* Do a endian load of a 16-bit word, regardless of the endianness of the
+   underlying host. */
+static inline UShort getUShort(const UChar * p)
+{
+   UShort w = 0;
+#if defined (_MIPSEL)
+   w = (w << 8) | p[1];
+   w = (w << 8) | p[0];
+#elif defined (_MIPSEB)
+   w = (w << 8) | p[0];
+   w = (w << 8) | p[1];
+#endif
+   return w;
+}
+
+/* Do a endian load of a 32-bit code word. */
+static inline UInt getUInt(const UChar * p)
+{
+   return (getUShort(p) << 16) | getUShort(p + 2);
+}
+
+const UChar GPR3_list[] = { 16, 17, 18, 19, 4, 5, 6, 7 };
+const UChar GPR4_list[] = { 8, 9, 10, 11, 4, 5, 6, 7, 16, 17, 18, 19,
+                            20, 21, 22, 23
+                          };
+const UChar GPR4_zero_list[] = { 8, 9, 10, 0, 4, 5, 6, 7, 16, 17, 18,
+                                 19, 20, 21, 22, 23
+                               };
+const UChar GPR3_src_store_list[] = { 0, 17, 18, 19, 4, 5, 6, 7 };
+const UChar GPR2_reg1_list[] = { 4, 5, 6, 7 };
+const UChar GPR2_reg2_list[] = { 5, 6, 7, 8 };
+
+static UInt integerGuestRegOffset(UInt iregNo)
+{
+   /* Maybe we should use formula ??? */
+   switch (iregNo) {
+      case 0:
+         return offsetof(VexGuestMIPS32State, guest_r0);
+
+      case 1:
+         return offsetof(VexGuestMIPS32State, guest_r1);
+
+      case 2:
+         return offsetof(VexGuestMIPS32State, guest_r2);
+
+      case 3:
+         return offsetof(VexGuestMIPS32State, guest_r3);
+
+      case 4:
+         return offsetof(VexGuestMIPS32State, guest_r4);
+
+      case 5:
+         return offsetof(VexGuestMIPS32State, guest_r5);
+
+      case 6:
+         return offsetof(VexGuestMIPS32State, guest_r6);
+
+      case 7:
+         return offsetof(VexGuestMIPS32State, guest_r7);
+
+      case 8:
+         return offsetof(VexGuestMIPS32State, guest_r8);
+
+      case 9:
+         return offsetof(VexGuestMIPS32State, guest_r9);
+
+      case 10:
+         return offsetof(VexGuestMIPS32State, guest_r10);
+
+      case 11:
+         return offsetof(VexGuestMIPS32State, guest_r11);
+
+      case 12:
+         return offsetof(VexGuestMIPS32State, guest_r12);
+
+      case 13:
+         return offsetof(VexGuestMIPS32State, guest_r13);
+
+      case 14:
+         return offsetof(VexGuestMIPS32State, guest_r14);
+
+      case 15:
+         return offsetof(VexGuestMIPS32State, guest_r15);
+
+      case 16:
+         return offsetof(VexGuestMIPS32State, guest_r16);
+
+      case 17:
+         return offsetof(VexGuestMIPS32State, guest_r17);
+
+      case 18:
+         return offsetof(VexGuestMIPS32State, guest_r18);
+
+      case 19:
+         return offsetof(VexGuestMIPS32State, guest_r19);
+
+      case 20:
+         return offsetof(VexGuestMIPS32State, guest_r20);
+
+      case 21:
+         return offsetof(VexGuestMIPS32State, guest_r21);
+
+      case 22:
+         return offsetof(VexGuestMIPS32State, guest_r22);
+
+      case 23:
+         return offsetof(VexGuestMIPS32State, guest_r23);
+
+      case 24:
+         return offsetof(VexGuestMIPS32State, guest_r24);
+
+      case 25:
+         return offsetof(VexGuestMIPS32State, guest_r25);
+
+      case 26:
+         return offsetof(VexGuestMIPS32State, guest_r26);
+
+      case 27:
+         return offsetof(VexGuestMIPS32State, guest_r27);
+
+      case 28:
+         return offsetof(VexGuestMIPS32State, guest_r28);
+
+      case 29:
+         return offsetof(VexGuestMIPS32State, guest_r29);
+
+      case 30:
+         return offsetof(VexGuestMIPS32State, guest_r30);
+
+      case 31:
+         return offsetof(VexGuestMIPS32State, guest_r31);
+   }
+
+   vassert(0);
+   return 0;
+}
+
+/* Add a statement to the list held by "irsb". */
+static void stmt(IRStmt * st)
+{
+   addStmtToIRSB(irsb, st);
+}
+
+static IRExpr *mkU8(UInt i)
+{
+   vassert(i < 256);
+   return IRExpr_Const(IRConst_U8((UChar) i));
+}
+
+/* Create an expression node for a 32-bit integer constant. */
+static IRExpr *mkU32(UInt i)
+{
+   return IRExpr_Const(IRConst_U32(i));
+}
+
+static IRExpr *mkU64(ULong i)
+{
+   return IRExpr_Const(IRConst_U64(i));
+}
+
+static void putPC(IRExpr * e)
+{
+   stmt(IRStmt_Put(OFFB_PC, e));
+}
+
+static void putIReg(UInt archreg, IRExpr * e)
+{
+   vassert(archreg < 32);
+
+   if (archreg != 0)
+      stmt(IRStmt_Put(integerGuestRegOffset(archreg), e));
+}
+
+static IRExpr *getIReg(UInt iregNo)
+{
+   if (0 == iregNo) {
+      return mkU32(0x0);
+   } else {
+      IRType ty = Ity_I32;
+      vassert(iregNo < 32);
+      return IRExpr_Get(integerGuestRegOffset(iregNo), ty);
+   }
+}
+
+static void putLLaddr(IRExpr * e)
+{
+   stmt(IRStmt_Put(offsetof(VexGuestMIPS32State, guest_LLaddr), e));
+}
+
+static IRExpr *getLLaddr(void)
+{
+   return IRExpr_Get(offsetof(VexGuestMIPS32State, guest_LLaddr), Ity_I32);
+}
+
+static void putLLdata(IRExpr * e)
+{
+   stmt(IRStmt_Put(offsetof(VexGuestMIPS32State, guest_LLdata), e));
+}
+
+static IRExpr *getLLdata(void)
+{
+   return IRExpr_Get(offsetof(VexGuestMIPS32State, guest_LLdata), Ity_I32);
+}
+
+static void putLLdata64(IRExpr * e)
+{
+   stmt(IRStmt_Put(offsetof(VexGuestMIPS32State, guest_LLdata64), e));
+}
+
+static IRExpr *getLLdata64(void)
+{
+   return IRExpr_Get(offsetof(VexGuestMIPS32State, guest_LLdata64), Ity_I64);
+}
+
+static IRExpr *unop(IROp op, IRExpr * a)
+{
+   return IRExpr_Unop(op, a);
+}
+
+static IRExpr *binop(IROp op, IRExpr * a1, IRExpr * a2)
+{
+   return IRExpr_Binop(op, a1, a2);
+}
+
+/* Generate a new temporary of the given type. */
+static IRTemp newTemp(IRType ty)
+{
+   vassert(isPlausibleIRType(ty));
+   return newIRTemp(irsb->tyenv, ty);
+}
+
+static void assign(IRTemp dst, IRExpr * e)
+{
+   stmt(IRStmt_WrTmp(dst, e));
+}
+
+static void store(IRExpr * addr, IRExpr * data)
+{
+#if defined (_MIPSEL)
+   stmt(IRStmt_Store(Iend_LE, addr, data));
+#elif defined (_MIPSEB)
+   stmt(IRStmt_Store(Iend_BE, addr, data));
+#endif
+}
+
+static IRExpr *load(IRType ty, IRExpr * addr)
+{
+   IRExpr *load1 = NULL;
+#if defined (_MIPSEL)
+   load1 = IRExpr_Load(Iend_LE, ty, addr);
+#elif defined (_MIPSEB)
+   load1 = IRExpr_Load(Iend_BE, ty, addr);
+#endif
+   return load1;
+}
+
+static IRExpr *mkexpr(IRTemp tmp)
+{
+   return IRExpr_RdTmp(tmp);
+}
+
+static UInt extend_sign(UInt value, UChar from_nbits)
+{
+   UChar shift = 32 - from_nbits;
+   return (UInt)((((Int) value) << shift) >> shift);
+}
+
+static void ir_for_branch(DisResult *dres, IRExpr *guard, UChar length,
+                          Int offset)
+{
+   dres->whatNext = Dis_StopHere;
+   dres->jk_StopHere = Ijk_Boring;
+   stmt(IRStmt_Exit(guard, Ijk_Boring,
+                    IRConst_U32(guest_PC_curr_instr + length + offset),
+                    OFFB_PC));
+   putPC(mkU32(guest_PC_curr_instr + length));
+}
+
+static void nano_plsu12(DisResult *dres, UInt cins)
+{
+   UChar rs = (cins >> 16) & 0x1F;
+   UChar rt = (cins >> 21) & 0x1F;
+   UShort u = cins & 0x0FFF;
+
+   switch ((cins >> 12) & 0x0F) {
+      case PLSU12_LB: {  /* lb[u12] */
+         DIP("lb[u12] r%u %u(r%u)", rt, u, rs);
+         putIReg(rt, unop(Iop_8Sto32,
+                          load(Ity_I8, binop(Iop_Add32, getIReg(rs), mkU32(u)))));
+         break;
+      }
+
+      case PLSU12_LH: {  /* lh[u12] */
+         DIP("lh[u12] r%u %u(r%u)", rt, u, rs);
+         putIReg(rt, unop(Iop_16Sto32,
+                          load(Ity_I16, binop(Iop_Add32, getIReg(rs), mkU32(u)))));
+         break;
+      }
+
+      case PLSU12_LW: {  /* lw[u12] */
+         DIP("lw[u12] r%u %u(r%u)", rt, u, rs);
+         putIReg(rt, load(Ity_I32, binop(Iop_Add32, getIReg(rs), mkU32(u))));
+         break;
+      }
+
+      case PLSU12_LD: {  /* ld[u12] */
+         DIP("ld[u12] r%u %u(r%u)", rt, u, rs);
+         vassert(0);
+         break;
+      }
+
+      case PLSU12_SB: {   /* sb[u12] */
+         DIP("sb[12] r%u %u(r%u)", rt, u, rs);
+         store(binop(Iop_Add32, getIReg(rs), mkU32(u)), unop(Iop_32to8, getIReg(rt)));
+         break;
+      }
+
+      case PLSU12_SH: {  /* sh[u12] */
+         DIP("sh[u12] r%u %u(r%u)", rt, u, rs);
+         store(binop(Iop_Add32, getIReg(rs), mkU32(u)), unop(Iop_32to16, getIReg(rt)));
+         break;
+      }
+
+      case PLSU12_SW: {  /* sw[u12] */
+         DIP("sw[u12] r%u, %u(r%u)", rt, u, rs);
+         store(binop(Iop_Add32, getIReg(rs), mkU32(u)), getIReg(rt));
+         break;
+      }
+
+      case PLSU12_SD: {  /* sd[u12] */
+         DIP("sd[u12] r%u, %u(r%u)", rt, u, rs);
+         vassert(0);
+      }
+
+      case PLSU12_LBU: {  /* lbu[u12] */
+         DIP("lbu r%u, %u(r%u)", rt, u, rs);
+         putIReg(rt, unop(Iop_8Uto32,
+                          load(Ity_I8, binop(Iop_Add32, getIReg(rs), mkU32(u)))));
+         break;
+      }
+
+      case PLSU12_LHU: {  /* lhu[u12] */
+         DIP("lhu[u12] r%u %u(r%u)", rt, u, rs);
+         putIReg(rt, unop(Iop_16Uto32,
+                          load(Ity_I16, binop(Iop_Add32, getIReg(rs), mkU32(u)))));
+         break;
+      }
+
+      case PLSU12_LWC1: {  /* lwc1[u12] */
+         DIP("lwc1[u12] r%u %u(r%u)", rt, u, rs);
+         vassert(0);
+         break;
+      }
+
+      case PLSU12_LDC1: {  /* ldc1[u12] */
+         DIP("ldc1[u12] r%u %u(r%u)", rt, u, rs);
+         vassert(0);
+         break;
+      }
+
+      case PLSU12_PREF: {  /* pref[u12] */
+         DIP("pref[u12] r%u %u(r%u)", rt, u, rs);
+         break;
+      }
+
+      case PLSU12_LWU: {  /* lwu[u12] */
+         DIP("lwu[u12] r%u %u(r%u)", rt, u, rs);
+         vassert(0);
+         break;
+      }
+
+      case PLSU12_SWC1: {  /* swc1[u12] */
+         DIP("swc1[u12] r%u %u(r%u)", rt, u, rs);
+         vassert(0);
+         break;
+      }
+
+      case PLSU12_SDC1: {  /* sdc1[u12] */
+         DIP("sdc1[u12] r%u %u(r%u)", rt, u, rs);
+         vassert(0);
+         break;
+      }
+
+      default:
+         vassert(0);
+   }
+}
+
+static void nano_pl32a0(DisResult *dres, UInt cins)
+{
+   UChar rd = (cins >> 11) & 0x1F;
+   UChar rs = (cins >> 16) & 0x1F;
+   UChar rt = (cins >> 21) & 0x1F;
+   IRTemp t1 = newTemp(Ity_I32);
+   IRTemp t2 = newTemp(Ity_I64);
+
+   switch ((cins >> 3) & 0x07F) {
+      case _POOL32A0_PTRAP: {
+         if ((cins >> 10) & 0x01) {  /* tne */
+            DIP("tne r%u, r%u", rs, rt);
+            stmt(IRStmt_Exit(binop(Iop_CmpNE32, getIReg(rs),
+                                   getIReg(rt)), Ijk_SigTRAP,
+                             IRConst_U32(guest_PC_curr_instr + 4),
+                             OFFB_PC));
+         } else {  /* teq */
+            DIP("teq r%u, r%u", rs, rt);
+            stmt(IRStmt_Exit(binop(Iop_CmpEQ32, getIReg(rs),
+                                   getIReg(rt)), Ijk_SigTRAP,
+                             IRConst_U32(guest_PC_curr_instr + 4),
+                             OFFB_PC));
+         }
+
+         break;
+      }
+
+      case _POOL32A0_SEB: {  /* seb */
+         DIP("seb r%u, r%u", rs, rt);
+         putIReg(rt, unop(Iop_8Sto32, unop(Iop_32to8, getIReg(rs))));
+         break;
+      }
+
+      case _POOL32A0_SEH: {  /* seh */
+         DIP("seh r%u, r%u", rs, rt);
+         putIReg(rt, unop(Iop_16Sto32, unop(Iop_32to16, getIReg(rs))));
+         break;
+      }
+
+      case _POOL32A0_SLLV: {  /* sllv */
+         DIP("sllv r%u, r%u, r%u", rd, rs, rt);
+         assign(t1, binop(Iop_And32, getIReg(rt), mkU32(0x1f)));
+         putIReg(rd, binop(Iop_Shl32, getIReg(rs), unop(Iop_32to8,
+                           mkexpr(t1))));
+         break;
+      }
+
+      case _POOL32A0_MUL32: {  /* mul */
+         DIP("mul[32] r%u, r%u, r%u", rd, rs, rt);
+         putIReg(rd, unop(Iop_64to32, binop(Iop_MullS32, getIReg(rt),
+                                            getIReg(rs))));
+         break;
+      }
+
+      case _POOL32A0_MUH: {  /* muh */
+         DIP("muh r%u, r%u, r%u", rd, rs, rt);
+         putIReg(rd, unop(Iop_64HIto32, binop(Iop_MullS32, getIReg(rt),
+                                              getIReg(rs))));
+         break;
+      }
+
+      case _POOL32A0_MULU: {  /* mulu */
+         DIP("mulu r%u, r%u, r%u", rd, rs, rt);
+         putIReg(rd, unop(Iop_64to32, binop(Iop_MullU32, getIReg(rt),
+                                            getIReg(rs))));
+         break;
+      }
+
+      case _POOL32A0_MUHU: {  /* muhu */
+         DIP("muhu r%u, r%u, r%u", rd, rs, rt);
+         putIReg(rd, unop(Iop_64HIto32, binop(Iop_MullU32, getIReg(rt),
+                                              getIReg(rs))));
+         break;
+      }
+
+      case _POOL32A0_DIV: {  /* div */
+         DIP("div r%u, r%u, r%u", rd, rs, rt);
+         putIReg(rd, binop(Iop_DivS32, getIReg(rs), getIReg(rt)));
+         break;
+      }
+
+      case _POOL32A0_MOD: {  /* mod */
+         DIP("mod r%u, r%u, r%u", rd, rs, rt);
+         putIReg(rd, unop(Iop_64HIto32, binop(Iop_DivModS32to32, getIReg(rs),
+                                              getIReg(rt))));
+         break;
+      }
+
+      case _POOL32A0_DIVU: {  /* divu */
+         DIP("divu r%u, r%u, r%u", rd, rs, rt);
+         putIReg(rd, binop(Iop_DivU32, getIReg(rs), getIReg(rt)));
+         break;
+      }
+
+      case _POOL32A0_MODU: {  /* modu */
+         DIP("modu r%u, r%u, r%u", rd, rs, rt);
+         putIReg(rd, unop(Iop_64HIto32, binop(Iop_DivModU32to32, getIReg(rs),
+                                              getIReg(rt))));
+         break;
+      }
+
+      case _POOL32A0_SRLV: {  /* srlv */
+         DIP("srlv r%u, r%u, r%u", rd, rs, rt);
+         assign(t1, binop(Iop_And32, getIReg(rt), mkU32(0x1f)));
+         putIReg(rd, binop(Iop_Shr32,
+                           getIReg(rs), unop(Iop_32to8, mkexpr(t1))));
+         break;
+      }
+
+      case _POOL32A0_SRAV: {  /* srav */
+         DIP("srav r%u, r%u, r%u", rd, rs, rt);
+         assign(t1, binop(Iop_And32, getIReg(rt), mkU32(0x1f)));
+         putIReg(rd, binop(Iop_Sar32,
+                           getIReg(rs), unop(Iop_32to8, mkexpr(t1))));
+         break;
+      }
+
+      case _POOL32A0_ROTRV: {  /* rotrv */
+         DIP("rotv r%u, r%u, r%u", rd, rs, rt);
+         assign(t1, binop(Iop_And32, getIReg(rt), mkU32(0x1f)));
+         assign(t2, binop(Iop_32HLto64, getIReg(rs), getIReg(rs)));
+         putIReg(rd, unop(Iop_64to32,
+                          binop(Iop_Shr64,
+                                mkexpr(t2), unop(Iop_32to8, mkexpr(t1)))));
+         break;
+      }
+
+      case _POOL32A0_AND32: {  /* and[32] */
+         DIP("and[32] r%u, r%u, r%u", rd, rs, rt);
+         putIReg(rd, binop(Iop_And32, getIReg(rs), getIReg(rt)));
+         break;
+      }
+
+      case _POOL32A0_ADD: {  /* add */
+         DIP("add r%u, r%u, r%u", rd, rs, rt);
+         // if overflows(sum, nbits=32): raise exception('OV')
+         putIReg(rd, binop(Iop_Add32, getIReg(rs), getIReg(rt)));
+         break;
+      }
+
+      case _POOL32A0_ADDU32: {  /* addu[32] */
+         DIP("addu[32] r%u, r%u, r%u", rd, rs, rt);
+         putIReg(rd, binop(Iop_Add32, getIReg(rs), getIReg(rt)));
+         break;
+      }
+
+      case _POOL32A0_SUB: {  /* sub */
+         DIP("sub r%u, r%u, r%u", rd, rs, rt);
+         // if overflows(result, nbits=32): raise exception('OV')
+         putIReg(rd, binop(Iop_Sub32, getIReg(rs), getIReg(rt)));
+         break;
+      }
+
+      case _POOL32A0_SUBU32: {  /* subu[32] */
+         DIP("subu[32] r%u, r%u, r%u", rd, rs, rt);
+         putIReg(rd, binop(Iop_Sub32, getIReg(rs), getIReg(rt)));
+         break;
+      }
+
+      case _POOL32A0_OR32: {  /* or[32] */
+         DIP("or[32] r%u, r%u, r%u", rd, rs, rt);
+         putIReg(rd, binop(Iop_Or32, getIReg(rs), getIReg(rt)));
+         break;
+      }
+
+      case _POOL32A0_NOR: {  /* nor */
+         DIP("nor r%u, r%u, r%u", rd, rs, rt);
+         putIReg(rd, unop(Iop_Not32, binop(Iop_Or32, getIReg(rs),
+                                           getIReg(rt))));
+         break;
+      }
+
+      case _POOL32A0_XOR32: {  /* xor[32] */
+         DIP("xor[32] r%u, r%u, r%u", rd, rs, rt);
+         putIReg(rd, binop(Iop_Xor32, getIReg(rs), getIReg(rt)));
+         break;
+      }
+
+      case _POOL32A0_SLT: {  /* slt */
+         DIP("slt r%u, r%u, r%u", rd, rs, rt);
+         putIReg(rd, unop(Iop_1Uto32, binop(Iop_CmpLT32S, getIReg(rs),
+                                            getIReg(rt))));
+         break;
+      }
+
+      case _POOL32A0_PSLTU: {  /* p.sltu */
+         if (rd == 0) {
+            vassert(0);
+         } else {  /* sltu */
+            DIP("sltu r%u, r%u, r%u", rd, rs, rt);
+            putIReg(rd, unop(Iop_1Uto32, binop(Iop_CmpLT32U, getIReg(rs),
+                                               getIReg(rt))));
+         }
+
+         break;
+      }
+
+      case _POOL32A0_SOV: {  /* sov */
+         IRTemp t33 = newTemp(Ity_I32);
+         IRTemp t0 = newTemp(Ity_I32);
+         DIP("sov r%u, r%u, r%u", rd, rs, rt);
+         assign(t1, binop(Iop_Add32, getIReg(rs), getIReg(rt)));
+         assign(t33, binop(Iop_Add32,
+                           binop(Iop_Sar32, getIReg(rs), mkU8(1)),
+                           binop(Iop_Sar32, getIReg(rt), mkU8(1))));
+         assign(t0, binop(Iop_And32,
+                          binop(Iop_And32,
+                                getIReg(rs), getIReg(rt)), mkU32(1)));
+         putIReg(rd, unop(Iop_1Uto32,
+                          binop(Iop_CmpNE32,
+                                binop(Iop_Sar32, mkexpr(t1), mkU8(1)),
+                                binop(Iop_Add32, mkexpr(t33), mkexpr(t0)))));
+         // GPR[rd] = 1 if overflows(sum, nbits=32) else 0
+         break;
+      }
+
+      case _POOL32A0_PCMOVE: {  /* p.cmove */
+         if (cins & 0x400) {  /* movn */
+            DIP("movn r%u, r%u, r%u", rd, rs, rt);
+            putIReg(rd, IRExpr_ITE(binop(Iop_CmpNE32, getIReg(rt), mkU32(0x00)),
+                                   getIReg(rs), getIReg(rd)));
+         } else {  /* movz */
+            DIP("movz r%u, r%u, r%u", rd, rs, rt);
+            putIReg(rd, IRExpr_ITE(binop(Iop_CmpEQ32, getIReg(rt), mkU32(0x00)),
+                                   getIReg(rs), getIReg(rd)));
+         }
+
+         break;
+      }
+
+      case _POOL32A0_RDHWR: /* RDHWR */
+         DIP("rdhwr r%u, r%u", rt, rs);
+
+         if (rs == 29) {
+            putIReg(rt, IRExpr_Get(offsetof(VexGuestMIPS32State, guest_ULR),
+                                   Ity_I32));
+            break;
+         } else if (rs <= 3) {
+            IRExpr** arg = mkIRExprVec_1(mkU32(rs));
+            IRTemp   val  = newTemp(Ity_I32);
+            IRDirty *d = unsafeIRDirty_1_N(val,
+                                           0,
+                                           "nanomips_dirtyhelper_rdhwr",
+                                           &nanomips_dirtyhelper_rdhwr,
+                                           arg);
+            stmt(IRStmt_Dirty(d));
+            putIReg(rt, mkexpr(val));
+            break;
+         } else {
+            vex_printf("Unsupported RDHWR variant");
+            vassert(0);
+         }
+
+      default:
+         vex_printf("Unrecognized _POOL32A0 instruction %08X",
+                    (cins >> 3) & 0x07F);
+         vassert(0);
+   }
+}
+
+static void nano_pplsx(DisResult *dres, UInt cins)
+{
+   UChar rd = (cins >> 11) & 0x1F;
+   UChar rs = (cins >> 16) & 0x1F;
+   UChar rt = (cins >> 21) & 0x1F;
+
+   switch ((cins >> 7) & 0x0F) {
+      case LBX: {  /* lbx */
+         DIP("lbx r%u, %u(r%u)", rd, rs, rt);
+         putIReg(rd, unop(Iop_8Sto32,
+                          load(Ity_I8,
+                          binop(Iop_Add32, getIReg(rs), getIReg(rt)))));
+         break;
+      }
+
+      case SBX: {  /* sbx */
+         DIP("sbx r%u %u(r%u)", rd, rs, rt);
+         store(binop(Iop_Add32, getIReg(rs), getIReg(rt)),
+                     unop(Iop_32to8, getIReg(rd)));
+         break;
+      }
+
+      case LBUX: {  /* lbux */
+         DIP("lbux r%u, %u(r%u)", rd, rs, rt);
+         putIReg(rd, unop(Iop_8Uto32,
+                          load(Ity_I8,
+                               binop(Iop_Add32, getIReg(rs), getIReg(rt)))));
+         break;
+      }
+
+      case LHX: {
+         DIP("lhx r%u, %u(r%u)", rd, rs, rt);
+         putIReg(rd, unop(Iop_16Sto32,
+                          load(Ity_I16,
+                               binop(Iop_Add32, getIReg(rs), getIReg(rt)))));
+         break;
+      }
+
+      case SHX: {
+         DIP("shx r%u %u(r%u)", rd, rs, rt);
+         store(binop(Iop_Add32, getIReg(rs), getIReg(rt)), unop(Iop_32to16,
+               getIReg(rd)));
+         break;
+      }
+
+      case LHUX: {
+         DIP("lbux r%u, %u(r%u)", rd, rs, rt);
+         putIReg(rd, unop(Iop_16Uto32,
+                          load(Ity_I16,
+                               binop(Iop_Add32, getIReg(rs), getIReg(rt)))));
+         break;
+      }
+
+      case LWX: {
+         DIP("lwx r%u, %u(r%u)", rd, rs, rt);
+         putIReg(rd, load(Ity_I32, binop(Iop_Add32, getIReg(rs), getIReg(rt))));
+         break;
+      }
+
+      case SWX: {
+         DIP("swx r%u %u(r%u)", rd, rs, rt);
+         store(binop(Iop_Add32, getIReg(rs), getIReg(rt)), getIReg(rd));
+         break;
+      }
+
+      default:
+         vassert(0);
+         break;
+   }
+}
+
+static void nano_pplsxs(DisResult *dres, UInt cins)
+{
+   UChar rd = (cins >> 11) & 0x1F;
+   UChar rs = (cins >> 16) & 0x1F;
+   UChar rt = (cins >> 21) & 0x1F;
+
+   switch ((cins >> 7) & 0x0F) {
+      case LHXS: {
+         DIP("lhxs r%u, %u(r%u)", rd, rs, rt);
+         putIReg(rd, unop(Iop_16Sto32,
+                          load(Ity_I16,
+                               binop(Iop_Add32,
+                                     binop(Iop_Shl32, getIReg(rs), mkU8(0x01)),
+                                     getIReg(rt)))));
+         break;
+      }
+
+      case SHXS: {
+         DIP("shxs r%u %u(r%u)", rd, rs, rt);
+         store(binop(Iop_Add32,
+                     binop(Iop_Shl32, getIReg(rs), mkU8(0x01)),
+                     getIReg(rt)),
+               unop(Iop_32to16, getIReg(rd)));
+         break;
+      }
+
+      case LHUXS: {
+         DIP("lbuxs r%u, %u(r%u)", rd, rs, rt);
+         putIReg(rd, unop(Iop_16Uto32,
+                          load(Ity_I16,
+                               binop(Iop_Add32,
+                                     binop(Iop_Shl32, getIReg(rs), mkU8(0x01)),
+                                     getIReg(rt)))));
+         break;
+      }
+
+      case LWXS32: {
+         DIP("lwxs[32] r%u, r%u(r%u)", rd, rs, rt);
+         putIReg(rd, load(Ity_I32,
+                          binop(Iop_Add32,
+                                binop(Iop_Shl32, getIReg(rs), mkU8(0x02)),
+                                getIReg(rt))));
+         break;
+      }
+
+      case SWXS: {
+         DIP("swxs r%u %u(r%u)", rd, rs, rt);
+         store(binop(Iop_Add32,
+                     binop(Iop_Shl32, getIReg(rs), mkU8(0x02)),
+                     getIReg(rt)),
+               getIReg(rd));
+         break;
+      }
+
+      default:
+         vassert(0);
+         break;
+   }
+}
+
+static void nano_plsx(DisResult *dres, UInt cins)
+{
+   if ((cins >> 6) & 0x01) {
+      nano_pplsxs(dres, cins);
+   } else {
+      nano_pplsx(dres, cins);
+   }
+}
+
+static void nano_pool32Axf_4(DisResult *dres, UInt cins)
+{
+   UChar rs = (cins >> 16) & 0x1F;
+   UChar rt = (cins >> 21) & 0x1F;
+   IRTemp t1;
+
+   switch ((cins >> 9) & 0x7F) {
+      case nano_POOL32Axf4_CLO: {  /* clo */
+         DIP("clo r%u, r%u", rt, rs);
+         t1 = newTemp(Ity_I1);
+         assign(t1, binop(Iop_CmpEQ32, getIReg(rs), mkU32(0xffffffff)));
+         putIReg(rt, IRExpr_ITE(mkexpr(t1),
+                                mkU32(0x00000020),
+                                unop(Iop_Clz32,
+                                     unop(Iop_Not32, getIReg(rs)))));
+         break;
+      }
+
+      case nano_POOL32Axf4_CLZ: {  /* clz */
+         DIP("clz r%u, r%u", rt, rs);
+         putIReg(rt, unop(Iop_Clz32, getIReg(rs)));
+         break;
+      }
+   }
+}
+
+static void nano_p32Axf(DisResult *dres, UInt cins)
+{
+   switch ((cins >> 6) & 0x7) {
+      case POOL32aXF_4:
+         nano_pool32Axf_4(dres, cins);
+         break;
+
+      case POOL32aXF_5:
+         vassert(0);
+         break;
+
+      default:
+         vex_printf("Unrecognized pool32Axf instruction %08X\n", cins);
+         vassert(0);
+         break;
+   }
+}
+
+static void nano_pool32a7(DisResult *dres, UInt cins)
+{
+   UChar rd = (cins >> 11) & 0x1F;
+   UChar rs = (cins >> 16) & 0x1F;
+   UChar rt = (cins >> 21) & 0x1F;
+   UChar u2  = (cins >> 9) & 0x03;
+   UChar shift = (cins >> 6) & 0x1F;
+
+   switch ((cins >> 3) & 7) {
+      case _POOL32A7_PLSX:
+         nano_plsx(dres, cins);
+         break;
+
+      case _POOL32A7_LSA: {  /* lsa */
+         DIP("lsa r%u r%u, r%u", rd, rs, rt);
+         putIReg(rd, binop(Iop_Add32, binop(Iop_Shl32, getIReg(rs), mkU8(u2)),
+                           getIReg(rt)));
+         break;
+      }
+
+      case _POOL32A7_EXTW: {  /*extw*/
+         DIP("extw r%u r%u, r%u, %u", rd, rs, rt, shift);
+         IRTemp t1 = newTemp(Ity_I64);
+         assign(t1, binop(Iop_32HLto64, getIReg(rt), getIReg(rs)));
+         putIReg(rd, unop(Iop_64to32, binop(Iop_Shr64, mkexpr(t1),
+                                            mkU8(shift))));
+         break;
+      }
+
+      case _POOL32A7_P32Axf: {
+         nano_p32Axf(dres, cins);
+         break;
+      }
+
+      default:
+         vex_printf("Unrecognized _POOL32A7 instruction %08X", cins);
+         vassert(0);
+   }
+}
+
+static void nano_p32a(DisResult *dres, UInt cins)
+{
+   switch (cins & 0x7) {
+      case P32A_POOL32A0:
+         nano_pl32a0(dres, cins);
+         break;
+
+      case P32A_POOL32A7:
+         nano_pool32a7(dres, cins);
+         break;
+
+      default:
+         vex_printf("Unrecognized P32A instruction %08X", cins);
+         vassert(0);
+   }
+}
+
+static void nano_pbal(DisResult *dres, UInt cins)
+{
+   Int s = extend_sign((cins & 0x1FFFFFE) | ((cins & 1) << 25), 26);
+
+   if (cins & 0x2000000) { /* BALC[32] */
+      DIP("balc %0X", guest_PC_curr_instr + 4 + s);
+      putIReg(31, mkU32(guest_PC_curr_instr + 4));
+      dres->jk_StopHere = Ijk_Call;
+   } else { /* BC[32] */
+      DIP("bc %0X", guest_PC_curr_instr + 4 + s);
+      dres->jk_StopHere = Ijk_Boring;
+   }
+
+   putPC(mkU32(guest_PC_curr_instr + 4 + s));
+   dres->whatNext = Dis_StopHere;
+}
+
+static void nano_ppsr(DisResult *dres, UInt cins)
+{
+   UInt u = cins & 0xFF8;
+   UChar count = (cins >> 16) & 0x0F;
+   UChar rt = (cins >> 21) & 0x1F;
+   UChar counter = 0;
+   Bool jr = True;
+
+   switch (cins & 0x03) {
+      case 0x00: {  /* save[32] */
+         DIP("save %u, r%u-r%u", u, (rt & 0x1fu) | (rt & 0x10u),
+             ((rt + count - 1) & 0x1fu) | (rt & 0x10u));
+
+         while (counter != count) {
+            Bool use_gp = (cins & 0x04) && (counter + 1 == count);
+            UChar this_rt = use_gp ? 28 : (UChar)((rt + counter) & 0x1f)
+                                          | (rt & 0x10);
+            Int offset = -((counter + 1) << 2);
+            store(binop(Iop_Add32, getIReg(29), mkU32(offset)),
+                  getIReg(this_rt));
+            counter++;
+         }
+
+         putIReg(29, binop(Iop_Sub32, getIReg(29), mkU32(u)));
+         break;
+      }
+
+      case 0x02: /* restore[32] */
+         jr = False; //falls through common restore(.jrc) implementation
+
+      case 0x03: {  /* restore.jrc[32] */
+         DIP("restore%s %u, r%u-r%u", jr ? ".jrc" : "", u,
+             ((rt + count - 1) & 0x1fu) | (rt & 0x10u),
+             (rt & 0x1fu) | (rt & 0x10u));
+
+         while (counter != count) {
+            Bool use_gp = (cins & 0x04) && (counter + 1 == count);
+            UChar this_rt = use_gp ? 28 : ((rt + counter) & 0x1F) | (rt & 0x10);
+            Int offset = u - ((counter + 1) << 2);
+            putIReg(this_rt,load(Ity_I32, binop(Iop_Add32,
+                                                getIReg(29), mkU32(offset))));
+            // if this_rt == 29: raise UNPREDICTABLE()
+            counter++;
+         }
+
+         putIReg(29, binop(Iop_Add32, getIReg(29), mkU32(u)));
+
+         if (jr) {
+            putPC(getIReg(31));
+            dres->whatNext = Dis_StopHere;
+            dres->jk_StopHere = Ijk_Ret;
+         }
+
+         break;
+      }
+
+      default:
+         vassert(0);
+   }
+}
+
+static void nano_psrf(UInt cins)
+{
+   switch (cins & 0x03) {
+      case 0x00: {  /* savef */
+         vex_printf("Instruction savef is missing documentation.\n");
+         vassert(0);
+         break;
+      }
+
+      case 0x02: {  /* restoref */
+         vex_printf("Instruction restoref is missing documentation.\n");
+         vassert(0);
+         break;
+      }
+
+      default:
+         vassert(0);
+   }
+}
+
+static void nano_psr(DisResult *dres, UInt cins)
+{
+   switch ((cins >> 20) & 0x1) {
+      case 0x00:  /* pp.sr */
+         nano_ppsr(dres, cins);
+         break;
+
+      case 0x01:  /* p.sr.f */
+         nano_psrf(cins);
+         break;
+
+      default:
+         vassert(0);
+         break;
+   }
+}
+
+static void nano_pri(DisResult *dres, UInt cins)
+{
+   switch ((cins >> 19) & 3) {
+      case PRI_SIGRIE:
+         ILLEGAL_INSTRUCTON
+         break;
+
+      case PRI_PSYSCALL:
+         if (cins & 0x40000) { /* HYPCALL */
+            vex_printf("Instruction HYPCALL is missing documentation.\n");
+            vassert(0);
+         } else { /* SYSCALL[32] */
+            DIP("syscall %u", cins & 0x3FFFF);
+            dres->jk_StopHere = Ijk_Sys_syscall;
+            dres->whatNext    = Dis_StopHere;
+         }
+
+         break;
+
+      case PRI_BREAK: /* BREAK[32] */
+         DIP("break %u", cins & 0x7FFFF);
+         dres->jk_StopHere = Ijk_SigTRAP;
+         dres->whatNext    = Dis_StopHere;
+         break;
+
+      case PRI_SDBBP:
+         vex_printf("Instruction SDBBP is not supported.\n");
+         vassert(0);
+         break;
+
+      default:
+         vassert(0);
+   }
+}
+
+static void nano_psll(UInt cins)
+{
+   UChar rt = (cins >> 21) & 0x1F;
+   UChar rs = (cins >> 16) & 0x1F;
+   UChar shift = cins & 0x1F;
+
+   if (rt == 0 && shift == 0) {  /* nop[32] */
+      DIP("nop[32]");
+      return;
+   }
+
+   if (rt == 0 && shift == 3) {  /* ehb */
+      DIP("ehb");
+      vassert(0);
+      return;
+   }
+
+   if (rt == 0 && shift == 5) {  /* pause */
+      DIP("pause");
+      vassert(0);
+      // pause_until_llbit_clears();
+      return;
+   }
+
+   if (rt == 0 && shift == 6) {  /* sync */
+      DIP("sync 0x%x", rs);
+      /* Just ignore it. */
+      return;
+   }
+
+   DIP("sll r%u, r%u, %u", rt, rs, shift);
+   putIReg(rt, binop(Iop_Shl32, getIReg(rs), mkU8(shift)));
+   return;
+}
+
+static void nano_pshift(UInt cins)
+{
+   UChar rt = (cins >> 21) & 0x1F;
+   UChar rs = (cins >> 16) & 0x1F;
+   UChar shift = cins & 0x1F;
+
+   switch ((cins >> 5) & 0xF) {
+      case PSLL: {  /* p.sll */
+         nano_psll(cins);
+         break;
+      }
+
+      case SRL32:  /* srl[32] */
+         DIP("srl[32] r%u, r%u, %u", rt, rs, shift);
+         putIReg(rt, binop(Iop_Shr32, getIReg(rs), mkU8(shift)));
+         break;
+
+      case SRA:  /* sra */
+         DIP("sra r%u, r%u, %u", rt, rs, shift);
+         putIReg(rt, binop(Iop_Sar32, getIReg(rs), mkU8(shift)));
+         break;
+
+      case ROTR:  /* rotr */
+         DIP("rotr r%u, r%u, %u", rt, rs, shift);
+         IRTemp t1 = newTemp(Ity_I64);
+         assign(t1, binop(Iop_32HLto64, getIReg(rs), getIReg(rs)));
+         putIReg(rt, unop(Iop_64to32, binop(Iop_Shr64, mkexpr(t1),
+                                            mkU8(shift))));
+         break;
+
+      case DSLL: {  /* dsll */
+         DIP("dsll r%u, r%u, %u", rt, rs, shift);
+         vassert(0);
+         break;
+      }
+
+      case DSLL32: {  /* dsll32 */
+         DIP("dsll32 r%u, r%u, %u", rt, rs, shift);
+         vassert(0);
+         break;
+      }
+
+      case DSRL: {  /* dsrl */
+         DIP("dsrl r%u, r%u, %u", rt, rs, shift);
+         vassert(0);
+         break;
+      }
+
+      case DSRL32: {  /* dsrl32 */
+         DIP("dsrl32 r%u, r%u, %u", rt, rs, shift);
+         vassert(0);
+         break;
+      }
+
+      case DSRA: {  /* dsra */
+         DIP("dsra r%u, r%u, %u", rt, rs, shift);
+         vassert(0);
+         break;
+      }
+
+      case DSRA32: {  /* dsra32 */
+         DIP("dsra32 r%u, r%u, %u", rt, rs, shift);
+         vassert(0);
+         break;
+      }
+
+      case DROTR: {  /* drotr */
+         DIP("drotr r%u, r%u, %u", rt, rs, shift);
+         vassert(0);
+         break;
+      }
+
+      case DROTR32: {  /* drotr32 */
+         DIP("drotr32 r%u, r%u, %u", rt, rs, shift);
+         vassert(0);
+         break;
+      }
+
+      default:
+         vassert(0);
+   }
+}
+
+static void nano_protx(UInt cins)
+{
+   UChar rt = (cins >> 21) & 0x1F;
+   UChar rs = (cins >> 16) & 0x1F;
+   UChar shift = cins & 0x1F;
+   UChar shiftx = ((cins >> 7) & 0xF) << 1;
+   UChar stripe = (cins & 0x40) ? 1 : 0;
+
+   switch ((cins >> 5) & 0x41) {
+      case 0x00: {  /* rotx */
+         int i;
+         IRTemp t0  = newTemp(Ity_I64);
+         IRTemp t1  = newTemp(Ity_I64);
+         IRTemp t2  = newTemp(Ity_I64);
+         IRTemp t3  = newTemp(Ity_I64);
+         IRTemp t4  = newTemp(Ity_I64);
+         IRTemp t5  = newTemp(Ity_I64);
+         IRTemp tmp = newTemp(Ity_I64);
+         IRTemp s   = newTemp(Ity_I32);
+         DIP("rotx r%u, r%u, %u, %u, %u", rt, rs, shift, shiftx, stripe);
+         assign(t0, binop(Iop_Or64, getIReg(rs), binop(Iop_Shl64,
+                          getIReg(rs), mkU8(32))));
+         assign(t1, mkexpr(t0));
+
+         for (i = 0; i < 46; i++) {
+            assign(s, IRExpr_ITE(binop(Iop_And32, mkU32(i), mkU32(0x08)),
+                                 mkU32(shift), mkU32(shiftx)));
+            assign(s, IRExpr_ITE(binop(Iop_And32, mkU32(stripe),
+                                       binop(Iop_CmpNE32, mkU32(0x0),
+                                             binop(Iop_And32,
+                                                   mkU32(i), mkU32(0x04)))),
+                                 unop(Iop_Not32, mkU32(s)), mkexpr(s)));
+            assign(tmp, binop(Iop_Or64, binop(Iop_And64,
+                                              binop(Iop_Shr64, mkexpr(t0),
+                                                    mkU8(0x10)),
+                                              binop(Iop_Shl64, mkU64(0x01),
+                                                    mkU8(i))),
+                              binop(Iop_And64, mkexpr(t1),
+                                    unop(Iop_Not64,
+                                         binop(Iop_Shl64, mkU64(0x01),
+                                               mkU8(i))))));
+            assign(t1, IRExpr_ITE(binop(Iop_And32, mkexpr(s), mkU32(0x10)),
+                                  mkexpr(tmp),
+                                  mkexpr(t1)));
+
+         }
+
+         assign(t2, mkexpr(t1));
+
+         for (i = 0; i < 38; i++) {
+            assign(s, IRExpr_ITE(binop(Iop_And32, mkU32(i), mkU32(0x04)),
+                                 mkU32(shift), mkU32(shiftx)));
+            assign(tmp, binop(Iop_Or64,
+                              binop(Iop_And64,
+                                    binop(Iop_Shr64, mkexpr(t1), mkU8(0x08)),
+                                    binop(Iop_Shl64, mkU64(0x01), mkU8(i))),
+                              binop(Iop_And64, mkexpr(t2),
+                                    unop(Iop_Not64, binop(Iop_Shl64,
+                                                          mkU64(0x01),
+                                                          mkU8(i))))));
+            assign(t2, IRExpr_ITE(binop(Iop_And32, mkexpr(s), mkU32(0x08)),
+                                  mkexpr(tmp),
+                                  mkexpr(t2)));
+
+         }
+
+         assign(t3, mkexpr(t2));
+
+         for (i = 0; i < 34; i++) {
+            assign(s, IRExpr_ITE(binop(Iop_And32, mkU32(i), mkU32(0x02)),
+                                 mkU32(shift), mkU32(shiftx)));
+            assign(tmp, binop(Iop_Or64,
+                              binop(Iop_And64,
+                                    binop(Iop_Shr64, mkexpr(t2), mkU8(0x04)),
+                                    binop(Iop_Shl64, mkU64(0x01), mkU8(i))),
+                              binop(Iop_And64, mkexpr(t3),
+                                    unop(Iop_Not64, binop(Iop_Shl64,
+                                                          mkU64(0x01),
+                                                          mkU8(i))))));
+            assign(t3, IRExpr_ITE(binop(Iop_And32, mkexpr(s), mkU32(0x04)),
+                                  mkexpr(tmp),
+                                  mkexpr(t3)));
+
+         }
+
+         assign(t4, mkexpr(t3));
+
+         for (i = 0; i < 32; i++) {
+            assign(s, IRExpr_ITE(binop(Iop_And32, mkU32(i), mkU32(0x01)),
+                                 mkU32(shift), mkU32(shiftx)));
+            assign(tmp, binop(Iop_Or64,
+                              binop(Iop_And64,
+                                    binop(Iop_Shr64, mkexpr(t3), mkU8(0x02)),
+                                    binop(Iop_Shl64, mkU64(0x01), mkU8(i))),
+                              binop(Iop_And64, mkexpr(t4),
+                                    unop(Iop_Not64, binop(Iop_Shl64,
+                                                          mkU64(0x01),
+                                                          mkU8(i))))));
+            assign(t4, IRExpr_ITE(binop(Iop_And32, mkexpr(s), mkU32(0x02)),
+                                  mkexpr(tmp),
+                                  mkexpr(t4)));
+
+         }
+
+         assign(t5, mkexpr(t4));
+
+         for (i = 0; i < 32; i++) {
+            assign(tmp, binop(Iop_Or64,
+                              binop(Iop_And64,
+                                    binop(Iop_Shr64, mkexpr(t4), mkU8(0x01)),
+                                    binop(Iop_Shl64, mkU64(0x01), mkU8(i))),
+                              binop(Iop_And64, mkexpr(t5),
+                                    unop(Iop_Not64, binop(Iop_Shl64,
+                                                          mkU64(0x01),
+                                                          mkU8(i))))));
+            assign(t4, IRExpr_ITE(binop(Iop_And32, mkexpr(shift), mkU32(0x02)),
+                                  mkexpr(tmp),
+                                  mkexpr(t5)));
+
+         }
+
+         putIReg(rt, mkexpr(t5));
+         break;
+      }
+
+      default:
+         vassert(0);
+         break;
+   }
+
+}
+
+static void nano_pins(UInt cins)
+{
+   UChar rt = (cins >> 21) & 0x1F;
+   UChar rs = (cins >> 16) & 0x1F;
+   UChar lsb = cins & 0x1F;
+   UChar msbd = (cins >> 6) & 0x1F;
+
+   switch ((cins >> 5) & 0x41) {
+      case 0x00: { /* ins */
+         UChar size = 1 + msbd - lsb;
+         DIP("ins r%u, r%u, %u, %u", rt, rs, lsb, size);
+         UInt mask = ((1 << size) - 1) << lsb;
+         putIReg(rt, binop(Iop_Or32, binop(Iop_And32, getIReg(rt),
+                                           mkU32(~mask)),
+                           binop(Iop_And32,
+                                 binop(Iop_Shl32,
+                                       getIReg(rs), mkU8(lsb)),
+                                 mkU32(mask))));
+         break;
+      }
+
+      case 0x01: { /* dins */
+         vassert(0);
+         break;
+      }
+
+      case 0x40: { /* dinsm */
+         vassert(0);
+         break;
+      }
+
+      case 0x41: { /* dins */
+         vassert(0);
+         break;
+      }
+
+      default:
+         vassert(0);
+   }
+}
+
+static void nano_pext(UInt cins)
+{
+   UChar rt = (cins >> 21) & 0x1F;
+   UChar rs = (cins >> 16) & 0x1F;
+   UChar lsb = cins & 0x1F;
+   UChar msbd = (cins >> 6) & 0x1F;
+
+   switch ((cins >> 5) & 0x41) {
+      case 0x00: { /* ext */
+         DIP("ext r%u, r%u, %u, %u", rt, rs, lsb, msbd + 1u);
+
+         if (msbd + 1 + lsb > 32) vassert(0);
+
+         putIReg(rt, binop(Iop_And32, binop(Iop_Shr32, getIReg(rs), mkU8(lsb)),
+                           mkU32((1 << (msbd + 1)) - 1)));
+         break;
+      }
+
+      case 0x01: { /* dextu */
+         vassert(0);
+         break;
+      }
+
+      case 0x40: { /* dextm */
+         vassert(0);
+         break;
+      }
+
+      case 0x41: { /* dext */
+         vassert(0);
+         break;
+      }
+
+      default:
+         vassert(0);
+   }
+}
+
+static void nano_pool16c00(UInt cins)
+{
+   UChar rs = GPR3_list[(cins >> 4) & 0x07];
+   UChar rt = GPR3_list[(cins >> 7) & 0x07];
+
+   switch (cins & 0x0C) {
+      case POOL16C00_NOT: {  /* not[16] */
+         DIP("not[16] r%u, r%u", rt, rs);
+         putIReg(rt, unop(Iop_Not32, getIReg(rs)));
+         break;
+      }
+
+      case POOL16C00_XOR: {  /* xor[16] */
+         DIP("xor[16] r%u, r%u", rt, rs);
+         putIReg(rt, binop(Iop_Xor32, getIReg(rs), getIReg(rt)));
+         break;
+      }
+
+      case POOL16C00_AND: {  /* and[16] */
+         DIP("and[16] r%u, r%u", rt, rs);
+         putIReg(rt, binop(Iop_And32, getIReg(rs), getIReg(rt)));
+         break;
+      }
+
+      case POOL16C00_OR: {  /* or[16] */
+         DIP("or[16] r%u, r%u", rt, rs);
+         putIReg(rt, binop(Iop_Or32, getIReg(rs), getIReg(rt)));
+         break;
+      }
+   }
+}
+
+static void nano_pu12(DisResult *dres, UInt cins)
+{
+   UChar rs = (cins >> 16) & 0x1F;
+   UChar rt = (cins >> 21) & 0x1F;
+   UShort u = cins & 0x0FFF;
+
+   switch ((cins >> 12) & 0x0F) {
+      case PU12_ORI: {  /* ori */
+         DIP("ori r%u, r%u, %u", rt, rs, u);
+         putIReg(rt, binop(Iop_Or32, getIReg(rs), mkU32(u)));
+         break;
+      }
+
+      case PU12_XORI: {  /* xori */
+         DIP("xori r%u, r%u, %u", rt, rs, u);
+         putIReg(rt, binop(Iop_Xor32, getIReg(rs), mkU32(u)));
+         break;
+      }
+
+      case PU12_ANDI: {  /* andi */
+         DIP("andi r%u, r%u, %u", rt, rs, u);
+         putIReg(rt, binop(Iop_And32, getIReg(rs), mkU32(u)));
+         break;
+      }
+
+      case PU12_PSR:  /* p.sr */
+         nano_psr(dres, cins);
+         break;
+
+      case PU12_SLTI: {  /* slti */
+         DIP("slti r%u, r%u, %u", rt, rs, u);
+         putIReg(rt, unop(Iop_1Uto32, binop(Iop_CmpLT32S, getIReg(rs),
+                                            mkU32(u))));
+         break;
+      }
+
+      case PU12_SLTIU: {  /* sltiu */
+         DIP("sltiu r%u, r%u, %u", rt, rs, u);
+         putIReg(rt, unop(Iop_1Uto32, binop(Iop_CmpLT32U, getIReg(rs),
+                                            mkU32(u))));
+         break;
+      }
+
+      case PU12_SEQI: {  /* seqi */
+         DIP("seqi r%u, r%u, %u", rt, rs, u);
+         putIReg(rt, unop(Iop_1Uto32, binop(Iop_CmpEQ32, getIReg(rs),
+                                            mkU32(u))));
+         break;
+      }
+
+      case PU12_ADDIU_NEG: {  /* addiu[neg] */
+         DIP("addiu[neg] r%u, r%u, %u", rt, rs, u);
+         putIReg(rt, binop(Iop_Sub32, getIReg(rs), mkU32(u)));
+         break;
+      }
+
+      case PU12_PSHIFT:  /* p.shift */
+         nano_pshift(cins);
+         break;
+
+      case PU12_PROTX:  /* p.rotx */
+         nano_protx(cins);
+         break;
+
+      case PU12_PINS: /* p.ins */
+         nano_pins(cins);
+         break;
+
+      case PU12_PEXT: /* p.ext */
+         nano_pext(cins);
+         break;
+
+      default:
+         vassert(0);
+   }
+}
+
+static void nano_pbr1(DisResult *dres, UInt cins)
+{
+   UChar rs = (cins >> 16) & 0x1F;
+   UChar rt = (cins >> 21) & 0x1F;
+   Short s = (Short)((cins & 0x3FFE) |
+                     ((cins & 1) << 14) | ((cins & 1) << 15));
+
+   switch ((cins >> 14) & 0x3) {
+      case PBR1_BEQC32: {  /* BEQC[32] */
+         DIP("beqc[32] r%u, r%u, %X", rt, rs, guest_PC_curr_instr + 4 + (Int)s);
+         ir_for_branch(dres, binop(Iop_CmpEQ32, getIReg(rt), getIReg(rs)),
+                       4, (Int)s);
+         break;
+      }
+
+      case PBR1_PBR3A: {  /* P.BR3A */
+         vassert(0);
+         break;
+      }
+
+      case PBR1_BGEC: {  /* BGEC */
+         DIP("bgec r%u, r%u, %X", rs, rt, guest_PC_curr_instr + 4 + (Int)s);
+         ir_for_branch(dres, binop(Iop_CmpLE32S, getIReg(rt), getIReg(rs)),
+                       4, (Int)s);
+         break;
+      }
+
+      case PBR1_BGEUC: {  /* bgeuc */
+         DIP("bgeuc r%u, r%u, %X", rs, rt, guest_PC_curr_instr + 4 + (Int)s);
+         ir_for_branch(dres, binop(Iop_CmpLE32U, getIReg(rt), getIReg(rs)),
+                       4, (Int)s);
+         break;
+      }
+
+      default:
+         vex_printf("Unsupported p.br1 instruction %08X", cins);
+         vassert(0);
+   }
+}
+
+static void nano_pbr2(DisResult *dres, UInt cins)
+{
+   UChar rs = (cins >> 16) & 0x1F;
+   UChar rt = (cins >> 21) & 0x1F;
+   Short s = (Short)((cins & 0x3FFE) |
+                     ((cins & 1) << 14) | ((cins & 1) << 15));
+
+   switch ((cins >> 14) & 0x3) {
+      case PBR2_BNEC32: {  /* BNEC[32] */
+         DIP("bnec r%u, r%u, %X", rt, rs, guest_PC_curr_instr + 4 + (Int)s);
+         ir_for_branch(dres, binop(Iop_CmpNE32, getIReg(rt), getIReg(rs)),
+                       4, (Int)s);
+         break;
+      }
+
+      case PBR2_BLTC: {  /* BLTC */
+         DIP("bltc r%u, r%u, %X", rt, rs, guest_PC_curr_instr + 4 + (Int)s);
+         ir_for_branch(dres, binop(Iop_CmpLT32S, getIReg(rs), getIReg(rt)),
+                       4, (Int)s);
+         break;
+      }
+
+      case PBR2_BLTUC: {  /* BLTUC */
+         DIP("bltuc r%u, r%u, %X", rt, rs, guest_PC_curr_instr + 4 + (Int)s);
+         ir_for_branch(dres, binop(Iop_CmpLT32U, getIReg(rs), getIReg(rt)),
+                       4, (Int)s);
+         break;
+      }
+
+      default:
+         vex_printf("Unsupported p.br2 instruction %08X", cins);
+         vassert(0);
+   }
+}
+
+static void nano_pbri(DisResult *dres, UInt cins)
+{
+   UChar rt = (cins >> 21) & 0x1F;
+   Int s = extend_sign((cins & 0x7FE) | ((cins & 0x01) << 11), 12);
+   UChar bit = (cins >> 11) & 0x3F;
+   UInt u =  (cins >> 11) & 0x7F;
+
+   switch ((cins >> 18) & 0x07) {
+      case PBRI_BEQIC: {
+         DIP("beqic r%u, %u, %0X", rt, u, guest_PC_curr_instr + 4 + s);
+         ir_for_branch(dres, binop(Iop_CmpEQ32, getIReg(rt), mkU32(u)),
+                       4, (Int)s);
+         break;
+      }
+
+      case PBRI_BBEQZC: {
+         DIP("bbeqzc r%u, %u, %0X", rt, bit, guest_PC_curr_instr + 4 + s);
+
+         if (bit >= 32) {
+            ILLEGAL_INSTRUCTON
+            return;
+         }
+
+         ir_for_branch(dres,
+                       binop(Iop_CmpEQ32,
+                             binop(Iop_And32,
+                                   binop(Iop_Shr32, getIReg(rt), mkU8(bit)),
+                                   mkU32(1)),
+                             mkU32(0)), 4, s);
+         break;
+      }
+
+      case PBRI_BGEIC: {  /* bgeic */
+         DIP("bgeic r%u, %u, %0X", rt, u, guest_PC_curr_instr + 4 + s);
+         ir_for_branch(dres, binop(Iop_CmpLE32S, mkU32(u), getIReg(rt)),
+                       4, (Int)s);
+         break;
+      }
+
+      case PBRI_BGEIUC: {  /* bgeiuc */
+         DIP("bgeiuc r%u, %u, %0X", rt, u, guest_PC_curr_instr + 4 + s);
+         ir_for_branch(dres, binop(Iop_CmpLE32U, mkU32(u), getIReg(rt)),
+                       4, (Int)s);
+         break;
+      }
+
+      case PBRI_BNEIC: {
+         DIP("bneic r%u, %u, %0X", rt, u, guest_PC_curr_instr + 4 + s);
+         ir_for_branch(dres,
+                       binop(Iop_CmpNE32, getIReg(rt), mkU32(u)), 4, s);
+         break;
+      }
+
+      case PBRI_BBNEZC: {
+         DIP("bbnezc r%u, %u, %0X", rt, bit, guest_PC_curr_instr + 4 + s);
+
+         if (bit >= 32) {
+            ILLEGAL_INSTRUCTON
+            return;
+         }
+
+         ir_for_branch(dres,
+                       binop(Iop_CmpNE32,
+                             binop(Iop_And32,
+                                   binop(Iop_Shr32, getIReg(rt), mkU8(bit)),
+                                   mkU32(1)),
+                             mkU32(0)), 4, s);
+         break;
+      }
+
+      case PBRI_BLTIC: {
+         DIP("bltic r%u, %u, %0X", rt, u, guest_PC_curr_instr + 4 + s);
+         ir_for_branch(dres, binop(Iop_CmpLT32S, getIReg(rt), mkU32(u)),
+                       4, (Int)s);
+         break;
+      }
+
+      case PBRI_BLTIUC: {
+         DIP("bltiuc r%u, %u, %0X", rt, u, guest_PC_curr_instr + 4 + s);
+         ir_for_branch(dres, binop(Iop_CmpLT32U, getIReg(rt), mkU32(u)),
+                       4, (Int)s);
+         break;
+      }
+
+      default:
+         vex_printf("Unsupported p.bri instruction %08X", cins);
+         vassert(0);
+   }
+}
+
+static void nano_pprefs9(DisResult *dres, UInt cins)
+{
+   UChar hint = (cins >> 21) & 0x1F;
+   UChar rs = (cins >> 16) & 0x1F;
+   UChar s = extend_sign((cins & 0xFF) | ((cins >> 7) & 0x100), 9);
+
+   if (hint == 31) { /* synci */
+      DIP("synci %u(r%u)", s, rs);
+      vassert(0);
+   } else { /* pref[s9] */
+      DIP("pref[s9] %u, %u(r%u)", hint, s, rs);
+      vassert(0);
+   }
+}
+
+static void nano_plss0(DisResult *dres, UInt cins)
+{
+   UChar rs = (cins >> 16) & 0x1F;
+   UChar rt = (cins >> 21) & 0x1F;
+   Int s  = extend_sign(((cins >> 7) & 0x100) | (cins & 0xFF), 9);
+
+   switch ((cins >> 11) & 0xf) {
+      case LBS9: {  /* lb[s9] */
+         DIP("lb[s9] r%u %d(r%u)", rt, s, rs);
+         putIReg(rt, unop(Iop_8Sto32,
+                          load(Ity_I8, binop(Iop_Add32, getIReg(rs),
+                                             mkU32(s)))));
+         break;
+      }
+
+      case LHS9: {  /* lh[s9] */
+         DIP("lh[s9] r%u %d(r%u)", rt, s, rs);
+         putIReg(rt, unop(Iop_16Sto32,
+                          load(Ity_I16, binop(Iop_Add32, getIReg(rs),
+                                              mkU32(s)))));
+         break;
+      }
+
+      case LWS9: {  /* lw[s9] */
+         DIP("lw[s9] r%u %d(r%u)", rt, s, rs);
+         putIReg(rt, load(Ity_I32, binop(Iop_Add32, getIReg(rs), mkU32(s))));
+         break;
+      }
+
+      case LDS9: {  /* ld[s9] */
+         DIP("ld[s9] r%u %d(r%u)", rt, s, rs);
+         vassert(0);
+         break;
+      }
+
+      case SBS9: {  /* sb[s9] */
+         DIP("sb[s9] r%u %d(r%u)", rt, s, rs);
+         store(binop(Iop_Add32, getIReg(rs), mkU32(s)), unop(Iop_32to8,
+                                                             getIReg(rt)));
+         break;
+      }
+
+      case SHS9: {  /* sh[s9] */
+         DIP("sh[s9] r%u %d(r%u)", rt, s, rs);
+         store(binop(Iop_Add32, getIReg(rs), mkU32(s)), unop(Iop_32to16,
+                                                             getIReg(rt)));
+         break;
+      }
+
+      case SWS9: {  /* sw[s9] */
+         DIP("sw[s9] r%u %d(r%u)", rt, s, rs);
+         store(binop(Iop_Add32, getIReg(rs), mkU32(s)), getIReg(rt));
+         break;
+      }
+
+      case SDS9: {  /* sd[s9] */
+         DIP("sd[s9] r%u %d(r%u)", rt, s, rs);
+         vassert(0);
+         break;
+      }
+
+      case LBUS9: { /* lbu[s9] */
+         DIP("lbu[s9] r%u %d(r%u)", rt, s, rs);
+         putIReg(rt, unop(Iop_8Uto32,
+                          load(Ity_I8, binop(Iop_Add32, getIReg(rs),
+                                             mkU32(s)))));
+         break;
+      }
+
+      case LHUS9: { /* lhu[s9] */
+         DIP("lhu[s9] r%u %d(r%u)", rt, s, rs);
+         putIReg(rt, unop(Iop_16Uto32,
+                          load(Ity_I16, binop(Iop_Add32, getIReg(rs),
+                                              mkU32(s)))));
+         break;
+      }
+
+      case LWC1S9: { /* lwc1[s9] */
+         DIP("lwc1[s9] r%u %d(r%u)", rt, s, rs);
+         vassert(0);
+         break;
+      }
+
+      case LDC1S9: { /* ldc1[s9] */
+         DIP("ldc1[s9] r%u %d(r%u)", rt, s, rs);
+         vassert(0);
+         break;
+      }
+
+      case PPREFS9: { /* p.pref[s9] pool */
+         nano_pprefs9(dres, cins);
+         break;
+      }
+
+      case LWUS9: { /* lwu[s9] */
+         DIP("lwu[s9] r%u %d(r%u)", rt, s, rs);
+         vassert(0);
+         break;
+      }
+
+      case SWC1S9: { /* swc1[s9] */
+         DIP("swc1[s9] r%u %d(r%u)", rt, s, rs);
+         vassert(0);
+         break;
+      }
+
+      case SDC1S9: { /* sdc1[s9] */
+         DIP("sdc1[s9] r%u %d(r%u)", rt, s, rs);
+         vassert(0);
+         break;
+      }
+   }
+}
+
+static void nano_pll(DisResult *dres, UInt cins)
+{
+   IRTemp t1, t2;
+   UChar rs = (cins >> 16) & 0x1F;
+   UChar rt = (cins >> 21) & 0x1F;
+   UInt s  = extend_sign(((cins >> 7) & 0x100) | (cins & 0xFC), 9);
+
+   switch (cins & 0x03) {
+      case LL: {
+         DIP("ll r%u %u(r%u)", rt, s, rs);
+         t1 = newTemp(Ity_I32);
+         t2 = newTemp(Ity_I32);
+         assign(t1, binop(Iop_Add32, getIReg(rs), mkU32(s)));
+         assign(t2, load(Ity_I32, mkexpr(t1)));
+         putLLaddr(mkexpr(t1));
+         putLLdata(mkexpr(t2));
+         putIReg(rt, mkexpr(t2));
+         break;
+      }
+
+      case LLWP: {
+         UChar ru = (cins >> 3) & 0x1F;
+         DIP("llwp r%u %u(r%u)", rt, s, rs);
+         if (rt == ru) vassert(0);
+         t1 = newTemp(Ity_I32);
+         t2 = newTemp(Ity_I64);
+         assign(t1, getIReg(rs));
+         assign(t2, load(Ity_I64, mkexpr(t1)));
+         putLLaddr(mkexpr(t1));
+         putLLdata64(mkexpr(t2));
+         putIReg(rt, unop(Iop_64to32, mkexpr(t2)));
+         putIReg(ru, unop(Iop_64HIto32, mkexpr(t2)));
+         break;
+      }
+
+      default:
+         vassert(0);
+   }
+}
+
+static void nano_psc(DisResult *dres, UInt cins)
+{
+   IRTemp t1, t2, t3, t4, t5;
+   UChar rs = (cins >> 16) & 0x1F;
+   UChar rt = (cins >> 21) & 0x1F;
+   UInt s  = extend_sign(((cins >> 7) & 0x100) | (cins & 0xFC), 9);
+
+   switch (cins & 0x03) {
+      case SC: {
+         DIP("sc r%u %u(r%u)", rt, s, rs);
+
+         t1 = newTemp(Ity_I32);
+         t2 = newTemp(Ity_I1);
+         t3 = newTemp(Ity_I32);
+
+         assign(t1, binop(Iop_Add32, getIReg(rs), mkU32(s)));
+
+         assign(t2, binop(Iop_CmpNE32,
+                          mkexpr(t1), getLLaddr()));
+         assign(t3, getIReg(rt));
+         putLLaddr(LLADDR_INVALID);
+         putIReg(rt, getIReg(0));
+
+         stmt(IRStmt_Exit(mkexpr(t2), Ijk_Boring,
+                          IRConst_U32(guest_PC_curr_instr + 4),
+              OFFB_PC));
+
+         t4 = newTemp(Ity_I32);
+         t5 = newTemp(Ity_I32);
+
+         assign(t5, getLLdata());
+
+         stmt(IRStmt_CAS(mkIRCAS(IRTemp_INVALID, t4, /* old_mem */
+              MIPS_IEND, mkexpr(t1),                 /* addr */
+              NULL, mkexpr(t5),                      /* expected value */
+              NULL, mkexpr(t3)                       /* new value */)));
+
+         putIReg(rt, unop(Iop_1Uto32,
+                          binop(Iop_CmpEQ32, mkexpr(t4), mkexpr(t5))));
+         break;
+      }
+
+      case SCWP: {
+         UChar ru = (cins >> 3) & 0x1F;
+         DIP("scwp r%u %u(r%u)", rt, s, rs);
+         t1 = newTemp(Ity_I32);
+         t2 = newTemp(Ity_I1);
+
+         IRTemp oldHi, oldLo, expHi, expLo;
+         oldHi = newTemp(Ity_I32);
+         oldLo = newTemp(Ity_I32);
+         expHi = newTemp(Ity_I32);
+         expLo = newTemp(Ity_I32);
+
+         assign(t2, binop(Iop_CmpNE32,
+                          getIReg(rs), getLLaddr()));
+
+         putLLaddr(LLADDR_INVALID);
+         putIReg(rt, getIReg(0));
+
+         stmt(IRStmt_Exit(mkexpr(t2), Ijk_Boring,
+                          IRConst_U32(guest_PC_curr_instr + 4),
+              OFFB_PC));
+
+         assign(expHi, unop(Iop_64HIto32, getLLdata64()));
+         assign(expLo, unop(Iop_64to32, getLLdata64()));
+
+
+         stmt(IRStmt_CAS(mkIRCAS(oldHi, oldLo, /* old_mem */
+              MIPS_IEND, getIReg(rs),          /* addr */
+              mkexpr(expHi),  mkexpr(expLo),   /* expected value */
+              getIReg(ru), getIReg(rt)         /* new value */)));
+
+         putIReg(rt, binop(Iop_And32,
+                           unop(Iop_1Uto32,
+                                binop(Iop_CmpEQ32, mkexpr(oldHi), mkexpr(expHi))),
+                           unop(Iop_1Uto32,
+                                binop(Iop_CmpEQ32, mkexpr(oldLo), mkexpr(expLo)))));
+         break;
+      }
+
+      default:
+         vassert(0);
+   }
+}
+
+static void nano_plss1(DisResult *dres, UInt cins)
+{
+   UChar rs = (cins >> 16) & 0x1F;
+   UChar rt = (cins >> 21) & 0x1F;
+   UInt s  = extend_sign(((cins >> 7) & 0x100) | (cins & 0xFF), 9);
+
+   switch ((cins >> 11) & 0x0F) {
+      case ASET_ACLER: {
+         vassert(0);
+         break;
+      }
+
+      case UALH: {  /* ualh */
+         DIP("ualh r%u %u(r%u)", rt, s, rs);
+         putIReg(rt, unop(Iop_16Sto32,
+                          load(Ity_I16, binop(Iop_Add32, getIReg(rs), mkU32(s)))));
+         break;
+      }
+
+      case UASH: {  /* uash */
+         DIP("uash r%u %u(r%u)", rt, s, rs);
+         store(binop(Iop_Add32, getIReg(rs), mkU32(s)), unop(Iop_32to16, getIReg(rt)));
+         break;
+      }
+
+      case CACHE: {  /* cache */
+         vassert(0);
+         break;
+      }
+
+      case LWC2: {
+         vassert(0);
+         break;
+      }
+
+      case SWC2: {
+         vassert(0);
+         break;
+      }
+
+      case LDC2: {
+         vassert(0);
+         break;
+      }
+
+      case SDC2: {
+         vassert(0);
+         break;
+      }
+
+      case PLL: {
+         nano_pll(dres, cins);
+         break;
+      }
+
+      case PSC: {
+         nano_psc(dres, cins);
+         break;
+      }
+
+      case PLLD: {
+         vassert(0);
+         break;
+      }
+
+      case PSCD: {
+         vassert(0);
+         break;
+      }
+
+      default:
+         vex_printf("Unrecognized P.LS.S1 instruction %08X", cins);
+         vassert(0);
+   }
+}
+
+static void nano_plswm(DisResult *dres, UInt cins)
+{
+   UChar rs = (cins >> 16) & 0x1F;
+   UChar rt = (cins >> 21) & 0x1F;
+   Int offset  = extend_sign(((cins >> 7) & 0x100) | (cins & 0xFF), 9);
+   UChar count3 = (cins >> 12) & 0x07;
+   UChar count = count3 ? count3 : 8;
+   UChar counter = 0;
+   UChar rt_tmp;
+   Int offset_tmp;
+
+   if ((cins >> 11) & 0x01) {  /* swm */
+      DIP("swm r%u, %d(r%u), %u", rt, offset, rs, count);
+
+      while (counter != count) {
+         rt_tmp = rt ? (rt & 0x10) | ((rt + counter) & 0x1F) : 0;
+         offset_tmp = offset + (counter << 2);
+         store(binop(Iop_Add32, getIReg(rs), mkU32(offset_tmp)),
+               getIReg(rt_tmp));
+         counter++;
+      }
+   } else {  /* lwm */
+      DIP("lwm r%u, %d(r%u), %u", rt, offset, rs, count);
+
+      while (counter != count) {
+         rt_tmp = (rt & 0x10) | ((rt + counter) & 0x1F);
+         offset_tmp = offset + (counter << 2);
+         putIReg(rt_tmp, load(Ity_I32, binop(Iop_Add32, getIReg(rs),
+                                             mkU32(offset_tmp))));
+
+         if ((rt_tmp == rs) && (counter != count - 1)) {
+            // raise UNPREDICTABLE()
+         }
+
+         counter++;
+      }
+
+   }
+}
+
+static void nano_plsuawm(DisResult *dres, UInt cins)
+{
+   UChar rs = (cins >> 16) & 0x1F;
+   UChar rt = (cins >> 21) & 0x1F;
+   UInt s  = extend_sign(((cins >> 7) & 0x100) | (cins & 0xFF), 9);
+   UChar count3 = (cins >> 12) & 0x07;
+   UChar count = count3 ? count3 : 8;
+   UChar counter = 0;
+   UInt offset = extend_sign(s, 9);
+   UChar rt_tmp, offset_tmp;
+
+   if ((cins >> 11) & 0x01) {  /* swm */
+      while (counter++ != count) {
+         rt_tmp = rt ? (rt & 0x10) | ((rt + counter) & 0x1F) : 0;
+         offset_tmp = offset + (counter << 2);
+         store(binop(Iop_Add32, getIReg(rs), mkU32(offset_tmp)),
+               getIReg(rt_tmp));
+      }
+   } else {  /* lwm */
+      while (counter++ != count) {
+         rt_tmp = (rt & 0x10) | (rt + counter);
+         offset_tmp = offset + (counter << 2);
+         putIReg(rt_tmp, load(Ity_I32, binop(Iop_Add32, getIReg(rs),
+                                             mkU32(offset_tmp))));
+
+         if ((rt_tmp == rs) && (counter != count - 1)) {
+            vassert(0);
+            // raise UNPREDICTABLE()
+         }
+      }
+
+   }
+}
+
+static void nano_plss9(DisResult *dres, UInt cins)
+{
+   switch ((cins >> 8) & 0x7) {
+      case PLSS0: {  /* p.ls.s0 */
+         nano_plss0(dres, cins);
+         break;
+      }
+
+      case PLSS1: {
+         nano_plss1(dres, cins);
+         break;
+      }
+
+      case PLSE0: {
+         vassert(0);
+         break;
+      }
+
+      case PLSWM: {
+         nano_plswm(dres, cins);
+         break;
+      }
+
+      case PLSUAWM: {
+         nano_plsuawm(dres, cins);
+         break;
+      }
+
+      case PLSDM: {
+         vassert(0);
+         break;
+      }
+
+      case PLSUADM: {
+         vassert(0);
+         break;
+      }
+   }
+}
+
+static void nano_p16a1(DisResult *dres, UShort cins)
+{
+   if (cins & 0x40) { /* ADDIU[R1.SP] */
+      UChar rs = 29;
+      UChar rt = GPR3_list[(cins >> 7) & 0x07];
+      UChar u = (cins & 0x3F) << 2;
+      DIP("ADDIU[R1.SP] r%u, %u", rt, u);
+      putIReg(rt, binop(Iop_Add32, getIReg(rs), mkU32(u)));
+   } else {
+      vassert(0);
+   }
+}
+
+static void nano_p16a2(DisResult *dres, UShort cins)
+{
+   if (cins & 0x08) { /* P.ADDIU[RS5] */
+      UChar rt = (cins >> 5) & 0x1F;
+
+      if (rt != 0) { /* ADDIU[RS5] */
+         Int s = extend_sign((cins & 0x07) | ((cins >> 1) & 0x08), 4);
+         DIP("addiu r%u, r%u, %d", rt, rt, s);
+         putIReg(rt, binop(Iop_Add32, getIReg(rt), mkU32(s)));
+      } else {
+         DIP("nop");
+      }
+   } else { /* ADDIU[R2] */
+      UChar rs = GPR3_list[(cins >> 4) & 0x07];
+      UChar rt = GPR3_list[(cins >> 7) & 0x07];
+      UChar u = (cins & 0x07) << 2;
+      DIP("addiu r%u, r%u, 0x%X", rt, rs, u);
+      putIReg(rt, binop(Iop_Add32, getIReg(rs), mkU32(u)));
+   }
+}
+
+static void nano_p16ri(DisResult *dres, UShort cins)
+{
+   switch ((cins >> 3) & 0x03) {
+      case RI_PSYSCALL: {
+         if (cins & 0x4) { /* HYPCALL[16] */
+            vex_printf("Instruction HYPCALL is missing documentation.\n");
+            vassert(0);
+         } else { /* SYSCALL[16] */
+            DIP("syscall %u", cins & 0x3u);
+            dres->jk_StopHere = Ijk_Sys_syscall;
+            dres->whatNext    = Dis_StopHere;
+         }
+
+         break;
+      }
+
+      case RI_BREAK: {  /* BREAK[16] */
+         DIP("break %u", cins & 0x7u);
+         dres->jk_StopHere = Ijk_SigTRAP;
+         dres->whatNext    = Dis_StopHere;
+         break;
+      }
+
+      case RI_SDBBP: {
+         vex_printf("Instruction SDBBP is not supported.\n");
+         vassert(0);
+      }
+   }
+}
+
+static void nano_p16mv(DisResult *dres, UShort cins)
+{
+   UChar rs = cins & 0x1F;
+   UChar rt = (cins >> 5) & 0x1f;
+
+   if (rt != 0) {
+      DIP("move r%u, r%u", rt, rs);
+      putIReg(rt, getIReg(rs));
+   } else {
+      nano_p16ri(dres, cins);
+   }
+}
+
+static void nano_p16shift(DisResult *dres, UShort cins)
+{
+   UChar rs = GPR3_list[(cins >> 4) & 0x07];
+   UChar rt = GPR3_list[(cins >> 7) & 0x07];
+   UChar shift = cins & 0x07;
+
+   if (cins & 0x08) { /* slr[16] */
+      DIP("slr r%u, r%u, %u ", rt, rs, shift);
+      putIReg(rt, binop(Iop_Shr32, getIReg(rs), mkU8(shift != 0 ? shift : 8)));
+   } else { /* sll[16] */
+      DIP("sll r%u, r%u, %u ", rt, rs, shift);
+      putIReg(rt, binop(Iop_Shl32, getIReg(rs), mkU8(shift != 0 ? shift : 8)));
+   }
+}
+
+static void nano_p16c(DisResult *dres, UShort cins)
+{
+   switch (cins & 0x03) {
+      case 0x00: {  /* POOL16C_0 */
+         nano_pool16c00(cins);
+         break;
+      }
+
+      case 0x01:
+      case 0x03: { /* LWXS[16] */
+         UChar rt = GPR3_list[(cins >> 7) & 0x07];
+         UChar rs = GPR3_list[(cins >> 4) & 0x07];
+         UChar rd = GPR3_list[(cins >> 1) & 0x07];
+         DIP("lwxs[32] r%u, %u(r%u)", rd, rs, rt);
+         putIReg(rd, load(Ity_I32,
+                          binop(Iop_Add32, binop(Iop_Shl32, getIReg(rs),
+                                                 mkU8(0x02)),
+                          getIReg(rt))));
+         break;
+      }
+
+      default:
+         vassert(0);
+   }
+}
+
+static void nano_p16br(DisResult *dres, UShort cins)
+{
+   UChar u = (cins & 0x0f) << 1;
+
+   if (0 == u) {
+      UChar rt = (cins >> 5) & 0x1F;
+
+      if (cins & 0x10) { /* JALRC[16] */
+         DIP("jalrc r%u", rt);
+         putIReg(31, mkU32(guest_PC_curr_instr + 2));
+         dres->jk_StopHere = Ijk_Call;
+      } else { /* JRC */
+         DIP("jrc r%u", rt);
+         dres->jk_StopHere = rt != 31 ? Ijk_Boring : Ijk_Ret;
+      }
+
+      putPC(getIReg(rt));
+      dres->whatNext = Dis_StopHere;
+   } else {
+      UChar rt = GPR3_list[(cins >> 7) & 0x07];
+      UChar rs = GPR3_list[(cins >> 4) & 0x07];
+
+      if (rs < rt) {  /* beqc[16] */
+         DIP("beqc r%u, r%u, %X", rt, rs, guest_PC_curr_instr + 2 + u);
+         ir_for_branch(dres, binop(Iop_CmpEQ32, getIReg(rt), getIReg(rs)),
+                       2, (Int)u);
+      } else {  /* bnec[16] */
+         DIP("bnec r%u, r%u, %X", rt, rs, guest_PC_curr_instr + 2 + u);
+         ir_for_branch(dres, binop(Iop_CmpNE32, getIReg(rt), getIReg(rs)),
+                       2, (Int)u);
+      }
+   }
+}
+
+static void nano_p16sr(DisResult *dres, UShort cins)
+{
+   UChar u = cins & 0xF0;
+   UChar count = cins & 0x0F;
+   UChar counter = 0;
+   UChar rt = cins & 0x200 ? 31 : 30;
+
+   if (cins & 0x100) { /* RESTORE.JRC[16] */
+      DIP("restore.jrc %u, r%u-r%u", u, (rt & 0x1fu) | (rt & 0x10u),
+          ((rt + count - 1) & 0x1fu) | (rt & 0x10u));
+
+      while (counter != count) {
+         UChar this_rt = ((rt + counter) & 0x1F) | (rt & 0x10);
+         Int offset = u - ((counter + 1) << 2);
+         putIReg(this_rt, load(Ity_I32, binop(Iop_Add32, getIReg(29),
+                                              mkU32(offset))));
+         // if this_rt == 29: raise UNPREDICTABLE()
+         counter++;
+      }
+
+      putIReg(29, binop(Iop_Add32, getIReg(29), mkU32(u)));
+      putPC(getIReg(31));
+      dres->whatNext = Dis_StopHere;
+      dres->jk_StopHere = Ijk_Ret;
+   } else { /* SAVE[16] */
+      DIP("save %u, r%u-r%u", u, (rt & 0x1fu) | (rt & 0x10u),
+          ((rt + count - 1) & 0x1fu) | (rt & 0x10u));
+
+      while (counter != count) {
+         UChar this_rt = ((rt + counter) & 0x1f) | (rt & 0x10);
+         Int offset = -((counter + 1) << 2);
+         store(binop(Iop_Add32, getIReg(29), mkU32(offset)), getIReg(this_rt));
+         counter++;
+      }
+
+      putIReg(29, binop(Iop_Sub32, getIReg(29), mkU32(u)));
+   }
+}
+
+static void nano_p16lb(DisResult *dres, UShort cins)
+{
+   UChar rt = GPR3_list[(cins >> 7) & 0x7];
+   UChar rs = GPR3_list[(cins >> 4) & 0x7];
+   UChar u = cins & 0x3;
+
+   switch ((cins >> 2) & 0x3) {
+      case 0x0: /* LB[16] */
+         DIP("lb[16] r%u %u(r%u)", rt, u, rs);
+         putIReg(rt, unop(Iop_8Sto32,
+                          load(Ity_I8, binop(Iop_Add32, getIReg(rs),
+                                             mkU32(u)))));
+         break;
+
+      case 0x1: /* SB[16] */
+         rt = GPR3_src_store_list[(cins >> 7) & 0x7];
+         DIP("sb[16] r%u %u(r%u)", rt, u, rs);
+         store(binop(Iop_Add32, getIReg(rs), mkU32(u)), unop(Iop_32to8,
+                                                             getIReg(rt)));
+         break;
+
+      case 0x2: /* LBU[16] */
+         DIP("lbu[16] r%u %u(r%u)", rt, u, rs);
+         putIReg(rt, unop(Iop_8Uto32,
+                          load(Ity_I8, binop(Iop_Add32, getIReg(rs),
+                                             mkU32(u)))));
+         break;
+
+      default:
+         vex_printf("Unrecognized bytes %04x\n", cins);
+         vassert(0);
+   }
+}
+
+static void nano_p16lh(DisResult *dres, UShort cins)
+{
+   UChar rt = GPR3_list[(cins >> 7) & 0x7];
+   UChar rs = GPR3_list[(cins >> 4) & 0x7];
+   UChar u = cins & 0x06;
+
+   switch (cins & 0x09) {
+      case 0x0: /* LH[16] */
+         DIP("lh[16] r%u %u(r%u)", rt, u, rs);
+         putIReg(rt, unop(Iop_16Sto32,
+                          load(Ity_I16, binop(Iop_Add32, getIReg(rs),
+                                              mkU32(u)))));
+         break;
+
+      case 0x1: /* SH[16] */
+         rt = GPR3_src_store_list[(cins >> 7) & 0x7];
+         DIP("sh[16] r%u %u(r%u)", rt, u, rs);
+         store(binop(Iop_Add32, getIReg(rs), mkU32(u)), unop(Iop_32to16,
+                                                             getIReg(rt)));
+         break;
+
+      case 0x8: /* LHU[16] */
+         DIP("lhu[16] r%u %u(r%u)", rt, u, rs);
+         putIReg(rt, unop(Iop_16Uto32,
+                          load(Ity_I16, binop(Iop_Add32, getIReg(rs),
+                                              mkU32(u)))));
+         break;
+
+      default:
+         vex_printf("Unrecognized bytes %04x\n", cins);
+         vassert(0);
+   }
+}
+
+static void nano_p164x4(DisResult *dres, UShort cins)
+{
+   UChar rt = GPR4_list[((cins >> 6) & 0x08) | ((cins >> 5) & 0x07)];
+   UChar rs = GPR4_list[((cins >> 1) & 0x08) | (cins & 0x07)];
+   UChar rd = rt;
+
+   switch (cins & 0x108) {
+      case 0x00: {  /* ADDU[4x4] */
+         DIP("addu[4x4] r%u, r%u, r%u", rd, rs, rt);
+         putIReg(rd, binop(Iop_Add32, getIReg(rs), getIReg(rt)));
+         break;
+      }
+
+      case 0x08: {  /* MUL[4x4] */
+         DIP("mul[4x4] r%u, r%u, r%u", rd, rs, rt);
+         putIReg(rd, binop(Iop_Mul32, getIReg(rs), getIReg(rt)));
+         break;
+      }
+
+      default:
+         vassert(0);
+   }
+}
+
+static void nano_pgpbh(DisResult *dres, UInt cins)
+{
+   UChar rt = (cins >> 21) & 0x1F;
+   UChar rs = 28;
+   UInt  u  = cins & 0x3FFFF;
+
+   switch ((cins >> 18) & 7) {
+      case LBGP: {  /* lb[gp] */
+         DIP("lb[gp] r%u %u(r%u)", rt, u, rs);
+         putIReg(rt, unop(Iop_8Sto32,
+                          load(Ity_I8, binop(Iop_Add32, getIReg(rs),
+                                             mkU32(u)))));
+         break;
+      }
+
+      case SBGP: {  /* sb[gp] */
+         DIP("sb[gp] r%u %u(r%u)", rt, u, rs);
+         store(binop(Iop_Add32, getIReg(rs), mkU32(u)), unop(Iop_32to8,
+                                                             getIReg(rt)));
+         break;
+      }
+
+      case LBUGP: {  /* lbu[gp] */
+         DIP("lbu[gp] r%u %u(r%u)", rt, u, rs);
+         putIReg(rt, unop(Iop_8Uto32,
+                          load(Ity_I8, binop(Iop_Add32, getIReg(rs),
+                                             mkU32(u)))));
+         break;
+      }
+
+      case ADDIUGPB: { /* addiu[gp.b] */
+         DIP("addiu r%u, r%u, 0x%X", rt, rs, u);
+         putIReg(rt, binop(Iop_Add32, getIReg(rs), mkU32(u)));
+         break;
+      }
+
+      case PGPLH: {
+         if (cins & 0x01) {  /* lhu[gp] */
+            DIP("lhu[gp] r%u %u(r%u)", rt, u, rs);
+            putIReg(rt, unop(Iop_16Uto32,
+                             load(Ity_I16, binop(Iop_Add32, getIReg(rs),
+                                                 mkU32(u & 0x3FFFE)))));
+         } else {  /* lh[gp] */
+            DIP("lh[gp] r%u %u(r%u)", rt, u, rs);
+            putIReg(rt, unop(Iop_16Sto32,
+                             load(Ity_I16, binop(Iop_Add32, getIReg(rs),
+                                                 mkU32(u & 0x3FFFE)))));
+         }
+
+         break;
+      }
+
+      case PGPSH: {
+         if (cins & 0x01) {
+            vassert(0);
+         } else {  /* sh[gp] */
+            DIP("sh[gp] r%u %u(r%u)", rt, u, rs);
+            store(binop(Iop_Add32, getIReg(rs), mkU32(u & 0x3FFFE)),
+                  unop(Iop_32to16, getIReg(rt)));
+         }
+
+         break;
+      }
+
+      case PGPCP1: {
+         vassert(0);
+         break;
+      }
+
+      case PGPM64: {
+         vassert(0);
+         break;
+      }
+   }
+}
+
+static void nano_pj(DisResult *dres, UInt cins)
+{
+   UChar rt = (cins >> 21) & 0x1F;
+   UChar rs = (cins >> 16) & 0x1F;
+
+   switch ((cins >> 12) & 0x0F) {
+      case JALRC32: {  /* JARLC[32] */
+         DIP("jalrc[32] r%u, r%u", rt, rs);
+         putIReg(rt, mkU32(guest_PC_curr_instr + 4));
+         putPC(getIReg(rs));
+         dres->jk_StopHere = Ijk_Call;
+         dres->whatNext = Dis_StopHere;
+         break;
+      }
+
+      case JALRCHB: {
+         DIP("jalrc.hb r%u r%u", rt, rs);
+         putIReg(rt, mkU32(guest_PC_curr_instr + 4));
+         putPC(getIReg(rs));
+         dres->jk_StopHere = Ijk_Call;
+         dres->whatNext = Dis_StopHere;
+         // clear_hazards()
+         break;
+      }
+
+      case PBALRSC: {
+         if (rt == 0) {  /* brsc */
+            DIP("brsc r%u", rs);
+            IRTemp t1 = newTemp(Ity_I32);
+            assign(t1, binop(Iop_Add32, mkU32(guest_PC_curr_instr + 4),
+                             binop(Iop_Shl32, getIReg(rs), mkU8(0x01))));
+            putPC(mkexpr(t1));
+         } else {  /* balrsc */
+            DIP("balrsc r%u, r%u", rs, rt);
+            IRTemp t1 = newTemp(Ity_I32);
+            assign(t1, binop(Iop_Add32, mkU32(guest_PC_curr_instr + 4),
+                             binop(Iop_Shl32, getIReg(rs), mkU8(0x01))));
+            putIReg(rt, mkU32(guest_PC_curr_instr + 4));
+            putPC(mkexpr(t1));
+         }
+      }
+      break;
+   }
+}
+
+static void nano_pgpw(DisResult *dres, UInt cins)
+{
+   UChar rt = (cins >> 21) & 0x1F;
+   UChar rs = 28;
+   UInt  u  = cins & 0x1FFFFC;
+
+   switch (cins & 0x03) {
+      case PGPW_ADDIU: {  /* addiu[gp.w] */
+         DIP("addiu[gp.w] r%u, r%u, %u", rt, rs, u);
+         putIReg(rt, binop(Iop_Add32, getIReg(rs), mkU32(u)));
+         break;
+      }
+
+      case PGPW_LW: {  /* lw[gp] */
+         DIP("lw[gp] r%u, %u(r%u)", rt, u, rs);
+         putIReg(rt, load(Ity_I32, binop(Iop_Add32, getIReg(rs), mkU32(u))));
+         break;
+      }
+
+      case PGPW_SW: {  /* sw[gp] */
+         DIP("sw[gp] r%u, %u(r%u)", rt, u, rs);
+         store(binop(Iop_Add32, getIReg(rs), mkU32(u)), getIReg(rt));
+         break;
+      }
+   }
+}
+
+static void dis_nanoMIPS16(DisResult *dres, UShort cins)
+{
+   switch (cins >> 10) {
+      case P16A2:
+         nano_p16a2(dres, cins);
+         break;
+
+      case BC16: {
+         Int s = extend_sign((cins & 0x3FE) | ((cins & 1) << 10), 11);
+         DIP("bc %0X", guest_PC_curr_instr + 2 + s);
+         putPC(mkU32(guest_PC_curr_instr + 2 + s));
+         dres->jk_StopHere = Ijk_Boring;
+         dres->whatNext = Dis_StopHere;
+         break;
+      }
+
+      case P16MV:
+         nano_p16mv(dres, cins);
+         break;
+
+      case P16SHIFT:
+         nano_p16shift(dres, cins);
+         break;
+
+      case P16C:
+         nano_p16c(dres, cins);
+         break;
+
+      case P16BR:
+         nano_p16br(dres, cins);
+         break;
+         break;
+
+      case P16ADDU: {
+         UChar rt = GPR3_list[(cins >> 7) & 0x7];
+         UChar rs = GPR3_list[(cins >> 4) & 0x7];
+         UChar rd = GPR3_list[(cins >> 1) & 0x7];
+
+         if (cins & 1) { /* SUBU[16] */
+            DIP("subu r%u, r%u, r%u", rd, rs, rt);
+            putIReg(rd, binop(Iop_Sub32, getIReg(rs), getIReg(rt)));
+         } else { /* ADDU[16] */
+            DIP("addu r%u, r%u, r%u", rd, rs, rt);
+            putIReg(rd, binop(Iop_Add32, getIReg(rs), getIReg(rt)));
+         }
+
+         break;
+      }
+
+      case LI16: {
+         UChar rt = GPR3_list[(cins >> 7) & 0x07];
+         UChar eu = cins & 0x7F;
+         Int s;
+
+         if (eu == 127) s = -1;
+         else s = eu;
+
+         DIP("li r%u, %d", rt, s);
+         putIReg(rt, mkU32(s));
+         break;
+      }
+
+      case BNEZC16: {
+         UChar rt = GPR3_list[(cins >> 7) & 0x7];
+         Int s = (Char)((cins & 0x7E) | (cins << 7));
+         DIP("bnezc r%u, %X", rt, guest_PC_curr_instr + 2 + s);
+         ir_for_branch(dres, binop(Iop_CmpNE32, getIReg(rt), mkU32(0)), 2, s);
+         break;
+      }
+
+      case BEQZC16: {
+         UChar rt = GPR3_list[(cins >> 7) & 0x7];
+         Int s = (Char)((cins & 0x7E) | (cins << 7));
+         DIP("beqzc r%u, %X", rt, guest_PC_curr_instr + 2 + s);
+         ir_for_branch(dres, binop(Iop_CmpEQ32, getIReg(rt), mkU32(0)), 2, s);
+         break;
+      }
+
+      case P16LB:
+         nano_p16lb(dres, cins);
+         break;
+
+      case P16LH:
+         nano_p16lh(dres, cins);
+         break;
+
+      case P16SR:
+         nano_p16sr(dres, cins);
+         break;
+
+      case P16A1:
+         nano_p16a1(dres, cins);
+         break;
+
+      case ANDI16: { /* ANDI[16] */
+         UChar rt = GPR3_list[(cins >> 7) & 0x7];
+         UChar rs = GPR3_list[(cins >> 4) & 0x7];
+         UChar eu = cins & 0xF;
+         UInt u;
+
+         if (eu == 12) u = 0x00FF;
+         else if (eu == 13) u = 0xFFFF;
+         else u = (UInt)eu;
+
+         DIP("andi[16] r%u, r%u, %u", rt, rs, u);
+         putIReg(rt, binop(Iop_And32, getIReg(rs), mkU32(u)));
+         break;
+      }
+
+      case LW16: { /* LW[16] */
+         UChar rt = GPR3_list[(cins >> 7) & 0x7];
+         UChar rs = GPR3_list[(cins >> 4) & 0x7];
+         UChar u = (cins & 0x0F) << 2;
+         DIP("lw[16] r%u, %u(r%u)", rt, u, rs);
+         putIReg(rt, load(Ity_I32, binop(Iop_Add32, getIReg(rs), mkU32(u))));
+         break;
+      }
+
+      case LWSP: { /* LW[SP] */
+         UChar rt = (cins >> 5) & 0x1F;
+         UChar rs = 29;
+         UChar u = (cins & 0x1F) << 2;
+         DIP("lw[SP] r%u, %u(r%u)", rt, u, rs);
+         putIReg(rt, load(Ity_I32, binop(Iop_Add32, getIReg(rs), mkU32(u))));
+         break;
+      }
+
+      case LWGP16: { /* LW[GP16] */
+         UChar rt = GPR3_list[(cins >> 7) & 0x07];
+         UChar rs = 28;
+         UInt u = (cins & 0x7F) << 2;
+         DIP("lw[GP16] r%u, %u(r%u)", rt, u, rs);
+         putIReg(rt, load(Ity_I32, binop(Iop_Add32, getIReg(rs), mkU32(u))));
+         break;
+      }
+
+      case LW4X4: { /* LW[4x4] */
+         UChar rt = GPR4_list[((cins >> 6) & 0x08) | ((cins >> 5) & 0x07)];
+         UChar rs = GPR4_list[((cins >> 1) & 0x08) | (cins & 0x07)];
+         UChar u = (cins & 0x08) | ((cins >> 6) & 0x04);
+         DIP("lw[4x4] r%u, %u(r%u)", rt, u, rs);
+         putIReg(rt, load(Ity_I32, binop(Iop_Add32, getIReg(rs), mkU32(u))));
+         break;
+      }
+
+      case SW16: { /* SW[16] */
+         UChar rt = GPR3_src_store_list[(cins >> 7) & 0x7];
+         UChar rs = GPR3_list[(cins >> 4) & 0x7];
+         UChar u = (cins & 0x0F) << 2;
+         DIP("sw[16] r%u, %u(r%u)", rt, u, rs);
+         store(binop(Iop_Add32, getIReg(rs), mkU32(u)), getIReg(rt));
+         break;
+      }
+
+      case SWSP: { /* SW[SP] */
+         UChar rt = (cins >> 5) & 0x1F;
+         UChar rs = 29;
+         UChar u = (cins & 0x1F) << 2;
+         DIP("sw[SP] r%u, %u(r%u)", rt, u, rs);
+         store(binop(Iop_Add32, getIReg(rs), mkU32(u)), getIReg(rt));
+         break;
+      }
+
+      case SWGP16: { /* SW[GP16] */
+         UChar rt = GPR3_src_store_list[(cins >> 7) & 0x07];
+         UChar rs = 28;
+         UInt u = (cins & 0x7F) << 2;
+         DIP("sw[GP16] r%u, %u(r%u)", rt, u, rs);
+         store(binop(Iop_Add32, getIReg(rs), mkU32(u)), getIReg(rt));
+         break;
+      }
+
+      case SW4X4: { /* SW[4x4] */
+         UChar rt = GPR4_zero_list[((cins >> 6) & 0x08) | ((cins >> 5) & 0x07)];
+         UChar rs = GPR4_list[((cins >> 1) & 0x08) | (cins & 0x07)];
+         UChar u = (cins & 0x08) | ((cins >> 6) & 0x04);
+         DIP("sw[4x4] r%u, %u(r%u)", rt, u, rs);
+         store(binop(Iop_Add32, getIReg(rs), mkU32(u)), getIReg(rt));
+         break;
+      }
+
+      case P164X4: { /* P16.4X4 pool */
+         nano_p164x4(dres, cins);
+         break;
+      }
+
+      case MOVEP: { /* MOVEP */
+         UChar rd1 = GPR2_reg1_list[((cins >> 2) & 0x02) | ((cins >> 8) & 0x01)];
+         UChar rd2 = GPR2_reg2_list[((cins >> 2) & 0x02) | ((cins >> 8) & 0x01)];
+         UChar rs1 = GPR4_zero_list[((cins >> 1) & 0x08) | (cins & 0x07)];
+         UChar rs2 = GPR4_zero_list[((cins >> 6) & 0x08) | ((cins >> 5) & 0x07)];
+         DIP("MOVEP r%u, r%u, r%u, r%u", rd1, rd2, rs1, rs2);
+         putIReg(rd1, getIReg(rs1));
+         putIReg(rd2, getIReg(rs2));
+         break;
+      }
+
+      case MOVEPREV: { /* MOVEP[REV] */
+         UChar rd1 = GPR4_list[((cins >> 1) & 0x08) | (cins & 0x07)];
+         UChar rd2 = GPR4_list[((cins >> 6) & 0x08) | ((cins >> 5) & 0x07)];
+         UChar rs1 = GPR2_reg1_list[((cins >> 2) & 0x02) | ((cins >> 8) & 0x01)];
+         UChar rs2 = GPR2_reg2_list[((cins >> 2) & 0x02) | ((cins >> 8) & 0x01)];
+         DIP("MOVEP r%u, r%u, r%u, r%u", rd1, rd2, rs1, rs2);
+         putIReg(rd1, getIReg(rs1));
+         putIReg(rd2, getIReg(rs2));
+         break;
+      }
+
+      case BALC16: {
+         Int s = extend_sign((cins & 0x3FE) | ((cins & 0x1) << 10), 11);
+         DIP("balc %0X", guest_PC_curr_instr + 2 + s);
+         putIReg(31, mkU32(guest_PC_curr_instr + 2));
+         putPC(mkU32(guest_PC_curr_instr + 2 + s));
+         dres->whatNext = Dis_StopHere;
+         dres->jk_StopHere = Ijk_Call;
+         break;
+      }
+
+      default:
+         vex_printf("Unsupported 16bit: %04X\n", cins);
+         vassert(0);
+         break;
+   }
+
+   dres->len = 2;
+}
+
+static void dis_nanoMIPS32(DisResult *dres, UInt cins)
+{
+
+   switch (cins >> 26) {
+      case P_ADDIURI: {
+         UChar rt = (cins >> 21) & 0x1F;
+
+         if (rt != 0) {
+            UChar rs = (cins >> 16) & 0x1F;
+            UInt u = cins & 0xFFFF;
+            DIP("addiu r%u, r%u, %u", rt, rs, u);
+            putIReg(rt, binop(Iop_Add32, getIReg(rs), mkU32(u)));
+         } else {
+            nano_pri(dres, cins);
+         }
+
+         break;
+      }
+
+      case ADDIUPC32: {
+         UChar rt = (cins >> 21) & 0x1F;
+         Int s = extend_sign((cins & 0x1FFFFE) | ((cins & 1) << 21), 22);
+         DIP("addiupc r%u, 0x%X", rt, guest_PC_curr_instr + 4 + s);
+         putIReg(rt, mkU32(guest_PC_curr_instr + 4 + s));
+         break;
+      }
+
+      case MOVE_BALC: {
+         Int s = extend_sign((cins & 0x1FFFFE) | ((cins & 1) << 21), 22);
+         UChar rt = GPR4_zero_list[((cins >> 21) & 0x07)
+                                    | ((cins >> 22) & 0x08)];
+         UChar rd = (cins & 0x1000000) ? 5 : 4; /* GPR1_list */
+         DIP("move.balc r%u, r%u, %0X", rd, rt, guest_PC_curr_instr + 4 + s);
+         putIReg(rd, getIReg(rt));
+         putIReg(31, mkU32(guest_PC_curr_instr + 4));
+         putPC(mkU32(guest_PC_curr_instr + 4 + s));
+         dres->jk_StopHere = Ijk_Call;
+         dres->whatNext = Dis_StopHere;
+         break;
+      }
+
+      case PLSU12:
+         nano_plsu12(dres, cins);
+         break;
+
+      case PGPW:
+         nano_pgpw(dres, cins);
+         break;
+
+      case PU12:
+         nano_pu12(dres, cins);
+         break;
+
+      case P32A:
+         nano_p32a(dres, cins);
+         break;
+
+      case PGPBH:
+         nano_pgpbh(dres, cins);
+         break;
+
+      case PBAL:
+         nano_pbal(dres, cins);
+         break;
+
+      case PBR1:
+         nano_pbr1(dres, cins);
+         break;
+
+      case PBR2:
+         nano_pbr2(dres, cins);
+         break;
+
+      case PBRI:
+         nano_pbri(dres, cins);
+         break;
+
+      case PLSS9:
+         nano_plss9(dres, cins);
+         break;
+
+      case P_LUI:
+         if (cins & 0x02) {  /* ALUIPC */
+            UChar rt = (cins >> 21) & 0x1F;
+            UInt s = (cins & 0x1FF000) | ((cins & 0xFFC) << 19) | (cins << 31);
+            DIP("aluipc r%u %08X", rt, s);
+            putIReg(rt, binop(Iop_And32, mkU32(guest_PC_curr_instr + s + 4),
+                              unop(Iop_Not32, mkU32(0xFFF))));
+         } else { /* LUI */
+            UChar rt = (cins >> 21) & 0x1F;
+            UInt s = (cins & 0x1FF000) | ((cins & 0xFFC) << 19) | (cins << 31);
+            DIP("lui r%u %08X", rt, s);
+            putIReg(rt, mkU32(s));
+         }
+
+         break;
+
+      case PJ: {
+         nano_pj(dres, cins);
+         break;
+      }
+
+      default:
+         vex_printf("Unsupported 32bit: %08X\n", cins);
+         vassert(0);
+         break;
+   }
+
+   dres->len = 4;
+}
+
+static void dis_nanoMIPS48(DisResult *dres, ULong cins)
+{
+
+   UChar rt = (cins >> 37) & 0x1F;
+   UInt x = (UInt)cins;
+
+   switch ((cins >> 32) & 0x1F) {
+      case P48I_LI:
+         DIP("li r%u, 0x%X", rt, x);
+         putIReg(rt, mkU32(x));
+         break;
+
+      case P48I_ADDIU:
+         DIP("addiu r%u, r%u, 0x%X", rt, rt, x);
+         putIReg(rt, binop(Iop_Add32, getIReg(rt), mkU32(x)));
+         break;
+
+      case P48I_ADDIU_GP:
+         DIP("addiu r%u, r28, 0x%X", rt, x);
+         putIReg(rt, binop(Iop_Add32, getIReg(28), mkU32(x)));
+         break;
+
+      case P48I_ADDIUPC:
+         DIP("addiupc r%u, 0x%X", rt, x);
+         putIReg(rt, mkU32(guest_PC_curr_instr + x + 6));
+         break;
+
+      case P48I_LWPC:
+         DIP("lwpc r%u, 0x%X", rt, x);
+         putIReg(rt, load(Ity_I32, mkU32(guest_PC_curr_instr + 6 + x)));
+         break;
+
+      case P48I_SWPC:
+         DIP("swpc r%u, 0x%X", rt, x);
+         store(mkU32(guest_PC_curr_instr + 6 + x), getIReg(rt));
+         break;
+
+      default:
+         vex_printf("Unsupported 48bit: %012llX\n", cins);
+         vassert(0);
+         break;
+   }
+
+   dres->len = 6;
+}
+
+static Bool check_for_special_requests_nanoMIPS(DisResult *dres,
+                                                const UChar *code)
+{
+/* 8000 c04d  srl  zero, zero, 13
+   8000 c05d  srl  zero, zero, 29
+   8000 c043  srl  zero, zero,  3
+   8000 c053  srl  zero, zero, 19  */
+   const UInt word1 = 0x8000C04D;
+   const UInt word2 = 0x8000C05D;
+   const UInt word3 = 0x8000C043;
+   const UInt word4 = 0x8000C053;
+   if (getUInt(code + 0) == word1 && getUInt(code + 4) == word2 &&
+       getUInt(code + 8) == word3 && getUInt(code + 12) == word4) {
+      /* Got a "Special" instruction preamble. Which one is it? */
+      if (getUInt(code + 16) == 0x218C6290 /* or t0, t0, t0 */ ) {
+         /* $a0 = client_request ( $a1 ) */
+         DIP("a0 = client_request(a1)");
+         dres->jk_StopHere = Ijk_ClientReq;
+         dres->whatNext    = Dis_StopHere;
+         dres->len = 20;
+         return True;
+      } else if (getUInt(code + 16) == 0x21AD6A90 /* or t1, t1, t1 */ ) {
+         /*  $a0 = guest_NRADDR */
+         DIP("a0 = guest_NRADDR");
+         putIReg(11, IRExpr_Get(offsetof(VexGuestMIPS32State, guest_NRADDR),
+                                Ity_I32));
+         dres->len = 20;
+         return True;
+      } else if (getUInt(code + 16) == 0x21CE7290 /* or t2, t2, t2 */ ) {
+         /*  branch-and-link-to-noredir $25 */
+         DIP("branch-and-link-to-noredir t9");
+         putIReg(31, mkU32(guest_PC_curr_instr + 20));
+         putPC(getIReg(25));
+         dres->jk_StopHere = Ijk_NoRedir;
+         dres->whatNext    = Dis_StopHere;
+         dres->len = 20;
+         return True;
+      } else if (getUInt(code + 16) == 0x21EF7A90 /* or t3, t3, t3 */ ) {
+        /* IR injection */
+         DIP("IR injection");
+#if defined (_MIPSEL)
+         vex_inject_ir(irsb, Iend_LE);
+#elif defined (_MIPSEB)
+         vex_inject_ir(irsb, Iend_BE);
+#endif
+
+         stmt(IRStmt_Put(offsetof(VexGuestMIPS32State, guest_CMSTART),
+                         mkU32(guest_PC_curr_instr)));
+         stmt(IRStmt_Put(offsetof(VexGuestMIPS32State, guest_CMLEN),
+                         mkU32(20)));
+
+         dres->whatNext    = Dis_StopHere;
+         dres->jk_StopHere = Ijk_InvalICache;
+         dres->len = 20;
+         return True;
+      }
+   }
+   return False;
+}
+
+
+/*------------------------------------------------------------*/
+/*---          Disassemble a single instruction            ---*/
+/*------------------------------------------------------------*/
+
+/* Disassemble a single instruction into IR. The instruction is
+   located in host memory at guest_instr, and has guest IP of
+   guest_PC_curr_instr, which will have been set before the call
+   here. */
+
+
+/* Disassemble a single instruction into IR.  The instruction
+   is located in host memory at &guest_code[delta]. */
+DisResult disInstr_nanoMIPS( IRSB*        irsb_IN,
+                             Bool         (*resteerOkFn) ( void *, Addr ),
+                             Bool         resteerCisOk,
+                             void*        callback_opaque,
+                             const UChar* guest_code_IN,
+                             Long         delta,
+                             Addr         guest_IP,
+                             VexArch      guest_arch,
+                             const VexArchInfo* archinfo,
+                             const VexAbiInfo*  abiinfo,
+                             VexEndness   host_endness_IN,
+                             Bool         sigill_diag_IN )
+{
+   DisResult dres;
+   const UChar *code;
+   vassert(guest_arch == VexArchNANOMIPS);
+
+   /* Set result defaults. */
+   dres.whatNext = Dis_Continue;
+   dres.len = 0;
+   dres.continueAt = 0;
+   dres.jk_StopHere = Ijk_INVALID;
+   dres.hint        = Dis_HintNone;
+
+   irsb = irsb_IN;
+   guest_PC_curr_instr = (Addr32)guest_IP;
+
+   code = guest_code_IN + delta;
+
+   if (!check_for_special_requests_nanoMIPS(&dres, code)) {
+      UShort cins = getUShort(code);
+      nanoMIPSopcodes opcode = cins >> 10;
+
+      if (opcode & P16) dis_nanoMIPS16(&dres, cins);
+      else if (opcode == P48I) {
+         ULong cinsl = (((ULong) cins ) << 32) |
+                       (((ULong) getUShort(code + 4)) << 16) |
+                       getUShort(code + 2);
+         dis_nanoMIPS48(&dres, cinsl);
+      } else {
+         UInt cinsi = (((UInt) cins ) << 16) | getUShort(code + 2);
+         dis_nanoMIPS32(&dres, cinsi);
+      }
+   }
+
+   if ((dres.whatNext == Dis_Continue)         ||
+         (dres.whatNext == Dis_ResteerC)       ||
+         (dres.jk_StopHere == Ijk_Sys_syscall) ||
+         (dres.jk_StopHere == Ijk_SigTRAP)     ||
+         (dres.jk_StopHere == Ijk_SigILL)      ||
+         (dres.jk_StopHere == Ijk_ClientReq)   ||
+         (dres.jk_StopHere == Ijk_NoRedir)     ||
+         (dres.jk_StopHere == Ijk_InvalICache)) {
+      putPC(mkU32(guest_PC_curr_instr + dres.len));
+   }
+
+   return dres;
+}
+
+
+/*--------------------------------------------------------------------*/
+/*--- end                                    guest_nanomips_toIR.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/VEX/priv/host_nanomips_defs.c b/VEX/priv/host_nanomips_defs.c
new file mode 100644
index 0000000000..eadac532bc
--- /dev/null
+++ b/VEX/priv/host_nanomips_defs.c
@@ -0,0 +1,2080 @@
+/*---------------------------------------------------------------*/
+/*--- begin                              host_NANOMIPS_defs.c ---*/
+/*---------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, a dynamic binary instrumentation
+   framework.
+
+   Copyright (C) 2017-2018 RT-RK
+      mips-valgrind@rt-rk.com
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#include "libvex_basictypes.h"
+#include "libvex.h"
+#include "libvex_trc_values.h"
+
+#include "main_util.h"
+#include "host_generic_regs.h"
+#include "host_nanomips_defs.h"
+
+/* Register number for guest state pointer in host code. */
+#define GuestSP 23
+
+
+NANOMIPSInstr *NANOMIPSInstr_Imm(NANOMIPSImmOp op, HReg dst, HReg src,
+                                 UInt imm)
+{
+   NANOMIPSInstr *i = LibVEX_Alloc_inline(sizeof(NANOMIPSInstr));
+   i->tag = NMin_Imm;
+   i->NMin.Imm.op = op;
+   i->NMin.Imm.dst = dst;
+   i->NMin.Imm.src = src;
+   i->NMin.Imm.imm = imm;
+   return i;
+}
+
+NANOMIPSInstr *NANOMIPSInstr_Alu(NANOMIPSAluOp op, HReg dst, HReg srcL,
+                                 HReg srcR)
+{
+   NANOMIPSInstr *i = LibVEX_Alloc_inline(sizeof(NANOMIPSInstr));
+   i->tag = NMin_Alu;
+   i->NMin.Alu.op = op;
+   i->NMin.Alu.dst = dst;
+   i->NMin.Alu.srcL = srcL;
+   i->NMin.Alu.srcR = srcR;
+   return i;
+}
+
+NANOMIPSInstr *NANOMIPSInstr_Unary(NANOMIPSUnaryOp op, HReg dst, HReg src)
+{
+   NANOMIPSInstr *i = LibVEX_Alloc_inline(sizeof(NANOMIPSInstr));
+   i->tag = NMin_Unary;
+   i->NMin.Unary.op = op;
+   i->NMin.Unary.dst = dst;
+   i->NMin.Unary.src = src;
+   return i;
+}
+
+NANOMIPSInstr *NANOMIPSInstr_Cmp(NANOMIPSCondCode cond, HReg dst, HReg srcL,
+                                 HReg srcR)
+{
+   NANOMIPSInstr *i = LibVEX_Alloc_inline(sizeof(NANOMIPSInstr));
+   i->tag            = NMin_Cmp;
+   i->NMin.Cmp.dst   = dst;
+   i->NMin.Cmp.srcL  = srcL;
+   i->NMin.Cmp.srcR  = srcR;
+   i->NMin.Cmp.cond  = cond;
+   return i;
+}
+
+NANOMIPSInstr *NANOMIPSInstr_Call(Addr target,
+                                  UInt argiregs, HReg guard, RetLoc rloc)
+{
+   UInt mask;
+   NANOMIPSInstr *i = LibVEX_Alloc_inline(sizeof(NANOMIPSInstr));
+   i->tag = NMin_Call;
+   i->NMin.Call.target = target;
+   i->NMin.Call.argiregs = argiregs;
+   i->NMin.Call.guard = guard;
+   i->NMin.Call.rloc = rloc;
+   /* Only $4 ... $11 inclusive may be used as arg regs.*/
+   mask = (1 << 4) | (1 << 5) | (1 << 6) | (1 << 7) | (1 << 8) | (1 << 9)
+          | (1 << 10) | (1 << 11);
+   vassert(0 == (argiregs & ~mask));
+   vassert(is_sane_RetLoc(rloc));
+   return i;
+}
+
+NANOMIPSInstr *NANOMIPSInstr_XDirect(Addr64 dstGA, HReg address, Int offset,
+                                     HReg cond, Bool toFastEP)
+{
+   NANOMIPSInstr* i            = LibVEX_Alloc_inline(sizeof(NANOMIPSInstr));
+   i->tag                      = NMin_XDirect;
+   i->NMin.XDirect.dstGA       = dstGA;
+   i->NMin.XDirect.addr        = address;
+   i->NMin.XDirect.addr_offset = offset;
+   i->NMin.XDirect.cond        = cond;
+   i->NMin.XDirect.toFastEP    = toFastEP;
+   return i;
+}
+
+NANOMIPSInstr *NANOMIPSInstr_XIndir(HReg dstGA, HReg address, Int offset,
+                                    HReg cond)
+{
+   NANOMIPSInstr* i           = LibVEX_Alloc_inline(sizeof(NANOMIPSInstr));
+   i->tag                     = NMin_XIndir;
+   i->NMin.XIndir.dstGA       = dstGA;
+   i->NMin.XIndir.addr        = address;
+   i->NMin.XIndir.addr_offset = offset;
+   i->NMin.XIndir.cond        = cond;
+   return i;
+}
+
+NANOMIPSInstr *NANOMIPSInstr_XAssisted(HReg dstGA, HReg address, Int offset,
+                                       HReg cond, IRJumpKind jk)
+{
+   NANOMIPSInstr* i              = LibVEX_Alloc_inline(sizeof(NANOMIPSInstr));
+   i->tag                        = NMin_XAssisted;
+   i->NMin.XAssisted.dstGA       = dstGA;
+   i->NMin.XAssisted.addr        = address;
+   i->NMin.XAssisted.addr_offset = offset;
+   i->NMin.XAssisted.cond        = cond;
+   i->NMin.XAssisted.jk          = jk;
+   return i;
+}
+
+NANOMIPSInstr *NANOMIPSInstr_Load(UChar sz, HReg dst,
+                                  HReg addr, Int addr_offset)
+{
+   NANOMIPSInstr *i = LibVEX_Alloc_inline(sizeof(NANOMIPSInstr));
+   i->tag = NMin_Load;
+   i->NMin.Load.sz = sz;
+   i->NMin.Load.addr = addr;
+   i->NMin.Load.addr_offset = addr_offset;
+   i->NMin.Load.dst = dst;
+   vassert(sz == 1 || sz == 2 || sz == 4);
+   return i;
+}
+
+NANOMIPSInstr *NANOMIPSInstr_Store(UChar sz, HReg addr, Int addr_offset,
+                                   HReg src)
+{
+   NANOMIPSInstr *i = LibVEX_Alloc_inline(sizeof(NANOMIPSInstr));
+   i->tag = NMin_Store;
+   i->NMin.Store.sz = sz;
+   i->NMin.Store.src = src;
+   i->NMin.Store.addr = addr;
+   i->NMin.Store.addr_offset = addr_offset;
+   vassert(sz == 1 || sz == 2 || sz == 4);
+   vassert(addr_offset < 0x1000);
+   return i;
+}
+
+NANOMIPSInstr *NANOMIPSInstr_LoadL(UChar sz, HReg dst,
+                                   HReg addr, Int addr_offset)
+{
+   NANOMIPSInstr *i = LibVEX_Alloc_inline(sizeof(NANOMIPSInstr));
+   i->tag = NMin_LoadL;
+   i->NMin.LoadL.sz  = sz;
+   i->NMin.LoadL.addr = addr;
+   i->NMin.LoadL.addr_offset = addr_offset;
+   vassert(sz == 4);
+   return i;
+}
+
+NANOMIPSInstr *NANOMIPSInstr_Cas(UChar sz, HReg oldLo, HReg oldHi, HReg addr,
+                                 HReg expdLo, HReg expdHi,
+                                 HReg dataLo, HReg dataHi)
+{
+   NANOMIPSInstr *i = LibVEX_Alloc_inline(sizeof(NANOMIPSInstr));
+   i->tag           = NMin_Cas;
+   i->NMin.Cas.sz   = sz;
+   i->NMin.Cas.oldLo  = oldLo;
+   i->NMin.Cas.addr = addr;
+   i->NMin.Cas.expdLo = expdLo;
+   i->NMin.Cas.dataLo = dataLo;
+
+   vassert((sz == 4) || (sz == 8));
+
+   if (sz == 8) {
+      i->NMin.Cas.oldHi  = oldHi;
+      i->NMin.Cas.expdHi = expdHi;
+      i->NMin.Cas.dataHi = dataHi;
+   }
+   return i;
+}
+
+NANOMIPSInstr *NANOMIPSInstr_StoreC(UChar sz, HReg addr, Int addr_offset,
+                                    HReg src)
+{
+   NANOMIPSInstr *i = LibVEX_Alloc_inline(sizeof(NANOMIPSInstr));
+   i->tag = NMin_StoreC;
+   i->NMin.StoreC.sz  = sz;
+   i->NMin.StoreC.src = src;
+   i->NMin.StoreC.addr = addr;
+   i->NMin.StoreC.addr_offset = addr_offset;
+   vassert(sz == 4);
+   return i;
+}
+
+NANOMIPSInstr *NANOMIPSInstr_MoveCond(NANOMIPSMoveCondOp op, HReg dst,
+                                      HReg src, HReg cond)
+{
+   NANOMIPSInstr *i        = LibVEX_Alloc_inline(sizeof(NANOMIPSInstr));
+   i->tag                  = NMin_MoveCond;
+   i->NMin.MoveCond.op     = op;
+   i->NMin.MoveCond.dst    = dst;
+   i->NMin.MoveCond.src    = src;
+   i->NMin.MoveCond.cond   = cond;
+   return i;
+}
+
+NANOMIPSInstr *NANOMIPSInstr_EvCheck(HReg r_amCounter,
+                                     Int offset_amCounter,
+                                     HReg r_amFailAddr,
+                                     Int offset_amFailAddr)
+{
+   NANOMIPSInstr* i = LibVEX_Alloc_inline(sizeof(NANOMIPSInstr));
+   i->tag                            = NMin_EvCheck;
+   i->NMin.EvCheck.r_amCounter       = r_amCounter;
+   i->NMin.EvCheck.offset_amCounter  = offset_amCounter;
+   i->NMin.EvCheck.r_amFailAddr      = r_amFailAddr;
+   i->NMin.EvCheck.offset_amFailAddr = offset_amFailAddr;
+   return i;
+}
+
+NANOMIPSInstr* NANOMIPSInstr_ProfInc ( void )
+{
+   NANOMIPSInstr* i = LibVEX_Alloc_inline(sizeof(NANOMIPSInstr));
+   i->tag       = NMin_ProfInc;
+   return i;
+}
+
+UInt ppHRegNANOMIPS(HReg r)
+{
+   static const HChar* regnames[32] = {
+      "zero", "at", "t4", "t5", "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7",
+      "t0", "t1", "t2", "t3", "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7",
+      "t8", "t9", "k0", "k1", "gp", "sp", "fp", "ra"
+   };
+   UChar r_no;
+
+   if (hregIsVirtual(r)) {
+      return ppHReg(r);
+   }
+
+   vassert(hregClass(r) == HRcInt32);
+   r_no = hregEncoding(r);
+   vassert(r_no < 32);
+
+   return vex_printf("%s", regnames[r_no]);
+}
+
+void ppNANOMIPSInstr(const NANOMIPSInstr* i)
+{
+   switch (i->tag) {
+      case NMin_Imm:
+         switch (i->NMin.Imm.op) {
+            case NMimm_SLL:
+               vex_printf("sll ");
+               break;
+
+            case NMimm_SRL:
+               vex_printf("srl ");
+               break;
+
+            case NMimm_LI:
+               vex_printf("LI ");
+               break;
+
+            case NMimm_SRA:
+               vex_printf("sra ");
+               break;
+
+            case NMimm_SGN:
+               vex_printf("SGN ");
+               break;
+
+            case NMimm_ORI:
+               vex_printf("ori ");
+               break;
+
+            case NMimm_XORI:
+               vex_printf("xori ");
+               break;
+
+            case NMimm_ANDI:
+               vex_printf("andi ");
+               break;
+
+            default:
+               vassert(0);
+         }
+
+         ppHRegNANOMIPS(i->NMin.Imm.dst);
+         vex_printf(", ");
+
+         if (i->NMin.Imm.op != NMimm_LI) {
+            ppHRegNANOMIPS(i->NMin.Imm.src);
+            vex_printf(", ");
+         }
+
+         vex_printf("0x%X (%d)", i->NMin.Imm.imm, (Int)i->NMin.Imm.imm);
+
+         break;
+
+      case NMin_Alu:
+
+         switch (i->NMin.Alu.op) {
+            case NMalu_SLL:
+               vex_printf("sllv ");
+               break;
+
+            case NMalu_SRL:
+               vex_printf("srlv ");
+               break;
+
+            case NMalu_SRA:
+               vex_printf("srav ");
+               break;
+
+            case NMalu_OR:
+               if (sameHReg(i->NMin.Alu.srcL, i->NMin.Alu.srcR))
+                  vex_printf("move ");
+               else
+                  vex_printf("or ");
+
+               break;
+
+            case NMalu_XOR:
+               vex_printf("xor ");
+               break;
+
+            case NMalu_AND:
+               vex_printf("and ");
+               break;
+
+            case NMalu_ADD:
+               vex_printf("add ");
+               break;
+
+            case NMalu_SUB:
+               vex_printf("sub ");
+               break;
+
+            case NMalu_SLT:
+               vex_printf("slt ");
+               break;
+
+            case NMalu_NOR:
+               vex_printf("nor ");
+               break;
+
+            case NMalu_MUL:
+               vex_printf("mul ");
+               break;
+
+            case NMalu_MULU:
+               vex_printf("mulu ");
+               break;
+
+            case NMalu_MUH:
+               vex_printf("muh ");
+               break;
+
+            case NMalu_MUHU:
+               vex_printf("muhu ");
+               break;
+
+            case NMalu_DIV:
+               vex_printf("div ");
+               break;
+
+            case NMalu_DIVU:
+               vex_printf("divu ");
+               break;
+
+            case NMalu_MOD:
+               vex_printf("mod ");
+               break;
+
+            case NMalu_MODU:
+               vex_printf("modu ");
+               break;
+
+            default:
+               vassert(0);
+         }
+
+         ppHRegNANOMIPS(i->NMin.Alu.dst);
+         vex_printf(", ");
+         ppHRegNANOMIPS(i->NMin.Alu.srcL);
+
+         if ((i->NMin.Alu.op != NMalu_OR) ||
+               !sameHReg(i->NMin.Alu.srcL, i->NMin.Alu.srcR)) {
+            vex_printf(", ");
+            ppHRegNANOMIPS(i->NMin.Alu.srcR);
+         }
+
+         break;
+
+      case NMin_Unary:
+         switch (i->NMin.Unary.op) {
+            case NMun_CLO:
+               vex_printf("clo");
+               break;
+
+            case NMun_CLZ:
+               vex_printf("clz");
+               break;
+
+            case NMun_NOP:
+               vex_printf("nop");
+               break;
+
+            default:
+               vassert(0);
+         }
+         if (i->NMin.Unary.op != NMun_NOP)
+         {
+            ppHRegNANOMIPS(i->NMin.Unary.dst);
+            vex_printf(",");
+            ppHRegNANOMIPS(i->NMin.Unary.src);
+         }
+         break;
+      case NMin_Cmp:
+
+         switch (i->NMin.Cmp.cond) {
+            case NMcc_EQ:
+               vex_printf("EQ ");
+               break;
+
+            case NMcc_NE:
+               vex_printf("NE ");
+               break;
+
+            case NMcc_LTS:
+               vex_printf("LTS ");
+               break;
+
+            case NMcc_LTU:
+               vex_printf("LTU ");
+               break;
+
+            case NMcc_LES:
+               vex_printf("LES ");
+               break;
+
+            case NMcc_LEU:
+               vex_printf("LEU ");
+               break;
+
+            case NMcc_AL:
+               vex_printf("AL ");
+               break;
+
+            case NMcc_NV:
+               vex_printf("NV ");
+               break;
+
+            default:
+               vassert(0);
+         }
+
+         ppHRegNANOMIPS(i->NMin.Cmp.dst);
+         vex_printf(", ");
+         ppHRegNANOMIPS(i->NMin.Cmp.srcL);
+         vex_printf(", ");
+         ppHRegNANOMIPS(i->NMin.Cmp.srcR);
+
+         break;
+
+      case NMin_Call:
+         vex_printf("CALL 0x%lX, #%X, ", i->NMin.Call.target,
+                    i->NMin.Call.argiregs);
+         ppHRegNANOMIPS(i->NMin.Call.guard);
+         break;
+
+      case NMin_XDirect:
+         vex_printf("(xDirect) ");
+         if (!hregIsInvalid(i->NMin.XDirect.cond)) {
+            vex_printf("beqc ");
+            ppHRegNANOMIPS(i->NMin.XDirect.cond);
+            vex_printf(", zero, 12; ");
+         }
+         vex_printf("LI a5, 0x%08lX; ", i->NMin.XDirect.dstGA);
+         vex_printf("sw a5, %d(", i->NMin.XDirect.addr_offset);
+         ppHRegNANOMIPS(i->NMin.XDirect.addr);
+         vex_printf("); LI a5, <%s>; ", i->NMin.XDirect.toFastEP ?
+            "disp_cp_chain_me_to_fastEP" : "disp_cp_chain_me_to_slowEP");
+         vex_printf("jalrc a5");
+         break;
+
+      case NMin_XIndir:
+         vex_printf("(xIndir) ");
+         if (!hregIsInvalid(i->NMin.XIndir.cond)) {
+            vex_printf("beqc ");
+            ppHRegNANOMIPS(i->NMin.XIndir.cond);
+            vex_printf(", zero, 16; ");
+         }
+         vex_printf("sw ");
+         ppHRegNANOMIPS(i->NMin.XIndir.dstGA);
+         vex_printf(", %d(", i->NMin.XIndir.addr_offset);
+         ppHRegNANOMIPS(i->NMin.XIndir.addr);
+         vex_printf("); LI a5, <disp_cp_xindir>; ");
+         vex_printf("jalrc a5");
+         break;
+
+      case NMin_XAssisted:
+         vex_printf("(xAssisted) ");
+         if (!hregIsInvalid(i->NMin.XAssisted.cond)) {
+            vex_printf("beqc ");
+            ppHRegNANOMIPS(i->NMin.XAssisted.cond);
+            vex_printf(", zero, 24; ");
+         }
+         vex_printf("sw ");
+         ppHRegNANOMIPS(i->NMin.XAssisted.dstGA);
+         vex_printf(", %d(", i->NMin.XAssisted.addr_offset);
+         ppHRegNANOMIPS(i->NMin.XAssisted.addr);
+         vex_printf("); move a5, $IRJumpKind_to_TRCVAL(%d)",
+                    (Int)i->NMin.XAssisted.jk);
+         vex_printf("; LI a5, <disp_cp_xassisted>; ");
+         vex_printf("jalrc a5");
+         break;
+
+      case NMin_EvCheck:
+         vex_printf("(evCheck) ");
+         vex_printf("lw a5, %d(", i->NMin.EvCheck.offset_amCounter);
+         ppHRegNANOMIPS(i->NMin.EvCheck.r_amCounter);
+         vex_printf("); addiu $9, $9, -1");
+         vex_printf("; sw a5, %d(", i->NMin.EvCheck.offset_amCounter);
+         ppHRegNANOMIPS(i->NMin.EvCheck.r_amCounter);
+         vex_printf("); begc a5, zero, nofail;");
+         vex_printf("lw a5, %d(", i->NMin.EvCheck.offset_amFailAddr);
+         ppHRegNANOMIPS(i->NMin.EvCheck.r_amFailAddr);
+         vex_printf("); jalrc a5; nofail:");
+         break;
+
+      case NMin_ProfInc:
+         vex_printf("(profInc) li a5, ($NotKnownYet); "
+                       "lw a4, 0(a5); "
+                       "addiu $a4, a4, 1; "
+                       "sw $a4, 0(a5); "
+                       "sltiu at, a4, 1; "
+                       "lw a4, 4(a5); "
+                       "addu a4, a4, at; "
+                       "sw a4, 4(a5); " );
+         break;
+
+      case NMin_Load:
+         switch (i->NMin.Load.sz) {
+            case 1:
+               vex_printf("lb ");
+               break;
+
+            case 2:
+               vex_printf("lh ");
+               break;
+
+            case 4:
+               vex_printf("lw ");
+               break;
+         }
+
+         ppHRegNANOMIPS(i->NMin.Load.dst);
+         vex_printf(", (%d)", i->NMin.Load.addr_offset);
+         ppHRegNANOMIPS(i->NMin.Load.addr);
+         break;
+
+      case NMin_Store:
+         switch (i->NMin.Store.sz) {
+            case 1:
+               vex_printf("sb ");
+               break;
+
+            case 2:
+               vex_printf("sh ");
+               break;
+
+            case 4:
+               vex_printf("sw ");
+               break;
+         }
+
+         ppHRegNANOMIPS(i->NMin.Store.src);
+         vex_printf(", (%d)", i->NMin.Store.addr_offset);
+         ppHRegNANOMIPS(i->NMin.Store.addr);
+         break;
+
+      case NMin_Cas:
+         if (i->NMin.Cas.sz == 4){
+            vex_printf("cas: \n");
+
+            vex_printf("ll ");
+            ppHRegNANOMIPS(i->NMin.Cas.oldLo);
+            vex_printf(", 0(");
+            ppHRegNANOMIPS(i->NMin.Cas.addr);
+            vex_printf("); ");
+
+            vex_printf("bnec ");
+            ppHRegNANOMIPS(i->NMin.Cas.oldLo);
+            vex_printf(", ");
+            ppHRegNANOMIPS(i->NMin.Cas.expdLo);
+            vex_printf(", end; ");
+
+            vex_printf("addiu ");
+            ppHRegNANOMIPS(i->NMin.Cas.oldLo);
+            vex_printf(", ");
+            ppHRegNANOMIPS(i->NMin.Cas.oldLo);
+            vex_printf(", 1; ");
+
+            vex_printf("sc ");
+            ppHRegNANOMIPS(i->NMin.Cas.dataLo);
+            vex_printf(", 0(");
+            ppHRegNANOMIPS(i->NMin.Cas.addr);
+            vex_printf("); ");
+
+            vex_printf("movn ");
+            ppHRegNANOMIPS(i->NMin.Cas.oldLo);
+            vex_printf(", ");
+            ppHRegNANOMIPS(i->NMin.Cas.expdLo);
+            vex_printf(", ");
+            ppHRegNANOMIPS(i->NMin.Cas.dataLo);
+            vex_printf("; end:");
+         }
+         else{
+            vassert(0);
+         }
+         break;
+
+      case NMin_LoadL:
+         vex_printf("ll ");
+         ppHRegNANOMIPS(i->NMin.LoadL.dst);
+         vex_printf(", %d(", i->NMin.LoadL.addr_offset);
+         ppHRegNANOMIPS(i->NMin.LoadL.addr);
+         vex_printf("); ");
+         break;
+
+      case NMin_StoreC:
+         vex_printf("sc ");
+         ppHRegNANOMIPS(i->NMin.StoreC.src);
+         vex_printf(", %d(", i->NMin.StoreC.addr_offset);
+         ppHRegNANOMIPS(i->NMin.StoreC.addr);
+         vex_printf("); ");
+         break;
+
+      case NMin_MoveCond:
+         vassert(i->NMin.MoveCond.op == NMMoveCond_movn);
+         vex_printf("movn ");
+         ppHRegNANOMIPS(i->NMin.MoveCond.dst);
+         vex_printf(", ");
+         ppHRegNANOMIPS(i->NMin.MoveCond.src);
+         vex_printf(", ");
+         ppHRegNANOMIPS(i->NMin.MoveCond.cond);
+         break;
+   }
+}
+
+/* --------- Helpers for register allocation. --------- */
+
+void getRegUsage_NANOMIPSInstr(HRegUsage* u, const NANOMIPSInstr* i)
+{
+   initHRegUsage(u);
+
+   switch (i->tag) {
+      case NMin_Imm:
+         addHRegUse(u, HRmWrite, i->NMin.Imm.dst);
+
+         if (!hregIsInvalid(i->NMin.Imm.src))
+            addHRegUse(u, HRmRead, i->NMin.Imm.src);
+
+         return;
+
+      case NMin_Alu:
+         addHRegUse(u, HRmRead, i->NMin.Alu.srcL);
+         addHRegUse(u, HRmRead, i->NMin.Alu.srcR);
+         addHRegUse(u, HRmWrite, i->NMin.Alu.dst);
+
+         /* or Rd,Rs,Rs == mr Rd,Rs */
+         if ((i->NMin.Alu.op == NMalu_OR)
+               && sameHReg(i->NMin.Alu.srcR, i->NMin.Alu.srcL)) {
+            u->isRegRegMove = True;
+            u->regMoveSrc   = i->NMin.Alu.srcL;
+            u->regMoveDst   = i->NMin.Alu.dst;
+         }
+
+         return;
+
+      case NMin_Cmp:
+         addHRegUse(u, HRmRead, i->NMin.Cmp.srcL);
+         addHRegUse(u, HRmRead, i->NMin.Cmp.srcR);
+         addHRegUse(u, HRmWrite, i->NMin.Cmp.dst);
+         return;
+
+      case NMin_Unary:
+         addHRegUse(u, HRmRead, i->NMin.Unary.src);
+         addHRegUse(u, HRmWrite, i->NMin.Unary.dst);
+         return;
+
+      case NMin_Call: {
+         UInt argir = i->NMin.Call.argiregs;
+
+         if (!hregIsInvalid(i->NMin.Call.guard))
+            addHRegUse(u, HRmRead, i->NMin.Call.guard);
+
+         addHRegUse(u, HRmWrite, hregNANOMIPS_GPR1());
+         addHRegUse(u, HRmWrite, hregNANOMIPS_GPR2());
+         addHRegUse(u, HRmWrite, hregNANOMIPS_GPR3());
+         addHRegUse(u, HRmWrite, hregNANOMIPS_GPR4());
+         addHRegUse(u, HRmWrite, hregNANOMIPS_GPR5());
+         addHRegUse(u, HRmWrite, hregNANOMIPS_GPR6());
+         addHRegUse(u, HRmWrite, hregNANOMIPS_GPR7());
+         addHRegUse(u, HRmWrite, hregNANOMIPS_GPR8());
+         addHRegUse(u, HRmWrite, hregNANOMIPS_GPR9());
+         addHRegUse(u, HRmWrite, hregNANOMIPS_GPR10());
+         addHRegUse(u, HRmWrite, hregNANOMIPS_GPR11());
+         addHRegUse(u, HRmWrite, hregNANOMIPS_GPR12());
+         addHRegUse(u, HRmWrite, hregNANOMIPS_GPR13());
+         addHRegUse(u, HRmWrite, hregNANOMIPS_GPR14());
+         addHRegUse(u, HRmWrite, hregNANOMIPS_GPR15());
+         addHRegUse(u, HRmWrite, hregNANOMIPS_GPR24());
+         addHRegUse(u, HRmWrite, hregNANOMIPS_GPR25());
+
+         if (argir & (1 << 11)) addHRegUse(u, HRmRead, hregNANOMIPS_GPR11());
+
+         if (argir & (1 << 10)) addHRegUse(u, HRmRead, hregNANOMIPS_GPR10());
+
+         if (argir & (1 << 9)) addHRegUse(u, HRmRead, hregNANOMIPS_GPR9());
+
+         if (argir & (1 << 8)) addHRegUse(u, HRmRead, hregNANOMIPS_GPR8());
+
+         if (argir & (1 << 7)) addHRegUse(u, HRmRead, hregNANOMIPS_GPR7());
+
+         if (argir & (1 << 6)) addHRegUse(u, HRmRead, hregNANOMIPS_GPR6());
+
+         if (argir & (1 << 5)) addHRegUse(u, HRmRead, hregNANOMIPS_GPR5());
+
+         if (argir & (1 << 4)) addHRegUse(u, HRmRead, hregNANOMIPS_GPR4());
+
+         vassert(0 == (argir & ~((1 << 4) | (1 << 5) | (1 << 6)
+                                 | (1 << 7) | (1 << 8) | (1 << 9) | (1 << 10)
+                                 | (1 << 11))));
+         return;
+      }
+
+      /* XDirect/XIndir/XAssisted are also a bit subtle.  They
+         conditionally exit the block.  Hence we only need to list (1)
+         the registers that they read, and (2) the registers that they
+         write in the case where the block is not exited.  (2) is
+         empty, hence only (1) is relevant here. */
+      case NMin_XDirect:
+         addHRegUse(u, HRmRead, i->NMin.XDirect.addr);
+
+         if (!hregIsInvalid(i->NMin.XDirect.cond))
+            addHRegUse(u, HRmRead, i->NMin.XDirect.cond);
+
+         return;
+
+      case NMin_XIndir:
+         addHRegUse(u, HRmRead, i->NMin.XIndir.dstGA);
+         addHRegUse(u, HRmRead, i->NMin.XIndir.addr);
+
+         if (!hregIsInvalid(i->NMin.XDirect.cond))
+            addHRegUse(u, HRmRead, i->NMin.XDirect.cond);
+
+         return;
+
+      case NMin_XAssisted:
+         addHRegUse(u, HRmRead, i->NMin.XAssisted.dstGA);
+         addHRegUse(u, HRmRead, i->NMin.XAssisted.addr);
+
+         if (!hregIsInvalid(i->NMin.XAssisted.cond))
+            addHRegUse(u, HRmRead, i->NMin.XAssisted.cond);
+
+         return;
+
+      case NMin_Load:
+         addHRegUse(u, HRmRead, i->NMin.Load.addr);
+         addHRegUse(u, HRmWrite, i->NMin.Load.dst);
+         return;
+
+      case NMin_Store:
+         addHRegUse(u, HRmRead, i->NMin.Store.addr);
+         addHRegUse(u, HRmRead, i->NMin.Store.src);
+         return;
+
+      case NMin_LoadL:
+         addHRegUse(u, HRmRead, i->NMin.LoadL.addr);
+         addHRegUse(u, HRmWrite, i->NMin.LoadL.dst);
+         return;
+
+      case NMin_Cas:
+         if (i->NMin.Cas.sz == 4){
+            addHRegUse(u, HRmWrite, i->NMin.Cas.oldLo);
+            addHRegUse(u, HRmRead, i->NMin.Cas.addr);
+            addHRegUse(u, HRmRead, i->NMin.Cas.expdLo);
+            addHRegUse(u, HRmModify, i->NMin.Cas.dataLo);
+         } else {
+            addHRegUse(u, HRmWrite, i->NMin.Cas.oldLo);
+            addHRegUse(u, HRmWrite, i->NMin.Cas.oldHi);
+            addHRegUse(u, HRmRead, i->NMin.Cas.addr);
+            addHRegUse(u, HRmRead, i->NMin.Cas.expdLo);
+            addHRegUse(u, HRmRead, i->NMin.Cas.expdHi);
+            addHRegUse(u, HRmModify, i->NMin.Cas.dataLo);
+            addHRegUse(u, HRmModify, i->NMin.Cas.dataHi);
+         }
+         return;
+
+      case NMin_StoreC:
+         addHRegUse(u, HRmRead, i->NMin.StoreC.addr);
+         addHRegUse(u, HRmWrite, i->NMin.StoreC.src);
+         addHRegUse(u, HRmRead, i->NMin.StoreC.src);
+         return;
+
+      case NMin_MoveCond:
+         addHRegUse(u, HRmWrite, i->NMin.MoveCond.dst);
+         addHRegUse(u, HRmRead, i->NMin.MoveCond.src);
+         addHRegUse(u, HRmRead, i->NMin.MoveCond.cond);
+         return;
+
+      case NMin_EvCheck:
+         addHRegUse(u, HRmRead, i->NMin.EvCheck.r_amCounter);
+         addHRegUse(u, HRmRead, i->NMin.EvCheck.r_amFailAddr);
+         return;
+
+      case NMin_ProfInc:
+         /* does not use any registers. */
+         return;
+
+      default:
+         ppNANOMIPSInstr(i);
+         vpanic("getRegUsage_NANOMIPSInstr");
+         break;
+   }
+}
+
+/* local helper */
+static void mapReg(HRegRemap * m, HReg * r)
+{
+   *r = lookupHRegRemap(m, *r);
+}
+
+void mapRegs_NANOMIPSInstr(HRegRemap * m, NANOMIPSInstr * i)
+{
+   switch (i->tag) {
+      case NMin_Imm:
+         mapReg(m, &i->NMin.Imm.dst);
+
+         if (!hregIsInvalid(i->NMin.Imm.src))
+            mapReg(m, &i->NMin.Imm.src);
+
+         break;
+
+      case NMin_Alu:
+         mapReg(m, &i->NMin.Alu.srcL);
+         mapReg(m, &i->NMin.Alu.srcR);
+         mapReg(m, &i->NMin.Alu.dst);
+         return;
+
+      case NMin_Cmp:
+         mapReg(m, &i->NMin.Cmp.srcL);
+         mapReg(m, &i->NMin.Cmp.srcR);
+         mapReg(m, &i->NMin.Cmp.dst);
+         return;
+
+      case NMin_Unary:
+         mapReg(m, &i->NMin.Unary.src);
+         mapReg(m, &i->NMin.Unary.dst);
+         return;
+
+      case NMin_Call: {
+         if (!hregIsInvalid(i->NMin.Call.guard))
+            mapReg(m, &i->NMin.Call.guard);
+
+         return;
+      }
+
+      case NMin_XDirect:
+         mapReg(m, &i->NMin.XDirect.addr);
+
+         if (!hregIsInvalid(i->NMin.XDirect.cond))
+            mapReg(m, &i->NMin.XDirect.cond);
+
+         return;
+
+      case NMin_XIndir:
+         mapReg(m, &i->NMin.XIndir.dstGA);
+         mapReg(m, &i->NMin.XIndir.addr);
+
+         if (!hregIsInvalid(i->NMin.XIndir.cond))
+            mapReg(m, &i->NMin.XIndir.cond);
+
+         return;
+
+      case NMin_XAssisted:
+         mapReg(m, &i->NMin.XAssisted.dstGA);
+         mapReg(m, &i->NMin.XAssisted.addr);
+
+         if (!hregIsInvalid(i->NMin.XAssisted.cond))
+            mapReg(m, &i->NMin.XAssisted.cond);
+
+         return;
+
+      case NMin_Load:
+         mapReg(m, &i->NMin.Load.addr);
+         mapReg(m, &i->NMin.Load.dst);
+         return;
+
+      case NMin_Store:
+         mapReg(m, &i->NMin.Store.addr);
+         mapReg(m, &i->NMin.Store.src);
+         return;
+
+      case NMin_LoadL:
+         mapReg(m, &i->NMin.LoadL.addr);
+         mapReg(m, &i->NMin.LoadL.dst);
+         return;
+
+      case NMin_Cas:
+         mapReg(m, &i->NMin.Cas.oldLo);
+         mapReg(m, &i->NMin.Cas.addr);
+         mapReg(m, &i->NMin.Cas.expdLo);
+         mapReg(m, &i->NMin.Cas.dataLo);
+         if (&i->NMin.Cas.sz){
+            mapReg(m, &i->NMin.Cas.oldHi);
+            mapReg(m, &i->NMin.Cas.expdHi);
+            mapReg(m, &i->NMin.Cas.dataHi);
+         }
+         return;
+
+      case NMin_StoreC:
+         mapReg(m, &i->NMin.StoreC.addr);
+         mapReg(m, &i->NMin.StoreC.src);
+         return;
+
+      case NMin_MoveCond:
+         mapReg(m, &i->NMin.MoveCond.dst);
+         mapReg(m, &i->NMin.MoveCond.src);
+         mapReg(m, &i->NMin.MoveCond.cond);
+         return;
+
+      case NMin_EvCheck:
+         /* We expect both amodes only to mention %ebp, so this is in
+            fact pointless, since %ebp isn't allocatable, but anyway.. */
+         mapReg(m, &i->NMin.EvCheck.r_amCounter);
+         mapReg(m, &i->NMin.EvCheck.r_amFailAddr);
+         return;
+
+      case NMin_ProfInc:
+         /* does not use any registers. */
+         return;
+
+      default:
+         ppNANOMIPSInstr(i);
+         vpanic("mapRegs_NANOMIPSInstr");
+         break;
+   }
+}
+
+/* Generate NANOMIPS spill/reload instructions under the direction of the
+   register allocator. */
+void genSpill_NANOMIPS( /*OUT*/ HInstr** i1, /*OUT*/ HInstr** i2, HReg rreg,
+                                Int offsetB, Bool mode64)
+{
+   vassert(offsetB >= 0);
+   vassert(offsetB < 0x1000);
+   vassert(!mode64);
+   vassert(!hregIsVirtual(rreg));
+   vassert(hregClass(rreg) == HRcInt32);
+   *i2 = NULL;
+   *i1 = NANOMIPSInstr_Store(4, GuestStatePointer, offsetB, rreg);
+}
+
+void genReload_NANOMIPS( /*OUT*/ HInstr ** i1, /*OUT*/ HInstr ** i2, HReg rreg,
+                                 Int offsetB, Bool mode64)
+{
+   vassert(offsetB >= 0);
+   vassert(offsetB < 0x1000);
+   vassert(!mode64);
+   vassert(!hregIsVirtual(rreg));
+   vassert(hregClass(rreg) == HRcInt32);
+   *i2 = NULL;
+   *i1 = NANOMIPSInstr_Load(4, rreg, GuestStatePointer, offsetB);
+}
+
+NANOMIPSInstr* genMove_NANOMIPS(HReg r_src, HReg r_dst)
+{
+   vassert(hregClass(r_dst) == hregClass(r_src));
+   vassert(hregClass(r_src) == HRcInt32);
+   return NANOMIPSInstr_Alu(NMalu_OR, r_dst, r_src, r_src);
+}
+
+/* --------- The NANOMIPS assembler --------- */
+
+inline static UInt iregNo(HReg r)
+{
+   UInt n;
+   vassert(hregClass(r) == (HRcInt32));
+   vassert(!hregIsVirtual(r));
+   n = hregEncoding(r);
+   vassert(n <= 32);
+   return n;
+}
+
+/* Emit 32bit instruction */
+static UChar *emit32(UChar * p, UInt w32)
+{
+#if defined (_MIPSEB)
+   *p++ = toUChar((w32 >> 24) & 0x000000FF);
+   *p++ = toUChar((w32 >> 16) & 0x000000FF);
+   *p++ = toUChar((w32 >> 8) & 0x000000FF);
+   *p++ = toUChar(w32 & 0x000000FF);
+#else
+   *p++ = toUChar((w32 >> 16) & 0x000000FF);
+   *p++ = toUChar((w32 >> 24) & 0x000000FF);
+   *p++ = toUChar(w32 & 0x000000FF);
+   *p++ = toUChar((w32 >> 8) & 0x000000FF);
+#endif
+   return p;
+}
+
+static UChar *mkFormNano2Regs12imm(UChar * p, UInt opc, UInt rt, UInt rs,
+                                   UInt opc2, UInt imm)
+{
+   UInt theInstr;
+   vassert(opc < 0x40);
+   vassert(rs < 0x20);
+   vassert(rt < 0x20);
+   vassert(opc2 < 0x10);
+   vassert(imm < 0x1000);
+   theInstr = ((opc << 26) | (rt << 21) | (rs << 16) | (opc2 << 12) | (imm));
+   return emit32(p, theInstr);
+}
+
+static UChar *mkFormNano2Regs16imm(UChar * p, UInt opc, UInt rt, UInt rs,
+                                   UShort imm)
+{
+   UInt theInstr;
+   vassert(opc < 0x40);
+   vassert(rs < 0x20);
+   vassert(rt < 0x20);
+   theInstr = ((opc << 26) | (rt << 21) | (rs << 16) | (imm));
+   return emit32(p, theInstr);
+}
+
+static UChar *mkFormNano1Reg(UChar * p, UInt opc, UInt rt, UInt opc2,
+                             UInt imm)
+{
+   UInt theInstr;
+   vassert(opc < 0x40);
+   vassert(rt < 0x20);
+
+   switch (opc) {
+      case 0x38: /* LUI */
+         theInstr = ((opc << 26) | (rt << 21) | (imm & 0x1FF000) |
+                     ((imm & 0x7FE00000) >> 19) | ((imm & 0x80000000) >> 31));
+         return emit32(p, theInstr);
+
+      default:
+         vassert(0);
+   }
+}
+
+static UChar* mkFormNanoPShift(UChar * p, UInt rt, UInt rs, UInt opc2,
+                               UInt imm)
+{
+   UInt theInstr;
+   vassert(rt < 0x20);
+   vassert(rs < 0x20);
+   vassert(opc2 < 0x10);
+   vassert(imm < 0x20);
+
+   switch (opc2) {
+      case PSLL:      /* SLL  */
+      case SRL32:     /* SRL  */
+      case SRA:       /* SRA  */
+         theInstr = ((PU12 << 26) | (rt << 21) | (rs << 16) |
+                     (PU12_PSHIFT << 12) | (opc2 << 5) | (imm));
+         return emit32(p, theInstr);
+
+      default:
+         vassert(0);
+   }
+}
+
+static UChar *mkFormNanoP32A0(UChar * p, UInt rt, UInt rs, UInt rd, UInt opc2)
+{
+   UInt theInstr;
+   vassert(rt < 0x20);
+   vassert(rs < 0x20);
+   vassert(rd < 0x20);
+   vassert(opc2 < 0x80);
+
+   switch (opc2) {
+      case _POOL32A0_ADDU32: /* ADDU */
+      case _POOL32A0_AND32:  /* AND  */
+      case _POOL32A0_SUBU32: /* SUBU */
+      case _POOL32A0_SLLV:   /* SLLV */
+      case _POOL32A0_SRLV:   /* SRLV */
+      case _POOL32A0_SRAV:   /* SRAV */
+      case _POOL32A0_XOR32:  /* XOR  */
+      case _POOL32A0_SLT:    /* SLT  */
+      case _POOL32A0_OR32:   /* OR   */
+      case _POOL32A0_NOR:    /* NOR  */
+      case _POOL32A0_PSLTU:  /* SLTU */
+      case _POOL32A0_DIV:    /* DIV  */
+      case _POOL32A0_DIVU:   /* DIVU */
+      case _POOL32A0_MOD:    /* MOD  */
+      case _POOL32A0_MODU:   /* MODU */
+      case _POOL32A0_MUL32:  /* MUL  */
+      case _POOL32A0_MULU:   /* MULU */
+      case _POOL32A0_MUH:    /* MUH  */
+      case _POOL32A0_MUHU:   /* MUHU */
+         theInstr = ((P32A << 26) | (rt << 21) | (rs << 16) | (rd << 11) |
+                     (opc2 << 3));
+         return emit32(p, theInstr);
+
+      case _POOL32A0_PCMOVE: /* MOVN */
+         theInstr = ((P32A << 26) | (rt << 21) | (rs << 16) | (rd << 11) |
+                     (1 << 10) | (opc2 << 3));
+         return emit32(p, theInstr);
+
+      default:
+         vassert(0);
+   }
+}
+
+static UChar *mkFormNanoPU12(UChar * p, UInt rt, UInt rs, UInt opc2, UInt imm)
+{
+   UInt theInstr;
+   vassert(rt < 0x20);
+   vassert(rs < 0x20);
+   vassert(opc2 < 0x10);
+   vassert(imm < 0x1000);
+
+   switch (opc2) {
+      case PU12_ANDI:      /* ANDI      */
+      case PU12_ADDIU_NEG: /* ADDIU_NEG */
+      case PU12_ORI:       /* ORI       */
+      case PU12_SLTIU:     /* SLTIU     */
+      case PU12_XORI:      /* XORI      */
+         theInstr = ((PU12 << 26) | (rt << 21) | (rs << 16) | (opc2 << 12) |
+                     (imm));
+         return emit32(p, theInstr);
+
+      default:
+         vassert(0);
+   }
+}
+
+static UChar *mkFormNanoPBR1(UChar * p, UInt rt, UInt rs, UInt opc2, UInt imm)
+{
+   UInt theInstr;
+   vassert(rt < 0x20);
+   vassert(rs < 0x20);
+   vassert(opc2 < 0x04);
+   vassert(imm < 0x4000);
+
+   theInstr = ((PBR1 << 26) | (rt << 21) | (rs << 16) | (opc2 << 14) |
+               (imm & 0x3FFE) | (imm >> 14));
+   return emit32(p, theInstr);
+}
+
+static UChar *mkFormNanoPBR2(UChar * p, UInt rt, UInt rs, UInt opc2, UInt imm)
+{
+   UInt theInstr;
+   vassert(rt < 0x20);
+   vassert(rs < 0x20);
+   vassert(opc2 < 0x04);
+   vassert(imm < 0x4000);
+
+   theInstr = ((PBR2 << 26) | (rt << 21) | (rs << 16) | (opc2 << 14) |
+               (imm & 0x3FFE) | (imm >> 14));
+   return emit32(p, theInstr);
+}
+
+static UChar *mkFormNanoPLSS9(UChar * p, UInt rt, UInt rs, nanoPLSS9 opc,
+                              UInt opc1, UInt opc2, UInt imm_ru)
+{
+   UInt theInstr;
+   vassert(rt < 0x20);
+   vassert(rs < 0x20);
+   vassert(opc < 0x04);
+   vassert(opc1 < 0x10);
+   vassert(opc2 < 0x02);
+
+   switch (opc2){
+         case LL: /* LL/SC */
+            vassert(imm_ru < 0x4000);
+            theInstr = ((PLSS9 << 26) | (rt << 21) | (rs << 16) | (opc << 8) |
+               (opc1 << 11) | opc2 | (imm_ru & 0xFC) | ((imm_ru & 0x100) << 7));
+            break;
+         case LLWP: /* LLWP/SCWP */
+            vassert(imm_ru < 0x20);
+            theInstr = ((PLSS9 << 26) | (rt << 21) | (rs << 16) | (opc << 8) |
+               (opc1 << 11) | ( imm_ru << 3 ) | opc2);
+            break;
+         default:
+            vassert(0);
+
+   }
+   return emit32(p, theInstr);
+}
+
+static UChar *doMemAccess_IR(UChar *p, UChar sz, UChar r_dst,
+                             HReg addr, Int addr_offset, Bool isLoad)
+{
+   UInt rA, opc2;
+   vassert(((UInt)addr_offset) < 0x1000);
+   rA = iregNo(addr);
+   opc2 = isLoad ? 0x00 : 0x01;
+
+   switch (sz) {
+      case 1:
+         break;
+
+      case 2:
+         opc2 = opc2 | 0x04;
+         break;
+
+      case 4:
+         opc2 = opc2 | 0x08;
+         break;
+
+      default:
+         vassert(0);
+   }
+
+   p = mkFormNano2Regs12imm(p, 0x21, r_dst, rA, opc2, addr_offset);
+   return p;
+}
+
+/* Load 32-bit immediate in exactely two 32-bit instructions even if it
+   could generate fewer. This is needed for generating fixed sized patchable
+   sequences. */
+static inline UChar* mkLoadImm32_EXACTLY2(UChar* p, UInt r_dst, UInt imm)
+{
+   vassert(r_dst < 0x20);
+   /* lui r_dst, (imm >> 20) */
+   p = mkFormNano1Reg(p, 0x38, r_dst, 0, imm);
+   /* ori r_dst, r_dst, (imm & 0xFFF) */
+   p = mkFormNanoPU12(p, r_dst, r_dst, PU12_ORI, imm & 0xFFF);
+   return p;
+}
+
+/* Load imm to r_dst */
+static UChar *mkLoadImm(UChar * p, UInt r_dst, UInt imm)
+{
+
+   if (imm <= 0xFFFF) {
+      /* addiu[32] r_dst, 0, imm */
+      p = mkFormNano2Regs16imm(p, 0x00, r_dst, 0, imm & 0xFFFF);
+   } else if (imm > 0xFFFFF000ULL) {
+      /* addiu[neg] r_dst, 0, imm */
+      p = mkFormNano2Regs12imm(p, 0x20, r_dst, 0, 0x08, (~imm + 1) & 0xFFF);
+   } else {
+      /* lui r_dst, (imm >> 20) */
+      p = mkFormNano1Reg(p, 0x38, r_dst, 0, imm);
+      imm &= 0xFFF;
+
+      if (imm != 0) {
+         /* ori r_dst, r_dst, (imm & 0xFFF) */
+         p = mkFormNanoPU12(p, r_dst, r_dst, PU12_ORI, imm & 0xFFF);
+      }
+   }
+
+   return p;
+}
+
+/* Emit an instruction into buf and return the number of bytes used.
+   Note that buf is not the insn's final place, and therefore it is
+   imperative to emit position-independent code.  If the emitted
+   instruction was a profiler inc, set *is_profInc to True, else
+   leave it unchanged. */
+Int emit_NANOMIPSInstr ( /*MB_MOD*/Bool* is_profInc,
+                                   UChar* buf, Int nbuf,
+                                   const NANOMIPSInstr* i,
+                                   Bool mode64,
+                                   VexEndness endness_host,
+                                   const void* disp_cp_chain_me_to_slowEP,
+                                   const void* disp_cp_chain_me_to_fastEP,
+                                   const void* disp_cp_xindir,
+                                   const void* disp_cp_xassisted )
+{
+   UChar *p = &buf[0];
+   vassert(nbuf >= 32);
+   vassert(!mode64);
+
+   switch (i->tag) {
+      case NMin_Imm: {
+         UInt r_dst = iregNo(i->NMin.Imm.dst);
+         UInt r_src = hregIsInvalid(i->NMin.Imm.src) ?
+                      0 : iregNo(i->NMin.Imm.src);
+
+         switch (i->NMin.Imm.op) {
+            case NMimm_LI:
+               p = mkLoadImm(p, r_dst, i->NMin.Imm.imm);
+               break;
+
+            case NMimm_SLL:
+            case NMimm_SRL:
+            case NMimm_SRA:
+               p = mkFormNanoPShift(p, r_dst, r_src, i->NMin.Imm.op,
+                                    i->NMin.Imm.imm);
+               break;
+
+            case NMimm_SGN:
+               p = mkFormNanoPShift(p, r_dst, r_src, NMimm_SLL,
+                                    32 - i->NMin.Imm.imm);
+               p = mkFormNanoPShift(p, r_dst, r_dst, NMimm_SRA,
+                                    32 - i->NMin.Imm.imm);
+               break;
+
+            case NMimm_ANDI:
+            case NMimm_ORI:
+            case NMimm_XORI:
+               p = mkFormNanoPU12(p, r_dst, r_src, i->NMin.Imm.op - 0x6,
+                                  i->NMin.Imm.imm);
+               break;
+
+            default:
+               goto bad;
+         };
+
+         goto done;
+      }
+
+      case NMin_Alu: {
+         UInt r_dst = iregNo(i->NMin.Alu.dst);
+         UInt r_srcL = iregNo(i->NMin.Alu.srcL);
+         UInt r_srcR = iregNo(i->NMin.Alu.srcR);
+
+         switch (i->NMin.Alu.op) {
+            /* NMalu_ADD, NMalu_SUB, NMalu_AND, NMalu_OR, NMalu_NOR,
+               NMalu_XOR, NMalu_SLT */
+            case NMalu_ADD:
+               /* addu[32] */
+               p = mkFormNanoP32A0(p, r_srcL, r_srcR, r_dst, _POOL32A0_ADDU32);
+               break;
+
+            case NMalu_SUB:
+               /* subu[32] */
+               p = mkFormNanoP32A0(p, r_srcR, r_srcL, r_dst, _POOL32A0_SUBU32);
+               break;
+
+            case NMalu_AND:
+               /* and */
+               p = mkFormNanoP32A0(p, r_srcL, r_srcR, r_dst, _POOL32A0_AND32);
+               break;
+
+            case NMalu_OR:
+               p = mkFormNanoP32A0(p, r_srcL, r_srcR, r_dst, _POOL32A0_OR32);
+               break;
+
+            case NMalu_NOR:
+               /* nor */
+               p = mkFormNanoP32A0(p, r_srcL, r_srcR, r_dst, _POOL32A0_NOR);
+               break;
+
+            case NMalu_XOR:
+               /* xor */
+               p = mkFormNanoP32A0(p, r_srcL, r_srcR, r_dst, _POOL32A0_XOR32);
+               break;
+
+            case NMalu_SLT:
+               p = mkFormNanoP32A0(p, r_srcR, r_srcL, r_dst, _POOL32A0_SLT);
+               break;
+
+            case NMalu_SLL:
+               /* sllv */
+               p = mkFormNanoP32A0(p, r_srcR, r_srcL, r_dst, _POOL32A0_SLLV);
+               break;
+
+            case NMalu_SRL:
+               /* srlv */
+               p = mkFormNanoP32A0(p, r_srcR, r_srcL, r_dst, _POOL32A0_SRLV);
+               break;
+
+            case NMalu_SRA:
+               /* srav */
+               p = mkFormNanoP32A0(p, r_srcR, r_srcL, r_dst, _POOL32A0_SRAV);
+               break;
+
+            case NMalu_DIV:
+               /* div */
+               p = mkFormNanoP32A0(p, r_srcR, r_srcL, r_dst, _POOL32A0_DIV);
+               break;
+
+            case NMalu_DIVU:
+               /* divu */
+               p = mkFormNanoP32A0(p, r_srcR, r_srcL, r_dst, _POOL32A0_DIVU);
+               break;
+
+            case NMalu_MOD:
+               /* mod */
+               p = mkFormNanoP32A0(p, r_srcR, r_srcL, r_dst, _POOL32A0_MOD);
+               break;
+
+            case NMalu_MODU:
+               /* modu */
+               p = mkFormNanoP32A0(p, r_srcR, r_srcL, r_dst, _POOL32A0_MODU);
+               break;
+
+            case NMalu_MUL:
+               /* mul */
+               p = mkFormNanoP32A0(p, r_srcR, r_srcL, r_dst, _POOL32A0_MUL32);
+               break;
+
+            case NMalu_MULU:
+               /* mulu */
+               p = mkFormNanoP32A0(p, r_srcR, r_srcL, r_dst, _POOL32A0_MULU);
+               break;
+
+            case NMalu_MUH:
+               /* muh */
+               p = mkFormNanoP32A0(p, r_srcR, r_srcL, r_dst, _POOL32A0_MUH);
+               break;
+
+            case NMalu_MUHU:
+               /* muhu */
+               p = mkFormNanoP32A0(p, r_srcR, r_srcL, r_dst, _POOL32A0_MUHU);
+               break;
+
+            default:
+               goto bad;
+         }
+
+         goto done;
+      }
+
+      case NMin_Unary: {
+         UInt r_dst = iregNo(i->NMin.Unary.dst);
+         UInt r_src = iregNo(i->NMin.Unary.src);
+
+         switch (i->NMin.Unary.op) {
+            /* NMun_CLO, NMun_CLZ, NMun_NOP */
+            case NMun_CLO:  /* clo */
+               p = mkFormNano2Regs16imm(p, 0x08, r_dst, r_src, 0x4B3F);
+               break;
+
+            case NMun_CLZ:  /* clz */
+               p = mkFormNano2Regs16imm(p, 0x08, r_dst, r_src, 0x5B3F);
+               break;
+
+            case NMun_NOP:  /* nop (sll r0,r0,0) */
+               p = mkFormNano2Regs16imm(p, 0x20, 0, 0, 0xC000);
+               break;
+         }
+
+         goto done;
+      }
+
+      case NMin_Cmp: {
+         UInt r_srcL = iregNo(i->NMin.Cmp.srcL);
+         UInt r_srcR = iregNo(i->NMin.Cmp.srcR);
+         UInt r_dst = iregNo(i->NMin.Cmp.dst);
+
+         switch (i->NMin.Cmp.cond) {
+            case NMcc_EQ:
+               /* xor r_dst, r_srcL, r_srcR
+                  sltiu r_dst, r_dst, 1 */
+               p = mkFormNanoP32A0(p, r_srcL, r_srcR, r_dst, _POOL32A0_XOR32);
+               p = mkFormNanoPU12(p, r_dst, r_dst, PU12_SLTIU, 1);
+               break;
+
+            case NMcc_NE:
+               /* xor r_dst, r_srcL, r_srcR
+                  sltu r_dst, zero, r_dst */
+               p = mkFormNanoP32A0(p, r_srcL, r_srcR, r_dst, _POOL32A0_XOR32);
+               p = mkFormNanoP32A0(p, r_dst, 0, r_dst, _POOL32A0_PSLTU);
+               break;
+
+            case NMcc_LTS:
+               /* slt r_dst, r_srcL, r_srcR */
+               p = mkFormNanoP32A0(p, r_srcR, r_srcL, r_dst, _POOL32A0_SLT);
+               break;
+
+            case NMcc_LTU:
+               /* sltu r_dst, r_srcL, r_srcR */
+               p = mkFormNanoP32A0(p, r_srcR, r_srcL, r_dst, _POOL32A0_PSLTU);
+               break;
+
+            case NMcc_LES:
+               /* slt r_dst, r_srcR, r_srcL
+                  xori r_dst, r_dst, 1 */
+               p = mkFormNanoP32A0(p, r_srcL, r_srcR, r_dst, _POOL32A0_SLT);
+               p = mkFormNanoPU12(p, r_dst, r_dst, PU12_XORI, 1);
+               break;
+
+            case NMcc_LEU:
+               /* sltu r_dst, r_srcR, r_srcL
+                  xori r_dst, r_dst, 1 */
+               p = mkFormNanoP32A0(p, r_srcL, r_srcR, r_dst, _POOL32A0_PSLTU);
+               p = mkFormNanoPU12(p, r_dst, r_dst, PU12_XORI, 1);
+               break;
+
+            default:
+               goto bad;
+         }
+
+         goto done;
+      }
+
+      case NMin_Call: {
+         /* If this is conditional, create a conditional
+            jump over the rest of it. */
+         if (!hregIsInvalid(i->NMin.Call.guard)) {
+            switch (i->NMin.Call.rloc.pri) {
+               case RLPri_2Int:
+                  /* li $a0, 0x55555555 */
+                  p = mkLoadImm(p, 4, 0x55555555);
+                  /* move $a1, $a0 */
+                  p = mkFormNanoP32A0(p, 0, 4, 5, _POOL32A0_OR32);
+                  break;
+
+               case RLPri_Int:
+                  /* li $a1, 0x55555555 */
+                  p = mkLoadImm(p, 4, 0x55555555);
+                  break;
+
+               case RLPri_None:
+                  break;
+
+               default:
+                  vassert(0);
+            }
+
+            /* Skip 3 instructions
+               beqc $[cond], $0, 12 */
+            p = mkFormNanoPBR1(p, iregNo(i->NMin.Call.guard), 0,
+                               PBR1_BEQC32, 12);
+         }
+
+         /* li $25, #target */
+         p = mkLoadImm32_EXACTLY2(p, 25, i->NMin.Call.target);
+         /* jalrc $25 */
+         p = mkFormNano2Regs16imm(p, 0x12, 31, 25, 0);
+
+         goto done;
+      }
+
+      case NMin_XDirect: {
+         /* NB: what goes on here has to be very closely coordinated
+            with the chainXDirect_NANOMIPS and
+            unchainXDirect_NANOMIPS below. */
+         /* We're generating chain-me requests here, so we need to be
+            sure this is actually allowed -- no-redir translations
+            can't use chain-me's.  Hence: */
+         vassert(disp_cp_chain_me_to_slowEP != NULL);
+         vassert(disp_cp_chain_me_to_fastEP != NULL);
+
+         /* Use ptmp for backpatching conditional jumps. */
+
+         /* If this is conditional, create a conditional
+            jump over the rest of it. */
+         if (!hregIsInvalid(i->NMin.XDirect.cond)) {
+            /* Skip 6 instructions
+               beqc $[cond], $0, 24 */
+            p = mkFormNanoPBR1(p, iregNo(i->NMin.XDirect.cond), 0,
+                               PBR1_BEQC32, 24);
+         }
+
+         /* Update the guest PC. */
+         /* li r9, dstGA */
+         /* sw r9, (offset)addr */
+         p = mkLoadImm32_EXACTLY2(p, 9, i->NMin.XDirect.dstGA);
+         p = doMemAccess_IR(p, 4, 9, i->NMin.XDirect.addr,
+                            i->NMin.XDirect.addr_offset,
+                            False /* Store */);
+         /* --- FIRST PATCHABLE BYTE follows --- */
+         /* VG_(disp_cp_chain_me_to_{slowEP,fastEP}) (where we're
+            calling to) backs up the return address, so as to find the
+            address of the first patchable byte.  So: don't change the
+            number of instructions (3) below. */
+         /* move r9, VG_(disp_cp_chain_me_to_{slowEP,fastEP}) */
+         /* jr  r9  */
+         const void* disp_cp_chain_me
+            = i->NMin.XDirect.toFastEP ? disp_cp_chain_me_to_fastEP
+              : disp_cp_chain_me_to_slowEP;
+         p = mkLoadImm32_EXACTLY2(p, /*r*/ 9,
+                                  (Addr)disp_cp_chain_me);
+
+         /* jalrc r9 */
+         p = mkFormNano2Regs16imm(p, 0x12, 31, 9, 0); /* p += 4 */
+         /* --- END of PATCHABLE BYTES --- */
+
+         goto done;
+      }
+
+      case NMin_XIndir: {
+         /* We're generating transfers that could lead indirectly to a
+            chain-me, so we need to be sure this is actually allowed --
+            no-redir translations are not allowed to reach normal
+            translations without going through the scheduler.  That means
+            no XDirects or XIndirs out from no-redir translations.
+            Hence: */
+         vassert(disp_cp_xindir != NULL);
+
+         /* If this is conditional, create a conditional
+            jump over the rest of it. */
+         if (!hregIsInvalid(i->NMin.XDirect.cond)) {
+            /* Skip 4 instructions
+               beqc $[cond], $0, 16 */
+            p = mkFormNanoPBR1(p, iregNo(i->NMin.XIndir.cond), 0,
+                               PBR1_BEQC32, 16);
+         }
+
+         /* sw r-dstGA, amPC */
+         p = doMemAccess_IR(p, 4,  iregNo(i->NMin.XIndir.dstGA),
+                            i->NMin.XIndir.addr,
+                            i->NMin.XIndir.addr_offset,
+                            False /* Store */);
+
+         /* move r9, VG_(disp_cp_xindir) */
+         p = mkLoadImm32_EXACTLY2(p, /*r*/ 9,
+                                  (Addr)disp_cp_xindir);
+         /* jalrc r9 */
+         p = mkFormNano2Regs16imm(p, 0x12, 31, 9, 0); /* p += 4 */
+
+         goto done;
+      }
+
+      case NMin_XAssisted: {
+         /* First off, if this is conditional, create a conditional jump
+            over the rest of it.  Or at least, leave a space for it that
+            we will shortly fill in. */
+
+         /* If this is conditional, create a conditional
+            jump over the rest of it. */
+         if (!hregIsInvalid(i->NMin.XAssisted.cond)) {
+            /* Skip 4 instructions
+               beqc $[cond], $0, 12 */
+            p = mkFormNanoPBR1(p, iregNo(i->NMin.XAssisted.cond), 0,
+                               PBR1_BEQC32, 24);
+         }
+         /* sw r-dstGA, amPC */
+         p = doMemAccess_IR(p, 4,  iregNo(i->NMin.XAssisted.dstGA),
+                            i->NMin.XAssisted.addr,
+                            i->NMin.XAssisted.addr_offset,
+                            False /* Store */);
+
+         UInt trcval = 0;
+
+         switch (i->NMin.XAssisted.jk) {
+            case Ijk_ClientReq:
+               trcval = VEX_TRC_JMP_CLIENTREQ;
+               break;
+
+            case Ijk_Sys_syscall:
+               trcval = VEX_TRC_JMP_SYS_SYSCALL;
+               break;
+
+            /* case Ijk_Sys_int128:
+                  trcval = VEX_TRC_JMP_SYS_INT128;
+                  break;
+            */
+
+            case Ijk_Yield:
+               trcval = VEX_TRC_JMP_YIELD;
+               break;
+
+            case Ijk_EmWarn:
+               trcval = VEX_TRC_JMP_EMWARN;
+               break;
+
+            case Ijk_EmFail:
+               trcval = VEX_TRC_JMP_EMFAIL;
+               break;
+
+            /* case Ijk_MapFail:
+                  trcval = VEX_TRC_JMP_MAPFAIL;
+                  break;
+            */
+
+            case Ijk_NoDecode:
+               trcval = VEX_TRC_JMP_NODECODE;
+               break;
+
+            case Ijk_InvalICache:
+               trcval = VEX_TRC_JMP_INVALICACHE;
+               break;
+
+            case Ijk_NoRedir:
+               trcval = VEX_TRC_JMP_NOREDIR;
+               break;
+
+            case Ijk_SigILL:
+               trcval = VEX_TRC_JMP_SIGILL;
+               break;
+
+            case Ijk_SigTRAP:
+               trcval = VEX_TRC_JMP_SIGTRAP;
+               break;
+
+            /* case Ijk_SigSEGV:
+                  trcval = VEX_TRC_JMP_SIGSEGV;
+                  break;
+            */
+
+            case Ijk_SigBUS:
+               trcval = VEX_TRC_JMP_SIGBUS;
+               break;
+
+            case Ijk_SigFPE_IntDiv:
+               trcval = VEX_TRC_JMP_SIGFPE_INTDIV;
+               break;
+
+            case Ijk_SigFPE_IntOvf:
+               trcval = VEX_TRC_JMP_SIGFPE_INTOVF;
+               break;
+
+            case Ijk_Boring:
+               trcval = VEX_TRC_JMP_BORING;
+               break;
+
+            /* We don't expect to see the following being assisted.
+               case Ijk_Ret:
+               case Ijk_Call:
+            fallthrough */
+            default:
+               ppIRJumpKind(i->NMin.XAssisted.jk);
+               vpanic("emit_NANOMIPSInstr.NMin_XAssisted: unexpected jump"
+                      "kind");
+         }
+
+         vassert(trcval != 0);
+         p = mkLoadImm32_EXACTLY2(p, /*r*/ GuestSP, trcval);
+
+         /* move r9, VG_(disp_cp_xassisted) */
+         p = mkLoadImm32_EXACTLY2(p, /*r*/ 9,
+                                  (ULong)(Addr)disp_cp_xassisted);
+         /* jalrc r9 */
+         p = mkFormNano2Regs16imm(p, 0x12, 31, 9, 0); /* p += 4 */
+
+         goto done;
+      }
+
+      case NMin_Load:
+         p = doMemAccess_IR(p, i->NMin.Load.sz, iregNo(i->NMin.Load.dst),
+                            i->NMin.Load.addr,
+                            i->NMin.Load.addr_offset,
+                            True /* Load */);
+         goto done;
+         break;
+
+      case NMin_Store:
+         p = doMemAccess_IR(p, i->NMin.Store.sz, iregNo(i->NMin.Store.src),
+                            i->NMin.Store.addr,
+                            i->NMin.Store.addr_offset,
+                            False /* Store */);
+         goto done;
+         break;
+
+      case NMin_LoadL: {
+         p = mkFormNanoPLSS9(p, iregNo(i->NMin.LoadL.dst),
+                                iregNo(i->NMin.LoadL.addr),
+                                PLSS1, PLL, LL, i->NMin.LoadL.addr_offset);
+         goto done;
+         break;
+      }
+
+      case NMin_StoreC: {
+         p = mkFormNanoPLSS9(p, iregNo(i->NMin.StoreC.src),
+                                iregNo(i->NMin.StoreC.addr),
+                                PLSS1, PSC, PSC, i->NMin.StoreC.addr_offset);
+         goto done;
+         break;
+      }
+
+      case NMin_Cas: {
+         UInt oldLo  = iregNo(i->NMin.Cas.oldLo);
+         UInt oldHi  = iregNo(i->NMin.Cas.oldHi);
+         UInt addr = iregNo(i->NMin.Cas.addr);
+         UInt expdLo = iregNo(i->NMin.Cas.expdLo);
+         UInt expdHi = iregNo(i->NMin.Cas.expdHi);
+         UInt dataLo = iregNo(i->NMin.Cas.dataLo);
+         UInt dataHi = iregNo(i->NMin.Cas.dataHi);
+
+         vassert((i->NMin.Cas.sz == 4) || (i->NMin.Cas.sz == 8));
+
+         if (i->NMin.Cas.sz == 4) {
+         /*
+          * ll       old,  0(addr)
+          * bnec     old,  expd, end
+          * addiu    old,  old,  1
+          * sc       data, 0(addr)
+          * movn     old,  expd, data
+          * end:
+          */
+            p = mkFormNanoPLSS9(p, oldLo, addr, PLSS1, PLL, LL, 0);
+            p = mkFormNanoPBR2(p, oldLo, expdLo, PBR2_BNEC32, 12);
+            p = mkFormNano2Regs16imm(p, 0x00, oldLo, oldLo, 1);
+            p = mkFormNanoPLSS9(p, dataLo, addr, PLSS1, PSC, SC, 0);
+            p = mkFormNanoP32A0(p, dataLo, expdLo, oldLo, _POOL32A0_PCMOVE);
+         } else {
+         /*
+          * llwp     oldLo, oldHi  0(addr)
+          * bnec     oldLo, expdLo, end
+          * bnec     oldHi, expdHi, end
+          * addiu    oldLo,  oldLo,  1
+          * addiu    oldHi,  oldHi,  1
+          * scwp     dataLo, dataHi, 0(addr)
+          * movn     oldLo, expdLo, dataLo
+          * movn     oldHi, expdHi, dataHi
+          * end:
+          */
+            p = mkFormNanoPLSS9(p, oldLo, addr, PLSS1, PLL, LLWP, oldHi);
+            p = mkFormNanoPBR2(p, oldLo, expdLo, PBR2_BNEC32, 24);
+            p = mkFormNanoPBR2(p, oldHi, expdHi, PBR2_BNEC32, 20);
+            p = mkFormNano2Regs16imm(p, 0x00, oldLo, oldLo, 1);
+            p = mkFormNano2Regs16imm(p, 0x00, oldHi, oldHi, 1);
+            p = mkFormNanoPLSS9(p, dataLo, addr, PLSS1, PSC, SCWP, dataHi);
+            p = mkFormNanoP32A0(p, dataLo, expdLo, oldLo, _POOL32A0_PCMOVE);
+            p = mkFormNanoP32A0(p, dataHi, expdHi, oldHi, _POOL32A0_PCMOVE);
+         }
+         goto done;
+      }
+
+      case NMin_MoveCond: {
+         UInt r_dst = iregNo(i->NMin.MoveCond.dst);
+         UInt r_src = iregNo(i->NMin.MoveCond.src);
+         UInt r_cond = iregNo(i->NMin.MoveCond.cond);
+
+         switch (i->NMin.MoveCond.op) {
+            case NMMoveCond_movn: {
+               p = mkFormNanoP32A0(p, r_cond, r_src, r_dst, _POOL32A0_PCMOVE);
+               break;
+            }
+
+            default:
+               vassert(0);
+         }
+
+         goto done;
+      }
+
+      case NMin_EvCheck: {
+         /* This requires a 32-bit dec/test in 32 mode. */
+         /* We generate:
+               lw      r9, amCounter
+               addiu   r9, r9, -1
+               sw      r9, amCounter
+               bgec    r9, zero, nofail
+               lw      r9, amFailAddr
+               jalrc   r9
+              nofail:
+         */
+         UChar* p0 = p;
+         /* lw  r9, amCounter */
+         p = doMemAccess_IR(p, 4, /*r*/ 9, i->NMin.EvCheck.r_amCounter,
+                            i->NMin.EvCheck.offset_amCounter,
+                            True /* Load */);
+         /* addiu r9,r9,-1 */
+         p = mkFormNanoPU12(p, 9, 9, PU12_ADDIU_NEG, 1);
+         /* sw r9, amCounter */
+         p = doMemAccess_IR(p, 4, /*r*/ 9, i->NMin.EvCheck.r_amCounter,
+                            i->NMin.EvCheck.offset_amCounter,
+                            False /* Store */);
+         /* bgec r9, zero, nofail */
+         p = emit32(p, 0x88098008);
+         /* lw r9, amFailAddr */
+         p = doMemAccess_IR(p, sizeof(Addr), /*r*/ 9,
+                            i->NMin.EvCheck.r_amFailAddr,
+                            i->NMin.EvCheck.offset_amFailAddr,
+                            True /* Load */);
+         /* jalrc[32] r9  */
+         p = mkFormNano2Regs16imm(p, 0x12, 31, 9, 0);  /* p += 4 */
+         /* nofail: */
+         /* Crosscheck */
+         vassert(evCheckSzB_NANOMIPS() == (UChar*)p - (UChar*)p0);
+         goto done;
+      }
+
+      case NMin_ProfInc: {
+         /* 32-bit:
+               li r9, 0x65556555
+               lw r8, 0(r9)
+               addiu r8, r8, 1         # add least significant word
+               sw r8, 0(r9)
+               sltiu r1, r8, 1         # set carry-in bit
+               lw r8, 4(r9)
+               addu r8, r8, r1
+               sw r8, 4(r9) */
+
+         /* li r9, 0x65556555 */
+         p = mkLoadImm32_EXACTLY2(p, 9, 0x65556555);
+
+         /* lw r8, 0(r9) */
+         p = mkFormNano2Regs12imm(p, 0x21, 8, 9, 0x8, 0);
+
+         /* addiu r8, r8, 1 */
+         p = mkFormNano2Regs16imm(p, 0x00, 8, 8, 0x01);
+
+         /* sw r8, 0(r9) */
+         p = mkFormNano2Regs12imm(p, 0x21, 8, 9, 0x9, 0);
+
+         /* sltiu r1, r8, 1 */
+         p = mkFormNanoPU12(p, 1, 8, PU12_SLTIU, 1);
+
+         /* lw r8, 4(r9) */
+         p = mkFormNano2Regs12imm(p, 0x21, 8, 9, 0x8, 4);
+
+         /* addu r8, r8, r1 */
+         p = mkFormNanoP32A0(p, 8, 1, 8, _POOL32A0_ADDU32);
+
+         /* sw r8, 0(r9) */
+         p = mkFormNano2Regs12imm(p, 0x21, 8, 9, 0x9, 4);
+
+         break;
+      }
+
+      default:
+         goto bad;
+   }
+
+bad:
+   vex_printf("\n=> ");
+   ppNANOMIPSInstr(i);
+   vpanic("emit_NANOMIPSInstr");
+/* NOTREACHED */ done:
+   vassert(p - &buf[0] <= 128);
+   return p - &buf[0];
+}
+
+/* How big is an event check?  See case for Min_EvCheck in
+   emit_MIPSInstr just above.  That crosschecks what this returns, so
+   we can tell if we're inconsistent. */
+Int evCheckSzB_NANOMIPS(void)
+{
+   return 6 * 4;
+}
+
+VexInvalRange chainXDirect_NANOMIPS(VexEndness endness_host,
+                                    void* place_to_chain,
+                                    const void* disp_cp_chain_me_EXPECTED,
+                                    const void* place_to_jump_to)
+{
+   UInt tmp[3];
+   UInt* p = (UInt*)place_to_chain;
+   /* li r9, disp_cp_chain_me_EXPECTED */
+   mkLoadImm32_EXACTLY2((UChar*)tmp, 9, (Addr)disp_cp_chain_me_EXPECTED);
+   /* jalrc r9  */
+   mkFormNano2Regs16imm((UChar*)(tmp + 2), 0x12, 31, 9, 0);
+   vassert((tmp[0] == p[0]) && (tmp[1] == p[1]) && (tmp[2] == p[2]));
+   /* li r9, place_to_jump_to */
+   mkLoadImm32_EXACTLY2((UChar*)place_to_chain, 9, (Addr)place_to_jump_to);
+   VexInvalRange vir = {(HWord)place_to_chain, 8};
+   return vir;
+}
+
+/* NB: what goes on here has to be very closely coordinated with the
+   emitInstr case for XDirect, above. */
+VexInvalRange unchainXDirect_NANOMIPS ( VexEndness endness_host,
+                                        void* place_to_unchain,
+                                        const void* place_to_jump_to_EXPECTED,
+                                        const void* disp_cp_chain_me)
+{
+   UInt tmp[3];
+   UInt* p = (UInt*)place_to_unchain;
+   /* li r9, disp_cp_chain_me_EXPECTED */
+   mkLoadImm32_EXACTLY2((UChar*)tmp, 9, (Addr)place_to_jump_to_EXPECTED);
+   /* jalrc r9  */
+   mkFormNano2Regs16imm((UChar*)(tmp + 2), 0x12, 31, 9, 0);
+   vassert((tmp[0] == p[0]) && (tmp[1] == p[1]) && (tmp[2] == p[2]));
+   /* li r9, place_to_jump_to */
+   mkLoadImm32_EXACTLY2((UChar*)place_to_unchain, 9, (Addr)disp_cp_chain_me);
+   VexInvalRange vir = {(HWord)place_to_unchain, 8};
+   return vir;
+}
+
+/* Patch the counter address into a profile inc point, as previously
+   created by the Min_ProfInc case for emit_NANOMIPSInstr. */
+VexInvalRange patchProfInc_NANOMIPS ( VexEndness endness_host,
+                                      void*  place_to_patch,
+                                      const ULong* location_of_counter)
+{
+   UInt tmp[9];
+   UInt* p = (UInt*)place_to_patch;
+
+   vassert(endness_host == VexEndnessLE || endness_host == VexEndnessBE);
+   vassert(sizeof(ULong*) == 4);
+   vassert(0 == (3 & (HWord)p));
+
+   mkLoadImm32_EXACTLY2((UChar*)tmp, 9, 0x65556555);
+   mkFormNano2Regs12imm((UChar*)tmp, 0x21, 8, 9, 0x8, 0);
+   mkFormNano2Regs16imm((UChar*)tmp, 0x00, 8, 8, 0x01);
+   mkFormNano2Regs12imm((UChar*)tmp, 0x21, 8, 9, 0x9, 0);
+   mkFormNanoPU12((UChar*)tmp, 1, 8, PU12_SLTIU, 1);
+   mkFormNano2Regs12imm((UChar*)tmp, 0x21, 8, 9, 0x8, 4);
+   mkFormNanoP32A0((UChar*)tmp, 8, 1, 8, _POOL32A0_ADDU32);
+   mkFormNano2Regs12imm((UChar*)tmp, 0x21, 8, 9, 0x9, 4);
+
+   for(int i = 0; i < 9; i++)
+      vassert(tmp[i] == p[i]);
+
+   /* li r9, place_to_jump_to */
+   mkLoadImm32_EXACTLY2((UChar*)place_to_patch, 9, (Addr)location_of_counter);
+   VexInvalRange vir = {(HWord)place_to_patch, 8};
+   return vir;
+}
+
+const RRegUniverse* getRRegUniverse_NANOMIPS ( Bool mode64 )
+{
+   /* The real-register universe is a big constant, so we just want to
+      initialise it once.  rRegUniverse_MIPS_initted values: 0=not initted,
+      1=initted for 32-bit-mode, 2=initted for 64-bit-mode */
+   static RRegUniverse rRegUniverse_MIPS;
+   static UInt         rRegUniverse_MIPS_initted = 0;
+   UInt gpr;
+
+   RRegUniverse* ru = &rRegUniverse_MIPS;
+
+   if (LIKELY(rRegUniverse_MIPS_initted == 1))
+      return ru;
+
+   vassert(!mode64);
+
+   RRegUniverse__init(ru);
+
+   /* Add the registers.  The initial segment of this array must be
+      those available for allocation by reg-alloc, and those that
+      follow are not available for allocation. */
+   ru->allocable_start[HRcInt32] = ru->size;
+
+   for (gpr = 16; gpr <= 22; gpr++) {
+      ru->regs[ru->size] = mkHReg(False, HRcInt32, gpr, ru->size);
+      ru->size++;
+   }
+
+   for (gpr = 12; gpr <= 15; gpr++) {
+      ru->regs[ru->size] = mkHReg(False, HRcInt32, gpr, ru->size);
+      ru->size++;
+   }
+
+   ru->regs[ru->size] = mkHReg(False, HRcInt32, 24, ru->size);
+
+   ru->allocable_end[HRcInt32] = ru->size;
+
+   ru->allocable = ++ru->size;
+
+   for (gpr = 0; gpr <= 11; gpr++) {
+      ru->regs[ru->size] = mkHReg(False, HRcInt32, gpr, ru->size);
+      ru->size++;
+   }
+
+   ru->regs[ru->size] = mkHReg(False, HRcInt32, 23, ru->size);
+   ru->size++;
+   ru->regs[ru->size] = mkHReg(False, HRcInt32, 25, ru->size);
+   ru->size++;
+   ru->regs[ru->size] = mkHReg(False, HRcInt32, 29, ru->size);
+   ru->size++;
+   ru->regs[ru->size] = mkHReg(False, HRcInt32, 31, ru->size);
+   ru->size++;
+
+   rRegUniverse_MIPS_initted = 1;
+
+   RRegUniverse__check_is_sane(ru);
+   return ru;
+}
+
+/*---------------------------------------------------------------*/
+/*--- end                                host_NANOMIPS_defs.c ---*/
+/*---------------------------------------------------------------*/
diff --git a/VEX/priv/host_nanomips_defs.h b/VEX/priv/host_nanomips_defs.h
new file mode 100644
index 0000000000..d1b49395ab
--- /dev/null
+++ b/VEX/priv/host_nanomips_defs.h
@@ -0,0 +1,428 @@
+
+/*---------------------------------------------------------------*/
+/*--- begin                              host_nanomips_defs.h ---*/
+/*---------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, a dynamic binary instrumentation
+   framework.
+
+   Copyright (C) 2017-2018 RT-RK
+      mips-valgrind@rt-rk.com
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#ifndef __VEX_HOST_NANOMIPS_DEFS_H
+#define __VEX_HOST_NANOMIPS_DEFS_H
+
+#include "libvex_basictypes.h"
+#include "libvex.h"             /* VexArch */
+#include "host_generic_regs.h"  /* HReg */
+#include "common_nanomips_defs.h"
+
+/* --------- Registers. --------- */
+
+#define ST_IN static inline
+
+#define GPR(_enc, _ix) \
+  mkHReg(False, HRcInt32, (_enc), (_ix))
+
+ST_IN HReg hregNANOMIPS_GPR16(void) {
+   return GPR(16,  0);
+}
+ST_IN HReg hregNANOMIPS_GPR17(void) {
+   return GPR(17,  1);
+}
+ST_IN HReg hregNANOMIPS_GPR18(void) {
+   return GPR(18,  2);
+}
+ST_IN HReg hregNANOMIPS_GPR19(void) {
+   return GPR(19,  3);
+}
+ST_IN HReg hregNANOMIPS_GPR20(void) {
+   return GPR(20,  4);
+}
+ST_IN HReg hregNANOMIPS_GPR21(void) {
+   return GPR(21,  5);
+}
+ST_IN HReg hregNANOMIPS_GPR22(void) {
+   return GPR(22,  6);
+}
+
+ST_IN HReg hregNANOMIPS_GPR12(void) {
+   return GPR(12,  7);
+}
+ST_IN HReg hregNANOMIPS_GPR13(void) {
+   return GPR(13,  8);
+}
+ST_IN HReg hregNANOMIPS_GPR14(void) {
+   return GPR(14,  9);
+}
+ST_IN HReg hregNANOMIPS_GPR15(void) {
+   return GPR(15, 10);
+}
+ST_IN HReg hregNANOMIPS_GPR24(void) {
+   return GPR(24, 11);
+}
+
+ST_IN HReg hregNANOMIPS_GPR0(void) {
+   return GPR( 0, 12);
+}
+ST_IN HReg hregNANOMIPS_GPR1(void) {
+   return GPR( 1, 13);
+}
+ST_IN HReg hregNANOMIPS_GPR2(void) {
+   return GPR( 2, 14);
+}
+ST_IN HReg hregNANOMIPS_GPR3(void) {
+   return GPR( 3, 15);
+}
+ST_IN HReg hregNANOMIPS_GPR4(void) {
+   return GPR( 4, 16);
+}
+ST_IN HReg hregNANOMIPS_GPR5(void) {
+   return GPR( 5, 17);
+}
+ST_IN HReg hregNANOMIPS_GPR6(void) {
+   return GPR( 6, 18);
+}
+ST_IN HReg hregNANOMIPS_GPR7(void) {
+   return GPR( 7, 19);
+}
+ST_IN HReg hregNANOMIPS_GPR8(void) {
+   return GPR( 8, 20);
+}
+ST_IN HReg hregNANOMIPS_GPR9(void) {
+   return GPR( 9, 21);
+}
+ST_IN HReg hregNANOMIPS_GPR10(void) {
+   return GPR(10, 22);
+}
+ST_IN HReg hregNANOMIPS_GPR11(void) {
+   return GPR(11, 23);
+}
+ST_IN HReg hregNANOMIPS_GPR23(void) {
+   return GPR(23, 24);
+}
+ST_IN HReg hregNANOMIPS_GPR25(void) {
+   return GPR(25, 25);
+}
+ST_IN HReg hregNANOMIPS_GPR29(void) {
+   return GPR(29, 26);
+}
+ST_IN HReg hregNANOMIPS_GPR31(void) {
+   return GPR(31, 27);
+}
+
+#undef ST_IN
+#undef GPR
+
+#undef GuestStatePointer
+#undef StackFramePointer
+#undef StackPointer
+#undef Zero
+
+#define GuestStatePointer     hregNANOMIPS_GPR23()
+#define StackFramePointer     hregNANOMIPS_GPR30()
+#define StackPointer          hregNANOMIPS_GPR29()
+#define Zero                  hregNANOMIPS_GPR0()
+
+/* Num registers used for function calls */
+/* a0, a1, a2, a3, a4, a5, a6, a7 */
+# define NANOMIPS_N_REGPARMS 8
+
+typedef enum {
+   NMin_Imm,        /* Operation with word and imm (fake insn). */
+   NMin_Unary,      /* Unary ops: clo, clz, neg and nop. */
+   NMin_Alu,        /* Binary ops: add/sub/and/or/xor/nor/mul/div. */
+   NMin_Cmp,        /* Word compare (fake insn). */
+   NMin_Call,       /* Call to address in register. */
+
+   /* The following 5 insns are mandated by translation chaining */
+   NMin_XDirect,    /* Direct transfer to GA. */
+   NMin_XIndir,     /* Indirect transfer to GA. */
+   NMin_XAssisted,  /* Assisted transfer to GA. */
+   NMin_EvCheck,    /* Event check. */
+   NMin_ProfInc,    /* 64-bit profile counter increment. */
+
+   NMin_Load,       /* Sign-extending load a 8|16|32 bit value from mem. */
+   NMin_Store,      /* Store a 8|16|32 bit value to mem. */
+   NMin_Cas,        /* Compare and swap. */
+   NMin_LoadL,      /* Mips Load Linked Word - LL. */
+   NMin_StoreC,     /* Mips Store Conditional Word - SC. */
+   NMin_MoveCond,   /* Move Conditional. */
+} NANOMIPSInstrTag;
+
+typedef enum {
+   NMimm_INVALID = -1,   /* Invalid / unknown op */
+   NMimm_SLL     = 0x00, /* Shift left */
+   NMimm_SRL     = 0x02, /* Logic shift right */
+   NMimm_LI      = 0x03, /* Load immediate */
+   NMimm_SRA     = 0x04, /* Arithetic shift right */
+   NMimm_SGN     = 0x05, /* Sign extend from imm bits */
+   NMimm_ORI     = 0x06, /* Logical or */
+   NMimm_XORI    = 0x07, /* Logical xor */
+   NMimm_ANDI    = 0x08, /* Logical and */
+} NANOMIPSImmOp;
+
+typedef enum {
+   NMun_CLO,
+   NMun_CLZ,
+   NMun_NOP,
+} NANOMIPSUnaryOp;
+
+typedef enum {
+   NMalu_INVALID = -1,
+   NMalu_SLL     = NMimm_SLL,
+   NMalu_SRL     = NMimm_SRL,
+   NMalu_SRA     = NMimm_SRA,
+   NMalu_OR      = NMimm_ORI,
+   NMalu_XOR     = NMimm_XORI,
+   NMalu_AND     = NMimm_ANDI,
+   NMalu_ADD,
+   NMalu_SUB,
+   NMalu_SLT,
+   NMalu_NOR,
+   NMalu_MUL,
+   NMalu_MULU,
+   NMalu_MUH,
+   NMalu_MUHU,
+   NMalu_DIV,
+   NMalu_DIVU,
+   NMalu_MOD,
+   NMalu_MODU,
+} NANOMIPSAluOp;
+
+typedef enum {
+   NMcc_INVALID, /* Invalid or unknown condition */
+   NMcc_EQ,      /* equal */
+   NMcc_NE,      /* not equal */
+
+   NMcc_LTS,     /* signed less than */
+   NMcc_LTU,     /* unsigned less than */
+
+   NMcc_LES,     /* signed less than or equal */
+   NMcc_LEU,     /* unsigned less than or equal */
+
+   NMcc_AL,      /* always (unconditional) */
+   NMcc_NV,      /* never (unconditional) */
+} NANOMIPSCondCode;
+
+typedef enum {
+   NMMoveCond_movn      /* Move Conditional on Not Zero */
+} NANOMIPSMoveCondOp;
+
+typedef struct {
+   NANOMIPSInstrTag tag;
+   union {
+      struct {
+         NANOMIPSImmOp op;
+         HReg src;
+         HReg dst;
+         UInt imm;
+      } Imm;
+      struct {
+         NANOMIPSAluOp op;
+         HReg dst;
+         HReg srcL;
+         HReg srcR;
+      } Alu;
+      /* Clz, Clo, not, nop */
+      struct {
+         NANOMIPSUnaryOp op;
+         HReg dst;
+         HReg src;
+      } Unary;
+      /* Word compare. Fake instruction, used for basic block ending. */
+      struct {
+         HReg dst;
+         HReg srcL;
+         HReg srcR;
+         NANOMIPSCondCode cond;
+      } Cmp;
+      /* Pseudo-insn.  Call target (an absolute address), on given
+         condition. */
+      struct {
+         Addr target;
+         UInt argiregs;
+         HReg guard;
+         RetLoc rloc; /* Where the return value will be */
+      } Call;
+      /* Update the guest EIP value, then exit requesting to chain
+         to it.  May be conditional. */
+      struct {
+         Addr dstGA;             /* next guest address */
+         HReg addr;              /* Address register */
+         Int addr_offset;        /* Offset */
+         HReg cond;              /* Condition */
+         Bool         toFastEP;  /* Chain to the slow or fast point? */
+      } XDirect;
+      /* Boring transfer to a guest address not known at JIT time.
+         Not chainable.  May be conditional. */
+      struct {
+         HReg dstGA;
+         HReg addr;              /* Address register */
+         Int addr_offset;        /* Offset */
+         HReg cond;              /* Condition */
+      } XIndir;
+      /* Assisted transfer to a guest address, most general case.
+         Not chainable.  May be conditional. */
+      struct {
+         HReg dstGA;
+         HReg addr;              /* Address register */
+         Int addr_offset;        /* Offset */
+         HReg cond;              /* Condition */
+         IRJumpKind  jk;         /* Jump kind */
+      } XAssisted;
+      struct {
+         HReg r_amCounter;
+         Int offset_amCounter;
+         HReg r_amFailAddr;
+         Int offset_amFailAddr;
+      } EvCheck;
+      struct {
+         /* No fields.  The address of the counter to inc is
+            installed later, post-translation, by patching it in,
+            as it is not known at translation time. */
+      } ProfInc;
+      /* Sign extending loads. Dst size is host word size */
+      struct {
+         UChar sz;         /* Must be 4 bytes for now. */
+         HReg dst;         /* Destionation register */
+         HReg addr;        /* Address register */
+         Int addr_offset;  /* Offset */
+      } Load;
+      struct {
+         UChar sz;         /* Must be 4 bytes for now. */
+         HReg addr;        /* Address register */
+         Int addr_offset;  /* Offset */
+         HReg src;         /* Source register */
+      } Store;
+      struct {
+         UChar sz;         /* Must be 4 bytes for now. */
+         HReg  oldHi;
+         HReg  oldLo;
+         HReg  addr;
+         HReg  expdHi;
+         HReg  expdLo;
+         HReg  dataHi;
+         HReg  dataLo;
+      } Cas;
+      struct {
+         UChar sz;         /* Must be 4 bytes for now. */
+         HReg dst;         /* Destination register */
+         HReg addr;        /* Address register */
+         Int addr_offset;  /* Offset */
+      } LoadL;
+      struct {
+         UChar sz;         /* Must be 4 bytes for now. */
+         HReg addr;        /* Address register */
+         Int addr_offset;  /* Offset */
+         HReg src;         /* Sorce register */
+      } StoreC;
+      /* Conditional move. */
+      struct {
+         NANOMIPSMoveCondOp op;
+         HReg dst;
+         HReg src;
+         HReg cond;
+      } MoveCond;
+   } NMin;
+} NANOMIPSInstr;
+
+extern NANOMIPSInstr *NANOMIPSInstr_Imm(NANOMIPSImmOp, HReg, HReg, UInt);
+extern NANOMIPSInstr *NANOMIPSInstr_Unary(NANOMIPSUnaryOp op, HReg dst,
+       HReg src);
+extern NANOMIPSInstr *NANOMIPSInstr_Alu(NANOMIPSAluOp, HReg, HReg, HReg);
+extern NANOMIPSInstr *NANOMIPSInstr_Cmp(NANOMIPSCondCode, HReg, HReg, HReg);
+extern NANOMIPSInstr *NANOMIPSInstr_Call(Addr, UInt, HReg, RetLoc);
+extern NANOMIPSInstr *NANOMIPSInstr_XDirect(Addr64 dstGA, HReg, Int,
+                                            HReg cond, Bool toFastEP);
+extern NANOMIPSInstr *NANOMIPSInstr_XIndir(HReg dstGA, HReg, Int,
+                                           HReg cond);
+extern NANOMIPSInstr *NANOMIPSInstr_XAssisted(HReg dstGA, HReg, Int,
+                                              HReg cond, IRJumpKind jk);
+extern NANOMIPSInstr *NANOMIPSInstr_EvCheck(HReg, Int, HReg, Int);
+extern NANOMIPSInstr *NANOMIPSInstr_ProfInc(void);
+extern NANOMIPSInstr *NANOMIPSInstr_Load(UChar sz, HReg dst, HReg src,
+                                         Int addr_offset);
+extern NANOMIPSInstr *NANOMIPSInstr_Store(UChar sz, HReg dst, Int addr_offset,
+                                          HReg src);
+extern NANOMIPSInstr *NANOMIPSInstr_Cas(UChar sz, HReg oldLo, HReg oldHi, HReg addr,
+                                        HReg expdLo, HReg expdHi,
+                                        HReg dataLo, HReg dataHi);
+extern NANOMIPSInstr *NANOMIPSInstr_LoadL(UChar sz, HReg dst, HReg src,
+                                          Int addr_offset);
+extern NANOMIPSInstr *NANOMIPSInstr_StoreC(UChar sz, HReg dst, Int addr_offset,
+                                           HReg src);
+extern NANOMIPSInstr *NANOMIPSInstr_MoveCond(NANOMIPSMoveCondOp op, HReg dst,
+                                             HReg src, HReg cond);
+extern void ppNANOMIPSInstr(const NANOMIPSInstr *);
+extern UInt ppHRegNANOMIPS(HReg);
+extern void getRegUsage_NANOMIPSInstr (HRegUsage *, const NANOMIPSInstr *);
+extern void mapRegs_NANOMIPSInstr     (HRegRemap *, NANOMIPSInstr *);
+extern void genSpill_NANOMIPS ( /*OUT*/ HInstr ** i1, /*OUT*/ HInstr ** i2,
+                                        HReg rreg, Int offset, Bool mode64);
+extern void genReload_NANOMIPS( /*OUT*/ HInstr ** i1, /*OUT*/ HInstr ** i2,
+                                        HReg rreg, Int offset, Bool mode64);
+extern NANOMIPSInstr* genMove_NANOMIPS(HReg from, HReg to);
+extern HInstrArray *iselSB_NANOMIPS(const IRSB*,
+                                    VexArch,
+                                    const VexArchInfo*,
+                                    const VexAbiInfo*,
+                                    Int offs_Host_EvC_Counter,
+                                    Int offs_Host_EvC_FailAddr,
+                                    Bool chainingAllowed,
+                                    Bool addProfInc,
+                                    Addr max_ga);
+extern Int emit_NANOMIPSInstr (/*MB_MOD*/Bool* is_profInc,
+                               UChar* buf, Int nbuf,
+                               const NANOMIPSInstr* i,
+                               Bool mode64,
+                               VexEndness endness_host,
+                               const void* disp_cp_chain_me_to_slowEP,
+                               const void* disp_cp_chain_me_to_fastEP,
+                               const void* disp_cp_xindir,
+                               const void* disp_cp_xassisted);
+/* How big is an event check?  This is kind of a kludge because it
+   depends on the offsets of host_EvC_FAILADDR and host_EvC_COUNTER,
+   and so assumes that they are both <= 128, and so can use the short
+   offset encoding.  This is all checked with assertions, so in the
+   worst case we will merely assert at startup. */
+extern Int evCheckSzB_NANOMIPS (void);
+/* Perform a chaining and unchaining of an XDirect jump. */
+extern VexInvalRange chainXDirect_NANOMIPS (VexEndness endness_host,
+      void* place_to_chain,
+      const void* disp_cp_chain_me_EXPECTED,
+      const void* place_to_jump_to);
+extern VexInvalRange unchainXDirect_NANOMIPS(VexEndness endness_host,
+      void* place_to_unchain,
+      const void* place_to_jump_to_EXPECTED,
+      const void* disp_cp_chain_me);
+/* Patch the counter location into an existing ProfInc point. */
+extern VexInvalRange patchProfInc_NANOMIPS (VexEndness endness_host,
+      void*  place_to_patch,
+      const ULong* location_of_counter);
+extern const RRegUniverse* getRRegUniverse_NANOMIPS (Bool mode64);
+
+#endif /* ndef __VEX_HOST_NANOMIPS_DEFS_H */
+
+/*---------------------------------------------------------------*/
+/*--- end                                host-nanomips_defs.h ---*/
+/*---------------------------------------------------------------*/
diff --git a/VEX/priv/host_nanomips_isel.c b/VEX/priv/host_nanomips_isel.c
new file mode 100644
index 0000000000..a76a5c67bd
--- /dev/null
+++ b/VEX/priv/host_nanomips_isel.c
@@ -0,0 +1,1567 @@
+
+/*---------------------------------------------------------------*/
+/*--- begin                              host_nanomips_isel.c ---*/
+/*---------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, a dynamic binary instrumentation
+   framework.
+
+   Copyright (C) 2017-2018 RT-RK
+      mips-valgrind@rt-rk.com
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+   02110-1301, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#include "libvex_basictypes.h"
+#include "libvex_ir.h"
+#include "libvex.h"
+
+#include "main_util.h"
+#include "main_globals.h"
+#include "host_generic_regs.h"
+#include "host_nanomips_defs.h"
+
+/*---------------------------------------------------------*/
+/*--- Register Usage Conventions                        ---*/
+/*---------------------------------------------------------*/
+
+/* Integer Regs
+   ------------
+   ZERO0       Reserved
+   GPR12:22    Allocateable
+   23          GuestStatePointer
+   SP          StackFramePointer
+   RA          LinkRegister */
+
+/* Host hwcaps */
+static UInt hwcaps_host = 0;
+
+/* GPR register class for NANOMIPS */
+#define HRcGPR  HRcInt32
+
+/*---------------------------------------------------------*/
+/*--- ISelEnv                                           ---*/
+/*---------------------------------------------------------*/
+
+/* This carries around:
+
+   - A mapping from IRTemp to IRType, giving the type of any IRTemp we
+     might encounter.  This is computed before insn selection starts,
+     and does not change.
+
+   - A mapping from IRTemp to HReg.  This tells the insn selector
+     which virtual register(s) are associated with each IRTemp
+     temporary.  This is computed before insn selection starts, and
+     does not change.  We expect this mapping to map precisely the
+     same set of IRTemps as the type mapping does.
+
+        - vregmap   holds the primary register for the IRTemp.
+        - vregmapHI is only used for 64-bit integer-typed
+             IRTemps.  It holds the identity of a second
+             32-bit virtual HReg, which holds the high half
+             of the value.
+
+   - The code array, that is, the insns selected so far.
+
+   - A counter, for generating new virtual registers.
+
+   - The host subarchitecture we are selecting insns for.
+     This is set at the start and does not change.
+
+   - A Bool for indicating whether we may generate chain-me
+     instructions for control flow transfers, or whether we must use
+     XAssisted.
+
+   - The maximum guest address of any guest insn in this block.
+     Actually, the address of the highest-addressed byte from any insn
+     in this block.  Is set at the start and does not change.  This is
+     used for detecting jumps which are definitely forward-edges from
+     this block, and therefore can be made (chained) to the fast entry
+     point of the destination, thereby avoiding the destination's
+     event check.
+
+   Note, this is all (well, mostly) host-independent.
+*/
+
+typedef
+struct {
+   /* Constant -- are set at the start and do not change. */
+   IRTypeEnv*   type_env;
+
+   HReg*        vregmap;
+   HReg*        vregmapHI;
+   Int          n_vregmap;
+
+   UInt         hwcaps;
+
+   Bool         chainingAllowed;
+   Addr64       max_ga;
+
+   /* These are modified as we go along. */
+   HInstrArray* code;
+   Int          vreg_ctr;
+} ISelEnv;
+
+static HReg lookupIRTemp(ISelEnv* env, IRTemp tmp)
+{
+   vassert(tmp < env->n_vregmap);
+   return env->vregmap[tmp];
+}
+
+static void lookupIRTemp64(HReg* vrHI, HReg* vrLO, ISelEnv* env, IRTemp tmp)
+{
+   vassert(tmp < env->n_vregmap);
+   vassert(!hregIsInvalid(env->vregmapHI[tmp]));
+   *vrLO = env->vregmap[tmp];
+   *vrHI = env->vregmapHI[tmp];
+}
+
+static void addInstr(ISelEnv* env, NANOMIPSInstr* instr)
+{
+   addHInstr(env->code, instr);
+
+   if (vex_traceflags & VEX_TRACE_VCODE) {
+      ppNANOMIPSInstr(instr);
+      vex_printf("\n");
+   }
+}
+
+static HReg newVRegI(ISelEnv* env)
+{
+   HReg reg = mkHReg(True /* virtual reg */,
+                     HRcGPR, 0 /* enc */, env->vreg_ctr);
+   env->vreg_ctr++;
+   return reg;
+}
+
+/*---------------------------------------------------------*/
+/*--- ISEL: Forward declarations                        ---*/
+/*---------------------------------------------------------*/
+
+/* These are organised as iselXXX and iselXXX_wrk pairs.  The
+   iselXXX_wrk do the real work, but are not to be called directly.
+   For each XXX, iselXXX calls its iselXXX_wrk counterpart, then
+   checks that all returned registers are virtual.  You should not
+   call the _wrk version directly.
+*/
+
+/* Compute an I1/I8/I16/I32 into a GPR. */
+static HReg iselWordExpr_R_wrk(ISelEnv* env, IRExpr* e);
+static HReg iselWordExpr_R(ISelEnv* env, IRExpr* e);
+
+/* Compute an I64 into a pair of GPRs. */
+static void iselInt64Expr_wrk(HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e);
+static void iselInt64Expr(HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e);
+
+/*---------------------------------------------------------*/
+/*--- ISEL: Misc helpers                                ---*/
+/*---------------------------------------------------------*/
+
+/* Make an int reg-reg move. */
+static inline NANOMIPSInstr *mk_iMOVds_RR(HReg r_dst, HReg r_src)
+{
+   vassert(hregClass(r_dst) == hregClass(r_src));
+   vassert(hregClass(r_src) == HRcInt32);
+   return NANOMIPSInstr_Alu(NMalu_OR, r_dst, r_src, r_src);
+}
+
+/* Extract sign-extended value from IRConst */
+static inline Int extractConst(IRConst *c)
+{
+   switch (c->tag) {
+      case Ico_U32:
+         return c->Ico.U32;
+
+      case Ico_U16:
+         return (Int)(Short)c->Ico.U16;
+
+      case Ico_U8:
+         return (Int)(Char)c->Ico.U8;
+
+      default:
+         vpanic("NANOMIPSisel_extractConst() fails");
+   }
+}
+
+/*---------------------------------------------------------*/
+/*--- ISEL: Function call helpers                       ---*/
+/*---------------------------------------------------------*/
+
+/* Used only in doHelperCall.  See big comment in doHelperCall re
+   handling of register-parameter args.  This function figures out
+   whether evaluation of an expression might require use of a fixed
+   register.  If in doubt return True (safe but suboptimal).
+*/
+static Bool mightRequireFixedRegs(IRExpr* e)
+{
+   switch (e->tag) {
+      case Iex_RdTmp:
+      case Iex_Const:
+      case Iex_Get:
+         return False;
+
+      default:
+         return True;
+   }
+}
+
+/* Do a complete function call.  |guard| is a Ity_Bit expression
+   indicating whether or not the call happens.  If guard==NULL, the
+   call is unconditional.  |retloc| is set to indicate where the
+   return value is after the call.  The caller (of this fn) must
+   generate code to add |stackAdjustAfterCall| to the stack pointer
+   after the call is done. */
+
+static void doHelperCall(/*OUT*/ RetLoc* retloc,
+                                 ISelEnv* env,
+                                 IRExpr* guard,
+                                 IRCallee* cee,
+                                 IRType retty,
+                                 IRExpr** args )
+{
+   HReg argregs[8];
+   HReg tmpregs[8];
+   Bool go_fast;
+   UInt n_args, i, argreg, nGSPTRs, argiregs;
+   HReg cond = INVALID_HREG;
+
+   vassert((retty == Ity_INVALID) ||
+           (retty == Ity_I32) ||
+           (retty == Ity_I64) ||
+           (retty == Ity_I8) ||
+           (retty == Ity_I16));
+   /* NANOMIPS P32 calling convention: up to eight registers ($a0 ... $a7)
+      are allowed to be used for passing integer arguments. */
+
+   /* The return type can be I{32,16,8}.
+      |args| may contain IRExpr_GSPTR(), in which case the value
+      in the guest state pointer register is passed as the
+      corresponding argument. */
+
+   *retloc = mk_RetLoc_INVALID();
+   n_args = 0;
+   nGSPTRs = 0;
+
+   for (i = 0; args[i]; i++) {
+      IRExpr* arg = args[i];
+
+      if (UNLIKELY(arg->tag == Iex_GSPTR)) {
+         nGSPTRs++;
+      }
+
+      n_args++;
+   }
+
+   vassert(nGSPTRs <= 1);
+   vassert(n_args <= NANOMIPS_N_REGPARMS);
+
+   argregs[0] = hregNANOMIPS_GPR4();
+   argregs[1] = hregNANOMIPS_GPR5();
+   argregs[2] = hregNANOMIPS_GPR6();
+   argregs[3] = hregNANOMIPS_GPR7();
+   argregs[4] = hregNANOMIPS_GPR8();
+   argregs[5] = hregNANOMIPS_GPR9();
+   argregs[6] = hregNANOMIPS_GPR10();
+   argregs[7] = hregNANOMIPS_GPR11();
+   argiregs = 0;
+   tmpregs[0] = tmpregs[1] = tmpregs[2] =
+                                tmpregs[3] = tmpregs[4] = tmpregs[5] =
+                                         tmpregs[6] = tmpregs[7] = INVALID_HREG;
+
+   /* First decide which scheme (slow or fast) is to be used. First assume the
+      fast scheme, and select slow if any contraindications (wow) appear. */
+   go_fast = True;
+
+   if (guard) {
+      vassert(typeOfIRExpr(env->type_env, guard) == Ity_I1);
+
+      if (guard->tag != Iex_Const || !guard->Iex.Const.con->Ico.U1) {
+         go_fast = False;
+         cond = iselWordExpr_R(env, guard);
+      }
+   }
+
+   if (go_fast) {
+      for (i = 0; i < n_args; i++) {
+         if (mightRequireFixedRegs(args[i])) {
+            go_fast = False;
+            break;
+         }
+      }
+   }
+
+   /* At this point the scheme to use has been established.  Generate
+      code to get the arg values into the argument rregs. */
+   if (go_fast) {
+      argreg = 0;
+
+      for (i = 0; i < n_args; i++) {
+         IRExpr* arg = args[i];
+         IRType aTy = Ity_INVALID;
+         vassert(argreg < NANOMIPS_N_REGPARMS);
+
+         if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg)))
+            aTy = typeOfIRExpr(env->type_env, arg);
+
+         switch (aTy) {
+            case Ity_I1:
+            case Ity_I8:
+            case Ity_I16:
+            case Ity_I32:
+               argiregs |= (1 << (argreg + 4));
+               addInstr(env, mk_iMOVds_RR(argregs[argreg],
+                                          iselWordExpr_R(env, arg)));
+               argreg++;
+               break;
+
+            case Ity_I64:
+               if (argreg & 1) {
+                  argreg++;
+                  argiregs |= (1 << (argreg + 4));
+               }
+
+               vassert(argreg + 1 < NANOMIPS_N_REGPARMS);
+
+               HReg rHi, rLo;
+               iselInt64Expr(&rHi, &rLo, env, arg);
+               argiregs |= (1 << (argreg + 4));
+               addInstr(env, mk_iMOVds_RR(argregs[argreg++], rHi));
+               argiregs |= (1 << (argreg + 4));
+               addInstr(env, mk_iMOVds_RR(argregs[argreg], rLo));
+               argreg++;
+               break;
+
+            case Ity_INVALID:
+            default:
+               vassert(arg->tag == Iex_GSPTR);
+               addInstr(env, mk_iMOVds_RR(argregs[argreg], GuestStatePointer));
+               argreg++;
+               break;
+         }
+      }
+   } else {
+      argreg = 0;
+
+      for (i = 0; i < n_args; i++) {
+         IRExpr* arg = args[i];
+         IRType  aTy = Ity_INVALID;
+         vassert(argreg < NANOMIPS_N_REGPARMS);
+
+         if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg)))
+            aTy = typeOfIRExpr(env->type_env, arg);
+
+         switch (aTy) {
+            case Ity_I1:
+            case Ity_I8:
+            case Ity_I16:
+            case Ity_I32:
+               tmpregs[argreg] = iselWordExpr_R(env, arg);
+               argreg++;
+               break;
+
+            case Ity_I64: {
+               HReg raHi, raLo;
+
+               if (argreg & 1) {
+                  argreg++;
+               }
+
+               vassert(argreg + 1 < NANOMIPS_N_REGPARMS);
+
+               iselInt64Expr(&raHi, &raLo, env, arg);
+               tmpregs[argreg] = raLo;
+               argreg++;
+               tmpregs[argreg] = raHi;
+               argreg++;
+               break;
+            }
+
+            case Ity_INVALID:
+            default:
+               vassert(arg->tag == Iex_GSPTR);
+               tmpregs[argreg] = GuestStatePointer;
+               argreg++;
+               break;
+         }
+
+         for (i = 0; i < argreg; i++) {
+            if (hregIsInvalid(tmpregs[i]))
+               continue;
+
+            /* None of these insns, including any spill code that might
+               be generated, may alter the condition codes. */
+            argiregs |= (1 << (i + 4));
+            addInstr(env, mk_iMOVds_RR(argregs[i], tmpregs[i]));
+         }
+      }
+   }
+
+   switch (retty) {
+      case Ity_INVALID:
+         *retloc = mk_RetLoc_simple(RLPri_None);
+         break;
+
+      case Ity_I64:
+         *retloc = mk_RetLoc_simple(RLPri_2Int);
+         break;
+
+      case Ity_I32:
+      case Ity_I16:
+      case Ity_I8:
+         *retloc = mk_RetLoc_simple(RLPri_Int);
+         break;
+
+      default:
+         vassert(0);
+   }
+
+   addInstr(env, NANOMIPSInstr_Call((Addr)cee->addr, argiregs, cond, *retloc));
+}
+
+/*---------------------------------------------------------*/
+/*--- ISEL: Integer expressions (64/32/16/8 bit)        ---*/
+/*---------------------------------------------------------*/
+
+/* Select insns for an integer-typed expression, and add them to the
+   code list.  Return a reg holding the result.  This reg will be a
+   virtual register.  THE RETURNED REG MUST NOT BE MODIFIED.  If you
+   want to modify it, ask for a new vreg, copy it in there, and modify
+   the copy.  The register allocator will do its best to map both
+   vregs to the same real register, so the copies will often disappear
+   later in the game.
+
+   This should handle expressions of 64, 32, 16 and 8-bit type.
+   All results are returned in a (mode64 ? 64bit : 32bit) register.
+   For 16- and 8-bit expressions, the upper (32/48/56 : 16/24) bits
+   are arbitrary, so you should mask or sign extend partial values
+   if necessary.
+*/
+static HReg iselWordExpr_R(ISelEnv * env, IRExpr * e)
+{
+   HReg r = iselWordExpr_R_wrk(env, e);
+   /* sanity checks ... */
+   vassert(hregClass(r) == HRcGPR);
+   vassert(hregIsVirtual(r));
+   return r;
+}
+
+static HReg iselWordExpr_R_wrk(ISelEnv * env, IRExpr * e)
+{
+   IRType ty = typeOfIRExpr(env->type_env, e);
+   vassert(ty == Ity_I1 || ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32);
+
+   switch (e->tag) {
+      case Iex_RdTmp:
+         return lookupIRTemp(env, e->Iex.RdTmp.tmp);
+
+      case Iex_Load: {
+         HReg r_dst = newVRegI(env);
+         HReg r_addr = iselWordExpr_R(env, e->Iex.Load.addr);
+         addInstr(env, NANOMIPSInstr_Load(sizeofIRType(ty), r_dst, r_addr, 0));
+         return r_dst;
+      }
+
+      case Iex_Get: {
+         vassert(ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32);
+         HReg r_dst = newVRegI(env);
+         vassert((e->Iex.Get.offset < 0x1000) && (e->Iex.Get.offset >= 0));
+         addInstr(env, NANOMIPSInstr_Load(sizeofIRType(ty), r_dst,
+                                          GuestStatePointer,
+                                          e->Iex.Get.offset));
+         return r_dst;
+      }
+
+      case Iex_Binop: {
+         NANOMIPSAluOp aluOp;
+         NANOMIPSCondCode ccOp;
+
+         switch (e->Iex.Binop.op) {
+            case Iop_Add8:
+            case Iop_Add16:
+            case Iop_Add32:
+               aluOp = NMalu_ADD;
+               break;
+
+            case Iop_Sub8:
+            case Iop_Sub16:
+            case Iop_Sub32:
+               aluOp = NMalu_SUB;
+               break;
+
+            case Iop_And8:
+            case Iop_And16:
+            case Iop_And32:
+               aluOp = NMalu_AND;
+               break;
+
+            case Iop_Or8:
+            case Iop_Or16:
+            case Iop_Or32:
+               aluOp = NMalu_OR;
+               break;
+
+            case Iop_Xor8:
+            case Iop_Xor16:
+            case Iop_Xor32:
+               aluOp = NMalu_XOR;
+               break;
+
+            case Iop_Shl32:
+               aluOp = NMalu_SLL;
+               break;
+
+            case Iop_Shr32:
+               aluOp = NMalu_SRL;
+               break;
+
+            case Iop_Sar32:
+               aluOp = NMalu_SRA;
+               break;
+
+            case Iop_Mul32:
+               aluOp = NMalu_MULU;
+               break;
+
+            case Iop_MullS8:
+            case Iop_MullS16:
+               aluOp = NMalu_MUL;
+               break;
+
+            case Iop_DivS32:
+               aluOp = NMalu_DIV;
+               break;
+
+            case Iop_DivU32:
+               aluOp = NMalu_DIVU;
+               break;
+
+            default:
+               aluOp = NMalu_INVALID;
+               break;
+         }
+
+         if (aluOp != NMalu_INVALID) {
+            HReg r_dst = newVRegI(env);
+            HReg r_srcL = iselWordExpr_R(env, e->Iex.Binop.arg1);
+
+            /* Optimization: If seccond argument is Const and
+               ALU operation can be converted to IMM operation */
+            if ((aluOp <= NMalu_AND) &&
+                  (e->Iex.Binop.arg2->tag == Iex_Const)) {
+
+               UInt val = extractConst(e->Iex.Binop.arg2->Iex.Const.con);
+
+               if ((val < 0x20) ||
+                     ((val < 0x1000) && (aluOp >= NMalu_OR))) {
+                  NANOMIPSImmOp immOp = (NANOMIPSImmOp)aluOp;
+                  addInstr(env, NANOMIPSInstr_Imm(immOp, r_dst, r_srcL,
+                                                  val));
+                  return r_dst;
+               }
+            }
+
+            HReg r_srcR = iselWordExpr_R(env, e->Iex.Binop.arg2);
+            addInstr(env, NANOMIPSInstr_Alu(aluOp, r_dst, r_srcL, r_srcR));
+            return r_dst;
+         }
+
+         switch (e->Iex.Binop.op) {
+            case Iop_CmpEQ32:
+            case Iop_CasCmpEQ32:
+               ccOp = NMcc_EQ;
+               break;
+
+            case Iop_CmpNE32:
+               ccOp = NMcc_NE;
+               break;
+
+            case Iop_CmpLT32S:
+               ccOp = NMcc_LTS;
+               break;
+
+            case Iop_CmpLT32U:
+               ccOp = NMcc_LTU;
+               break;
+
+            case Iop_CmpLE32S:
+               ccOp = NMcc_LES;
+               break;
+
+            case Iop_CmpLE32U:
+               ccOp = NMcc_LEU;
+               break;
+
+            default:
+               ccOp = NMcc_INVALID;
+               break;
+         }
+
+         if (ccOp != NMcc_INVALID) {
+            HReg dst = newVRegI(env);
+            HReg r1 = iselWordExpr_R(env, e->Iex.Binop.arg1);
+            HReg r2 = iselWordExpr_R(env, e->Iex.Binop.arg2);
+            addInstr(env, NANOMIPSInstr_Cmp(ccOp, dst, r1, r2));
+            return dst;
+         }
+
+         switch (e->Iex.Binop.op) {
+            case Iop_MullU8: {
+               HReg r_dst = newVRegI(env);
+               HReg r_tmp = newVRegI(env);
+               HReg r_srcL = iselWordExpr_R(env, e->Iex.Binop.arg1);
+               HReg r_srcR = iselWordExpr_R(env, e->Iex.Binop.arg2);
+               addInstr(env, NANOMIPSInstr_Imm(NMimm_ANDI, r_dst, r_srcL, 0xFF));
+               addInstr(env, NANOMIPSInstr_Imm(NMimm_ANDI, r_tmp, r_srcR, 0xFF));
+               addInstr(env, NANOMIPSInstr_Alu(NMalu_MULU, r_dst, r_dst, r_tmp));
+               return r_dst;
+            }
+
+            case Iop_MullU16: {
+               HReg r_dst = newVRegI(env);
+               HReg r_tmp = newVRegI(env);
+               HReg r_mask = newVRegI(env);
+               HReg r_srcL = iselWordExpr_R(env, e->Iex.Binop.arg1);
+               HReg r_srcR = iselWordExpr_R(env, e->Iex.Binop.arg2);
+               addInstr(env, NANOMIPSInstr_Imm(NMimm_LI, r_mask, INVALID_HREG,
+                                               0xFFFF));
+               addInstr(env, NANOMIPSInstr_Alu(NMalu_AND, r_dst, r_srcL, r_mask));
+               addInstr(env, NANOMIPSInstr_Alu(NMalu_AND, r_tmp, r_srcR, r_mask));
+               addInstr(env, NANOMIPSInstr_Alu(NMalu_MULU, r_dst, r_dst, r_tmp));
+               return r_dst;
+            }
+
+            case Iop_8HLto16:
+            case Iop_16HLto32: {
+               HReg r_dst = newVRegI(env);
+               HReg r_tmp = newVRegI(env);
+               HReg r_srcL = iselWordExpr_R(env, e->Iex.Binop.arg1);
+               HReg r_srcR = iselWordExpr_R(env, e->Iex.Binop.arg2);
+
+               switch (e->Iex.Binop.op) {
+                  case Iop_8HLto16:
+                     addInstr(env, NANOMIPSInstr_Imm(NMimm_SLL, r_tmp, r_srcL, 8));
+                     addInstr(env, NANOMIPSInstr_Imm(NMimm_ANDI, r_dst, r_srcR,
+                                                     0xFF));
+                     break;
+
+                  case Iop_16HLto32: {
+                     HReg r_mask = newVRegI(env);
+                     addInstr(env, NANOMIPSInstr_Imm(NMimm_LI, r_mask,
+                                                     INVALID_HREG, 0xFFFF));
+                     addInstr(env, NANOMIPSInstr_Imm(NMimm_SLL, r_tmp,
+                                                     r_srcL, 16));
+                     addInstr(env, NANOMIPSInstr_Alu(NMalu_AND, r_dst, r_srcR,
+                                                     r_mask));
+                  }
+                  break;
+
+                  default:
+                     vassert(0);
+               }
+
+               addInstr(env, NANOMIPSInstr_Alu(NMalu_OR, r_dst, r_dst, r_tmp));
+               return r_dst;
+            }
+
+            default:
+               break;
+         }
+
+         vex_printf("Unimplemented binop ");
+         ppIROp(e->Iex.Binop.op);
+         vpanic("\n");
+
+         break;
+      }
+
+      case Iex_Unop: {
+         IROp op_unop = e->Iex.Unop.op;
+
+         switch (op_unop) {
+            case Iop_1Sto8:
+            case Iop_1Sto16:
+            case Iop_1Sto32: {
+               HReg r_dst = newVRegI(env);
+               HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg);
+               addInstr(env, NANOMIPSInstr_Imm(NMimm_SGN, r_dst, r_src, 1));
+               return r_dst;
+            }
+
+            case Iop_16to8:
+            case Iop_32to8: {
+               HReg r_dst = newVRegI(env);
+               HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg);
+               addInstr(env, NANOMIPSInstr_Imm(NMimm_SGN, r_dst, r_src, 8));
+               return r_dst;
+            }
+
+            case Iop_32to16: {
+               HReg r_dst = newVRegI(env);
+               HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg);
+               addInstr(env, NANOMIPSInstr_Imm(NMimm_SGN, r_dst, r_src, 16));
+               return r_dst;
+            }
+
+            case Iop_1Uto8:
+            case Iop_1Uto32:
+            case Iop_8Sto16:
+            case Iop_8Sto32:
+            case Iop_16Sto32:
+               return iselWordExpr_R(env, e->Iex.Unop.arg);
+
+            case Iop_64to32: {
+               HReg rHi, rLo;
+               iselInt64Expr(&rHi, &rLo, env, e->Iex.Unop.arg);
+               return rLo;
+            }
+
+            case Iop_64HIto32: {
+               HReg rHi, rLo;
+               iselInt64Expr(&rHi, &rLo, env, e->Iex.Unop.arg);
+               return rHi;
+            }
+
+            case Iop_32to1: {
+               HReg r_dst = newVRegI(env);
+               HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg);
+               addInstr(env, NANOMIPSInstr_Imm(NMimm_ANDI, r_dst, r_src, 1));
+               return r_dst;
+            }
+
+            case Iop_8Uto16:
+            case Iop_8Uto32: {
+               HReg r_dst = newVRegI(env);
+               HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg);
+               addInstr(env, NANOMIPSInstr_Imm(NMimm_ANDI, r_dst, r_src,
+                                               0xFF));
+               return r_dst;
+            }
+
+            case Iop_16Uto32: {
+               HReg r_dst = newVRegI(env);
+               HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg);
+               addInstr(env, NANOMIPSInstr_Imm(NMimm_LI, r_dst, INVALID_HREG,
+                                               0xFFFF));
+               addInstr(env, NANOMIPSInstr_Alu(NMalu_AND, r_dst, r_dst, r_src));
+               return r_dst;
+            }
+
+            case Iop_Not1: {
+               HReg r_dst = newVRegI(env);
+               HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg);
+               addInstr(env, NANOMIPSInstr_Imm(NMimm_XORI, r_dst, r_src, 1));
+               return r_dst;
+            }
+
+            case Iop_Not8:
+            case Iop_Not16:
+            case Iop_Not32: {
+               HReg r_dst = newVRegI(env);
+               HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg);
+               addInstr(env, NANOMIPSInstr_Alu(NMalu_NOR, r_dst, r_src, r_src));
+               return r_dst;
+            }
+
+            case Iop_32HIto16: {
+               HReg r_dst = newVRegI(env);
+               HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg);
+               addInstr(env, NANOMIPSInstr_Imm(NMimm_SRA, r_dst, r_src, 16));
+               return r_dst;
+            }
+
+            case Iop_16HIto8: {
+               HReg r_dst = newVRegI(env);
+               HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg);
+               addInstr(env, NANOMIPSInstr_Imm(NMimm_SRA, r_dst, r_src, 8));
+               return r_dst;
+            }
+
+            case Iop_CmpNEZ8:
+            case Iop_CmpNEZ16:
+            case Iop_CmpNEZ32: {
+               HReg r_dst = newVRegI(env);
+               HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg);
+               addInstr(env, NANOMIPSInstr_Cmp(NMcc_NE, r_dst, r_src,
+                                               Zero));
+               return r_dst;
+            }
+
+            case Iop_CmpwNEZ32: {
+               HReg r_dst = newVRegI(env);
+               HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg);
+               addInstr(env, NANOMIPSInstr_Cmp(NMcc_NE, r_dst, r_src,
+                                               Zero));
+               addInstr(env, NANOMIPSInstr_Imm(NMimm_SGN, r_dst, r_dst, 1));
+               return r_dst;
+            }
+
+            case Iop_Clz32: {
+               HReg r_dst = newVRegI(env);
+               HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg);
+               addInstr(env, NANOMIPSInstr_Unary(NMun_CLZ, r_dst, r_src));
+               return r_dst;
+            }
+
+            case Iop_Left8:
+            case Iop_Left16:
+            case Iop_Left32: {
+               HReg r_dst = newVRegI(env);
+               HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg);
+               addInstr(env, NANOMIPSInstr_Alu(NMalu_SUB, r_dst, Zero, r_src));
+               addInstr(env, NANOMIPSInstr_Alu(NMalu_OR, r_dst, r_dst,
+                                               r_src));
+               return r_dst;
+            }
+
+            default:
+               break;
+         }
+
+         vex_printf("Unimplemented unop ");
+         ppIROp(e->Iex.Unop.op);
+         vpanic("\n");
+      }
+
+      case Iex_ITE: {
+         vassert(typeOfIRExpr(env->type_env, e->Iex.ITE.cond) == Ity_I1);
+         HReg r0     = iselWordExpr_R(env, e->Iex.ITE.iffalse);
+         HReg r1     = iselWordExpr_R(env, e->Iex.ITE.iftrue);
+         HReg r_cond = iselWordExpr_R(env, e->Iex.ITE.cond);
+         HReg r_dst = newVRegI(env);
+         addInstr(env, mk_iMOVds_RR(r_dst, r0));
+         addInstr(env, NANOMIPSInstr_MoveCond(NMMoveCond_movn, r_dst,
+                                              r1, r_cond));
+         return r_dst;
+      }
+
+      case Iex_Const: {
+         HReg r_dst = newVRegI(env);
+         addInstr(env, NANOMIPSInstr_Imm(NMimm_LI, r_dst, INVALID_HREG,
+                                         extractConst(e->Iex.Const.con)));
+         return r_dst;
+      }
+
+      case Iex_CCall: {
+         /* unimplemented yet */
+         vassert(0);
+      }
+
+      default:
+         break;
+   }
+
+   ppIRExpr(e);
+   vpanic("iselWordExpr_R(NANOMIPS): cannot reduce tree");
+}
+
+/*---------------------------------------------------------*/
+/*--- ISEL: Integer expressions (64 bit)                ---*/
+/*---------------------------------------------------------*/
+
+/* Compute a 64-bit value into the register pair HI, LO.
+   HI and LO must not be changed by subsequent code emitted
+   by the caller. */
+static void iselInt64Expr(HReg * rHi, HReg * rLo, ISelEnv * env, IRExpr * e)
+{
+   iselInt64Expr_wrk(rHi, rLo, env, e);
+   vassert(hregClass(*rHi) == HRcInt32);
+   vassert(hregIsVirtual(*rHi));
+   vassert(hregClass(*rLo) == HRcInt32);
+   vassert(hregIsVirtual(*rLo));
+}
+
+static void iselInt64Expr_wrk(HReg * rHi, HReg * rLo, ISelEnv * env,
+                              IRExpr * e)
+{
+   vassert(e);
+   vassert(typeOfIRExpr(env->type_env, e) == Ity_I64);
+
+   switch (e->tag) {
+      case Iex_RdTmp:
+         lookupIRTemp64(rHi, rLo, env, e->Iex.RdTmp.tmp);
+         return;
+
+      case Iex_Load: {
+         HReg tLo = newVRegI(env);
+         HReg tHi = newVRegI(env);
+         HReg r_addr = iselWordExpr_R(env, e->Iex.Load.addr);
+         addInstr(env, NANOMIPSInstr_Load(4, tLo, r_addr, 0));
+         addInstr(env, NANOMIPSInstr_Load(4, tHi, r_addr, 4));
+         *rHi = tHi;
+         *rLo = tLo;
+         return;
+      }
+
+      case Iex_Get: {
+         HReg tLo = newVRegI(env);
+         HReg tHi = newVRegI(env);
+         vassert((e->Iex.Get.offset < 0x1000 - 4) && (e->Iex.Get.offset >= 0));
+         addInstr(env, NANOMIPSInstr_Load(4, tLo, GuestStatePointer,
+                                          e->Iex.Get.offset));
+         addInstr(env, NANOMIPSInstr_Load(4, tHi, GuestStatePointer,
+                                          e->Iex.Get.offset + 4));
+         *rHi = tHi;
+         *rLo = tLo;
+         return;
+      }
+
+      case Iex_Binop: {
+         switch (e->Iex.Binop.op) {
+            case Iop_DivModS32to32: {
+               HReg r_srcL = iselWordExpr_R(env, e->Iex.Binop.arg1);
+               HReg r_srcR = iselWordExpr_R(env, e->Iex.Binop.arg2);
+               HReg tLo = newVRegI(env);
+               HReg tHi = newVRegI(env);
+               addInstr(env, NANOMIPSInstr_Alu(NMalu_DIV, tLo, r_srcL, r_srcR));
+               addInstr(env, NANOMIPSInstr_Alu(NMalu_MOD, tHi, r_srcL, r_srcR));
+               *rHi = tHi;
+               *rLo = tLo;
+               return;
+            }
+
+            case Iop_DivModU32to32: {
+               HReg r_srcL = iselWordExpr_R(env, e->Iex.Binop.arg1);
+               HReg r_srcR = iselWordExpr_R(env, e->Iex.Binop.arg2);
+               HReg tLo = newVRegI(env);
+               HReg tHi = newVRegI(env);
+               addInstr(env, NANOMIPSInstr_Alu(NMalu_DIVU, tLo, r_srcL, r_srcR));
+               addInstr(env, NANOMIPSInstr_Alu(NMalu_MODU, tHi, r_srcL, r_srcR));
+               *rHi = tHi;
+               *rLo = tLo;
+               return;
+            }
+
+            case Iop_MullS32: {
+               HReg r_srcL = iselWordExpr_R(env, e->Iex.Binop.arg1);
+               HReg r_srcR = iselWordExpr_R(env, e->Iex.Binop.arg2);
+               HReg tLo = newVRegI(env);
+               HReg tHi = newVRegI(env);
+               addInstr(env, NANOMIPSInstr_Alu(NMalu_MUL, tLo, r_srcL, r_srcR));
+               addInstr(env, NANOMIPSInstr_Alu(NMalu_MUH, tHi, r_srcL, r_srcR));
+               *rHi = tHi;
+               *rLo = tLo;
+               return;
+            }
+
+            case Iop_MullU32: {
+               HReg r_srcL = iselWordExpr_R(env, e->Iex.Binop.arg1);
+               HReg r_srcR = iselWordExpr_R(env, e->Iex.Binop.arg2);
+               HReg tLo = newVRegI(env);
+               HReg tHi = newVRegI(env);
+               addInstr(env, NANOMIPSInstr_Alu(NMalu_MULU, tLo, r_srcL, r_srcR));
+               addInstr(env, NANOMIPSInstr_Alu(NMalu_MUHU, tHi, r_srcL, r_srcR));
+               *rHi = tHi;
+               *rLo = tLo;
+               return;
+            }
+
+            case Iop_Shr64: {
+#if defined (_MIPSEL)
+               HReg a0, a1, sa;
+               HReg a0tmp = newVRegI(env);
+               HReg a1tmp = newVRegI(env);
+               HReg a2 = newVRegI(env);
+               HReg a3 = newVRegI(env);
+               HReg a4 = newVRegI(env);
+
+               iselInt64Expr(&a1, &a0, env, e->Iex.Binop.arg1);
+               sa = iselWordExpr_R(env, e->Iex.Binop.arg2);
+
+               /* andi a2, %sa, 0x3f */
+               addInstr(env, NANOMIPSInstr_Imm(NMimm_ANDI, a2, sa, 0x3f));
+               /* nor a4, zero, a2 */
+               addInstr(env, NANOMIPSInstr_Alu(NMalu_NOR, a4, Zero, a2));
+               /* sll a3, a1, 1 */
+               addInstr(env, NANOMIPSInstr_Imm(NMimm_SLL, a3, a1, 0x1));
+               /* sllv a3, a3, a4 */
+               addInstr(env, NANOMIPSInstr_Alu(NMalu_SLL, a3, a3, a4));
+               /* srlv a0, a0, a2 */
+               addInstr(env, NANOMIPSInstr_Alu(NMalu_SRL, a0tmp, a0, a2));
+               /* andi a4, a2, 0x20 */
+               addInstr(env, NANOMIPSInstr_Imm(NMimm_ANDI, a4, a2, 0x20));
+               /* srlv a2, a1, a2 */
+               addInstr(env, NANOMIPSInstr_Alu(NMalu_SRL, a2, a1, a2));
+               /* or a0, a0, a3 */
+               addInstr(env, NANOMIPSInstr_Alu(NMalu_OR, a0tmp, a0tmp, a3));
+               /* move a1, a2 */
+               addInstr(env, mk_iMOVds_RR(a1tmp, a2));
+               /* movn a1, zero, a4 */
+               addInstr(env, NANOMIPSInstr_MoveCond(NMMoveCond_movn, a1tmp,
+                                                    Zero, a4));
+               /* movn a0, a2, a4 */
+               addInstr(env, NANOMIPSInstr_MoveCond(NMMoveCond_movn, a0tmp,
+                                                    a2, a4));
+
+               *rHi = a1tmp;
+               *rLo = a0tmp;
+               return;
+#elif defined (_MIPSEB)
+               /* 64-bit logical shift right based on what gcc generates:
+                  <shift>:
+                  nor  v0, zero, a2
+                  sll  a3, a0, 0x1
+                  sllv a3, a3, v0
+                  srlv v1, a1, a2
+                  andi v0, a2, 0x20
+                  or   v1, a3, v1
+                  srlv a2, a0, a2
+                  movn v1, a2, v0
+                  movn a2, zero, v0
+                  jr   ra
+                  move v0, a2
+               */
+               /* unimplemented yet */
+               vassert(0);
+
+#endif
+            }
+
+            case Iop_32HLto64:
+               *rHi = iselWordExpr_R(env, e->Iex.Binop.arg1);
+               *rLo = iselWordExpr_R(env, e->Iex.Binop.arg2);
+
+               return;
+
+            case Iop_Or64: {
+               HReg rHi_srcL, rLo_srcL;
+               HReg rHi_srcR, rLo_srcR;
+               HReg rHi_dst = newVRegI(env);
+               HReg rLo_dst = newVRegI(env);
+               iselInt64Expr(&rHi_srcL, &rLo_srcL, env, e->Iex.Binop.arg1);
+               iselInt64Expr(&rHi_srcR, &rLo_srcR, env, e->Iex.Binop.arg2);
+               addInstr(env, NANOMIPSInstr_Alu(NMalu_OR, rHi_dst, rHi_srcL,
+                                               rHi_srcR));
+               addInstr(env, NANOMIPSInstr_Alu(NMalu_OR, rLo_dst, rLo_srcL,
+                                               rLo_srcR));
+               *rHi = rHi_dst;
+               *rLo = rLo_dst;
+
+               return;
+            }
+
+            default:
+               break;
+         }
+
+         vex_printf("Unimplemented binop ");
+         ppIROp(e->Iex.Binop.op);
+         vpanic("\n");
+
+         break;
+      }
+
+      case Iex_Unop: {
+         switch (e->Iex.Unop.op) {
+            case Iop_1Sto64: {
+               HReg rHi_dst = newVRegI(env);
+               HReg rLo_dst = newVRegI(env);
+               HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg);
+               addInstr(env, NANOMIPSInstr_Imm(NMimm_SGN, rLo_dst, r_src, 1));
+               addInstr(env, mk_iMOVds_RR(rHi_dst, rLo_dst));
+               *rHi = rHi_dst;
+               *rLo = rLo_dst;
+               return;
+            }
+
+            default:
+               break;
+         }
+
+         vex_printf("Unimplemented unop ");
+         ppIROp(e->Iex.Unop.op);
+         vpanic("\n");
+
+         break;
+      }
+
+      default:
+         break;
+   }
+
+   ppIRExpr(e);
+   vpanic("iselInt64Expr(NANOMIPS): cannot reduce tree");
+}
+
+/*---------------------------------------------------------*/
+/*--- ISEL: Statements                                  ---*/
+/*---------------------------------------------------------*/
+static void iselStmt(ISelEnv * env, IRStmt * stmt)
+{
+   if (vex_traceflags & VEX_TRACE_VCODE) {
+      vex_printf("\n-- ");
+      ppIRStmt(stmt);
+      vex_printf("\n");
+   }
+
+   switch (stmt->tag) {
+      case Ist_Store: {
+         IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Store.data);
+         HReg r_addr = iselWordExpr_R(env, stmt->Ist.Store.addr);
+
+         if (tyd == Ity_I8 || tyd == Ity_I16 || tyd == Ity_I32) {
+            HReg r_src = iselWordExpr_R(env, stmt->Ist.Store.data);
+            addInstr(env, NANOMIPSInstr_Store(sizeofIRType(tyd),
+                                              r_addr, 0, r_src));
+            return;
+         } else if (tyd == Ity_I64) {
+            HReg vHi, vLo;
+            iselInt64Expr(&vHi, &vLo, env, stmt->Ist.Store.data);
+            addInstr(env, NANOMIPSInstr_Store(4, r_addr, 0, vLo));
+            addInstr(env, NANOMIPSInstr_Store(4, r_addr, 4, vHi));
+            return;
+         }
+
+         break;
+      }
+
+      case Ist_Put: {
+         IRType ty = typeOfIRExpr(env->type_env, stmt->Ist.Put.data);
+         vassert(stmt->Ist.Put.offset >= 0);
+
+         if (ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32) {
+            HReg r_src = iselWordExpr_R(env, stmt->Ist.Put.data);
+            vassert(stmt->Ist.Put.offset < 0x1000);
+            addInstr(env, NANOMIPSInstr_Store(sizeofIRType(ty),
+                                              GuestStatePointer,
+                                              stmt->Ist.Put.offset, r_src));
+            return;
+         } else if (ty == Ity_I64) {
+            HReg vHi, vLo;
+            vassert(stmt->Ist.Put.offset < 0x1000 - 4);
+            iselInt64Expr(&vHi, &vLo, env, stmt->Ist.Put.data);
+            addInstr(env, NANOMIPSInstr_Store(4, GuestStatePointer,
+                                              stmt->Ist.Put.offset, vLo));
+            addInstr(env, NANOMIPSInstr_Store(4, GuestStatePointer,
+                                              stmt->Ist.Put.offset + 4,
+                                              vHi));
+            return;
+         }
+
+         break;
+      }
+
+      case Ist_WrTmp: {
+         IRTemp tmp = stmt->Ist.WrTmp.tmp;
+         IRType ty = typeOfIRTemp(env->type_env, tmp);
+
+         if (ty == Ity_I1 || ty == Ity_I8 || ty == Ity_I16 ||
+               ty == Ity_I32) {
+            HReg r_dst = lookupIRTemp(env, tmp);
+            HReg r_src = iselWordExpr_R(env, stmt->Ist.WrTmp.data);
+            addInstr(env, mk_iMOVds_RR(r_dst, r_src));
+            return;
+         } else if (ty == Ity_I64) {
+            HReg rHi, rLo, dstHi, dstLo;
+            iselInt64Expr(&rHi, &rLo, env, stmt->Ist.WrTmp.data);
+            lookupIRTemp64(&dstHi, &dstLo, env, tmp);
+            addInstr(env, mk_iMOVds_RR(dstHi, rHi));
+            addInstr(env, mk_iMOVds_RR(dstLo, rLo));
+            return;
+         }
+
+         break;
+      }
+
+      case Ist_Dirty: {
+         IRDirty *d = stmt->Ist.Dirty.details;
+         IRType retty = Ity_INVALID;
+
+         if (d->tmp != IRTemp_INVALID)
+            retty = typeOfIRTemp(env->type_env, d->tmp);
+
+         vassert((retty == Ity_INVALID) ||
+                 (retty == Ity_I32) ||
+                 (retty == Ity_I64) ||
+                 (retty == Ity_I8) ||
+                 (retty == Ity_I16));
+
+         /* Marshal args, do the call, clear stack, set the return value
+           to 0x555..555 if this is a conditional call that returns a
+           value and the call is skipped. */
+         RetLoc rloc = mk_RetLoc_INVALID();
+         doHelperCall(&rloc, env, d->guard, d->cee, retty, d->args);
+         vassert(is_sane_RetLoc(rloc));
+
+         /* Now figure out what to do with the returned value, if any. */
+         switch (retty) {
+            case Ity_INVALID: {
+               vassert(d->tmp == IRTemp_INVALID);
+               vassert(rloc.pri == RLPri_None);
+               return;
+            }
+
+            case Ity_I32:
+            case Ity_I16:
+            case Ity_I8: {
+               HReg r_dst = lookupIRTemp(env, d->tmp);
+               vassert(rloc.pri == RLPri_Int);
+               addInstr(env, mk_iMOVds_RR(r_dst, hregNANOMIPS_GPR4()));
+               return;
+            }
+
+            case Ity_I64: {
+               HReg rHi = newVRegI(env);
+               HReg rLo = newVRegI(env);
+               HReg dstHi, dstLo;
+               vassert(rloc.pri == RLPri_2Int);
+               addInstr(env, mk_iMOVds_RR(rLo, hregNANOMIPS_GPR4()));
+               addInstr(env, mk_iMOVds_RR(rHi, hregNANOMIPS_GPR5()));
+               lookupIRTemp64(&dstHi, &dstLo, env, d->tmp);
+               addInstr(env, mk_iMOVds_RR(dstHi, rHi));
+               addInstr(env, mk_iMOVds_RR(dstLo, rLo));
+               return;
+            }
+
+            default:
+               vassert(0);
+         }
+
+         break;
+      }
+
+      case Ist_LLSC: {
+         IRTemp res = stmt->Ist.LLSC.result;
+         IRType tyAddr = typeOfIRExpr(env->type_env, stmt->Ist.LLSC.addr);
+
+         if (tyAddr != Ity_I32)
+            break;
+
+         if (stmt->Ist.LLSC.storedata == NULL) {
+            /* LL */
+            HReg r_addr = iselWordExpr_R(env, stmt->Ist.LLSC.addr);
+            HReg r_dst = lookupIRTemp(env, res);
+
+            addInstr(env, NANOMIPSInstr_LoadL(4, r_dst, r_addr, 0));
+            return;
+         } else {
+            /* SC */
+            HReg r_addr = iselWordExpr_R(env, stmt->Ist.LLSC.addr);
+            HReg r_src = iselWordExpr_R(env, stmt->Ist.LLSC.storedata);
+            HReg r_dst = lookupIRTemp(env, res);
+
+            addInstr(env, mk_iMOVds_RR(r_dst, r_src));
+            addInstr(env, NANOMIPSInstr_StoreC(4, r_addr, 0, r_dst));
+            return;
+         }
+         break;
+       /* NOTREACHED */}
+      case Ist_CAS:
+         if (stmt->Ist.CAS.details->oldHi == IRTemp_INVALID) {
+            IRCAS *cas = stmt->Ist.CAS.details;
+            HReg old   = lookupIRTemp(env, cas->oldLo);
+            HReg addr  = iselWordExpr_R(env, cas->addr);
+            HReg expd  = iselWordExpr_R(env, cas->expdLo);
+            HReg data  = iselWordExpr_R(env, cas->dataLo);
+            vassert(typeOfIRTemp(env->type_env, cas->oldLo) == Ity_I32);
+            addInstr(env, NANOMIPSInstr_Cas(4, old, old, addr, expd, expd, data, data));
+         }
+         else {
+            IRCAS *cas = stmt->Ist.CAS.details;
+            HReg oldHi   = lookupIRTemp(env, cas->oldHi);
+            HReg oldLo   = lookupIRTemp(env, cas->oldLo);
+            HReg addr  = iselWordExpr_R(env, cas->addr);
+            HReg expdHi  = iselWordExpr_R(env, cas->expdHi);
+            HReg expdLo  = iselWordExpr_R(env, cas->expdLo);
+            HReg dataHi  = iselWordExpr_R(env, cas->dataHi);
+            HReg dataLo  = iselWordExpr_R(env, cas->dataLo);
+            vassert(typeOfIRTemp(env->type_env, cas->oldLo) == Ity_I32);
+            addInstr(env, NANOMIPSInstr_Cas(8, oldLo, oldHi, addr,
+                                            expdLo, expdHi, dataLo, dataHi));
+         }
+         return;
+
+      case Ist_IMark:
+      case Ist_AbiHint:
+      case Ist_NoOp:
+         return;
+
+      case Ist_Exit: {
+         Addr dst = extractConst(stmt->Ist.Exit.dst);
+         HReg cond = iselWordExpr_R(env, stmt->Ist.Exit.guard);
+
+         switch (stmt->Ist.Exit.jk) {
+            /* case Ijk_Ret: */
+            case Ijk_Boring:
+            case Ijk_Call: {
+               vassert(stmt->Ist.Exit.offsIP >= 0);
+               vassert(stmt->Ist.Exit.offsIP <= 0x1000);
+
+               if (env->chainingAllowed) {
+                  Bool toFastEP = (dst > (Addr)env->max_ga);
+                  addInstr(env, NANOMIPSInstr_XDirect(dst, GuestStatePointer,
+                                                      stmt->Ist.Exit.offsIP,
+                                                      cond, toFastEP));
+               } else {
+                  HReg r = newVRegI(env);
+                  addInstr(env, NANOMIPSInstr_Imm(NMimm_LI, r, INVALID_HREG,
+                                                  dst));
+                  addInstr(env, NANOMIPSInstr_XAssisted(r, GuestStatePointer,
+                                                        stmt->Ist.Exit.offsIP,
+                                                        cond, Ijk_Boring));
+               }
+
+               return;
+            }
+
+            case Ijk_ClientReq:
+            case Ijk_EmFail:
+            case Ijk_EmWarn:
+            case Ijk_NoDecode:
+            case Ijk_NoRedir:
+            case Ijk_SigBUS:
+            case Ijk_Yield:
+            case Ijk_SigTRAP:
+            case Ijk_SigFPE_IntDiv:
+            case Ijk_SigFPE_IntOvf:
+            case Ijk_Sys_syscall:
+            case Ijk_InvalICache: {
+               HReg r = newVRegI(env);
+               addInstr(env, NANOMIPSInstr_Imm(NMimm_LI, r, INVALID_HREG,
+                                               dst));
+               vassert(stmt->Ist.Exit.offsIP >= 0);
+               vassert(stmt->Ist.Exit.offsIP <= 0x1000);
+               addInstr(env, NANOMIPSInstr_XAssisted(r, GuestStatePointer,
+                                                     stmt->Ist.Exit.offsIP,
+                                                     cond, stmt->Ist.Exit.jk));
+               return;
+            }
+
+            default:
+               vassert(0);
+         };
+
+         break;
+      }
+
+      default:
+         break;
+   }
+
+   vex_printf("stmt_fail tag: 0x%x\n", stmt->tag);
+   ppIRStmt(stmt);
+   vpanic("iselStmt:\n");
+}
+
+
+/*---------------------------------------------------------*/
+/*--- ISEL: Basic block terminators (Nexts)             ---*/
+/*---------------------------------------------------------*/
+static void iselNext(ISelEnv * env,
+                     IRExpr * next, IRJumpKind jk, Int offsIP)
+{
+   if (vex_traceflags & VEX_TRACE_VCODE) {
+      vex_printf( "\n-- PUT(%d) = ", offsIP);
+      ppIRExpr( next );
+      vex_printf( "; exit-");
+      ppIRJumpKind(jk);
+      vex_printf( "\n");
+   }
+
+   /* Case: boring transfer to known address */
+   if (next->tag == Iex_Const) {
+      IRConst* cdst = next->Iex.Const.con;
+      vassert(cdst->tag == Ico_U32);
+
+      if (jk == Ijk_Boring || jk == Ijk_Call) {
+         vassert(offsIP >= 0);
+         vassert(offsIP < 0x1000);
+
+         /* Boring transfer to known address */
+         if (env->chainingAllowed) {
+            /* .. almost always true .. */
+            /* Skip the event check at the dst if this is a forwards
+               edge. */
+            Bool toFastEP
+               = (((Addr32)cdst->Ico.U32) > (Addr32)env->max_ga);
+            addInstr(env, NANOMIPSInstr_XDirect((Addr)cdst->Ico.U32,
+                                                GuestStatePointer, offsIP,
+                                                INVALID_HREG, toFastEP));
+         } else {
+            /* .. very occasionally .. */
+            /* We can't use chaining, so ask for an assisted transfer,
+               as that's the only alternative that is allowable. */
+            HReg r = iselWordExpr_R(env, next);
+            addInstr(env, NANOMIPSInstr_XAssisted(r, GuestStatePointer, offsIP,
+                                                  INVALID_HREG, Ijk_Boring));
+         }
+
+         return;
+      }
+   }
+
+   /* Case: call/return (==boring) transfer to any address */
+   switch (jk) {
+      case Ijk_Boring:
+      case Ijk_Ret:
+      case Ijk_Call: {
+         HReg r = iselWordExpr_R(env, next);
+         vassert(offsIP >= 0);
+         vassert(offsIP < 0x1000);
+
+         if (env->chainingAllowed) {
+            addInstr(env, NANOMIPSInstr_XIndir(r, GuestStatePointer, offsIP,
+                                               INVALID_HREG));
+         } else {
+            addInstr(env, NANOMIPSInstr_XAssisted(r, GuestStatePointer, offsIP,
+                                                  INVALID_HREG, Ijk_Boring));
+         }
+
+         return;
+      }
+
+      default:
+         break;
+   }
+
+   /* Case: assisted transfer to arbitrary address */
+   switch (jk) {
+      /* Keep this list in sync with that for Ist_Exit above */
+      case Ijk_ClientReq:
+      case Ijk_EmFail:
+      case Ijk_EmWarn:
+      case Ijk_NoDecode:
+      case Ijk_NoRedir:
+      case Ijk_SigBUS:
+      case Ijk_SigILL:
+      case Ijk_SigTRAP:
+      case Ijk_SigFPE_IntDiv:
+      case Ijk_SigFPE_IntOvf:
+      case Ijk_Sys_syscall:
+      case Ijk_InvalICache: {
+         HReg r = iselWordExpr_R(env, next);
+         vassert(offsIP >= 0);
+         vassert(offsIP < 0x1000);
+         addInstr(env, NANOMIPSInstr_XAssisted(r, GuestStatePointer,
+                                               offsIP, INVALID_HREG, jk));
+         return;
+      }
+
+      default:
+         break;
+   }
+
+   vex_printf("\n-- PUT(%d) = ", offsIP);
+   ppIRExpr(next );
+   vex_printf("; exit-");
+   ppIRJumpKind(jk);
+   vex_printf("\n");
+   vassert(0);  /* are we expecting any other kind? */
+}
+
+/*---------------------------------------------------------*/
+/*--- Insn selector top-level                           ---*/
+/*---------------------------------------------------------*/
+
+/* Translate an entire BB to NANOMIPS code. */
+HInstrArray *iselSB_NANOMIPS(const IRSB * bb,
+                             VexArch arch_host,
+                             const VexArchInfo * archinfo_host,
+                             const VexAbiInfo * vbi,
+                             Int offs_Host_EvC_Counter,
+                             Int offs_Host_EvC_FailAddr,
+                             Bool chainingAllowed,
+                             Bool addProfInc,
+                             Addr max_ga)
+{
+   Int      i, j;
+   HReg     hreg, hregHI;
+   ISelEnv *env;
+   hwcaps_host = archinfo_host->hwcaps;
+   /* sanity ... */
+   vassert(arch_host == VexArchNANOMIPS);
+   /* Check that the host's endianness is as expected. */
+   vassert(archinfo_host->endness == VexEndnessLE
+           || archinfo_host->endness == VexEndnessBE);
+   /* Make up an initial environment to use. */
+   env = LibVEX_Alloc_inline(sizeof(ISelEnv));
+   env->vreg_ctr = 0;
+   /* Set up output code array. */
+   env->code = newHInstrArray();
+   /* Copy BB's type env. */
+   env->type_env = bb->tyenv;
+   /* Make up an IRTemp -> virtual HReg mapping.  This doesn't
+      change as we go along. */
+   env->n_vregmap = bb->tyenv->types_used;
+   env->vregmap   = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg));
+   env->vregmapHI = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg));
+   env->hwcaps          = hwcaps_host;
+   env->chainingAllowed = chainingAllowed;
+   env->max_ga          = max_ga;
+   /* For each IR temporary, allocate a suitably-kinded virtual
+      register. */
+   j = 0;
+
+   for (i = 0; i < env->n_vregmap; i++) {
+      hregHI = hreg = INVALID_HREG;
+
+      switch (bb->tyenv->types[i]) {
+         case Ity_I1:
+         case Ity_I8:
+         case Ity_I16:
+         case Ity_I32:
+            hreg = mkHReg(True, HRcInt32, 0, j++);
+            break;
+
+         case Ity_I64:
+            hreg   = mkHReg(True, HRcInt32, 0, j++);
+            hregHI = mkHReg(True, HRcInt32, 0, j++);
+            break;
+
+         default:
+            ppIRType(bb->tyenv->types[i]);
+            vpanic("iselBB(nanomips): IRTemp type");
+            break;
+      }
+
+      env->vregmap[i] = hreg;
+      env->vregmapHI[i] = hregHI;
+   }
+
+   env->vreg_ctr = j;
+   /* The very first instruction must be an event check. */
+   vassert(offs_Host_EvC_Counter >= 0);
+   vassert(offs_Host_EvC_FailAddr >= 0);
+   vassert(offs_Host_EvC_Counter < 0x1000);
+   vassert(offs_Host_EvC_FailAddr < 0x1000);
+   addInstr(env, NANOMIPSInstr_EvCheck(GuestStatePointer,
+                                       offs_Host_EvC_Counter,
+                                       GuestStatePointer,
+                                       offs_Host_EvC_FailAddr));
+
+   /* Possibly a block counter increment (for profiling).  At this
+      point we don't know the address of the counter, so just pretend
+      it is zero.  It will have to be patched later, but before this
+      translation is used, by a call to LibVEX_patchProfCtr. */
+   if (addProfInc) {
+      addInstr(env, NANOMIPSInstr_ProfInc());
+   }
+
+   /* Ok, finally we can iterate over the statements. */
+   for (i = 0; i < bb->stmts_used; i++)
+      iselStmt(env, bb->stmts[i]);
+
+   iselNext(env, bb->next, bb->jumpkind, bb->offsIP);
+   /* record the number of vregs we used. */
+   env->code->n_vregs = env->vreg_ctr;
+   return env->code;
+}
+
+/*---------------------------------------------------------------*/
+/*--- end                                host_nanomips_isel.c ---*/
+/*---------------------------------------------------------------*/
diff --git a/VEX/priv/main_main.c b/VEX/priv/main_main.c
index 3cfe8c1cb2..eb77bfd6a0 100644
--- a/VEX/priv/main_main.c
+++ b/VEX/priv/main_main.c
@@ -56,6 +56,7 @@
 #include "host_arm64_defs.h"
 #include "host_s390_defs.h"
 #include "host_mips_defs.h"
+#include "host_nanomips_defs.h"
 
 #include "guest_generic_bb_to_IR.h"
 #include "guest_x86_defs.h"
@@ -65,6 +66,7 @@
 #include "guest_ppc_defs.h"
 #include "guest_s390_defs.h"
 #include "guest_mips_defs.h"
+#include "guest_nanomips_defs.h"
 
 #include "host_generic_simd128.h"
 
@@ -153,6 +155,13 @@
 #define MIPS64ST(f) vassert(0)
 #endif
 
+#if defined(VGA_nanomips) || defined(VEXMULTIARCH)
+#define NANOMIPSFN(f) f
+#define NANOMIPSST(f) f
+#else
+#define NANOMIPSFN(f) NULL
+#define NANOMIPSST(f) vassert(0)
+#endif
 
 /* This file contains the top level interface to the library. */
 
@@ -518,6 +527,24 @@ IRSB* LibVEX_FrontEnd ( /*MOD*/ VexTranslateArgs* vta,
          vassert(sizeof( ((VexGuestMIPS64State*)0)->guest_NRADDR ) == 8);
          break;
 
+      case VexArchNANOMIPS:
+         preciseMemExnsFn
+            = NANOMIPSFN(guest_mips32_state_requires_precise_mem_exns);
+         disInstrFn              = NANOMIPSFN(disInstr_nanoMIPS);
+         specHelper              = NANOMIPSFN(guest_mips32_spechelper);
+         guest_layout            = NANOMIPSFN(&mips32Guest_layout);
+         offB_CMSTART            = offsetof(VexGuestMIPS32State,guest_CMSTART);
+         offB_CMLEN              = offsetof(VexGuestMIPS32State,guest_CMLEN);
+         offB_GUEST_IP           = offsetof(VexGuestMIPS32State,guest_PC);
+         szB_GUEST_IP            = sizeof( ((VexGuestMIPS32State*)0)->guest_PC );
+         vassert(vta->archinfo_guest.endness == VexEndnessLE
+                 || vta->archinfo_guest.endness == VexEndnessBE);
+         vassert(0 == sizeof(VexGuestMIPS32State) % LibVEX_GUEST_STATE_ALIGN);
+         vassert(sizeof( ((VexGuestMIPS32State*)0)->guest_CMSTART) == 4);
+         vassert(sizeof( ((VexGuestMIPS32State*)0)->guest_CMLEN  ) == 4);
+         vassert(sizeof( ((VexGuestMIPS32State*)0)->guest_NRADDR ) == 4);
+         break;
+
       default:
          vpanic("LibVEX_Translate: unsupported guest insn set");
    }
@@ -843,6 +870,14 @@ static void libvex_BackEnd ( const VexTranslateArgs *vta,
          offB_HOST_EvC_FAILADDR = offsetof(VexGuestMIPS64State,host_EvC_FAILADDR);
          break;
 
+      case VexArchNANOMIPS:
+         preciseMemExnsFn
+            = NANOMIPSFN(guest_mips32_state_requires_precise_mem_exns);
+         guest_sizeB            = sizeof(VexGuestMIPS32State);
+         offB_HOST_EvC_COUNTER  = offsetof(VexGuestMIPS32State,host_EvC_COUNTER);
+         offB_HOST_EvC_FAILADDR = offsetof(VexGuestMIPS32State,host_EvC_FAILADDR);
+         break;
+
       default:
          vpanic("LibVEX_Codegen: unsupported guest insn set");
    }
@@ -1000,6 +1035,23 @@ static void libvex_BackEnd ( const VexTranslateArgs *vta,
                  || vta->archinfo_host.endness == VexEndnessBE);
          break;
 
+      case VexArchNANOMIPS:
+         mode64       = False;
+         rRegUniv     = NANOMIPSFN(getRRegUniverse_NANOMIPS(mode64));
+         getRegUsage
+            = CAST_TO_TYPEOF(getRegUsage) NANOMIPSFN(getRegUsage_NANOMIPSInstr);
+         mapRegs      = CAST_TO_TYPEOF(mapRegs) NANOMIPSFN(mapRegs_NANOMIPSInstr);
+         genSpill     = CAST_TO_TYPEOF(genSpill) NANOMIPSFN(genSpill_NANOMIPS);
+         genReload    = CAST_TO_TYPEOF(genReload) NANOMIPSFN(genReload_NANOMIPS);
+         genMove      = CAST_TO_TYPEOF(genMove) NANOMIPSFN(genMove_NANOMIPS);
+         ppInstr      = CAST_TO_TYPEOF(ppInstr) NANOMIPSFN(ppNANOMIPSInstr);
+         ppReg        = CAST_TO_TYPEOF(ppReg) NANOMIPSFN(ppHRegNANOMIPS);
+         iselSB       = NANOMIPSFN(iselSB_NANOMIPS);
+         emit         = CAST_TO_TYPEOF(emit) NANOMIPSFN(emit_NANOMIPSInstr);
+         vassert(vta->archinfo_host.endness == VexEndnessLE
+                 || vta->archinfo_host.endness == VexEndnessBE);
+         break;
+
       default:
          vpanic("LibVEX_Translate: unsupported host insn set");
    }
@@ -1240,6 +1292,11 @@ VexInvalRange LibVEX_Chain ( VexArch     arch_host,
                                            place_to_chain,
                                            disp_cp_chain_me_EXPECTED,
                                            place_to_jump_to, True/*!mode64*/));
+      case VexArchNANOMIPS:
+         NANOMIPSST(return chainXDirect_NANOMIPS(endness_host,
+                                                 place_to_chain,
+                                                 disp_cp_chain_me_EXPECTED,
+                                                 place_to_jump_to));
       default:
          vassert(0);
    }
@@ -1297,6 +1354,11 @@ VexInvalRange LibVEX_UnChain ( VexArch     arch_host,
                                              place_to_unchain,
                                              place_to_jump_to_EXPECTED,
                                              disp_cp_chain_me, True/*!mode64*/));
+      case VexArchNANOMIPS:
+         NANOMIPSST(return unchainXDirect_NANOMIPS(endness_host,
+                                                 place_to_unchain,
+                                                 place_to_jump_to_EXPECTED,
+                                                 disp_cp_chain_me));
       default:
          vassert(0);
    }
@@ -1325,6 +1387,8 @@ Int LibVEX_evCheckSzB ( VexArch    arch_host )
             MIPS32ST(cached = evCheckSzB_MIPS()); break;
          case VexArchMIPS64:
             MIPS64ST(cached = evCheckSzB_MIPS()); break;
+        case VexArchNANOMIPS:
+            NANOMIPSST(cached = evCheckSzB_NANOMIPS()); break;
          default:
             vassert(0);
       }
@@ -1365,6 +1429,9 @@ VexInvalRange LibVEX_PatchProfInc ( VexArch    arch_host,
       case VexArchMIPS64:
          MIPS64ST(return patchProfInc_MIPS(endness_host, place_to_patch,
                                            location_of_counter, True/*!mode64*/));
+      case VexArchNANOMIPS:
+         NANOMIPSST(return patchProfInc_NANOMIPS(endness_host, place_to_patch,
+                                                 location_of_counter));
       default:
          vassert(0);
    }
@@ -1447,6 +1514,7 @@ const HChar* LibVEX_ppVexArch ( VexArch arch )
       case VexArchS390X:    return "S390X";
       case VexArchMIPS32:   return "MIPS32";
       case VexArchMIPS64:   return "MIPS64";
+      case VexArchNANOMIPS: return "NANOMIPS";
       default:              return "VexArch???";
    }
 }
@@ -1507,6 +1575,7 @@ static IRType arch_word_size (VexArch arch) {
       case VexArchX86:
       case VexArchARM:
       case VexArchMIPS32:
+      case VexArchNANOMIPS:
       case VexArchPPC32:
          return Ity_I32;
 
@@ -2073,6 +2142,11 @@ static void check_hwcaps ( VexArch arch, UInt hwcaps )
                invalid_hwcaps(arch, hwcaps, "Unsupported baseline\n");
          }
 
+      case VexArchNANOMIPS:
+         if (hwcaps == 0)
+            return;
+         invalid_hwcaps(arch, hwcaps, "Unsupported baseline\n");
+
       default:
          vpanic("unknown architecture");
    }
diff --git a/VEX/pub/libvex.h b/VEX/pub/libvex.h
index 9337a7c329..1d1979cb5d 100644
--- a/VEX/pub/libvex.h
+++ b/VEX/pub/libvex.h
@@ -58,7 +58,8 @@ typedef
       VexArchPPC64,
       VexArchS390X,
       VexArchMIPS32,
-      VexArchMIPS64
+      VexArchMIPS64,
+      VexArchNANOMIPS,
    }
    VexArch;
 
diff --git a/VEX/pub/libvex_basictypes.h b/VEX/pub/libvex_basictypes.h
index 745b8d3df8..e3f1485d50 100644
--- a/VEX/pub/libvex_basictypes.h
+++ b/VEX/pub/libvex_basictypes.h
@@ -194,6 +194,10 @@ typedef  unsigned long HWord;
 #   define VEX_HOST_WORDSIZE 4
 #   define VEX_REGPARM(_n) /* */
 
+#elif defined(__nanomips__) && (__nanomips != 64)
+#   define VEX_HOST_WORDSIZE 4
+#   define VEX_REGPARM(_n) /* */
+
 #else
 #   error "Vex: Fatal: Can't establish the host architecture"
 #endif
diff --git a/VEX/pub/libvex_guest_mips32.h b/VEX/pub/libvex_guest_mips32.h
index 53efd1f313..29202a0760 100644
--- a/VEX/pub/libvex_guest_mips32.h
+++ b/VEX/pub/libvex_guest_mips32.h
@@ -190,6 +190,9 @@ typedef
       /*  1016 */ UInt guest_MSACSR;
 
       /*  1020 */ UInt _padding3;
+
+      /*  1020 */ ULong guest_LLdata64;
+      /*  1028 */ ULong _padding4;
 } VexGuestMIPS32State;
 /*---------------------------------------------------------------*/
 /*--- Utility functions for MIPS32 guest stuff.               ---*/