From e0a5f54cf991c373879f7450c255c7c9c3d0c967 Mon Sep 17 00:00:00 2001
From: Julian Seward <jseward@acm.org>
Date: Thu, 29 Sep 2005 11:09:56 +0000
Subject: [PATCH] Make ppc32-linux build again following aspacem merge. 
 Doesn't work, though: programs crash before reaching main.  I don't know why.

git-svn-id: svn://svn.valgrind.org/valgrind/trunk@4819
---
 coregrind/m_debugger.c                      |   1 +
 coregrind/m_dispatch/dispatch-ppc32.S       |   4 +-
 coregrind/m_libcproc.c                      |   2 +-
 coregrind/m_machine.c                       |  39 +-
 coregrind/m_main.c                          |  42 +-
 coregrind/m_sigframe/sigframe-ppc32-linux.c |  11 +-
 coregrind/m_syswrap/syswrap-ppc32-linux.c   | 457 +++-----------------
 coregrind/m_trampoline.S                    |  56 ++-
 coregrind/m_translate.c                     |   4 +-
 coregrind/pub_core_machine.h                |   2 +-
 coregrind/vki_unistd-ppc32-linux.h          |   2 +-
 11 files changed, 165 insertions(+), 455 deletions(-)

diff --git a/coregrind/m_debugger.c b/coregrind/m_debugger.c
index 6b46355db0..5ead4ce2f6 100644
--- a/coregrind/m_debugger.c
+++ b/coregrind/m_debugger.c
@@ -36,6 +36,7 @@
 #include "pub_core_libcprint.h"
 #include "pub_core_libcproc.h"
 #include "pub_core_libcsignal.h"
+#include "pub_core_libcassert.h"   // I_die_here
 #include "pub_core_options.h"
 
 #define WIFSTOPPED(status) (((status) & 0xff) == 0x7f)
diff --git a/coregrind/m_dispatch/dispatch-ppc32.S b/coregrind/m_dispatch/dispatch-ppc32.S
index 501e1efe46..78ac228db5 100644
--- a/coregrind/m_dispatch/dispatch-ppc32.S
+++ b/coregrind/m_dispatch/dispatch-ppc32.S
@@ -92,8 +92,8 @@ VG_(run_innerloop):
 
 	/* set host AltiVec control word to the default mode expected 
 	   by VEX-generated code. */
-        lis     3,VG_(have_altivec_ppc)@ha
-        lwz     3,VG_(have_altivec_ppc)@l(3)
+        lis     3,VG_(have_altivec_ppc32)@ha
+        lwz     3,VG_(have_altivec_ppc32)@l(3)
         cmplwi  3,0
         beq     L1
         /* generate vector {0x0,0x0,0x0,0x00010000} */
diff --git a/coregrind/m_libcproc.c b/coregrind/m_libcproc.c
index 424cd6382a..c851fc2c0a 100644
--- a/coregrind/m_libcproc.c
+++ b/coregrind/m_libcproc.c
@@ -397,7 +397,7 @@ Int VG_(getegid) ( void )
    platform. */
 Int VG_(getgroups)( Int size, UInt* list )
 {
-#  if defined(VGP_x86_linux)
+#  if defined(VGP_x86_linux) || defined(VGP_ppc32_linux)
    Int    i;
    SysRes sres;
    UShort list16[32];
diff --git a/coregrind/m_machine.c b/coregrind/m_machine.c
index 79955aec21..f143f00c50 100644
--- a/coregrind/m_machine.c
+++ b/coregrind/m_machine.c
@@ -215,29 +215,30 @@ Bool VG_(thread_stack_next)(ThreadId* tid, Addr* stack_min, Addr* stack_max)
 // Architecture specifics
 
 #if defined(VGA_ppc32)
-// PPC: what is the cache line size (for dcbz etc) ?  This info is
-// harvested on Linux at startup from the AT_SYSINFO entries.  0 means
-// not-yet-set.
+/* PPC: what is the cache line size (for dcbz etc) ?  This info is
+   harvested on Linux at startup from the AT_SYSINFO entries.  0 means
+   not-yet-set. */
 Int VG_(cache_line_size_ppc32) = 0;
-// Altivec enabled?  Harvested on startup from the AT_HWCAP entry
-Int VG_(have_altivec_ppc) = 0;
+
+/* Altivec enabled?  Harvested on startup from the AT_HWCAP entry. */
+Int VG_(have_altivec_ppc32) = 0;
 #endif
 
-// X86: set to 1 if the host is able to do {ld,st}mxcsr (load/store
-// the SSE control/status register.  For most modern CPUs this will be
-// 1.  It is set to 1, if possible, by m_translate.getArchAndArchInfo.
-// The value is read by m_dispatch.dispatch-x86.S, which is why it
-// is an Int rather than a Bool.
-//
-// Ugly hack: this has to start as 0 and be set to 1 in the normal
-// case, rather than the other way round, because the dispatch
-// loop needs it, and it runs before the first translation is 
-// made.  Yet it is the act of making that first translation which
-// causes getArchAndArchInfo to set this value to its final value.
-// So it is necessary to start this value off at 0 as only that
-// guarantees that the dispatch loop will not SIGILL on its first
-// attempt.
+
 #if defined(VGA_x86)
+/* X86: set to 1 if the host is able to do {ld,st}mxcsr (load/store
+   the SSE control/status register.  For most modern CPUs this will be
+   1.  It is set to 1, if possible, by m_translate.getArchAndArchInfo.
+   The value is read by m_dispatch.dispatch-x86.S, which is why it is
+   an Int rather than a Bool.
+
+   Ugly hack: this has to start as 0 and be set to 1 in the normal
+   case, rather than the other way round, because the dispatch loop
+   needs it, and it runs before the first translation is made.  Yet it
+   is the act of making that first translation which causes
+   getArchAndArchInfo to set this value to its final value.  So it is
+   necessary to start this value off at 0 as only that guarantees that
+   the dispatch loop will not SIGILL on its first attempt. */
 Int VG_(have_mxcsr_x86) = 0;
 #endif
 
diff --git a/coregrind/m_main.c b/coregrind/m_main.c
index 8ba61143b1..66beba3278 100644
--- a/coregrind/m_main.c
+++ b/coregrind/m_main.c
@@ -142,7 +142,8 @@ static HChar** setup_client_env ( HChar** origenv, const HChar* toolname)
    }
    VG_(free)(preload_tool_path);
 
-   VG_(debugLog)(1, "main", "preload_string = %s\n", preload_string);
+   VG_(debugLog)(2, "main", "preload_string:\n");
+   VG_(debugLog)(2, "main", "  \"%s\"\n", preload_string);
 
    /* Count the original size of the env */
    envc = 0;
@@ -546,11 +547,11 @@ Addr setup_client_stack( void*  init_sp,
          case AT_HWCAP:
 #           if defined(VGP_ppc32_linux)
             /* Acquire altivecness info */
-            VG_(debugLog)(1, "main", "PPC32 hwcaps: 0x%x\n", 
+            VG_(debugLog)(2, "main", "PPC32 hwcaps: 0x%x\n", 
                                      (UInt)auxv->u.a_val);
             if (auxv->u.a_val & 0x10000000)
                VG_(have_altivec_ppc32) = 1;
-            VG_(debugLog)(1, "main", "PPC32 AltiVec support: %u\n", 
+            VG_(debugLog)(2, "main", "PPC32 AltiVec support: %u\n", 
                                      VG_(have_altivec_ppc32));
 #           endif
             break;
@@ -562,7 +563,7 @@ Addr setup_client_stack( void*  init_sp,
             /* acquire cache info */
             if (auxv->u.a_val > 0) {
                VG_(cache_line_size_ppc32) = auxv->u.a_val;
-               VG_(debugLog)(1, "main", 
+               VG_(debugLog)(2, "main", 
                                 "PPC32 cache line size %u (type %u)\n", 
                                 (UInt)auxv->u.a_val, (UInt)auxv->a_type );
             }
@@ -1846,7 +1847,7 @@ void shutdown_actions_NORETURN( ThreadId tid,
 static Addr* get_seg_starts ( /*OUT*/Int* n_acquired )
 {
    Addr* starts;
-   Int   n_starts, r;
+   Int   n_starts, r = 0;
 
    n_starts = 1;
    while (True) {
@@ -2094,7 +2095,7 @@ Int main(Int argc, HChar **argv, HChar **envp)
 
       VG_(debugLog)(2, "main",
                        "Client info: "
-                       "entry=%p client_SP=%p brkbase=%p\n",
+                       "initial_IP=%p initial_SP=%p brk_base=%p\n",
                        (void*)initial_client_IP, 
                        (void*)initial_client_SP,
                        (void*)VG_(brk_base) );
@@ -2208,7 +2209,7 @@ Int main(Int argc, HChar **argv, HChar **envp)
    //   p: setup_file_descriptors()  [for 'VG_(fd_xxx_limit)']
    //--------------------------------------------------------------
    VG_(debugLog)(1, "main", "Process Valgrind's command line options, "
-                            " setup logging\n");
+                            "setup logging\n");
    logging_to_fd = process_cmd_line_options(client_auxv, toolname);
 
    //--------------------------------------------------------------
@@ -2734,7 +2735,8 @@ void* memset(void *s, int c, size_t n) {
 */
 
 /* The kernel hands control to _start, which extracts the initial
-   stack pointer and calls onwards to _start_in_C.  This also switches the new stack.  */
+   stack pointer and calls onwards to _start_in_C.  This also switches
+   the new stack.  */
 #if defined(VGP_x86_linux)
 asm("\n"
     "\t.globl _start\n"
@@ -2769,6 +2771,30 @@ asm("\n"
     "\tcall  _start_in_C\n"
     "\thlt\n"
 );
+#elif defined(VGP_ppc32_linux)
+asm("\n"
+    "\t.globl _start\n"
+    "\t.type _start,@function\n"
+    "_start:\n"
+    /* set up the new stack in r16 */
+    "\tlis 16,vgPlain_interim_stack@ha\n"
+    "\tla  16,vgPlain_interim_stack@l(16)\n"
+    "\tlis    17,("VG_STRINGIFY(VG_STACK_GUARD_SZB)" >> 16)\n"
+    "\tori 17,17,("VG_STRINGIFY(VG_STACK_GUARD_SZB)" & 0xFFFF)\n"
+    "\tlis    18,("VG_STRINGIFY(VG_STACK_ACTIVE_SZB)" >> 16)\n"
+    "\tori 18,18,("VG_STRINGIFY(VG_STACK_ACTIVE_SZB)" & 0xFFFF)\n"
+    "\tadd 16,17,16\n"
+    "\tadd 16,18,16\n"
+    "\trlwinm 16,16,0,0,27\n"
+    /* now r16 = &vgPlain_interim_stack + VG_STACK_GUARD_SZB +
+       VG_STACK_ACTIVE_SZB rounded down to the nearest 16-byte
+       boundary.  And r1 is the original SP.  Set the SP to r16 and
+       call _start_in_C, passing it the initial SP. */
+    "\tmr 3,1\n"
+    "\tmr 1,16\n"
+    "\tbl _start_in_C\n"
+    "\ttrap\n"
+);
 #else
 #error "_start: needs implementation on this platform"
 #endif
diff --git a/coregrind/m_sigframe/sigframe-ppc32-linux.c b/coregrind/m_sigframe/sigframe-ppc32-linux.c
index 22af92a50a..3432687f76 100644
--- a/coregrind/m_sigframe/sigframe-ppc32-linux.c
+++ b/coregrind/m_sigframe/sigframe-ppc32-linux.c
@@ -171,7 +171,7 @@ void stack_mcontext ( struct vki_mcontext *mc,
              (Addr)&mc->mc_pad, sizeof(mc->mc_pad) );
    /* invalidate any translation of this area */
    VG_(discard_translations)( (Addr64)(Addr)&mc->mc_pad, 
-                              sizeof(mc->mc_pad) );   
+                              sizeof(mc->mc_pad), "stack_mcontext" );   
 
    /* set the signal handler to return to the trampoline */
    SET_SIGNAL_LR(tst, (Addr) &mc->mc_pad[0]);
@@ -493,17 +493,16 @@ void stack_mcontext ( struct vki_mcontext *mc,
 static Bool extend ( ThreadState *tst, Addr addr, SizeT size )
 {
    ThreadId tid = tst->tid;
-   Segment *stackseg = NULL;
+   NSegment *stackseg = NULL;
 
    if (VG_(extend_stack)(addr, tst->client_stack_szB)) {
-      stackseg = VG_(find_segment)(addr);
+      stackseg = VG_(am_find_nsegment)(addr);
       if (0 && stackseg)
 	 VG_(printf)("frame=%p seg=%p-%p\n",
-		     addr, stackseg->addr, stackseg->addr+stackseg->len);
+		     addr, stackseg->start, stackseg->end);
    }
 
-   if (stackseg == NULL 
-       || (stackseg->prot & (VKI_PROT_READ|VKI_PROT_WRITE)) == 0) {
+   if (stackseg == NULL || !stackseg->hasR || !stackseg->hasW) {
       VG_(message)(
          Vg_UserMsg,
          "Can't extend stack to %p during signal delivery for thread %d:",
diff --git a/coregrind/m_syswrap/syswrap-ppc32-linux.c b/coregrind/m_syswrap/syswrap-ppc32-linux.c
index 0bac456af9..52a916eb73 100644
--- a/coregrind/m_syswrap/syswrap-ppc32-linux.c
+++ b/coregrind/m_syswrap/syswrap-ppc32-linux.c
@@ -58,71 +58,46 @@
    Note.  Why is this stuff here?
    ------------------------------------------------------------------ */
 
-/* 
-   Allocate a stack for this thread.
-   They're allocated lazily, but never freed.
- */
-#define FILL 0xdeadbeef
+/* Allocate a stack for this thread.  They're allocated lazily, and
+   never freed. */
 
-// Valgrind's stack size, in words.
-#define STACK_SIZE_W      16384
+/* Allocate a stack for this thread, if it doesn't already have one.
+   Returns the initial stack pointer value to use, or 0 if allocation
+   failed. */
 
-static UWord* allocstack(ThreadId tid)
+static Addr allocstack ( ThreadId tid )
 {
-   ThreadState *tst = VG_(get_ThreadState)(tid);
-   UWord *sp;
+   ThreadState* tst = VG_(get_ThreadState)(tid);
+   VgStack*     stack;
+   Addr         initial_SP;
 
-   if (tst->os_state.valgrind_stack_base == 0) {
-      void *stk = VG_(mmap)(0, STACK_SIZE_W * sizeof(UWord) + VKI_PAGE_SIZE,
-                            VKI_PROT_READ|VKI_PROT_WRITE,
-                            VKI_MAP_PRIVATE|VKI_MAP_ANONYMOUS,
-                            SF_VALGRIND,
-                            -1, 0);
-
-      if (stk != (void *)-1) {
-         VG_(mprotect)(stk, VKI_PAGE_SIZE, VKI_PROT_NONE); /* guard page */
-         tst->os_state.valgrind_stack_base = ((Addr)stk) + VKI_PAGE_SIZE;
-         tst->os_state.valgrind_stack_szB  = STACK_SIZE_W * sizeof(UWord);
-      } else 
-         return (UWord*)-1;
-   }
+   /* Either the stack_base and stack_init_SP are both zero (in which
+      case a stack hasn't been allocated) or they are both non-zero,
+      in which case it has. */
 
-   for (sp = (UWord*) tst->os_state.valgrind_stack_base;
-        sp < (UWord*)(tst->os_state.valgrind_stack_base + 
-                       tst->os_state.valgrind_stack_szB); 
-        sp++)
-      *sp = FILL;
-   /* sp is left at top of stack */
+   if (tst->os_state.valgrind_stack_base == 0)
+      vg_assert(tst->os_state.valgrind_stack_init_SP == 0);
 
-   if (0)
-      VG_(printf)("stack for tid %d at %p (%x); sp=%p\n",
-                  tid, tst->os_state.valgrind_stack_base, 
-                  *(UWord*)(tst->os_state.valgrind_stack_base), sp);
+   if (tst->os_state.valgrind_stack_base != 0)
+      vg_assert(tst->os_state.valgrind_stack_init_SP != 0);
 
-   vg_assert(VG_IS_16_ALIGNED(sp));
+   /* If no stack is present, allocate one. */
 
-   return sp;
-}
+   if (tst->os_state.valgrind_stack_base == 0) {
+      stack = VG_(am_alloc_VgStack)( &initial_SP );
+      if (stack) {
+         tst->os_state.valgrind_stack_base    = (Addr)stack;
+         tst->os_state.valgrind_stack_init_SP = initial_SP;
+      }
+   }
 
-/* NB: this is identical the the amd64 version. */
-/* Return how many bytes of this stack have not been used */
-SSizeT VG_(stack_unused)(ThreadId tid)
-{
-   ThreadState *tst = VG_(get_ThreadState)(tid);
-   UWord* p;
-   
-   for (p = (UWord*)tst->os_state.valgrind_stack_base; 
-        p && (p < (UWord*)(tst->os_state.valgrind_stack_base +
-                           tst->os_state.valgrind_stack_szB)); 
-        p++)
-      if (*p != FILL)
-         break;
-   
    if (0)
-      VG_(printf)("p=%p %x tst->os_state.valgrind_stack_base=%p\n",
-                  p, *p, tst->os_state.valgrind_stack_base);
-   
-   return ((Addr)p) - tst->os_state.valgrind_stack_base;
+      VG_(printf)( "stack for tid %d at %p; init_SP=%p\n",
+                   tid, 
+                   (void*)tst->os_state.valgrind_stack_base, 
+                   (void*)tst->os_state.valgrind_stack_init_SP );
+                  
+   return tst->os_state.valgrind_stack_init_SP;
 }
 
 
@@ -162,6 +137,7 @@ static void run_a_thread_NORETURN ( Word tidW )
       ( * VG_(address_of_m_main_shutdown_actions_NORETURN) ) (tid, src);
 
    } else {
+
       VG_(debugLog)(1, "syswrap-ppc32-linux", 
                        "run_a_thread_NORETURN(tid=%lld): "
                           "not last one standing\n",
@@ -254,21 +230,27 @@ asm(
 );
 
 
-/*
-   Allocate a stack for the main thread, and run it all the way to the
-   end.  
- */
+/* Allocate a stack for the main thread, and run it all the way to the
+   end.  Although we already have a working VgStack
+   (VG_(interim_stack)) it's better to allocate a new one, so that
+   overflow detection works uniformly for all threads.
+*/
 void VG_(main_thread_wrapper_NORETURN)(ThreadId tid)
 {
    VG_(debugLog)(1, "syswrap-ppc32-linux", 
                     "entering VG_(main_thread_wrapper_NORETURN)\n");
 
-   UWord* sp = allocstack(tid);
+   Addr sp = allocstack(tid);
 
    /* make a stack frame */
    sp -= 16;
+   sp &= ~0xF;
    *(UWord *)sp = 0;
 
+   /* If we can't even allocate the first thread's stack, we're hosed.
+      Give up. */
+   vg_assert2(sp != 0, "Cannot allocate main thread's stack.");
+
    /* shouldn't be any other threads around yet */
    vg_assert( VG_(count_living_threads)() == 1 );
 
@@ -426,7 +408,7 @@ static SysRes do_clone ( ThreadId ptid,
    ThreadState* ctst = VG_(get_ThreadState)(ctid);
    ULong        word64;
    UWord*       stack;
-   Segment*     seg;
+   NSegment*    seg;
    SysRes       res;
    vki_sigset_t blockall, savedmask;
 
@@ -435,7 +417,7 @@ static SysRes do_clone ( ThreadId ptid,
    vg_assert(VG_(is_running_thread)(ptid));
    vg_assert(VG_(is_valid_tid)(ctid));
 
-   stack = allocstack(ctid);
+   stack = (UWord*)allocstack(ctid);
    if (stack == NULL) {
       res = VG_(mk_SysRes_Error)( VKI_ENOMEM );
       goto out;
@@ -486,14 +468,14 @@ static SysRes do_clone ( ThreadId ptid,
       memory mappings and try to derive some useful information.  We
       assume that esp starts near its highest possible value, and can
       only go down to the start of the mmaped segment. */
-   seg = VG_(find_segment)(sp);
-   if (seg) {
+   seg = VG_(am_find_nsegment)(sp);
+   if (seg && seg->kind != SkResvn) {
       ctst->client_stack_highest_word = (Addr)VG_PGROUNDUP(sp);
-      ctst->client_stack_szB  = ctst->client_stack_highest_word - seg->addr;
+      ctst->client_stack_szB = ctst->client_stack_highest_word - seg->start;
 
       if (debug)
 	 VG_(printf)("\ntid %d: guessed client stack range %p-%p\n",
-		     ctid, seg->addr, VG_PGROUNDUP(sp));
+		     ctid, seg->start, VG_PGROUNDUP(sp));
    } else {
       VG_(message)(Vg_UserMsg, "!? New thread %d starts with R1(%p) unmapped\n",
 		   ctid, sp);
@@ -526,6 +508,7 @@ static SysRes do_clone ( ThreadId ptid,
 
    VG_(sigprocmask)(VKI_SIG_SETMASK, &savedmask, NULL);
 
+  out:
    if (res.isError) {
       /* clone failed */
       VG_(cleanup_thread)(&ctst->arch);
@@ -582,330 +565,6 @@ static SysRes do_fork_clone( ThreadId tid,
 }
 
 
-/* ---------------------------------------------------------------------
-   LDT/GDT simulation
-   ------------------------------------------------------------------ */
-#warning "Do we need all this LDT/GDT garbage on ppc32?  Surely not."
-
-/* Details of the LDT simulation
-   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-  
-   When a program runs natively, the linux kernel allows each *thread*
-   in it to have its own LDT.  Almost all programs never do this --
-   it's wildly unportable, after all -- and so the kernel never
-   allocates the structure, which is just as well as an LDT occupies
-   64k of memory (8192 entries of size 8 bytes).
-
-   A thread may choose to modify its LDT entries, by doing the
-   __NR_modify_ldt syscall.  In such a situation the kernel will then
-   allocate an LDT structure for it.  Each LDT entry is basically a
-   (base, limit) pair.  A virtual address in a specific segment is
-   translated to a linear address by adding the segment's base value.
-   In addition, the virtual address must not exceed the limit value.
-
-   To use an LDT entry, a thread loads one of the segment registers
-   (%cs, %ss, %ds, %es, %fs, %gs) with the index of the LDT entry (0
-   .. 8191) it wants to use.  In fact, the required value is (index <<
-   3) + 7, but that's not important right now.  Any normal instruction
-   which includes an addressing mode can then be made relative to that
-   LDT entry by prefixing the insn with a so-called segment-override
-   prefix, a byte which indicates which of the 6 segment registers
-   holds the LDT index.
-
-   Now, a key constraint is that valgrind's address checks operate in
-   terms of linear addresses.  So we have to explicitly translate
-   virtual addrs into linear addrs, and that means doing a complete
-   LDT simulation.
-
-   Calls to modify_ldt are intercepted.  For each thread, we maintain
-   an LDT (with the same normally-never-allocated optimisation that
-   the kernel does).  This is updated as expected via calls to
-   modify_ldt.
-
-   When a thread does an amode calculation involving a segment
-   override prefix, the relevant LDT entry for the thread is
-   consulted.  It all works.
-
-   There is a conceptual problem, which appears when switching back to
-   native execution, either temporarily to pass syscalls to the
-   kernel, or permanently, when debugging V.  Problem at such points
-   is that it's pretty pointless to copy the simulated machine's
-   segment registers to the real machine, because we'd also need to
-   copy the simulated LDT into the real one, and that's prohibitively
-   expensive.
-
-   Fortunately it looks like no syscalls rely on the segment regs or
-   LDT being correct, so we can get away with it.  Apart from that the
-   simulation is pretty straightforward.  All 6 segment registers are
-   tracked, although only %ds, %es, %fs and %gs are allowed as
-   prefixes.  Perhaps it could be restricted even more than that -- I
-   am not sure what is and isn't allowed in user-mode.
-*/
-
-//.. /* Translate a struct modify_ldt_ldt_s to a VexGuestX86SegDescr, using
-//..    the Linux kernel's logic (cut-n-paste of code in
-//..    linux/kernel/ldt.c).  */
-//.. 
-//.. static
-//.. void translate_to_hw_format ( /* IN  */ vki_modify_ldt_t* inn,
-//.. 			      /* OUT */ VexGuestX86SegDescr* out,
-//..                                         Int oldmode )
-//.. {
-//..    UInt entry_1, entry_2;
-//..    vg_assert(8 == sizeof(VexGuestX86SegDescr));
-//.. 
-//..    if (0)
-//..       VG_(printf)("translate_to_hw_format: base %p, limit %d\n", 
-//..                   inn->base_addr, inn->limit );
-//.. 
-//..    /* Allow LDTs to be cleared by the user. */
-//..    if (inn->base_addr == 0 && inn->limit == 0) {
-//..       if (oldmode ||
-//..           (inn->contents == 0      &&
-//..            inn->read_exec_only == 1   &&
-//..            inn->seg_32bit == 0      &&
-//..            inn->limit_in_pages == 0   &&
-//..            inn->seg_not_present == 1   &&
-//..            inn->useable == 0 )) {
-//..          entry_1 = 0;
-//..          entry_2 = 0;
-//..          goto install;
-//..       }
-//..    }
-//.. 
-//..    entry_1 = ((inn->base_addr & 0x0000ffff) << 16) |
-//..              (inn->limit & 0x0ffff);
-//..    entry_2 = (inn->base_addr & 0xff000000) |
-//..              ((inn->base_addr & 0x00ff0000) >> 16) |
-//..              (inn->limit & 0xf0000) |
-//..              ((inn->read_exec_only ^ 1) << 9) |
-//..              (inn->contents << 10) |
-//..              ((inn->seg_not_present ^ 1) << 15) |
-//..              (inn->seg_32bit << 22) |
-//..              (inn->limit_in_pages << 23) |
-//..              0x7000;
-//..    if (!oldmode)
-//..       entry_2 |= (inn->useable << 20);
-//.. 
-//..    /* Install the new entry ...  */
-//..   install:
-//..    out->LdtEnt.Words.word1 = entry_1;
-//..    out->LdtEnt.Words.word2 = entry_2;
-//.. }
-//.. 
-//.. 
-//.. /*
-//..  * linux/kernel/ldt.c
-//..  *
-//..  * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds
-//..  * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
-//..  */
-//.. 
-//.. /*
-//..  * read_ldt() is not really atomic - this is not a problem since
-//..  * synchronization of reads and writes done to the LDT has to be
-//..  * assured by user-space anyway. Writes are atomic, to protect
-//..  * the security checks done on new descriptors.
-//..  */
-//.. static
-//.. Int read_ldt ( ThreadId tid, UChar* ptr, UInt bytecount )
-//.. {
-//..    Int    err;
-//..    UInt   i, size;
-//..    UChar* ldt;
-//.. 
-//..    if (0)
-//..       VG_(printf)("read_ldt: tid = %d, ptr = %p, bytecount = %d\n",
-//..                   tid, ptr, bytecount );
-//.. 
-//..    vg_assert(sizeof(HWord) == sizeof(VexGuestX86SegDescr*));
-//..    vg_assert(8 == sizeof(VexGuestX86SegDescr));
-//.. 
-//..    ldt = (Char*)(VG_(threads)[tid].arch.vex.guest_LDT);
-//..    err = 0;
-//..    if (ldt == NULL)
-//..       /* LDT not allocated, meaning all entries are null */
-//..       goto out;
-//.. 
-//..    size = VEX_GUEST_X86_LDT_NENT * sizeof(VexGuestX86SegDescr);
-//..    if (size > bytecount)
-//..       size = bytecount;
-//.. 
-//..    err = size;
-//..    for (i = 0; i < size; i++)
-//..       ptr[i] = ldt[i];
-//.. 
-//..   out:
-//..    return err;
-//.. }
-//.. 
-//.. 
-//.. static
-//.. Int write_ldt ( ThreadId tid, void* ptr, UInt bytecount, Int oldmode )
-//.. {
-//..    Int error;
-//..    VexGuestX86SegDescr* ldt;
-//..    vki_modify_ldt_t* ldt_info; 
-//.. 
-//..    if (0)
-//..       VG_(printf)("write_ldt: tid = %d, ptr = %p, "
-//..                   "bytecount = %d, oldmode = %d\n",
-//..                   tid, ptr, bytecount, oldmode );
-//.. 
-//..    vg_assert(8 == sizeof(VexGuestX86SegDescr));
-//..    vg_assert(sizeof(HWord) == sizeof(VexGuestX86SegDescr*));
-//.. 
-//..    ldt      = (VexGuestX86SegDescr*)VG_(threads)[tid].arch.vex.guest_LDT;
-//..    ldt_info = (vki_modify_ldt_t*)ptr;
-//.. 
-//..    error = -VKI_EINVAL;
-//..    if (bytecount != sizeof(vki_modify_ldt_t))
-//..       goto out;
-//.. 
-//..    error = -VKI_EINVAL;
-//..    if (ldt_info->entry_number >= VEX_GUEST_X86_LDT_NENT)
-//..       goto out;
-//..    if (ldt_info->contents == 3) {
-//..       if (oldmode)
-//..          goto out;
-//..       if (ldt_info->seg_not_present == 0)
-//..          goto out;
-//..    }
-//.. 
-//..    /* If this thread doesn't have an LDT, we'd better allocate it
-//..       now. */
-//..    if (ldt == (HWord)NULL) {
-//..       ldt = VG_(alloc_zeroed_x86_LDT)();
-//..       VG_(threads)[tid].arch.vex.guest_LDT = (HWord)ldt;
-//..    }
-//.. 
-//..    /* Install the new entry ...  */
-//..    translate_to_hw_format ( ldt_info, &ldt[ldt_info->entry_number], oldmode );
-//..    error = 0;
-//.. 
-//..   out:
-//..    return error;
-//.. }
-//.. 
-//.. 
-//.. Int VG_(sys_modify_ldt) ( ThreadId tid,
-//..                           Int func, void* ptr, UInt bytecount )
-//.. {
-//..    Int ret = -VKI_ENOSYS;
-//.. 
-//..    switch (func) {
-//..    case 0:
-//..       ret = read_ldt(tid, ptr, bytecount);
-//..       break;
-//..    case 1:
-//..       ret = write_ldt(tid, ptr, bytecount, 1);
-//..       break;
-//..    case 2:
-//..       VG_(unimplemented)("sys_modify_ldt: func == 2");
-//..       /* god knows what this is about */
-//..       /* ret = read_default_ldt(ptr, bytecount); */
-//..       /*UNREACHED*/
-//..       break;
-//..    case 0x11:
-//..       ret = write_ldt(tid, ptr, bytecount, 0);
-//..       break;
-//..    }
-//..    return ret;
-//.. }
-//.. 
-//.. 
-//.. Int VG_(sys_set_thread_area) ( ThreadId tid,
-//..                                vki_modify_ldt_t* info )
-//.. {
-//..    Int idx;
-//..    VexGuestX86SegDescr* gdt;
-//.. 
-//..    vg_assert(8 == sizeof(VexGuestX86SegDescr));
-//..    vg_assert(sizeof(HWord) == sizeof(VexGuestX86SegDescr*));
-//.. 
-//..    if (info == NULL)
-//..       return -VKI_EFAULT;
-//.. 
-//..    gdt = (VexGuestX86SegDescr*)VG_(threads)[tid].arch.vex.guest_GDT;
-//.. 
-//..    /* If the thread doesn't have a GDT, allocate it now. */
-//..    if (!gdt) {
-//..       gdt = VG_(alloc_zeroed_x86_GDT)();
-//..       VG_(threads)[tid].arch.vex.guest_GDT = (HWord)gdt;
-//..    }
-//.. 
-//..    idx = info->entry_number;
-//.. 
-//..    if (idx == -1) {
-//..       /* Find and use the first free entry. */
-//..       for (idx = 0; idx < VEX_GUEST_X86_GDT_NENT; idx++) {
-//..          if (gdt[idx].LdtEnt.Words.word1 == 0 
-//..              && gdt[idx].LdtEnt.Words.word2 == 0)
-//..             break;
-//..       }
-//.. 
-//..       if (idx == VEX_GUEST_X86_GDT_NENT)
-//..          return -VKI_ESRCH;
-//..    } else if (idx < 0 || idx >= VEX_GUEST_X86_GDT_NENT) {
-//..       return -VKI_EINVAL;
-//..    }
-//.. 
-//..    translate_to_hw_format(info, &gdt[idx], 0);
-//.. 
-//..    VG_TRACK( pre_mem_write, Vg_CoreSysCall, tid,
-//..              "set_thread_area(info->entry)",
-//..              (Addr) & info->entry_number, sizeof(unsigned int) );
-//..    info->entry_number = idx;
-//..    VG_TRACK( post_mem_write, Vg_CoreSysCall, tid,
-//..              (Addr) & info->entry_number, sizeof(unsigned int) );
-//.. 
-//..    return 0;
-//.. }
-//.. 
-//.. 
-//.. Int VG_(sys_get_thread_area) ( ThreadId tid,
-//..                                vki_modify_ldt_t* info )
-//.. {
-//..    Int idx;
-//..    VexGuestX86SegDescr* gdt;
-//.. 
-//..    vg_assert(sizeof(HWord) == sizeof(VexGuestX86SegDescr*));
-//..    vg_assert(8 == sizeof(VexGuestX86SegDescr));
-//.. 
-//..    if (info == NULL)
-//..       return -VKI_EFAULT;
-//.. 
-//..    idx = info->entry_number;
-//.. 
-//..    if (idx < 0 || idx >= VEX_GUEST_X86_GDT_NENT)
-//..       return -VKI_EINVAL;
-//.. 
-//..    gdt = (VexGuestX86SegDescr*)VG_(threads)[tid].arch.vex.guest_GDT;
-//.. 
-//..    /* If the thread doesn't have a GDT, allocate it now. */
-//..    if (!gdt) {
-//..       gdt = VG_(alloc_zeroed_x86_GDT)();
-//..       VG_(threads)[tid].arch.vex.guest_GDT = (HWord)gdt;
-//..    }
-//.. 
-//..    info->base_addr = ( gdt[idx].LdtEnt.Bits.BaseHi << 24 ) |
-//..                      ( gdt[idx].LdtEnt.Bits.BaseMid << 16 ) |
-//..                      gdt[idx].LdtEnt.Bits.BaseLow;
-//..    info->limit = ( gdt[idx].LdtEnt.Bits.LimitHi << 16 ) |
-//..                    gdt[idx].LdtEnt.Bits.LimitLow;
-//..    info->seg_32bit = gdt[idx].LdtEnt.Bits.Default_Big;
-//..    info->contents = ( gdt[idx].LdtEnt.Bits.Type >> 2 ) & 0x3;
-//..    info->read_exec_only = ( gdt[idx].LdtEnt.Bits.Type & 0x1 ) ^ 0x1;
-//..    info->limit_in_pages = gdt[idx].LdtEnt.Bits.Granularity;
-//..    info->seg_not_present = gdt[idx].LdtEnt.Bits.Pres ^ 0x1;
-//..    info->useable = gdt[idx].LdtEnt.Bits.Sys;
-//..    info->reserved = 0;
-//.. 
-//..    return 0;
-//.. }
-
-
-
 /* ---------------------------------------------------------------------
    More thread stuff
    ------------------------------------------------------------------ */
@@ -1451,14 +1110,16 @@ PRE(sys_clone)
 
    if (ARG1 & VKI_CLONE_PARENT_SETTID) {
       PRE_MEM_WRITE("clone(parent_tidptr)", ARG3, sizeof(Int));
-      if (!VG_(is_addressable)(ARG3, sizeof(Int), VKI_PROT_WRITE)) {
+      if (!VG_(am_is_valid_for_client)(ARG3, sizeof(Int), 
+                                             VKI_PROT_WRITE)) {
          SET_STATUS_Failure( VKI_EFAULT );
          return;
       }
    }
    if (ARG1 & (VKI_CLONE_CHILD_SETTID | VKI_CLONE_CHILD_CLEARTID)) {
       PRE_MEM_WRITE("clone(child_tidptr)", ARG5, sizeof(Int));
-      if (!VG_(is_addressable)(ARG5, sizeof(Int), VKI_PROT_WRITE)) {
+      if (!VG_(am_is_valid_for_client)(ARG5, sizeof(Int), 
+                                             VKI_PROT_WRITE)) {
          SET_STATUS_Failure( VKI_EFAULT );
          return;
       }
@@ -2068,11 +1729,7 @@ const SyscallTableEntry ML_(syscall_table)[] = {
 //..    //   (__NR_reboot,            sys_reboot),            // 88 */Linux
 //..    //   (__NR_readdir,           old_readdir),           // 89 -- superseded
 
-<<<<<<< .working
-   LINXY(__NR_mmap,              sys_mmap2),                  // 90
-=======
-   GENX_(__NR_mmap,              sys_mmap2),                  // 90
->>>>>>> .merge-right.r4787
+   LINX_(__NR_mmap,              sys_mmap2),                  // 90
    GENXY(__NR_munmap,            sys_munmap),                 // 91
 //..    GENX_(__NR_truncate,          sys_truncate),          // 92
    GENX_(__NR_ftruncate,         sys_ftruncate),         // 93
@@ -2198,11 +1855,7 @@ const SyscallTableEntry ML_(syscall_table)[] = {
    GENX_(__NR_vfork,             sys_fork),              // 189
    GENXY(__NR_ugetrlimit,        sys_getrlimit),         // 190
 //__NR_readahead      // 191 ppc/Linux only?
-<<<<<<< .working
-   LINXY(__NR_mmap2,             sys_mmap2),             // 192
-=======
-   GENX_(__NR_mmap2,             sys_mmap2),             // 192
->>>>>>> .merge-right.r4787
+   LINX_(__NR_mmap2,             sys_mmap2),             // 192
 //..    GENX_(__NR_truncate64,        sys_truncate64),        // 193
 //..    GENX_(__NR_ftruncate64,       sys_ftruncate64),       // 194
 //..    
diff --git a/coregrind/m_trampoline.S b/coregrind/m_trampoline.S
index 5db984472f..2f9d46d264 100644
--- a/coregrind/m_trampoline.S
+++ b/coregrind/m_trampoline.S
@@ -36,6 +36,9 @@
    Replacements for some functions to do with vsyscalls and signals.
    This code runs on the simulated CPU.
 */
+	
+/*---------------------- x86-linux ----------------------*/
+#if defined(VGP_x86_linux)
 
 #	define UD2_16     ud2 ; ud2 ; ud2 ; ud2 ;ud2 ; ud2 ; ud2 ; ud2
 #	define UD2_64     UD2_16   ; UD2_16   ; UD2_16   ; UD2_16
@@ -45,9 +48,6 @@
 
 	/* a leading page of unexecutable code */
 	UD2_PAGE
-	
-/*---------------- x86-linux ----------------*/
-#if defined(VGP_x86_linux)
 
 .global VG_(trampoline_stuff_start)
 VG_(trampoline_stuff_start):
@@ -77,11 +77,28 @@ VG_(x86_linux_REDIR_FOR__dl_sysinfo_int80):
 .global VG_(trampoline_stuff_end)
 VG_(trampoline_stuff_end):
 
+	/* and a trailing page of unexecutable code */
+	UD2_PAGE
+
+#	undef UD2_16
+#	undef UD2_64
+#	undef UD2_256
+#	undef UD2_1024
+#	undef UD2_PAGE
 	
-/*---------------- amd64-linux ----------------*/
+/*---------------------- amd64-linux ----------------------*/
 #else
 #if defined(VGP_amd64_linux)
 
+#	define UD2_16     ud2 ; ud2 ; ud2 ; ud2 ;ud2 ; ud2 ; ud2 ; ud2
+#	define UD2_64     UD2_16   ; UD2_16   ; UD2_16   ; UD2_16
+#	define UD2_256    UD2_64   ; UD2_64   ; UD2_64   ; UD2_64
+#	define UD2_1024   UD2_256  ; UD2_256  ; UD2_256  ; UD2_256
+#	define UD2_PAGE   UD2_1024 ; UD2_1024 ; UD2_1024 ; UD2_1024  
+
+	/* a leading page of unexecutable code */
+	UD2_PAGE
+
 .global VG_(trampoline_stuff_start)
 VG_(trampoline_stuff_start):
 
@@ -108,11 +125,28 @@ VG_(amd64_linux_REDIR_FOR_vtime):
 .global VG_(trampoline_stuff_end)
 VG_(trampoline_stuff_end):
 
+	/* and a trailing page of unexecutable code */
+	UD2_PAGE
+
+#	undef UD2_16
+#	undef UD2_64
+#	undef UD2_256
+#	undef UD2_1024
+#	undef UD2_PAGE
 
 /*---------------- ppc32-linux ----------------*/
 #else
 #if defined(VGP_ppc32_linux)
 
+#	define UD2_16     trap ; trap ; trap; trap
+#	define UD2_64     UD2_16   ; UD2_16   ; UD2_16   ; UD2_16
+#	define UD2_256    UD2_64   ; UD2_64   ; UD2_64   ; UD2_64
+#	define UD2_1024   UD2_256  ; UD2_256  ; UD2_256  ; UD2_256
+#	define UD2_PAGE   UD2_1024 ; UD2_1024 ; UD2_1024 ; UD2_1024  
+
+	/* a leading page of unexecutable code */
+	UD2_PAGE
+
 .global VG_(trampoline_stuff_start)
 VG_(trampoline_stuff_start):
 
@@ -138,6 +172,11 @@ VG_(ppc32_linux_REDIR_FOR_strlen):
 .global VG_(trampoline_stuff_end)
 VG_(trampoline_stuff_end):
 
+#	undef UD2_16
+#	undef UD2_64
+#	undef UD2_256
+#	undef UD2_1024
+#	undef UD2_PAGE
 
 /*---------------- unknown ----------------*/
 #else
@@ -147,15 +186,6 @@ VG_(trampoline_stuff_end):
 #endif
 #endif
 
-	/* and a trailing page of unexecutable code */
-	UD2_PAGE
-
-#	undef UD2_16
-#	undef UD2_64
-#	undef UD2_256
-#	undef UD2_1024
-#	undef UD2_PAGE
-
 
 /* Let the linker know we don't need an executable stack */
 .section .note.GNU-stack,"",@progbits
diff --git a/coregrind/m_translate.c b/coregrind/m_translate.c
index 3793eb739b..a32127efdb 100644
--- a/coregrind/m_translate.c
+++ b/coregrind/m_translate.c
@@ -115,8 +115,8 @@ static Bool getArchAndArchInfo( /*OUT*/VexArch*     vex_arch,
 
 #elif defined(VGA_ppc32)
    *vex_arch    = VexArchPPC32;
-   vai->subarch = VG_(have_altivec_ppc) ? VexSubArchPPC32_AV
-                                        : VexSubArchPPC32_noAV;
+   vai->subarch = VG_(have_altivec_ppc32) ? VexSubArchPPC32_AV
+                                          : VexSubArchPPC32_noAV;
    vai->ppc32_cache_line_szB = VG_(cache_line_size_ppc32);
    return True;
 
diff --git a/coregrind/pub_core_machine.h b/coregrind/pub_core_machine.h
index 0e4d8e930f..2046fe6768 100644
--- a/coregrind/pub_core_machine.h
+++ b/coregrind/pub_core_machine.h
@@ -84,7 +84,7 @@
 // entries.
 extern Int VG_(cache_line_size_ppc32);
 // Altivec enabled?  Harvested on startup from the AT_HWCAP entry
-extern Int VG_(have_altivec_ppc);
+extern Int VG_(have_altivec_ppc32);
 #endif
 
 // X86: set to 1 if the host is able to do {ld,st}mxcsr (load/store
diff --git a/coregrind/vki_unistd-ppc32-linux.h b/coregrind/vki_unistd-ppc32-linux.h
index 380adfb1b4..a7f576912c 100644
--- a/coregrind/vki_unistd-ppc32-linux.h
+++ b/coregrind/vki_unistd-ppc32-linux.h
@@ -55,7 +55,7 @@
 #define __NR_setuid			 23
 #define __NR_getuid			 24
 //#define __NR_stime			 25
-//#define __NR_ptrace			 26
+#define __NR_ptrace			 26
 #define __NR_alarm			 27
 //#define __NR_oldfstat			 28
 #define __NR_pause			 29
-- 
2.47.3