--- /dev/null
+
+/*--------------------------------------------------------------------*/
+/*--- Startup: create initial process image on AIX5 ---*/
+/*--- initimg-aix5.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+ This file is part of Valgrind, a dynamic binary instrumentation
+ framework.
+
+ Copyright (C) 2006-2006 OpenWorks LLP
+ info@open-works.co.uk
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307, USA.
+
+ The GNU General Public License is contained in the file COPYING.
+*/
+
+#include "pub_core_basics.h"
+#include "pub_core_vki.h"
+#include "pub_core_vkiscnums.h"
+#include "pub_core_debuglog.h"
+#include "pub_core_libcbase.h"
+#include "pub_core_libcassert.h"
+#include "pub_core_libcfile.h"
+#include "pub_core_libcproc.h"
+#include "pub_core_libcprint.h"
+#include "pub_core_clientstate.h"
+#include "pub_core_aspacemgr.h"
+#include "pub_core_mallocfree.h"
+#include "pub_core_machine.h"
+#include "pub_core_ume.h"
+#include "pub_core_options.h"
+#include "pub_core_threadstate.h" /* ThreadArchState */
+#include "pub_core_tooliface.h" /* VG_TRACK */
+#include "pub_core_trampoline.h" /* VG_(ppc32_aix5_do_preloads_then_start_client) */
+#include "pub_core_syscall.h" // VG_(do_syscall1)
+#include "pub_core_initimg.h" /* self */
+
+#include "simple_huffman.c"
+
+#if !defined(VGP_ppc32_aix5) && !defined(VGP_ppc64_aix5)
+#error "This should only be compiled on AIX"
+#endif
+
+
+static void diagnose_load_failure ( void );
+
+/* --- Create the client's initial memory image. --- */
+
+ClientInitImgInfo
+ VG_(setup_client_initial_image)(
+ /*IN*/ HChar** argv,
+ /*IN*/ HChar** envp,
+ /*IN*/ HChar* toolname,
+ /*IN*/ Addr clstack_top,
+ /*IN*/ SizeT clstack_max_size
+ )
+{
+ ClientInitImgInfo ciii;
+
+ /* Set up an AIX5PreloadPage structure with the names of
+
+ $VALGRIND_LIB/PLATFORM/vgpreload_core.so
+ $VALGRIND_LIB/PLATFORM/vgpreload_TOOL.so, if it exists
+ xxx in "LD_PRELOAD=xxx", if it exists
+
+ The client is started by running (on the simulator, of course)
+ VG_(ppc{32,64}_aix5_do_preloads_then_start_client), which uses
+ __loadx/_kload to load these .so's. When the preloading is
+ done, various guest registers are restored to what they are
+ really supposed to be at client startup, so these values too are
+ stored in the AIX5PreloadPage. Finally, we jump to the client's
+ entry point address.
+ */
+ const HChar* _so = ".so";
+ const HChar* vgpreload_ = "vgpreload_";
+ const HChar* vgpreload_core_so = "vgpreload_core.so";
+ const HChar* errmsg_str
+ = "valgrind: FATAL: core/tool/LD_PRELOAD= "
+ "preload failed.\n";
+ Int plcore_len, pltool_len, ld_pre_len, errmsg_len;
+ HChar *plcore_str, *pltool_str, *ld_pre_str;
+ Bool have_tool_so, have_ld_pre;
+
+ AIX5PreloadPage* pp;
+ UChar* pc;
+ Int szB, szPG;
+ SysRes sres;
+
+ vg_assert( toolname );
+ pltool_len = VG_(strlen)( VG_(libdir) )
+ + 1 /*slash*/
+ + VG_(strlen)(VG_PLATFORM)
+ + 1 /*slash*/
+ + VG_(strlen)( vgpreload_ )
+ + VG_(strlen)( toolname )
+ + VG_(strlen)( _so )
+ + 1 /*NUL*/;
+ vg_assert(pltool_len > 0);
+ pltool_str = VG_(malloc)( pltool_len );
+ pltool_str[0] = 0;
+ VG_(strcat)( pltool_str, VG_(libdir) );
+ VG_(strcat)( pltool_str, "/" );
+ VG_(strcat)( pltool_str, VG_PLATFORM );
+ VG_(strcat)( pltool_str, "/" );
+ VG_(strcat)( pltool_str, vgpreload_ );
+ VG_(strcat)( pltool_str, toolname );
+ VG_(strcat)( pltool_str, _so );
+ vg_assert( pltool_str[pltool_len-1] == 0);
+ vg_assert( VG_(strlen)(pltool_str) == pltool_len-1 );
+
+ plcore_len = VG_(strlen)( VG_(libdir) )
+ + 1 /*slash*/
+ + VG_(strlen)(VG_PLATFORM)
+ + 1 /*slash*/
+ + VG_(strlen)( vgpreload_core_so )
+ + 1 /*NUL*/;
+ vg_assert(plcore_len > 0);
+ plcore_str = VG_(malloc)( plcore_len );
+ plcore_str[0] = 0;
+ VG_(strcat)( plcore_str, VG_(libdir) );
+ VG_(strcat)( plcore_str, "/" );
+ VG_(strcat)( plcore_str, VG_PLATFORM );
+ VG_(strcat)( plcore_str, "/" );
+ VG_(strcat)( plcore_str, vgpreload_core_so );
+ vg_assert( plcore_str[plcore_len-1] == 0 );
+ vg_assert( VG_(strlen)(plcore_str) == plcore_len-1 );
+
+ errmsg_len = VG_(strlen)( errmsg_str )
+ + 1 /*NUL*/;
+
+ ld_pre_str = VG_(getenv)("LD_PRELOAD");
+ if (ld_pre_str && VG_(strlen)(ld_pre_str) > 0) {
+ have_ld_pre = True;
+ ld_pre_len = VG_(strlen)(ld_pre_str) + 1/*NUL*/;
+ ld_pre_str = VG_(malloc)( ld_pre_len );
+ ld_pre_str[0] = 0;
+ VG_(strcat)( ld_pre_str, VG_(getenv)("LD_PRELOAD") );
+ vg_assert( ld_pre_str[ld_pre_len-1] == 0);
+ vg_assert( VG_(strlen)( ld_pre_str ) == ld_pre_len - 1 );
+ } else {
+ have_ld_pre = False;
+ ld_pre_len = 0;
+ ld_pre_str = NULL;
+ }
+
+ VG_(debugLog)(1, "initimg", "plcore_str = '%s'\n", plcore_str );
+ VG_(debugLog)(1, "initimg", "pltool_str = '%s'\n", pltool_str );
+ VG_(debugLog)(1, "initimg", "ld_pre_str = '%s'\n", ld_pre_str );
+
+ if (0 != VG_(access)(plcore_str, True,False,True))
+ VG_(err_config_error)("Can't find core preload "
+ "(vgpreload_core.so)");
+
+ have_tool_so = 0 == VG_(access)(pltool_str, True,False,True);
+
+ /* Figure out how much space is needed for an AIX5PreloadInfo
+ followed by the three preload strings. */
+
+ vg_assert((sizeof(AIX5PreloadPage) % 4) == 0); /* paranoia */
+
+ szB = sizeof(AIX5PreloadPage) + plcore_len
+ + (have_tool_so ? pltool_len : 0)
+ + (have_ld_pre ? ld_pre_len : 0)
+ + errmsg_len;
+ szPG = VG_PGROUNDUP(szB+1) / VKI_PAGE_SIZE;
+ VG_(debugLog)(2, "initimg", "preload page size: %d bytes, %d pages\n", szB, szPG);
+
+ vg_assert(szB > 0);
+ vg_assert(szB < szPG * VKI_PAGE_SIZE);
+
+ /* We'll need szPG pages of anonymous, rw-, client space (needs w
+ so we can write it here) */
+ sres = VG_(am_mmap_anon_float_client)
+ ( szPG * VKI_PAGE_SIZE, VKI_PROT_READ|VKI_PROT_WRITE);
+ if (sres.isError)
+ VG_(err_config_error)("Can't allocate client page(s) "
+ "for preload info");
+ pp = (AIX5PreloadPage*)sres.res;
+
+ VG_(debugLog)(2, "initimg", "preload page allocation succeeded at %p\n", pp);
+
+ /* Zero out the initial structure. */
+ VG_(memset)(pp, 0, sizeof(AIX5PreloadPage));
+
+ pc = (UChar*)pp;
+ pc += sizeof(AIX5PreloadPage);
+ VG_(memcpy)(pc, plcore_str, plcore_len);
+ pp->off_preloadcorename = pc - (UChar*)pp;
+ pc += plcore_len;
+ if (have_tool_so) {
+ VG_(memcpy)(pc, pltool_str, pltool_len);
+ pp->off_preloadtoolname = pc - (UChar*)pp;
+ pc += pltool_len;
+ }
+ if (have_ld_pre) {
+ VG_(memcpy)(pc, ld_pre_str, ld_pre_len);
+ pp->off_ld_preloadname = pc - (UChar*)pp;
+ pc += ld_pre_len;
+ }
+ VG_(memcpy)(pc, errmsg_str, errmsg_len);
+ pp->off_errmsg = pc - (UChar*)pp;
+ pp->len_errmsg = errmsg_len - 1; /* -1: skip terminating NUL */
+
+ vg_assert(pc <= ((UChar*)pp) - 1 + szPG * VKI_PAGE_SIZE);
+
+ VG_(free)(plcore_str);
+ VG_(free)(pltool_str);
+
+ /* Fill in all the other preload page fields that we can right
+ now. */
+# if defined(VGP_ppc32_aix5)
+ vg_assert(__NR_AIX5___loadx != __NR_AIX5_UNKNOWN);
+ pp->nr_load = __NR_AIX5___loadx;
+# else /* defined(VGP_ppc64_aix5) */
+ vg_assert(__NR_AIX5_kload != __NR_AIX5_UNKNOWN);
+ pp->nr_load = __NR_AIX5_kload;
+# endif
+
+ vg_assert(__NR_AIX5_kwrite != __NR_AIX5_UNKNOWN);
+ pp->nr_kwrite = __NR_AIX5_kwrite; /* kwrite */
+
+ vg_assert(__NR_AIX5__exit != __NR_AIX5_UNKNOWN);
+ pp->nr__exit = __NR_AIX5__exit; /* _exit */
+
+ pp->p_diagnose_load_failure = &diagnose_load_failure;
+
+ ciii.preloadpage = pp;
+ ciii.intregs37 = 0; /* filled in in m_main.c */
+ return ciii;
+}
+
+
+/* --- Finalise the initial image and register state. --- */
+
+static UChar unz_page[VKI_PAGE_SIZE];
+
+static UInt compute_adler32 ( void* addr, UWord len )
+{
+ UInt s1 = 1;
+ UInt s2 = 0;
+ UChar* buf = (UChar*)addr;
+ while (len > 0) {
+ s1 += buf[0];
+ s2 += s1;
+ s1 %= 65521;
+ s2 %= 65521;
+ len--;
+ buf++;
+ }
+ return (s2 << 16) + s1;
+}
+
+void VG_(finalise_thread1state)( /*MOD*/ThreadArchState* arch,
+ ClientInitImgInfo ciii )
+{
+ UInt adler32_act;
+ SysRes sres;
+ /* On AIX we get a block of 37 words telling us the initial state
+ for (GPR0 .. GPR31, PC, CR, LR, CTR, XER), and we start with all
+ the other registers zeroed. */
+
+# if defined(VGP_ppc32_aix5)
+
+ vg_assert(0 == sizeof(VexGuestPPC32State) % 8);
+
+ /* Zero out the initial state, and set up the simulated FPU in a
+ sane way. */
+ LibVEX_GuestPPC32_initialise(&arch->vex);
+
+ /* Zero out the shadow area. */
+ VG_(memset)(&arch->vex_shadow, 0, sizeof(VexGuestPPC32State));
+
+# else /* defined(VGP_ppc64_aix5) */
+
+ vg_assert(0 == sizeof(VexGuestPPC64State) % 8);
+
+ /* Zero out the initial state, and set up the simulated FPU in a
+ sane way. */
+ LibVEX_GuestPPC64_initialise(&arch->vex);
+
+ /* Zero out the shadow area. */
+ VG_(memset)(&arch->vex_shadow, 0, sizeof(VexGuestPPC64State));
+
+# endif
+
+ /* ciii.intregs37 contains the integer register state as it needs
+ to be at client startup. These values are supplied by the
+ launcher. The 37 regs are:initial values from launcher for:
+ GPR0 .. GPR31, PC, CR, LR, CTR, XER. */
+
+ /* Put essential stuff into the new state. */
+ arch->vex.guest_GPR0 = (UWord)ciii.intregs37[0];
+ arch->vex.guest_GPR1 = (UWord)ciii.intregs37[1];
+ arch->vex.guest_GPR2 = (UWord)ciii.intregs37[2];
+ arch->vex.guest_GPR3 = (UWord)ciii.intregs37[3];
+ arch->vex.guest_GPR4 = (UWord)ciii.intregs37[4];
+ arch->vex.guest_GPR5 = (UWord)ciii.intregs37[5];
+ arch->vex.guest_GPR6 = (UWord)ciii.intregs37[6];
+ arch->vex.guest_GPR7 = (UWord)ciii.intregs37[7];
+ arch->vex.guest_GPR8 = (UWord)ciii.intregs37[8];
+ arch->vex.guest_GPR9 = (UWord)ciii.intregs37[9];
+ arch->vex.guest_GPR10 = (UWord)ciii.intregs37[10];
+ arch->vex.guest_GPR11 = (UWord)ciii.intregs37[11];
+ arch->vex.guest_GPR12 = (UWord)ciii.intregs37[12];
+ arch->vex.guest_GPR13 = (UWord)ciii.intregs37[13];
+ arch->vex.guest_GPR14 = (UWord)ciii.intregs37[14];
+ arch->vex.guest_GPR15 = (UWord)ciii.intregs37[15];
+ arch->vex.guest_GPR16 = (UWord)ciii.intregs37[16];
+ arch->vex.guest_GPR17 = (UWord)ciii.intregs37[17];
+ arch->vex.guest_GPR18 = (UWord)ciii.intregs37[18];
+ arch->vex.guest_GPR19 = (UWord)ciii.intregs37[19];
+ arch->vex.guest_GPR20 = (UWord)ciii.intregs37[20];
+ arch->vex.guest_GPR21 = (UWord)ciii.intregs37[21];
+ arch->vex.guest_GPR22 = (UWord)ciii.intregs37[22];
+ arch->vex.guest_GPR23 = (UWord)ciii.intregs37[23];
+ arch->vex.guest_GPR24 = (UWord)ciii.intregs37[24];
+ arch->vex.guest_GPR25 = (UWord)ciii.intregs37[25];
+ arch->vex.guest_GPR26 = (UWord)ciii.intregs37[26];
+ arch->vex.guest_GPR27 = (UWord)ciii.intregs37[27];
+ arch->vex.guest_GPR28 = (UWord)ciii.intregs37[28];
+ arch->vex.guest_GPR29 = (UWord)ciii.intregs37[29];
+ arch->vex.guest_GPR30 = (UWord)ciii.intregs37[30];
+ arch->vex.guest_GPR31 = (UWord)ciii.intregs37[31];
+
+ arch->vex.guest_CIA = (UWord)ciii.intregs37[32+0];
+ arch->vex.guest_LR = (UWord)ciii.intregs37[32+2];
+ arch->vex.guest_CTR = (UWord)ciii.intregs37[32+3];
+
+# if defined(VGP_ppc32_aix5)
+
+ LibVEX_GuestPPC32_put_CR( (UWord)ciii.intregs37[32+1], &arch->vex );
+ LibVEX_GuestPPC32_put_XER( (UWord)ciii.intregs37[32+4], &arch->vex );
+
+ /* Set the cache line size (KLUDGE) */
+ VG_(machine_ppc32_set_clszB)( 128 );
+
+# else /* defined(VGP_ppc64_aix5) */
+
+ LibVEX_GuestPPC64_put_CR( (UWord)ciii.intregs37[32+1], &arch->vex );
+ LibVEX_GuestPPC64_put_XER( (UWord)ciii.intregs37[32+4], &arch->vex );
+
+ /* Set the cache line size (KLUDGE) */
+ VG_(machine_ppc64_set_clszB)( 128 );
+
+# endif
+
+ /* Fix up the client's command line. Its argc/v/envp is in r3/4/5
+ (32-bit AIX) or r14/15/16 (64-bit AIX). but that is for the
+ Valgrind invokation as a whole. Hence we need to decrement argc
+ and advance argv to step over the args for Valgrind, and the
+ name of the Valgrind tool exe bogusly inserted by the launcher
+ (hence the "+1"). */
+
+# if defined(VGP_ppc32_aix5)
+
+ vg_assert(arch->vex.guest_GPR3 >= 1 + VG_(args_for_valgrind).used);
+ arch->vex.guest_GPR3 -= (1 + VG_(args_for_valgrind).used);
+ arch->vex.guest_GPR4 += sizeof(UWord) * (1 + VG_(args_for_valgrind).used);
+
+# else /* defined(VGP_ppc64_aix5) */
+
+ vg_assert(arch->vex.guest_GPR14 >= 1 + VG_(args_for_valgrind).used);
+ arch->vex.guest_GPR14 -= (1 + VG_(args_for_valgrind).used);
+ arch->vex.guest_GPR15 += sizeof(UWord) * (1 + VG_(args_for_valgrind).used);
+
+# endif
+
+ /* At this point the guest register state is correct for client
+ startup. However, that's not where we want to start; in fact we
+ want to start at VG_(ppc{3,64}2_aix5_do_preloads_then_start_client),
+ passing it ciii.preloadpage in r3. This will load the core/tool
+ preload .so's, then restore r2-r10 from what's stashed in the
+ preloadpage, and then start the client really. Hence: */
+
+ /* Save r2-r10 and the client start point in preloadpage */
+ ciii.preloadpage->r2 = (ULong)arch->vex.guest_GPR2;
+ ciii.preloadpage->r3 = (ULong)arch->vex.guest_GPR3;
+ ciii.preloadpage->r4 = (ULong)arch->vex.guest_GPR4;
+ ciii.preloadpage->r5 = (ULong)arch->vex.guest_GPR5;
+ ciii.preloadpage->r6 = (ULong)arch->vex.guest_GPR6;
+ ciii.preloadpage->r7 = (ULong)arch->vex.guest_GPR7;
+ ciii.preloadpage->r8 = (ULong)arch->vex.guest_GPR8;
+ ciii.preloadpage->r9 = (ULong)arch->vex.guest_GPR9;
+ ciii.preloadpage->r10 = (ULong)arch->vex.guest_GPR10;
+ ciii.preloadpage->client_start = (ULong)arch->vex.guest_CIA;
+
+
+# if defined(VGP_ppc32_aix5)
+
+ /* Set up to start at VG_(ppc32_aix5_do_preloads_then_start_client) */
+ arch->vex.guest_CIA = (UWord)&VG_(ppc32_aix5_do_preloads_then_start_client);
+
+# else /* defined(VGP_ppc64_aix5) */
+
+ /* Set up to start at VG_(ppc64_aix5_do_preloads_then_start_client) */
+ arch->vex.guest_CIA = (UWord)&VG_(ppc64_aix5_do_preloads_then_start_client);
+
+# endif
+
+ arch->vex.guest_GPR3 = (UWord)ciii.preloadpage;
+
+ /* The rest of the preloadpage fields will already have been filled
+ in by VG_(setup_client_initial_image). So we're done. */
+
+ /* Finally, decompress the page compressed by the launcher. We
+ can't do this any earlier, because the page is (effectively)
+ decompressed in place, which trashes ciii.intregs37. So we have
+ to wait till this point, at which we're done with ciii.intregs37
+ (to be precise, with what it points at). */
+ VG_(debugLog)(1, "initimg", "decompressing page at %p\n",
+ (void*)ciii.compressed_page);
+ vg_assert(VG_IS_PAGE_ALIGNED(ciii.compressed_page));
+
+ Huffman_Uncompress( (void*)ciii.compressed_page, unz_page,
+ VKI_PAGE_SIZE, VKI_PAGE_SIZE );
+ adler32_act = compute_adler32(unz_page, VKI_PAGE_SIZE);
+
+ VG_(debugLog)(1, "initimg",
+ "decompress done, adler32s: act 0x%x, exp 0x%x\n",
+ adler32_act, ciii.adler32_exp );
+
+ VG_(memcpy)((void*)ciii.compressed_page, unz_page, VKI_PAGE_SIZE);
+
+ VG_(debugLog)(1, "initimg", "copy back done\n");
+
+ /* Tell the tool that we just wrote to the registers. */
+ VG_TRACK( post_reg_write, Vg_CoreStartup, /*tid*/1, /*offset*/0,
+ sizeof(VexGuestArchState));
+
+ /* Determine the brk limit. */
+ VG_(debugLog)(1, "initimg", "establishing current brk ..\n");
+ vg_assert(__NR_AIX5_sbrk != __NR_AIX5_UNKNOWN);
+ sres = VG_(do_syscall1)(__NR_AIX5_sbrk, 0);
+ vg_assert(sres.err == 0); /* assert no error */
+ VG_(brk_base) = VG_(brk_limit) = sres.res;
+ VG_(debugLog)(1, "initimg", ".. brk = %p\n", (void*)VG_(brk_base));
+}
+
+
+/* --- Diagnose preload failures. --- */
+
+/* This is a nasty but effective kludge. The address of the following
+ function is put into the preload page. So, if a preload failure
+ happens, we call here to get helpful info printed out (the call
+ site is in m_trampoline.S). This is a dirty hack (1) because
+ diagnose_load_failure runs on the simulated CPU, not the real one
+ and (2) because it induces a libc dependency. Oh well. */
+
+/* --- !!! --- EXTERNAL HEADERS start --- !!! --- */
+#include <stdlib.h>
+#include <sys/ldr.h>
+/* --- !!! --- EXTERNAL HEADERS end --- !!! --- */
+
+static void diagnose_load_failure ( void )
+{
+# define NBUF 1024
+ UChar buf[NBUF];
+ VG_(debugLog)(0, "initimg", "Diagnosing load failure\n");
+ if (sizeof(void*) == 8) {
+ VG_(debugLog)(0, "initimg", "Can't safely do loadquery() "
+ "in 64-bit mode. Sorry.\n");
+ /* because this requires dynamic linking to be working (IIRC)
+ and it isn't; the tool file's dynamic linking was never done,
+ because it was loaded by the bootstrap stub, which simply did
+ sys_kload() but didn't make usla do the relevant
+ relocations. */
+ } else {
+ UChar** p;
+ Int r = loadquery(L_GETMESSAGES, buf, NBUF);
+ VG_(debugLog)(0, "initimg", "loadquery returned %d (0 = success)\n", r);
+ p = (UChar**)(&buf[0]);
+ for (; *p; p++)
+ VG_(debugLog)(0, "initimg", "\"%s\"\n", *p);
+ VG_(debugLog)(0, "initimg", "Use /usr/sbin/execerror to make "
+ "sense of above string(s)\n");
+ VG_(debugLog)(0, "initimg", "See also comments at the bottom of\n");
+ VG_(debugLog)(0, "initimg", "coregrind/m_initimg/"
+ "initimg-aix5.c (in Valgrind sources)\n");
+ }
+# undef NBUF
+}
+
+/* Take the strings that this prints out and feed them
+ to /usr/sbin/execerror. For example, it might print
+
+ (ld 3 1 __libc_freeres /foo/bar/ppc32-aix5/vgpreload_core.so
+
+ in which case
+
+ $ execerror xyzzy \
+ "(ld 3 1 __libc_freeres /foo/bar/ppc32-aix5/vgpreload_core.so"
+
+ gets you
+
+ Could not load program xyzzy:
+ rtld: 0712-001 Symbol __libc_freeres was referenced
+ from module /foo/bar/ppc32-aix5/vgpreload_core.so(),
+ but a runtime definition
+ of the symbol was not found.
+*/
+
+/*--------------------------------------------------------------------*/
+/*--- initimg-aix5.c ---*/
+/*--------------------------------------------------------------------*/
--- /dev/null
+/*************************************************************************
+* Name: huffman.c
+* Author: Marcus Geelnard
+* Description: Huffman coder/decoder implementation.
+* Reentrant: Yes
+* $Id: huffman.c,v 1.6 2004/12/14 18:59:40 marcus256 Exp $
+*
+* This is a very straight forward implementation of a Huffman coder and
+* decoder.
+*
+* Primary flaws with this primitive implementation are:
+* - Slow bit stream implementation
+* - Fairly slow decoding (slower than encoding)
+* - Maximum tree depth of 32 (the coder aborts if any code exceeds a
+* size of 32 bits). If I'm not mistaking, this should not be possible
+* unless the input buffer is larger than 2^32 bytes, which is not
+* supported by the coder anyway (max 2^32-1 bytes can be specified with
+* an unsigned 32-bit integer).
+*
+* On the other hand, there are a few advantages of this implementation:
+* - The Huffman tree is stored in a very compact form, requiring only
+* 12 bits per symbol (for 8 bit symbols), meaning a maximum of 384
+* bytes overhead.
+* - The Huffman coder does quite well in situations where the data is
+* noisy, in which case most dictionary based coders run into problems.
+*
+* Possible improvements (probably not worth it):
+* - Partition the input data stream into blocks, where each block has
+* its own Huffman tree. With variable block sizes, it should be
+* possible to find locally optimal Huffman trees, which in turn could
+* reduce the total size.
+* - Allow for a few different predefined Huffman trees, which could
+* reduce the size of a block even further.
+*-------------------------------------------------------------------------
+* Copyright (c) 2003-2004 Marcus Geelnard
+*
+* This software is provided 'as-is', without any express or implied
+* warranty. In no event will the authors be held liable for any damages
+* arising from the use of this software.
+*
+* Permission is granted to anyone to use this software for any purpose,
+* including commercial applications, and to alter it and redistribute it
+* freely, subject to the following restrictions:
+*
+* 1. The origin of this software must not be misrepresented; you must not
+* claim that you wrote the original software. If you use this software
+* in a product, an acknowledgment in the product documentation would
+* be appreciated but is not required.
+*
+* 2. Altered source versions must be plainly marked as such, and must not
+* be misrepresented as being the original software.
+*
+* 3. This notice may not be removed or altered from any source
+* distribution.
+*
+* Marcus Geelnard
+* marcus.geelnard at home.se
+*************************************************************************/
+
+/* Modified May 06 by Julian Seward for use in Valgrind.
+ - changed integral types to V's versions (UInt, UChar etc)
+ - added initialisation in _Huffman_WriteBits, as described in
+ comment in that function.
+*/
+
+/*************************************************************************
+* Types used for Huffman coding
+*************************************************************************/
+
+typedef struct {
+ UInt Symbol;
+ UInt Count;
+ UInt Code;
+ UInt Bits;
+} huff_sym_t;
+
+typedef struct {
+ UChar *BytePtr;
+ UInt BitPos;
+} huff_bitstream_t;
+
+
+
+/*************************************************************************
+* INTERNAL FUNCTIONS *
+*************************************************************************/
+
+
+/*************************************************************************
+* _Huffman_InitBitstream() - Initialize a bitstream.
+*************************************************************************/
+
+static void _Huffman_InitBitstream( huff_bitstream_t *stream,
+ UChar *buf )
+{
+ stream->BytePtr = buf;
+ stream->BitPos = 0;
+}
+
+
+/*************************************************************************
+* _Huffman_ReadBits() - Read bits from a bitstream.
+*************************************************************************/
+
+static UInt _Huffman_ReadBits( huff_bitstream_t *stream,
+ UInt bits )
+{
+ UInt x, bit, count;
+ UChar *buf;
+
+ /* Get current stream state */
+ buf = stream->BytePtr;
+ bit = stream->BitPos;
+
+ /* Extract bits */
+ x = 0;
+ for( count = 0; count < bits; ++ count )
+ {
+ x = (x<<1) + (*buf & (1<<(7-bit)) ? 1 : 0);
+ bit = (bit+1) & 7;
+ if( !bit )
+ {
+ ++ buf;
+ }
+ }
+
+ /* Store new stream state */
+ stream->BytePtr = buf;
+ stream->BitPos = bit;
+
+ return x;
+}
+
+
+/*************************************************************************
+* _Huffman_WriteBits() - Write bits to a bitstream.
+*************************************************************************/
+
+static void _Huffman_WriteBits( huff_bitstream_t *stream, UInt x,
+ UInt bits )
+{
+ UInt bit, count;
+ UChar *buf;
+ UInt mask;
+
+ /* Get current stream state */
+ buf = stream->BytePtr;
+ bit = stream->BitPos;
+
+ /* Append bits */
+ mask = 1 << (bits-1);
+ for( count = 0; count < bits; ++ count )
+ {
+ /* If we're starting a new byte, zero it out, so that the
+ resulting byte sequence looks completely defined from
+ Valgrind's point of view. If this doesn't happen then the
+ last byte in the stream may look partially undefined. */
+ if (bit == 0)
+ *buf = 0;
+ *buf = (*buf & (0xff^(1<<(7-bit)))) +
+ ((x & mask ? 1 : 0) << (7-bit));
+ x <<= 1;
+ bit = (bit+1) & 7;
+ if( !bit )
+ {
+ ++ buf;
+ }
+ }
+
+ /* Store new stream state */
+ stream->BytePtr = buf;
+ stream->BitPos = bit;
+}
+
+
+/*************************************************************************
+* _Huffman_Hist() - Calculate (sorted) histogram for a block of data.
+*************************************************************************/
+
+static void _Huffman_Hist( UChar *in, huff_sym_t *sym,
+ UInt size )
+{
+ Int k, swaps;
+ huff_sym_t tmp;
+
+ /* Clear/init histogram */
+ for( k = 0; k < 256; ++ k )
+ {
+ sym[k].Symbol = k;
+ sym[k].Count = 0;
+ sym[k].Code = 0;
+ sym[k].Bits = 0;
+ }
+
+ /* Build histogram */
+ for( k = size; k; -- k )
+ {
+ sym[ *in ++ ].Count ++;
+ }
+
+ /* Sort histogram - most frequent symbol first (bubble sort) */
+ do
+ {
+ swaps = 0;
+ for( k = 0; k < 255; ++ k )
+ {
+ if( sym[k].Count < sym[k+1].Count )
+ {
+ tmp = sym[k];
+ sym[k] = sym[k+1];
+ sym[k+1] = tmp;
+ swaps = 1;
+ }
+ }
+ }
+ while( swaps );
+}
+
+
+/*************************************************************************
+* _Huffman_MakeTree() - Generate a Huffman tree.
+*************************************************************************/
+
+static void _Huffman_MakeTree( huff_sym_t *sym, huff_bitstream_t *stream,
+ UInt code, UInt bits, UInt first,
+ UInt last )
+{
+ UInt k, size, size_a, size_b, last_a, first_b;
+
+ /* Is this a leaf node? */
+ if( first == last )
+ {
+ /* Append symbol to tree description */
+ _Huffman_WriteBits( stream, 1, 1 );
+ _Huffman_WriteBits( stream, sym[first].Symbol, 8 );
+
+ /* Store code info in symbol array */
+ sym[first].Code = code;
+ sym[first].Bits = bits;
+ return;
+ }
+ else
+ {
+ /* This was not a leaf node */
+ _Huffman_WriteBits( stream, 0, 1 );
+ }
+
+ /* Total size of interval */
+ size = 0;
+ for( k = first; k <= last; ++ k )
+ {
+ size += sym[k].Count;
+ }
+
+ /* Find size of branch a */
+ size_a = 0;
+ for( k = first; size_a < ((size+1)>>1) && k < last; ++ k )
+ {
+ size_a += sym[k].Count;
+ }
+
+ /* Non-empty branch? */
+ if( size_a > 0 )
+ {
+ /* Continue branching */
+ _Huffman_WriteBits( stream, 1, 1 );
+
+ /* Branch a cut in histogram */
+ last_a = k-1;
+
+ /* Create branch a */
+ _Huffman_MakeTree( sym, stream, (code<<1)+0, bits+1,
+ first, last_a );
+ }
+ else
+ {
+ /* This was an empty branch */
+ _Huffman_WriteBits( stream, 0, 1 );
+ }
+
+ /* Size of branch b */
+ size_b = size - size_a;
+
+ /* Non-empty branch? */
+ if( size_b > 0 )
+ {
+ /* Continue branching */
+ _Huffman_WriteBits( stream, 1, 1 );
+
+ /* Branch b cut in histogram */
+ first_b = k;
+
+ /* Create branch b */
+ _Huffman_MakeTree( sym, stream, (code<<1)+1, bits+1,
+ first_b, last );
+ }
+ else
+ {
+ /* This was an empty branch */
+ _Huffman_WriteBits( stream, 0, 1 );
+ }
+}
+
+
+/*************************************************************************
+* _Huffman_RecoverTree() - Recover a Huffman tree from a bitstream.
+*************************************************************************/
+
+static void _Huffman_RecoverTree( huff_sym_t *sym,
+ huff_bitstream_t *stream, UInt code, UInt bits,
+ UInt *symnum )
+{
+ UInt symbol;
+
+ /* Is this a leaf node? */
+ if( _Huffman_ReadBits( stream, 1 ) )
+ {
+ /* Get symbol from tree description */
+ symbol = _Huffman_ReadBits( stream, 8 );
+
+ /* Store code info in symbol array */
+ sym[*symnum].Symbol = symbol;
+ sym[*symnum].Code = code;
+ sym[*symnum].Bits = bits;
+
+ /* Increase symbol counter */
+ *symnum = *symnum + 1;
+
+ return;
+ }
+
+ /* Non-empty branch? */
+ if( _Huffman_ReadBits( stream, 1 ) )
+ {
+ /* Create branch a */
+ _Huffman_RecoverTree( sym, stream, (code<<1)+0, bits+1,
+ symnum );
+ }
+
+ /* Non-empty branch? */
+ if( _Huffman_ReadBits( stream, 1 ) )
+ {
+ /* Create branch b */
+ _Huffman_RecoverTree( sym, stream, (code<<1)+1, bits+1,
+ symnum );
+ }
+}
+
+
+
+
+/*************************************************************************
+* PUBLIC FUNCTIONS *
+*************************************************************************/
+
+
+/*************************************************************************
+* Huffman_Compress() - Compress a block of data using a Huffman coder.
+* in - Input (uncompressed) buffer.
+* out - Output (compressed) buffer. This buffer must be 384 bytes
+* larger than the input buffer.
+* insize - Number of input bytes.
+* The function returns the size of the compressed data.
+*************************************************************************/
+static
+Int Huffman_Compress( UChar *in, UChar *out,
+ UInt insize )
+{
+ huff_sym_t sym[ 256 ], tmp;
+ huff_bitstream_t stream;
+ UInt k, total_bytes, swaps, symbol, last_symbol;
+
+ /* Do we have anything to compress? */
+ if( insize < 1 ) return 0;
+
+ /* Initialize bitstream */
+ _Huffman_InitBitstream( &stream, out );
+
+ /* Calculate and sort histogram for input data */
+ _Huffman_Hist( in, sym, insize );
+
+ /* Find number of used symbols */
+ for( last_symbol = 255; sym[last_symbol].Count == 0; -- last_symbol );
+
+ /* Special case: In order to build a correct tree, we need at least
+ two symbols (otherwise we get zero-bit representations). */
+ if( last_symbol == 0 ) ++ last_symbol;
+
+ /* Build Huffman tree */
+ _Huffman_MakeTree( sym, &stream, 0, 0, 0, last_symbol );
+
+ /* Was any code > 32 bits? (we do not handle that at present) */
+ for( k = 0; k < 255; ++ k )
+ {
+ if( sym[k].Bits > 32 )
+ {
+ return 0;
+ }
+ }
+
+ /* Sort histogram - first symbol first (bubble sort) */
+ do
+ {
+ swaps = 0;
+ for( k = 0; k < 255; ++ k )
+ {
+ if( sym[k].Symbol > sym[k+1].Symbol )
+ {
+ tmp = sym[k];
+ sym[k] = sym[k+1];
+ sym[k+1] = tmp;
+ swaps = 1;
+ }
+ }
+ }
+ while( swaps );
+
+ /* Encode input stream */
+ for( k = 0; k < insize; ++ k )
+ {
+ symbol = in[ k ];
+ _Huffman_WriteBits( &stream, sym[symbol].Code,
+ sym[symbol].Bits );
+ }
+
+ /* Calculate size of output data */
+ total_bytes = (Int)(stream.BytePtr - out);
+ if( stream.BitPos > 0 )
+ {
+ ++ total_bytes;
+ }
+
+ return total_bytes;
+}
+
+
+
+/*************************************************************************
+* Huffman_Uncompress() - Uncompress a block of data using a Huffman
+* decoder.
+* in - Input (compressed) buffer.
+* out - Output (uncompressed) buffer. This buffer must be large
+* enough to hold the uncompressed data.
+* insize - Number of input bytes.
+* outsize - Number of output bytes.
+*************************************************************************/
+static
+void Huffman_Uncompress( UChar *in, UChar *out,
+ UInt insize, UInt outsize )
+{
+ huff_sym_t sym[ 256 ], tmp;
+ huff_bitstream_t stream;
+ UInt k, m, symbol_count, swaps;
+ UChar *buf;
+ UInt bits, delta_bits, new_bits, code;
+
+ /* Do we have anything to decompress? */
+ if( insize < 1 ) return;
+
+ /* Initialize bitstream */
+ _Huffman_InitBitstream( &stream, in );
+
+ /* Clear tree/histogram */
+ for( k = 0; k < 256; ++ k )
+ {
+ sym[k].Bits = 0x7fffffff;
+ }
+
+ /* Recover Huffman tree */
+ symbol_count = 0;
+ _Huffman_RecoverTree( sym, &stream, 0, 0, &symbol_count );
+
+ /* Sort histogram - shortest code first (bubble sort) */
+ do
+ {
+ swaps = 0;
+ for( k = 0; k < symbol_count-1; ++ k )
+ {
+ if( sym[k].Bits > sym[k+1].Bits )
+ {
+ tmp = sym[k];
+ sym[k] = sym[k+1];
+ sym[k+1] = tmp;
+ swaps = 1;
+ }
+ }
+ }
+ while( swaps );
+
+ /* Decode input stream */
+ buf = out;
+ for( k = 0; k < outsize; ++ k )
+ {
+ /* Search tree for matching code */
+ bits = 0;
+ code = 0;
+ for( m = 0; m < symbol_count; ++ m )
+ {
+ delta_bits = sym[m].Bits - bits;
+ if( delta_bits )
+ {
+ new_bits = _Huffman_ReadBits( &stream, delta_bits );
+ code = code | (new_bits << (32-bits-delta_bits));
+ bits = sym[m].Bits;
+ }
+ if( code == (sym[m].Code << (32-sym[m].Bits)) )
+ {
+ *buf ++ = (UChar) sym[m].Symbol;
+ break;
+ }
+ }
+ }
+}