]> git.ipfire.org Git - thirdparty/valgrind.git/commitdiff
Add exp-bbv to the tool-suite. I'm seeing a couple of amd64-linux test
authorNicholas Nethercote <njn@valgrind.org>
Tue, 14 Jul 2009 01:39:54 +0000 (01:39 +0000)
committerNicholas Nethercote <njn@valgrind.org>
Tue, 14 Jul 2009 01:39:54 +0000 (01:39 +0000)
failures, but they can be fixed up in-repo.  This resolves bug 198395.

git-svn-id: svn://svn.valgrind.org/valgrind/trunk@10444

75 files changed:
Makefile.am
NEWS
configure.in
docs/xml/manual.xml
docs/xml/valgrind-manpage.xml
exp-bbv/Makefile.am [new file with mode: 0644]
exp-bbv/bbv_main.c [new file with mode: 0644]
exp-bbv/docs/Makefile.am [new file with mode: 0644]
exp-bbv/docs/bbv-manual.xml [new file with mode: 0644]
exp-bbv/tests/Makefile.am [new file with mode: 0644]
exp-bbv/tests/amd64-linux/Makefile.am [new file with mode: 0644]
exp-bbv/tests/amd64-linux/clone_test.S [new file with mode: 0644]
exp-bbv/tests/amd64-linux/clone_test.post.exp [new file with mode: 0644]
exp-bbv/tests/amd64-linux/clone_test.stderr.exp [new file with mode: 0644]
exp-bbv/tests/amd64-linux/clone_test.vgtest [new file with mode: 0644]
exp-bbv/tests/amd64-linux/complex_rep.S [new file with mode: 0644]
exp-bbv/tests/amd64-linux/complex_rep.stderr.exp [new file with mode: 0644]
exp-bbv/tests/amd64-linux/complex_rep.vgtest [new file with mode: 0644]
exp-bbv/tests/amd64-linux/filter_stderr [new file with mode: 0644]
exp-bbv/tests/amd64-linux/fldcw_check.S [new file with mode: 0644]
exp-bbv/tests/amd64-linux/fldcw_check.stderr.exp [new file with mode: 0644]
exp-bbv/tests/amd64-linux/fldcw_check.vgtest [new file with mode: 0644]
exp-bbv/tests/amd64-linux/ll.S [new file with mode: 0644]
exp-bbv/tests/amd64-linux/ll.post.exp [new file with mode: 0644]
exp-bbv/tests/amd64-linux/ll.stderr.exp [new file with mode: 0644]
exp-bbv/tests/amd64-linux/ll.stdout.exp [new file with mode: 0644]
exp-bbv/tests/amd64-linux/ll.vgtest [new file with mode: 0644]
exp-bbv/tests/amd64-linux/million.S [new file with mode: 0644]
exp-bbv/tests/amd64-linux/million.post.exp [new file with mode: 0644]
exp-bbv/tests/amd64-linux/million.stderr.exp [new file with mode: 0644]
exp-bbv/tests/amd64-linux/million.vgtest [new file with mode: 0644]
exp-bbv/tests/amd64-linux/rep_prefix.S [new file with mode: 0644]
exp-bbv/tests/amd64-linux/rep_prefix.stderr.exp [new file with mode: 0644]
exp-bbv/tests/amd64-linux/rep_prefix.vgtest [new file with mode: 0644]
exp-bbv/tests/filter_bb [new file with mode: 0644]
exp-bbv/tests/filter_stderr [new file with mode: 0644]
exp-bbv/tests/logo.include [new file with mode: 0644]
exp-bbv/tests/logo.lzss_new [new file with mode: 0644]
exp-bbv/tests/ppc32-linux/Makefile.am [new file with mode: 0644]
exp-bbv/tests/ppc32-linux/filter_stderr [new file with mode: 0644]
exp-bbv/tests/ppc32-linux/ll.S [new file with mode: 0644]
exp-bbv/tests/ppc32-linux/ll.post.exp [new file with mode: 0644]
exp-bbv/tests/ppc32-linux/ll.stderr.exp [new file with mode: 0644]
exp-bbv/tests/ppc32-linux/ll.stdout.exp [new file with mode: 0644]
exp-bbv/tests/ppc32-linux/ll.vgtest [new file with mode: 0644]
exp-bbv/tests/ppc32-linux/million.S [new file with mode: 0644]
exp-bbv/tests/ppc32-linux/million.post.exp [new file with mode: 0644]
exp-bbv/tests/ppc32-linux/million.stderr.exp [new file with mode: 0644]
exp-bbv/tests/ppc32-linux/million.vgtest [new file with mode: 0644]
exp-bbv/tests/x86-linux/Makefile.am [new file with mode: 0644]
exp-bbv/tests/x86-linux/clone_test.S [new file with mode: 0644]
exp-bbv/tests/x86-linux/clone_test.post.exp [new file with mode: 0644]
exp-bbv/tests/x86-linux/clone_test.stderr.exp [new file with mode: 0644]
exp-bbv/tests/x86-linux/clone_test.vgtest [new file with mode: 0644]
exp-bbv/tests/x86-linux/filter_stderr [new file with mode: 0644]
exp-bbv/tests/x86-linux/ll.S [new file with mode: 0644]
exp-bbv/tests/x86-linux/ll.post.exp [new file with mode: 0644]
exp-bbv/tests/x86-linux/ll.stderr.exp [new file with mode: 0644]
exp-bbv/tests/x86-linux/ll.stdout.exp [new file with mode: 0644]
exp-bbv/tests/x86-linux/ll.vgtest [new file with mode: 0644]
exp-bbv/tests/x86/Makefile.am [new file with mode: 0644]
exp-bbv/tests/x86/complex_rep.S [new file with mode: 0644]
exp-bbv/tests/x86/complex_rep.stderr.exp [new file with mode: 0644]
exp-bbv/tests/x86/complex_rep.vgtest [new file with mode: 0644]
exp-bbv/tests/x86/filter_stderr [new file with mode: 0644]
exp-bbv/tests/x86/fldcw_check.S [new file with mode: 0644]
exp-bbv/tests/x86/fldcw_check.stderr.exp [new file with mode: 0644]
exp-bbv/tests/x86/fldcw_check.vgtest [new file with mode: 0644]
exp-bbv/tests/x86/million.S [new file with mode: 0644]
exp-bbv/tests/x86/million.post.exp [new file with mode: 0644]
exp-bbv/tests/x86/million.stderr.exp [new file with mode: 0644]
exp-bbv/tests/x86/million.vgtest [new file with mode: 0644]
exp-bbv/tests/x86/rep_prefix.S [new file with mode: 0644]
exp-bbv/tests/x86/rep_prefix.stderr.exp [new file with mode: 0644]
exp-bbv/tests/x86/rep_prefix.vgtest [new file with mode: 0644]

index 5c8f043da18bb0999460261672c2d6f4c68ae731..9e99f993fbd97c14ed66a0fa69a97bd8932af8a4 100644 (file)
@@ -12,7 +12,8 @@ TOOLS =               memcheck \
                helgrind \
                drd
 
-EXP_TOOLS =    exp-ptrcheck
+EXP_TOOLS =    exp-ptrcheck \
+               exp-bbv
 
 # DDD: once all tools work on Darwin, TEST_TOOLS and TEST_EXP_TOOLS can be
 # replaced with TOOLS and EXP_TOOLS.
@@ -27,7 +28,7 @@ else
                lackey \
                none
 
-  TEST_EXP_TOOLS =
+  TEST_EXP_TOOLS = exp-bbv
 endif
 
 # Put docs last because building the HTML is slow and we want to get
diff --git a/NEWS b/NEWS
index 21db12c5d88b180bcffaabb52085628d6c4a5594..993a17da8c998d0bf7cc91869b4e3a61a787b8a0 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -29,6 +29,8 @@ Release 3.5.0 (???)
 
 * XXX: something about improved Wine support?
 
+* XXX: exp-bbv has been added...
+
 * A new Memcheck client request VALGRIND_COUNT_LEAK_BLOCKS has been added.
   It is similar to VALGRIND_COUNT_LEAKS but counts blocks instead of bytes.
   [XXX: consider adding VALGRIND_COUNT_LEAK_BYTES as a synonym and
index a3d22013eb279163a0749be0ca2a8c13fdf8cae8..c4f33062db2763ecfa459ea15340528020392cd4 100644 (file)
@@ -1900,6 +1900,13 @@ AC_CONFIG_FILES([
    drd/docs/Makefile
    drd/scripts/download-and-build-splash2
    drd/tests/Makefile
+   exp-bbv/Makefile
+   exp-bbv/docs/Makefile
+   exp-bbv/tests/Makefile
+   exp-bbv/tests/x86/Makefile
+   exp-bbv/tests/x86-linux/Makefile
+   exp-bbv/tests/amd64-linux/Makefile
+   exp-bbv/tests/ppc32-linux/Makefile
 ])
 AC_OUTPUT
 
index 727570a62fe5f59d6175485a783b362563b1b1bd..53cee09add8a1454e8658c61fb4f223c81cdfe8f 100644 (file)
@@ -38,6 +38,8 @@
       xmlns:xi="http://www.w3.org/2001/XInclude" />
   <xi:include href="../../exp-ptrcheck/docs/pc-manual.xml" parse="xml"  
       xmlns:xi="http://www.w3.org/2001/XInclude" />
+  <xi:include href="../../exp-bbv/docs/bbv-manual.xml" parse="xml"  
+      xmlns:xi="http://www.w3.org/2001/XInclude" />      
   <xi:include href="../../none/docs/nl-manual.xml" parse="xml"  
       xmlns:xi="http://www.w3.org/2001/XInclude" />
   <xi:include href="../../lackey/docs/lk-manual.xml" parse="xml"  
index e45d72a78829f6a96f8992fdeb790899398991fb..7163eecdf7ed57972bf8c9451256e437bb27aa08 100644 (file)
@@ -259,6 +259,14 @@ leaks.</para>
 
 </refsect1>
 
+<refsect1 id="bbv-options">
+<title>BBV Options</title>
+
+<xi:include href="../../exp-bbv/docs/bbv-manual.xml" 
+            xpointer="bbv.opts.list"
+            xmlns:xi="http://www.w3.org/2001/XInclude" />
+
+</refsect1>
 
 
 <refsect1 id="lackey-options">
@@ -271,7 +279,6 @@ leaks.</para>
 </refsect1>
 
 
-
 <refsect1 id="see_also">
 <title>See Also</title>
 
diff --git a/exp-bbv/Makefile.am b/exp-bbv/Makefile.am
new file mode 100644 (file)
index 0000000..367d0d4
--- /dev/null
@@ -0,0 +1,37 @@
+include $(top_srcdir)/Makefile.tool.am
+
+#----------------------------------------------------------------------------
+# exp-bbv-<platform>
+#----------------------------------------------------------------------------
+
+noinst_PROGRAMS  = exp-bbv-@VGCONF_ARCH_PRI@-@VGCONF_OS@
+if VGCONF_HAVE_PLATFORM_SEC
+noinst_PROGRAMS += exp-bbv-@VGCONF_ARCH_SEC@-@VGCONF_OS@
+endif
+
+BBV_SOURCES_COMMON = bbv_main.c
+
+exp_bbv_@VGCONF_ARCH_PRI@_@VGCONF_OS@_SOURCES      = $(BBV_SOURCES_COMMON)
+exp_bbv_@VGCONF_ARCH_PRI@_@VGCONF_OS@_CPPFLAGS     = \
+       $(AM_CPPFLAGS_@VGCONF_PLATFORM_PRI_CAPS@)
+exp_bbv_@VGCONF_ARCH_PRI@_@VGCONF_OS@_CFLAGS       = \
+       $(AM_CFLAGS_@VGCONF_PLATFORM_PRI_CAPS@)
+exp_bbv_@VGCONF_ARCH_PRI@_@VGCONF_OS@_DEPENDENCIES = \
+       $(TOOL_DEPENDENCIES_@VGCONF_PLATFORM_PRI_CAPS@)
+exp_bbv_@VGCONF_ARCH_PRI@_@VGCONF_OS@_LDADD        = \
+       $(TOOL_LDADD_@VGCONF_PLATFORM_PRI_CAPS@)
+exp_bbv_@VGCONF_ARCH_PRI@_@VGCONF_OS@_LDFLAGS      = \
+       $(TOOL_LDFLAGS_@VGCONF_PLATFORM_PRI_CAPS@)
+if VGCONF_HAVE_PLATFORM_SEC
+exp_bbv_@VGCONF_ARCH_SEC@_@VGCONF_OS@_SOURCES      = $(BBV_SOURCES_COMMON)
+exp_bbv_@VGCONF_ARCH_SEC@_@VGCONF_OS@_CPPFLAGS     = \
+       $(AM_CPPFLAGS_@VGCONF_PLATFORM_SEC_CAPS@)
+exp_bbv_@VGCONF_ARCH_SEC@_@VGCONF_OS@_CFLAGS       = \
+       $(AM_CFLAGS_@VGCONF_PLATFORM_SEC_CAPS@)
+exp_bbv_@VGCONF_ARCH_SEC@_@VGCONF_OS@_DEPENDENCIES = \
+       $(TOOL_DEPENDENCIES_@VGCONF_PLATFORM_SEC_CAPS@)
+exp_bbv_@VGCONF_ARCH_SEC@_@VGCONF_OS@_LDADD        = \
+       $(TOOL_LDADD_@VGCONF_PLATFORM_SEC_CAPS@)
+exp_bbv_@VGCONF_ARCH_SEC@_@VGCONF_OS@_LDFLAGS      = \
+       $(TOOL_LDFLAGS_@VGCONF_PLATFORM_SEC_CAPS@)
+endif
diff --git a/exp-bbv/bbv_main.c b/exp-bbv/bbv_main.c
new file mode 100644 (file)
index 0000000..b5db191
--- /dev/null
@@ -0,0 +1,633 @@
+//--------------------------------------------------------------------*/
+//--- BBV: a SimPoint basic block vector generator      bbv_main.c ---*/
+//--------------------------------------------------------------------*/
+
+/*
+   This file is part of BBV, a Valgrind tool for generating SimPoint
+   basic block vectors.
+
+   Copyright (C) 2006-2009 Vince Weaver
+      vince _at_ csl.cornell.edu
+
+   pcfile code is Copyright (C) 2006-2009 Oriol Prat
+      oriol.prat _at _ bsc.es
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+
+#include "pub_tool_basics.h"
+#include "pub_tool_tooliface.h"
+#include "pub_tool_options.h"    /* command line options */
+
+#include "pub_tool_vki.h"        /* vki_stat */
+#include "pub_tool_libcbase.h"   /* VG_(strlen) */
+#include "pub_tool_libcfile.h"   /* VG_(write) */
+#include "pub_tool_libcprint.h"  /* VG_(printf) */
+#include "pub_tool_libcassert.h" /* VG_(exit) */
+#include "pub_tool_mallocfree.h" /* plain_free */
+#include "pub_tool_machine.h"    /* VG_(fnptr_to_fnentry) */
+#include "pub_tool_debuginfo.h"  /* VG_(get_fnname) */
+
+#include "pub_tool_oset.h"       /* ordered set stuff */
+
+   /* instruction special cases */
+#define REP_INSTRUCTION   0x1
+#define FLDCW_INSTRUCTION 0x2
+
+   /* interval variables */
+#define DEFAULT_GRAIN_SIZE 100000000  /* 100 million by default */
+static Int interval_size=DEFAULT_GRAIN_SIZE;
+
+   /* filenames */
+static UChar *clo_bb_out_file="bb.out.%p";
+static UChar *clo_pc_out_file="pc.out.%p";
+static UChar *pc_out_file=NULL;
+static UChar *bb_out_file=NULL;
+
+
+   /* output parameters */
+static Bool instr_count_only=False;
+static Bool generate_pc_file=False;
+
+   /* write buffer */
+static UChar buf[1024];
+
+   /* Global values */
+static OSet* instr_info_table;  /* table that holds the basic block info */
+static Int block_num=1;         /* global next block number */
+static Int current_thread=0;
+static Int allocated_threads=1;
+struct thread_info *bbv_thread=NULL;
+
+   /* Per-thread variables */
+struct thread_info {
+   ULong dyn_instr;         /* Current retired instruction count */
+   ULong total_instr;       /* Total retired instruction count   */
+   Addr last_rep_addr;      /* rep counting values */
+   ULong rep_count;
+   ULong global_rep_count;
+   ULong unique_rep_count;
+   ULong fldcw_count;       /* fldcw count */
+   Int bbtrace_fd;          /* file descriptor */
+};
+
+#define FUNCTION_NAME_LENGTH 20
+
+struct BB_info {
+   Addr       BB_addr;           /* used as key, must be first           */
+   Int        n_instrs;          /* instructions in the basic block      */
+   Int        block_num;         /* unique block identifier              */
+   Int        *inst_counter;     /* times entered * num_instructions     */
+   Bool       is_entry;          /* is this block a function entry point */
+   UChar      fn_name[FUNCTION_NAME_LENGTH];  /* Function block is in    */
+};
+
+
+   /* dump the optional PC file, which contains basic block number to */
+   /*   instruction address and function name mappings                */
+static void dumpPcFile(void)
+{
+   struct BB_info   *bb_elem;
+   Int              pctrace_fd;
+   SysRes           sres;
+
+   pc_out_file =
+          VG_(expand_file_name)("--pc-out-file", clo_pc_out_file);
+
+   sres = VG_(open)(pc_out_file, VKI_O_CREAT|VKI_O_TRUNC|VKI_O_WRONLY,
+                              VKI_S_IRUSR|VKI_S_IWUSR|VKI_S_IRGRP|VKI_S_IWGRP);
+   if (sr_isError(sres)) {
+      VG_UMSG("Error: cannot create pc file %s\n", pc_out_file);
+      VG_(exit)(1);
+   } else {
+      pctrace_fd = sr_Res(sres);
+   }
+
+      /* Loop through the table, printing the number, address, */
+      /*    and function name for each basic block             */
+   VG_(OSetGen_ResetIter)(instr_info_table);
+   while ( (bb_elem = VG_(OSetGen_Next)(instr_info_table)) ) {
+      VG_(write)(pctrace_fd,"F",1);
+      VG_(sprintf)( buf,":%d:%x:%s\n",
+                       bb_elem->block_num,
+                       (Int)bb_elem->BB_addr,
+                       bb_elem->fn_name);
+      VG_(write)(pctrace_fd, (void*)buf, VG_(strlen)(buf));
+   }
+
+   VG_(close)(pctrace_fd);
+}
+
+static Int open_tracefile(Int thread_num)
+{
+   SysRes  sres;
+   UChar temp_string[2048];
+
+      /* For thread 1, don't append any thread number  */
+      /* This lets the single-thread case not have any */
+      /* extra values appended to the file name.       */
+   if (thread_num==1) {
+      VG_(strncpy)(temp_string,bb_out_file,2047);
+   }
+   else {
+      VG_(sprintf)(temp_string,"%s.%d",bb_out_file,thread_num);
+   }
+
+   sres = VG_(open)(temp_string, VKI_O_CREAT|VKI_O_TRUNC|VKI_O_WRONLY,
+                              VKI_S_IRUSR|VKI_S_IWUSR|VKI_S_IRGRP|VKI_S_IWGRP);
+
+   if (sr_isError(sres)) {
+      VG_UMSG("Error: cannot create bb file %s\n",temp_string);
+      VG_(exit)(1);
+   }
+
+   return sr_Res(sres);
+}
+
+static void handle_overflow(void)
+{
+   struct BB_info *bb_elem;
+
+   if (bbv_thread[current_thread].dyn_instr > interval_size) {
+
+      if (!instr_count_only) {
+
+            /* If our output fd hasn't been opened, open it */
+         if (bbv_thread[current_thread].bbtrace_fd < 0) {
+            bbv_thread[current_thread].bbtrace_fd=open_tracefile(current_thread);
+         }
+
+           /* put an entry to the bb.out file */
+
+         VG_(write)(bbv_thread[current_thread].bbtrace_fd,"T",1);
+
+         VG_(OSetGen_ResetIter)(instr_info_table);
+         while ( (bb_elem = VG_(OSetGen_Next)(instr_info_table)) ) {
+            if ( bb_elem->inst_counter[current_thread] != 0 ) {
+               VG_(sprintf)( buf,":%d:%d   ",
+                         bb_elem->block_num,
+                         bb_elem->inst_counter[current_thread]);
+               VG_(write)(bbv_thread[current_thread].bbtrace_fd,
+                          (void*)buf, VG_(strlen)(buf));
+               bb_elem->inst_counter[current_thread] = 0;
+            }
+         }
+
+         VG_(write)(bbv_thread[current_thread].bbtrace_fd,"\n",1);
+      }
+
+      bbv_thread[current_thread].dyn_instr -= interval_size;
+   }
+}
+
+
+static void close_out_reps(void)
+{
+   bbv_thread[current_thread].global_rep_count+=bbv_thread[current_thread].rep_count;
+   bbv_thread[current_thread].unique_rep_count++;
+   bbv_thread[current_thread].rep_count=0;
+}
+
+   /* Generic function to get called each instruction */
+static VG_REGPARM(1) void per_instruction_BBV(struct BB_info *bbInfo)
+{
+   Int n_instrs=1;
+
+   tl_assert(bbInfo);
+
+      /* we finished rep but didn't clear out count */
+   if (bbv_thread[current_thread].rep_count) {
+      n_instrs++;
+      close_out_reps();
+   }
+
+   bbInfo->inst_counter[current_thread]+=n_instrs;
+
+   bbv_thread[current_thread].total_instr+=n_instrs;
+   bbv_thread[current_thread].dyn_instr +=n_instrs;
+
+   handle_overflow();
+}
+
+   /* Function to get called if instruction has a rep prefix */
+static VG_REGPARM(1) void per_instruction_BBV_rep(Addr addr)
+{
+      /* handle back-to-back rep instructions */
+   if (bbv_thread[current_thread].last_rep_addr!=addr) {
+      if (bbv_thread[current_thread].rep_count) {
+         close_out_reps();
+         bbv_thread[current_thread].total_instr++;
+         bbv_thread[current_thread].dyn_instr++;
+      }
+      bbv_thread[current_thread].last_rep_addr=addr;
+   }
+
+   bbv_thread[current_thread].rep_count++;
+
+}
+
+   /* Function to call if our instruction has a fldcw instruction */
+static VG_REGPARM(1) void per_instruction_BBV_fldcw(struct BB_info *bbInfo)
+{
+   Int n_instrs=1;
+
+   tl_assert(bbInfo);
+
+      /* we finished rep but didn't clear out count */
+   if (bbv_thread[current_thread].rep_count) {
+      n_instrs++;
+      close_out_reps();
+   }
+
+      /* count fldcw instructions */
+   bbv_thread[current_thread].fldcw_count++;
+
+   bbInfo->inst_counter[current_thread]+=n_instrs;
+
+   bbv_thread[current_thread].total_instr+=n_instrs;
+   bbv_thread[current_thread].dyn_instr +=n_instrs;
+
+   handle_overflow();
+}
+
+   /* Check if the instruction pointed to is one that needs */
+   /*   special handling.  If so, set a bit in the return   */
+   /*   value indicating what type.                         */
+static Int get_inst_type(Int len, Addr addr)
+{
+   int result=0;
+
+#if defined(VGA_x86) || defined(VGA_amd64)
+
+   unsigned char *inst_pointer;
+   unsigned char inst_byte;
+   int i,possible_rep;
+
+   /* rep prefixed instructions are counted as one instruction on */
+   /*     x86 processors and must be handled as a special case    */
+
+   /* Also, the rep prefix is re-used as part of the opcode for   */
+   /*     SSE instructions.  So we need to specifically check for */
+   /*     the following: movs, cmps, scas, lods, stos, ins, outs  */
+
+   inst_pointer=(unsigned char *)addr;
+   i=0;
+   inst_byte=0;
+   possible_rep=0;
+
+   while (i<len) {
+
+      inst_byte=*inst_pointer;
+
+      if ( (inst_byte == 0x67) ||            /* size override prefix */
+           (inst_byte == 0x66) ||            /* size override prefix */
+           (inst_byte == 0x48) ) {           /* 64-bit prefix */
+      } else if ( (inst_byte == 0xf2) ||     /* rep prefix    */
+                  (inst_byte == 0xf3) ) {    /* repne prefix  */
+         possible_rep=1;
+      } else {
+         break;                              /* other byte, exit */
+      }
+
+      i++;
+      inst_pointer++;
+   }
+
+   if ( possible_rep &&
+        ( ( (inst_byte >= 0xa4) &&     /* movs,cmps,scas */
+            (inst_byte <= 0xaf) ) ||   /* lods,stos      */
+          ( (inst_byte >= 0x6c) &&
+            (inst_byte <= 0x6f) ) ) ) {  /* ins,outs       */
+
+      result|=REP_INSTRUCTION;
+   }
+
+   /* fldcw instructions are double-counted by the hardware       */
+   /*     performance counters on pentium 4 processors so it is   */
+   /*     useful to have that count when doing validation work.   */
+
+   inst_pointer=(unsigned char *)addr;
+   if (len>1) {
+         /* FLDCW detection */
+         /* opcode is 0xd9/5, ie 1101 1001 oo10 1mmm */
+      if ((*inst_pointer==0xd9) &&
+          (*(inst_pointer+1)<0xb0) &&  /* need this case of fldz, etc, count */
+          ( (*(inst_pointer+1) & 0x38) == 0x28)) {
+         result|=FLDCW_INSTRUCTION;
+      }
+   }
+
+#endif
+   return result;
+}
+
+
+
+   /* Our instrumentation function       */
+   /*    sbIn = super block to translate */
+   /*    layout = guest layout           */
+   /*    gWordTy = size of guest word    */
+   /*    hWordTy = size of host word     */
+static IRSB* bbv_instrument ( VgCallbackClosure* closure,
+                             IRSB* sbIn, VexGuestLayout* layout,
+                             VexGuestExtents* vge,
+                             IRType gWordTy, IRType hWordTy )
+{
+   Int      i,n_instrs=1;
+   IRSB     *sbOut;
+   IRStmt   *st;
+   struct BB_info  *bbInfo;
+   Addr64   origAddr,ourAddr;
+   IRDirty  *di;
+   IRExpr   **argv, *arg1;
+   Int      regparms,opcode_type;
+
+      /* We don't handle a host/guest word size mismatch */
+   if (gWordTy != hWordTy) {
+      VG_(tool_panic)("host/guest word size mismatch");
+   }
+
+      /* Set up SB */
+   sbOut = deepCopyIRSBExceptStmts(sbIn);
+
+      /* Copy verbatim any IR preamble preceding the first IMark */
+   i = 0;
+   while ( (i < sbIn->stmts_used) && (sbIn->stmts[i]->tag!=Ist_IMark)) {
+      addStmtToIRSB( sbOut, sbIn->stmts[i] );
+      i++;
+   }
+
+      /* Get the first statement */
+   tl_assert(sbIn->stmts_used > 0);
+   st = sbIn->stmts[i];
+
+      /* double check we are at a Mark statement */
+   tl_assert(Ist_IMark == st->tag);
+
+   origAddr=st->Ist.IMark.addr;
+
+      /* Get the BB_info */
+   bbInfo = VG_(OSetGen_Lookup)(instr_info_table, &origAddr);
+
+   if (bbInfo==NULL) {
+
+         /* BB never translated before (at this address, at least;          */
+         /* could have been unloaded and then reloaded elsewhere in memory) */
+
+         /* allocate and initialize a new basic block structure */
+      bbInfo=VG_(OSetGen_AllocNode)(instr_info_table, sizeof(struct BB_info));
+      bbInfo->BB_addr = origAddr;
+      bbInfo->n_instrs = n_instrs;
+      bbInfo->inst_counter=VG_(calloc)("bbv_instrument",
+                                       allocated_threads,
+                                       sizeof(Int));
+
+         /* assign a unique block number */
+      bbInfo->block_num=block_num;
+      block_num++;
+         /* get function name and entry point information */
+      VG_(get_fnname)(origAddr,bbInfo->fn_name,FUNCTION_NAME_LENGTH);
+      bbInfo->is_entry=VG_(get_fnname_if_entry)(origAddr, bbInfo->fn_name,
+                                                FUNCTION_NAME_LENGTH);
+         /* insert structure into table */
+      VG_(OSetGen_Insert)( instr_info_table, bbInfo );
+   }
+
+      /* Iterate through the basic block, putting the original   */
+      /* instructions in place, plus putting a call to updateBBV */
+      /* for each original instruction                           */
+
+      /* This is less efficient than only instrumenting the BB   */
+      /* But it gives proper results given the fact that         */
+      /* valgrind uses superblocks (not basic blocks) by default */
+
+
+   while(i < sbIn->stmts_used) {
+      st=sbIn->stmts[i];
+
+      if (st->tag == Ist_IMark) {
+
+         ourAddr = st->Ist.IMark.addr;
+
+         opcode_type=get_inst_type(st->Ist.IMark.len,ourAddr);
+
+         regparms=1;
+         arg1= mkIRExpr_HWord( (HWord)bbInfo);
+         argv= mkIRExprVec_1(arg1);
+
+
+         if (opcode_type&REP_INSTRUCTION) {
+            arg1= mkIRExpr_HWord(ourAddr);
+            argv= mkIRExprVec_1(arg1);
+            di= unsafeIRDirty_0_N( regparms, "per_instruction_BBV_rep",
+                                VG_(fnptr_to_fnentry)( &per_instruction_BBV_rep ),
+                                argv);
+         }
+         else if (opcode_type&FLDCW_INSTRUCTION) {
+            di= unsafeIRDirty_0_N( regparms, "per_instruction_BBV_fldcw",
+                                VG_(fnptr_to_fnentry)( &per_instruction_BBV_fldcw ),
+                                argv);
+         }
+         else {
+         di= unsafeIRDirty_0_N( regparms, "per_instruction_BBV",
+                                VG_(fnptr_to_fnentry)( &per_instruction_BBV ),
+                                argv);
+         }
+
+
+            /* Insert our call */
+         addStmtToIRSB( sbOut,  IRStmt_Dirty(di));
+      }
+
+         /* Insert the original instruction */
+      addStmtToIRSB( sbOut, st );
+
+      i++;
+   }
+
+   return sbOut;
+}
+
+static struct thread_info *allocate_new_thread(struct thread_info *old,
+                                     Int old_number, Int new_number)
+{
+   struct thread_info *temp;
+   struct BB_info   *bb_elem;
+   Int i;
+
+   temp=VG_(realloc)("bbv_main.c allocate_threads",
+                     old,
+                     new_number*sizeof(struct thread_info));
+
+      /* init the new thread */
+      /* We loop in case the new thread is not contiguous */
+   for(i=old_number;i<new_number;i++) {
+      temp[i].last_rep_addr=0;
+      temp[i].dyn_instr=0;
+      temp[i].total_instr=0;
+      temp[i].global_rep_count=0;
+      temp[i].unique_rep_count=0;
+      temp[i].rep_count=0;
+      temp[i].fldcw_count=0;
+      temp[i].bbtrace_fd=-1;
+   }
+      /* expand the inst_counter on all allocated basic blocks */
+   VG_(OSetGen_ResetIter)(instr_info_table);
+   while ( (bb_elem = VG_(OSetGen_Next)(instr_info_table)) ) {
+      bb_elem->inst_counter =
+                    VG_(realloc)("bbv_main.c inst_counter",
+                                 bb_elem->inst_counter,
+                                 new_number*sizeof(Int));
+      for(i=old_number;i<new_number;i++) {
+         bb_elem->inst_counter[i]=0;
+      }
+   }
+
+   return temp;
+}
+
+static void bbv_thread_called ( ThreadId tid, ULong nDisp )
+{
+   if (tid >= allocated_threads) {
+      bbv_thread=allocate_new_thread(bbv_thread,allocated_threads,tid+1);
+      allocated_threads=tid+1;
+   }
+   current_thread=tid;
+}
+
+
+
+
+/*--------------------------------------------------------------------*/
+/*--- Setup                                                        ---*/
+/*--------------------------------------------------------------------*/
+
+static void bbv_post_clo_init(void)
+{
+   bb_out_file =
+          VG_(expand_file_name)("--bb-out-file", clo_bb_out_file);
+
+      /* Try a closer approximation of basic blocks  */
+      /* This is the same as the command line option */
+      /* --vex-guest-chase-thresh=0                  */
+   VG_(clo_vex_control).guest_chase_thresh = 0;
+}
+
+   /* Parse the command line options */
+static Bool bbv_process_cmd_line_option(Char* arg)
+{
+   if VG_INT_CLO       (arg, "--interval-size",    interval_size) {}
+   else if VG_STR_CLO  (arg, "--bb-out-file",      clo_bb_out_file) {}
+   else if VG_STR_CLO  (arg, "--pc-out-file",      clo_pc_out_file) {
+      generate_pc_file = True;
+   }
+   else if VG_XACT_CLO (arg, "--instr-count-only", instr_count_only, True) {}
+   else {
+      return False;
+   }
+
+   return True;
+}
+
+static void bbv_print_usage(void)
+{
+   VG_(printf) ("   --bb-out-file=<file>  filename for basic block vector info\n");
+   VG_(printf) ("   --pc-out-file=<file>  filename for basic block addresses and function names\n");
+   VG_(printf) ("   --interval-size=<num> interval size\n");
+   VG_(printf) ("   --instr-count-only    only print total instruction count\n");
+}
+
+static void bbv_print_debug_usage(void)
+{
+   VG_(printf)("    (none)\n");
+}
+
+static void bbv_fini(Int exitcode)
+{
+   Int i;
+
+   if (generate_pc_file) {
+      dumpPcFile();
+   }
+
+   for(i=0;i<allocated_threads;i++) {
+
+      if (bbv_thread[i].total_instr!=0) {
+
+         VG_(sprintf)(buf,"\n\n"
+                          "# Thread %d\n"
+                          "#   Total intervals: %d (Interval Size %d)\n"
+                          "#   Total instructions: %lld\n"
+                          "#   Total reps: %lld\n"
+                          "#   Unique reps: %lld\n"
+                          "#   Total fldcw instructions: %lld\n\n",
+                i,
+                (Int)(bbv_thread[i].total_instr/(ULong)interval_size),
+                interval_size,
+                bbv_thread[i].total_instr,
+                bbv_thread[i].global_rep_count,
+                bbv_thread[i].unique_rep_count,
+                bbv_thread[i].fldcw_count);
+
+            /* Print results to display */
+         VG_UMSG("%s", buf);
+
+            /* open the output file if it hasn't already */
+         if (bbv_thread[i].bbtrace_fd < 0) {
+            bbv_thread[i].bbtrace_fd=open_tracefile(i);
+         }
+            /* Also print to results file */
+         VG_(write)(bbv_thread[i].bbtrace_fd,(void*)buf,VG_(strlen)(buf));
+         VG_(close)(bbv_thread[i].bbtrace_fd);
+      }
+   }
+}
+
+static void bbv_pre_clo_init(void)
+{
+   VG_(details_name)            ("exp-bbv");
+   VG_(details_version)         (NULL);
+   VG_(details_description)     ("a SimPoint basic block vector generator");
+   VG_(details_copyright_author)(
+      "Copyright (C) 2006-2009 Vince Weaver");
+   VG_(details_bug_reports_to)  (VG_BUGS_TO);
+
+   VG_(basic_tool_funcs)          (bbv_post_clo_init,
+                                   bbv_instrument,
+                                   bbv_fini);
+
+   VG_(needs_command_line_options)(bbv_process_cmd_line_option,
+                                   bbv_print_usage,
+                                   bbv_print_debug_usage);
+
+   VG_(track_start_client_code)( bbv_thread_called );
+
+
+   instr_info_table = VG_(OSetGen_Create)(/*keyOff*/0,
+                                          NULL,
+                                          VG_(malloc), "bbv.1", VG_(free));
+
+   bbv_thread=allocate_new_thread(bbv_thread,0,allocated_threads);
+}
+
+VG_DETERMINE_INTERFACE_VERSION(bbv_pre_clo_init)
+
+/*--------------------------------------------------------------------*/
+/*--- end                                                          ---*/
+/*--------------------------------------------------------------------*/
diff --git a/exp-bbv/docs/Makefile.am b/exp-bbv/docs/Makefile.am
new file mode 100644 (file)
index 0000000..734dc54
--- /dev/null
@@ -0,0 +1,2 @@
+EXTRA_DIST = bbv-manual.xml
+
diff --git a/exp-bbv/docs/bbv-manual.xml b/exp-bbv/docs/bbv-manual.xml
new file mode 100644 (file)
index 0000000..a699a5f
--- /dev/null
@@ -0,0 +1,345 @@
+<?xml version="1.0"?> <!-- -*- sgml -*- -->
+<!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.2//EN"
+  "http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd">
+
+<chapter id="bbv-manual" xreflabel="BBV">
+  <title>BBV: a Basic Block Vector generation tool</title>
+
+<para>To use this tool, you must specify
+<computeroutput>--tool=exp-bbv</computeroutput> on the Valgrind
+command line.</para>
+
+<sect1 id="bbv-manual.background" xreflabel="BBV Background">
+<title>Basic Block Profiling and SimPoint</title>
+
+<para>
+   A Basic Blocks Vector (BBV) is a list of all basic blocks entered
+   during program execution, and a count of how many times each
+   block was run (a basic block is a section of code
+   with only one entry point and one exit point).
+</para>
+
+<para>
+   This tool was written to generate basic block vectors
+   for use with the SimPoint analysis tool 
+   (http://www.cse.ucsd.edu/~calder/simpoint/).
+   The SimPoint methodology enables speeding up architectural 
+   simulations by only running a small portion of a program
+   and then extrapolating total behavior from this
+   small portion.  Most programs exhibit phase-based behavior, which
+   means that at various times during execution a program will encounter 
+   intervals of time where the code behaves similarly to a previous
+   interval.  If you can detect these intervals and group them together, 
+   an approximation of the total program behavior can be obtained
+   by only simulating a bare minimum number of intervals, and then scaling 
+   the results.
+</para>
+
+<para>
+  In computer architecture research, running a 
+  benchmark on a cycle-accurate simulator can cause slowdowns on the order
+  of 1000 times, making it take days, weeks, or even longer to run full
+  benchmarks.  By utilizing SimPoint this can be reduced significantly
+  while still retaining reasonable accuracy, usually in the 5-10% range.
+</para>
+
+<para>
+   A more complete introduction to how SimPoint works can be 
+   found in the paper "Automatically Characterizing Large Scale 
+   Program Behavior" by T. Sherwood, E Perelman, G. Hamerly, and 
+   B. Calder.  
+</para>
+
+</sect1>
+
+<sect1 id="bbv-manual.quickstart" xreflabel="Quick Start">
+<title>Using Basic Block Vectors to create SimPoints</title>
+
+<para>
+   To quickly create a basic block vector file, you will call Valgrind
+   like this:
+   <computeroutput>valgrind --tool=exp-bbv /bin/ls</computeroutput>
+   In this case we are running on the "ls" program, but this
+   can be any executable.  By default a file called 
+   <computeroutput>bb.out.PID</computeroutput> will be created,
+   where PID is replaced by the process ID of the running process.
+   This file is the basic block vector.  For long-running programs
+   this file can be quite large, so it might be wise to compress
+   it with gzip or some other compression program.
+</para>   
+
+<para>
+   To create actual SimPoint results, you will need the
+   SimPoint utility, available from the SimPoint webpage
+   (http://www.cse.ucsd.edu/~calder/simpoint/).
+   Assuming you have downloaded SimPoint 3.2 and compiled it,
+   create SimPoint results with a command like the following:
+      
+   <computeroutput>./SimPoint.3.2/bin/simpoint -inputVectorsGzipped \
+           -loadFVFile bb.out.1234.gz \
+          -k 5 -saveSimpoints results.simpts \
+          -saveSimpointWeights results.weights
+   </computeroutput>
+   where bb.out.1234.gz is your compressed basic block vector file
+   generated by Valgrind exp-bbv.
+</para>
+
+<para>   
+   The SimPoint utility does random linear projection using 15-dimensions,
+   then does k-mean clustering to calculate which intervals are 
+   of interest.  In this example we specify 5 intervals with the 
+   -k 5 option.   
+</para>   
+   
+<para>   
+   The outputs from the SimPoint run are the 
+   <computeroutput>results.simpts</computeroutput>
+   and <computeroutput>results.weights</computeroutput> files.
+   The first holds the 5 most relevant intervals of the program.
+   The seconds holds the weight to scale each interval by when
+   extrapolating full-program behavior.  The intervals and the weights
+   can be used in conjunction with a simulator that supports
+   fast-forwarding; you fast-forward to the interval of interest,
+   collect stats for the desired interval length, then use
+   statistics gathered in conjunction with the weights to 
+   calculate your results.
+</para> 
+   
+</sect1>
+
+<sect1 id="bbv-manual.usage" xreflabel="BBV Usage">
+<title>BBV Command Line Options</title>
+
+<para>
+   BBV has various options that control the behavior of the plugin:
+<!-- start of xi:include in the manpage -->
+<variablelist id="bbv.opts.list">
+
+  <varlistentry id="opt.interval-size" xreflabel="--interval-size">
+      <term>
+        <option><![CDATA[--interval-size=<number> [default: 100000000] ]]></option>
+      </term>
+      <listitem>
+      <para>
+         This option selects the size of the interval to use.  
+         The default is 100 
+         million instructions, which is a commonly used value.  
+         Other sizes can be used; smaller intervals can help programs
+         with finer-grained phases.  However smaller interval size
+         can lead to accuracy issues due to warm-up effects 
+         (When fast-forwarding the various architectural features
+         will be un-initialized, and it will take some number
+         of instructions before they "warm up" to the state a 
+         full simulation would be at without the fast-forwarding.
+         Large interval sizes tend to mitigate this.)
+      </para>
+      </listitem>
+  </varlistentry>
+
+  <varlistentry id="opt.instr-count-only" xreflabel="--instr-count-only">
+     <term>
+        <option><![CDATA[--instr-count-only [default: no] ]]></option>
+     </term>
+     <listitem>
+        <para>
+           This option tells the tool to only display instruction 
+           count totals, and to not generate the
+           actual BBV file.  This is useful for debugging, and for
+           gathering instruction count info without generating
+           the large BBV files.
+        </para>
+     </listitem>
+   </varlistentry>
+  
+  <varlistentry id="opt.bb-out-file" xreflabel="--bb-out-file">
+     <term>
+        <option><![CDATA[--bb-out-file=<name> [default: bb.out.%p] ]]></option>
+     </term>
+     <listitem>
+        <para>
+           This option selects the name of the basic block file.  Default is 
+           bb.out.%p.   The
+           <option>%p</option> and <option>%q</option> format specifiers can be
+           used to embed the process ID and/or the contents of an environment
+           variable in the name, as is the case for the core option
+           <option>--log-file</option>.
+        </para>
+     </listitem>
+  </varlistentry>
+
+  <varlistentry id="opt.pc-out-file" xreflabel="--pc-out-file">
+     <term>
+        <option><![CDATA[--pc-out-file=<name> [default: pc.out.%p] ]]></option>
+     </term>
+     <listitem>
+        <para>
+           This option selects the name of the PC file.  
+           This file holds program counter addresses
+           and function name info for the various basic blocks.
+           This can be used in conjunction
+           with the bbv file to fast-forward via function names
+           instead of just instruction counts.
+          The default filename is pc.out.%p.
+           <option>%p</option> and <option>%q</option> format specifiers can be
+           used to embed the process ID and/or the contents of an environment
+           variable in the name, as is the case for the core option
+           <option>--log-file</option>.
+
+        </para>
+     </listitem>
+   </varlistentry>
+</variablelist>
+<!-- end of xi:include in the manpage -->
+
+</para>
+
+</sect1>
+
+<sect1 id="bbv-manual.fileformat" xreflabel="BBV File Format">
+<title>Basic Block Vector File Format</title>
+
+<para>  
+  The Basic Block Vector is dumped at fixed intervals.  This
+  is commonly done every 100 million instructions; the 
+  <computeroutput>--interval-size</computeroutput> option can be 
+  used to change this.
+</para>
+
+<para>
+  The output file looks like this:
+</para>
+
+<programlisting><![CDATA[
+T:45:1024 :189:99343
+T:11:78573 :15:1353  :56:1
+T:18:45 :12:135353 :56:78 314:4324263]]></programlisting>
+
+<para>
+  Each new interval starts with a T.   This is followed by a colon,
+  then by a unique number identifying the basic block.  This is followed
+  by another colon, then followed by the frequency (which is scaled
+  by the number of instructions in the basic block).
+</para>
+
+<para>
+  The entry count is multiplied by the number of instructions that are 
+  in the basic block, in order to weigh the count so that instructions in 
+  small Basic Blocks aren't counted as more important than instructions 
+  in large Basic Blocks.
+</para>
+
+</sect1>
+
+<sect1 id="bbv-manual.implementation" xreflabel="Implementation">
+<title>Implementation</title>
+
+<para>
+   Valgrind provides all of the information necessary to create
+   BBV files.  In the current implementation, all instructions
+   are instrumented.  This is slower (by approximately a factor
+   of two) than a method that instruments at the basic-block level, 
+   but there are some complications (especially with rep prefix
+   detection) that make that method more difficult.
+</para>
+  
+<para>
+   Valgrind actually provides instrumentation at a super-block level.
+   A super-block has one entry point but unlike basic-blocks can
+   have multiple exit points.  Once a branch occurs into the middle
+   of a block, it is split into a new basic-block.  Because
+   Valgrind cannot produce "true" basic blocks, the generated
+   BBV vectors will be different than those generated by other tools.
+   In practice this does not seem to affect the accuracy of the
+   SimPoint results.  We do internally force the
+   <computeroutput>--vex-guest-chase-thresh=0</computeroutput>
+   option to Valgrind which forces a more basic-block like
+   behavior.
+</para>
+
+<para>
+   When a super block is run for the first time, it is instrumented
+   with our BBV routine.  This adds a call to our instruction
+   counting function for each original instruction.
+   The current superblock is looked up in an Ordered Set to find 
+   a structure that holds block-specific statistics (the entry point 
+   address is the index into the hash table).  We increment the 
+   instruction count for this superblock and
+   also update the master instruction count.
+   If the master count overflows the interval size 
+   then we print out the basic block statistics for the current interval
+   to disk, and then reset all the superblock counters to zero.
+</para>
+
+<para>
+   On the x86 and amd64 architectures the code takes special
+   care with rep-prefixed string instructions.  This is because 
+   actual hardware counts a rep-prefixed instruction 
+   as one instruction, while a naive Valgrind implementation
+   would count it as many (possibly hundreds, thousands or even millions)
+   of instructions.  We have special code to handle
+   this properly, which makes the results match hardware performance
+   counter results.
+</para>   
+   
+<para>
+   The exp-bbv tool also counts the fldcw instruction.  This
+   instruction is used on x86 machines when converting numbers
+   from floating point to integer (among other uses).
+   On Pentium 4 systems the retired instruction performance
+   counter counts this instruction as two
+   instructions (all other known processors only count it as one).
+   This can affect results when using SimPoint on Pentium 4 systems,
+   so we provide the count for use in mitigating this at analysis time.
+</para>
+
+</sect1>
+
+<sect1 id="bbv-manual.threadsupport" xreflabel="BBV Threaded Support">
+<title>Threaded Executable Support</title>
+
+<para>
+   BBV supports threaded programs.  When a program has multiple threads,
+   an additional BBV file is created for each thread (each additional
+   file is the specified filename with the thread number
+   appended at the end).
+</para>
+
+<para>
+   There is no official method of using SimPoint with
+   threaded workloads.  The most common method is to run
+   SimPoint on each thread's results independently, and use 
+   some method of deterministic execution to try to match the
+   original workload.  This should be possible with current
+   exp-bbv.
+</para>
+
+</sect1>
+
+<sect1 id="bbv-manual.validation" xreflabel="BBV Validation">
+<title>Validation</title>
+
+<para>
+   This plugin has been tested on x86, amd64, and ppc32 platforms.
+   An earlier version of the plugin was tested in detail using
+   hardware performance counters, this work is described in a paper 
+   from the HiPEAC'08 conference, "Using Dynamic Binary Instrumentation 
+   to Generate Multi-Platform SimPoints: Methodology and Accuracy" by
+   V.M. Weaver and S.A. McKee.
+</para>
+</sect1>
+<sect1 id="bbv-manual.performance" xreflabel="BBV Performance">
+<title>Performance</title>
+
+<para>
+  Using this program slows down execution by roughly a factor of 40
+  over native execution.  This varies depending on the machine
+  used and the benchmark being run.
+  On the SPEC CPU 2000 benchmarks running on a 3.4GHz Pentium D 
+  processor, the slowdown ranges from 24x (mcf) to 340x (vortex.2).
+</para>
+
+</sect1>
+
+</chapter>
diff --git a/exp-bbv/tests/Makefile.am b/exp-bbv/tests/Makefile.am
new file mode 100644 (file)
index 0000000..efd5914
--- /dev/null
@@ -0,0 +1,29 @@
+
+include $(top_srcdir)/Makefile.tool-tests.am
+
+SUBDIRS = .
+
+# Platform-specific tests
+if VGCONF_ARCHS_INCLUDE_X86
+SUBDIRS += x86
+endif
+if VGCONF_PLATFORMS_INCLUDE_X86_LINUX
+SUBDIRS += x86-linux
+endif
+if VGCONF_PLATFORMS_INCLUDE_AMD64_LINUX
+SUBDIRS += amd64-linux
+endif
+if VGCONF_PLATFORMS_INCLUDE_PPC32_LINUX
+SUBDIRS += ppc32-linux
+endif
+
+DIST_SUBDIRS = x86 x86-linux amd64-linux ppc32-linux .
+
+EXTRA_DIST = \
+          logo.include logo.lzss_new
+       
+check_PROGRAMS = 
+       
+AM_CFLAGS   += $(AM_FLAG_M3264_PRI)
+AM_CXXFLAGS += $(AM_FLAG_M3264_PRI)
+
diff --git a/exp-bbv/tests/amd64-linux/Makefile.am b/exp-bbv/tests/amd64-linux/Makefile.am
new file mode 100644 (file)
index 0000000..65ef300
--- /dev/null
@@ -0,0 +1,36 @@
+include $(top_srcdir)/Makefile.tool-tests.am
+
+dist_noinst_SCRIPTS = filter_stderr
+
+check_PROGRAMS = \
+       million rep_prefix ll fldcw_check complex_rep clone_test
+
+EXTRA_DIST = \
+          clone_test.stderr.exp \
+          clone_test.post.exp \
+          clone_test.vgtest \
+          complex_rep.stderr.exp \
+          complex_rep.vgtest \
+          fldcw_check.stderr.exp \
+          fldcw_check.vgtest \
+          ll.stderr.exp \
+          ll.stdout.exp \
+          ll.post.exp \
+          ll.vgtest \
+          million.stderr.exp \
+          million.post.exp \
+          million.vgtest \
+          rep_prefix.stderr.exp \
+          rep_prefix.vgtest 
+
+AM_CCASFLAGS += -ffreestanding
+
+LDFLAGS += -nostartfiles -nodefaultlibs
+
+clone_test = clone_test.S
+complex_rep_SOURCES = complex_rep.S
+fldcw_check_SOURCES = fldcw_check.S
+ll_SOURCES = ll.S
+million_SOURCES = million.S
+rep_prefix_SOURCES = rep_prefix.S
+
diff --git a/exp-bbv/tests/amd64-linux/clone_test.S b/exp-bbv/tests/amd64-linux/clone_test.S
new file mode 100644 (file)
index 0000000..10a2df3
--- /dev/null
@@ -0,0 +1,95 @@
+            # count for ~1 million instructions thread 1
+            # count for ~2 million instructions thread 2
+            # count for additional 500 million each before exit
+            
+       .globl _start   
+_start:        
+
+       #################################################
+        # 1000 cycles in initial thread                 #
+       #################################################
+       
+       xor     %rax,%rax
+       mov     $499,%rcx               # load counter
+initial_loop:  
+       dec     %rcx                    # repeat count times
+       jnz     initial_loop
+
+
+       #####################################################
+       # Spawn a thread!                                   #
+       #####################################################
+clone:
+       mov    $56,%rax                 # clone syscall
+       
+       # Note, clone syscall is different than the glibc implementation
+       
+#      int clone (flags, stack_pointer,parent_tidptr,child_tidptr,tls)
+
+
+                                       # Flags in 
+                                       #/usr/include/bits/sched.h
+                                       # CLONE_THREAD 0x10000
+                                       # CLONE_SIGHAND 0x800
+                                       # CLONE_VM      0x100
+                                       # above must be called together
+                                       # Below required for Valgrind
+                                       # CLONE_FS       0x200
+                                       # CLONE_FILES    0x400
+
+       mov    $0x10f00,%rdi            
+       
+
+       mov    $(new_stack+4096),%rsi                   # new stack
+
+       
+
+       mov    $0,%rdx          # args (none)
+
+       syscall
+       
+       cmp   $0,%rax           # are we in new thread?
+       jz    thread2           # if so, jump to thrad2
+
+
+       ###############################################
+       # thread1                                     #
+       ###############################################
+
+thread1:
+
+       mov     $499997,%rcx            # load counter
+thread1_loop:  
+       dec     %rcx                    # repeat count times
+       jnz     thread1_loop
+
+       xor     %rdi,%rdi               # we return 0
+       jmp    exit
+       
+thread2:       
+       mov     $999997,%rcx            # load counter
+thread2_loop:  
+       dec     %rcx                    # repeat count times
+       jnz     thread2_loop    
+       
+       mov    $5,%rdi                  # we return 5
+       
+       
+       #================================
+       # Exit
+       #================================
+exit:
+
+       # count an additional 500 million
+
+       mov     $250000,%rcx            # load counter
+exit_loop:     
+       dec     %rcx                    # repeat count times
+       jnz     exit_loop       
+
+actual_exit:
+       mov     $60,%rax                # put exit syscall number (60) in rax
+       syscall
+
+.bss
+.lcomm new_stack,4096
diff --git a/exp-bbv/tests/amd64-linux/clone_test.post.exp b/exp-bbv/tests/amd64-linux/clone_test.post.exp
new file mode 100644 (file)
index 0000000..55bcf61
--- /dev/null
@@ -0,0 +1,58 @@
+T 4    996    5    2    3    98991   
+T 100000   
+T 100000   
+T 100000   
+T 100000   
+T 100000   
+T 100000   
+T 100000   
+T 100000   
+T 100000   
+T 1001    2    3    98994   
+T 100000   
+T 100000   
+T 100000   
+T 100000   
+
+
+# Thread 1
+#   Total intervals: 15 (Interval Size 100000)
+#   Total instructions: 1501007
+#   Total reps: 0
+#   Unique reps: 0
+#   Total fldcw instructions: 0
+
+T 2    3    99996   
+T 100000   
+T 100000   
+T 100000   
+T 100000   
+T 100000   
+T 100000   
+T 100000   
+T 100000   
+T 100000   
+T 100000   
+T 100000   
+T 100000   
+T 100000   
+T 100000   
+T 100000   
+T 100000   
+T 100000   
+T 100000   
+T 99996    4   
+T 100000   
+T 100000   
+T 100000   
+T 100000   
+T 99998    2   
+
+
+# Thread 2
+#   Total intervals: 25 (Interval Size 100000)
+#   Total instructions: 2500001
+#   Total reps: 0
+#   Unique reps: 0
+#   Total fldcw instructions: 0
+
diff --git a/exp-bbv/tests/amd64-linux/clone_test.stderr.exp b/exp-bbv/tests/amd64-linux/clone_test.stderr.exp
new file mode 100644 (file)
index 0000000..6a917a2
--- /dev/null
@@ -0,0 +1,12 @@
+# Thread 1
+#   Total intervals: 15 (Interval Size 100000)
+#   Total instructions: 1501007
+#   Total reps: 0
+#   Unique reps: 0
+#   Total fldcw instructions: 0
+# Thread 2
+#   Total intervals: 25 (Interval Size 100000)
+#   Total instructions: 2500001
+#   Total reps: 0
+#   Unique reps: 0
+#   Total fldcw instructions: 0
diff --git a/exp-bbv/tests/amd64-linux/clone_test.vgtest b/exp-bbv/tests/amd64-linux/clone_test.vgtest
new file mode 100644 (file)
index 0000000..9f5cd4d
--- /dev/null
@@ -0,0 +1,5 @@
+prog: clone_test
+vgopts: --interval-size=100000 --bb-out-file=clone_test.out.bb --pc-out-file=clone_test.out.pc
+post:  cat clone_test.out.bb clone_test.out.bb.2 | ../filter_bb
+cleanup: rm clone_test.out.bb
+
diff --git a/exp-bbv/tests/amd64-linux/complex_rep.S b/exp-bbv/tests/amd64-linux/complex_rep.S
new file mode 100644 (file)
index 0000000..80b8c8c
--- /dev/null
@@ -0,0 +1,58 @@
+# When trying (and failing) to instrument at the basic block level
+# I thought up a lot of corner-cases in the rep code.  This tries
+# to catch some of them
+
+# Performance counters give us 8207 insns
+#    11 + 8*1024 + 3 = 8206
+
+       .globl _start   
+_start:        
+       cld                             # we want these to happen forward
+
+       mov    $0xfeb131978,%rax        # value to store
+
+       # test back-to-back rep/stosb's
+
+       mov     $1024,%rcx
+       mov     $buffer1, %rdi          # set destination
+       rep     stosb                   # store 1024 times
+       rep     stosb                   # should store 0 times  
+       rep     stosb                   # should store 0 times
+
+       
+       # test stosb where cx is 0
+       
+       xor    %rcx,%rcx
+       mov    $buffer1, %rdi           # set destination
+       rep    stosb                    # should not load at all
+       
+       # test rep inside of a loop
+       
+       mov    $1024, %rbx
+rep_loop:      
+
+       mov    $1024,%rcx
+       mov    $buffer1, %rdi           # set destination
+       rep    stosb
+       
+       mov    $1024,%rcx
+       mov    $buffer1, %rdi           # set destination
+       rep    stosb
+
+       dec    %rbx
+       jnz    rep_loop
+       
+       
+       #================================
+       # Exit
+       #================================
+exit:
+       mov     $60,%rax
+       xor     %rdi,%rdi               # we return 0
+       syscall                         # and exit
+
+
+.bss
+
+.lcomm buffer1,        16384
+
diff --git a/exp-bbv/tests/amd64-linux/complex_rep.stderr.exp b/exp-bbv/tests/amd64-linux/complex_rep.stderr.exp
new file mode 100644 (file)
index 0000000..ceabe14
--- /dev/null
@@ -0,0 +1,6 @@
+# Thread 1
+#   Total intervals: 0 (Interval Size 100000)
+#   Total instructions: 8206
+#   Total reps: 2100228
+#   Unique reps: 2052
+#   Total fldcw instructions: 0
diff --git a/exp-bbv/tests/amd64-linux/complex_rep.vgtest b/exp-bbv/tests/amd64-linux/complex_rep.vgtest
new file mode 100644 (file)
index 0000000..ef5ac30
--- /dev/null
@@ -0,0 +1,4 @@
+prog: complex_rep
+vgopts: --interval-size=100000 --bb-out-file=complex_rep.out.bb
+cleanup: rm complex_rep.out.bb
+
diff --git a/exp-bbv/tests/amd64-linux/filter_stderr b/exp-bbv/tests/amd64-linux/filter_stderr
new file mode 100644 (file)
index 0000000..616ce05
--- /dev/null
@@ -0,0 +1,4 @@
+#! /bin/sh
+
+../filter_stderr
+
diff --git a/exp-bbv/tests/amd64-linux/fldcw_check.S b/exp-bbv/tests/amd64-linux/fldcw_check.S
new file mode 100644 (file)
index 0000000..cfca2d0
--- /dev/null
@@ -0,0 +1,129 @@
+
+.globl _start
+
+_start:
+        # This code tests for the fldcw "load floating point command word"
+       #   instruction.  On most x86 processors the retired_instruction
+       #   performance counter counts this as one instruction.  However,
+       #   on Pentium 4 systems it counts as two.  Therefore this can
+       #   affect BBV results on such a system.
+       # fldcw is most often used to set the rouding mode when doing
+       #   floating point to integer conversions
+       
+       # It is encoded as "d9 /5" which means
+       #   1101 1001 xx10 1yyy
+       # Where xx is the "mod" which will be 00, 01, or 10 indicating offset
+       #   and yyy is the register field
+
+        # these are instructions with similar encodings to fldcw
+       # that can cause false positives if the test isn't explicit enough
+similar:       
+        fld1                                   # d9 e8
+       fldl2t                          # d9 e9
+       fldl2e                          # d9 ea
+       fldpi                           # d9 eb
+       fldlg2                          # d9 ec
+       fldln2                          # d9 ed
+       fldz                            # d9 ee
+
+       # check some varied ways of calling fldcw
+
+       # offset on stack
+stack: 
+       sub     $8,%rsp                 # allocate space on stack
+       fnstcw  2(%rsp)         
+       fldcw   2(%rsp)         
+       add     $8,%rsp                 # restore stack
+
+       # 64-bit register
+sixtyfour_reg: 
+       fnstcw  cw
+       mov     $cw,%rax
+       fldcw   0(%rax)                 # rax
+       mov     $cw,%rbx
+       fldcw   0(%rbx)                 # rbx
+       mov     $cw,%rcx        
+       fldcw   0(%rcx)                 # rcx
+       mov     $cw,%rdx                 
+       fldcw   0(%rdx)                 # rdx
+
+       # 32-bit register
+thirtytwo_reg: 
+       fnstcw  cw
+       mov     $cw,%eax
+       fldcw   0(%eax)                 # eax
+       mov     $cw,%ebx
+       fldcw   0(%ebx)                 # ebx
+       mov     $cw,%ecx        
+       fldcw   0(%ecx)                 # ecx
+       mov     $cw,%edx                 
+       fldcw   0(%edx)                 # edx
+       
+       # register + 8-bit offset
+eight_bit:     
+       mov     $cw,%eax
+       sub     $32,%eax
+       
+       fldcw   32(%eax)                # eax + 8 bit offset
+       mov     %eax,%ebx
+       fldcw   32(%ebx)                # ebx + 8 bit offset    
+       mov     %eax,%ecx
+       fldcw   32(%ecx)                # ecx + 8 bit offset            
+       mov     %eax,%edx
+       fldcw   32(%edx)                # edx + 8 bit offset
+       
+       # register + 32-bit offset
+thirtytwo_bit: 
+       mov     $cw,%eax
+       sub     $30000,%eax
+       
+       fldcw   30000(%eax)             # eax + 16 bit offset
+       mov     %eax,%ebx
+       fldcw   30000(%ebx)             # ebx + 16 bit offset   
+       mov     %eax,%ecx
+       fldcw   30000(%ecx)             # ecx + 16 bit offset           
+       mov     %eax,%edx
+       fldcw   30000(%edx)             # edx + 16 bit offset                   
+
+       # check an fp/integer conversion
+       # in a loop to give a bigger count
+
+       mov     $1024,%rcx
+big_loop:
+
+       fldl    three                   # load value onto fp stack
+       fnstcw  saved_cw                # store control word to mem
+       movzwl  saved_cw, %eax          # load cw from mem, zero extending
+       movb    $12, %ah                # set cw for "round to zero"
+       movw    %rax, cw                # store back to memory
+       fldcw   cw                      # save new rounding mode
+       fistpl  result                  # save stack value as integer to mem
+       fldcw   saved_cw                # restore old cw
+       
+       loop    big_loop                # loop to make the count more obvious
+
+       movl    result, %ebx            # sanity check to see if the
+       cmp     $3,%rbx                 # result is the expected one
+       je      exit
+       
+print_error:
+       mov     $1,%rax                 # write syscall
+       mov     $1,%rdi                 # stdout
+       mov     $error,%rsi             # string        
+       mov     $22,%rdx                # length of string
+       syscall
+       
+exit:  
+       xor     %rdi, %rdi              # return 0
+       mov     $60, %rax               # SYSCALL_EXIT
+       syscall
+       
+
+
+.data
+saved_cw:      .long 0
+cw:    .long   0
+result: .long  0
+three: .long   0                       # a floating point 3.0
+       .long   1074266112
+error: .asciz  "Error!  Wrong result!\n"
diff --git a/exp-bbv/tests/amd64-linux/fldcw_check.stderr.exp b/exp-bbv/tests/amd64-linux/fldcw_check.stderr.exp
new file mode 100644 (file)
index 0000000..9e7d33d
--- /dev/null
@@ -0,0 +1,6 @@
+# Thread 1
+#   Total intervals: 0 (Interval Size 10000)
+#   Total instructions: 9270
+#   Total reps: 0
+#   Unique reps: 0
+#   Total fldcw instructions: 2053
diff --git a/exp-bbv/tests/amd64-linux/fldcw_check.vgtest b/exp-bbv/tests/amd64-linux/fldcw_check.vgtest
new file mode 100644 (file)
index 0000000..f9bbae9
--- /dev/null
@@ -0,0 +1,4 @@
+prog: fldcw_check
+vgopts: --interval-size=10000 --bb-out-file=fldcw_check.out.bb
+cleanup: rm fldcw_check.out.bb
+
diff --git a/exp-bbv/tests/amd64-linux/ll.S b/exp-bbv/tests/amd64-linux/ll.S
new file mode 100644 (file)
index 0000000..95c5d3b
--- /dev/null
@@ -0,0 +1,631 @@
+#
+#  linux_logo in x86_64 assembly language
+#    based on the code from ll_asm-0.36
+#
+#  By Vince Weaver <vince _at_ deater.net>
+#
+# Modified to remove non-deterministic system calls
+# And to avoid reading from /proc
+#
+
+               
+.include "../logo.include"
+
+# offsets into the results returned by the uname syscall
+.equ U_SYSNAME,0
+.equ U_NODENAME,65
+.equ U_RELEASE,65*2
+.equ U_VERSION,(65*3)
+.equ U_MACHINE,(65*4)
+.equ U_DOMAINNAME,65*5
+
+# offset into the results returned by the sysinfo syscall
+.equ S_TOTALRAM,32
+
+# Sycscalls
+.equ SYSCALL_EXIT,    60
+.equ SYSCALL_READ,     0
+.equ SYSCALL_WRITE,    1
+.equ SYSCALL_OPEN,     2
+.equ SYSCALL_CLOSE,    3
+.equ SYSCALL_SYSINFO, 99
+.equ SYSCALL_UNAME,   63
+
+#
+.equ STDIN,0
+.equ STDOUT,1
+.equ STDERR,2
+
+       .globl _start   
+_start:        
+       #=========================
+       # PRINT LOGO
+       #=========================
+
+# LZSS decompression algorithm implementation
+# by Stephan Walter 2002, based on LZSS.C by Haruhiko Okumura 1989
+# optimized some more by Vince Weaver
+
+       # we used to fill the buffer with FREQUENT_CHAR
+       # but, that only gains us one byte of space in the lzss image.
+       # the lzss algorithm does automatic RLE... pretty clever
+       # so we compress with NUL as FREQUENT_CHAR and it is pre-done for us
+
+       mov     $(N-F), %ebp            # R
+
+       mov     $logo, %esi             # %esi points to logo (for lodsb)
+
+       mov     $out_buffer, %edi       # point to out_buffer
+       push    %rdi                    # save this value for later
+
+       xor     %ecx, %ecx
+
+decompression_loop:    
+       lodsb                   # load in a byte
+
+       mov     $0xff, %bh      # re-load top as a hackish 8-bit counter
+       mov     %al, %bl        # move in the flags
+
+test_flags:
+       cmp     $logo_end, %esi # have we reached the end?
+       je      done_logo       # ! if so, exit
+
+       shr     $1, %ebx        # shift bottom bit into carry flag
+       jc      discrete_char   # ! if set, we jump to discrete char
+
+offset_length:
+       lodsw                   # get match_length and match_position
+       mov %eax,%edx           # copy to edx
+                               # no need to mask dx, as we do it
+                               # by default in output_loop
+       
+       shr $(P_BITS),%eax      
+       add $(THRESHOLD+1),%al
+       mov %al,%cl             # cl = (ax >> P_BITS) + THRESHOLD + 1
+                                 #                       (=match_length)
+               
+output_loop:
+       and     $POSITION_MASK,%dh      # mask it
+       mov     text_buf(%rdx), %al     # load byte from text_buf[]
+       inc     %edx                    # advance pointer in text_buf
+store_byte:    
+       stosb                           # store it
+       
+       mov     %al, text_buf(%rbp)     # store also to text_buf[r]
+       inc     %ebp                    # r++
+       and     $(N-1), %bp             # mask r
+
+       loop    output_loop             # repeat until k>j
+       
+       or      %bh,%bh                 # ! if 0 we shifted through 8 and must
+       jnz     test_flags              # re-load flags
+       
+       jmp     decompression_loop
+
+discrete_char:
+       lodsb                           # load a byte
+       inc     %ecx                    # we set ecx to one so byte
+                                       # will be output once
+                                       # (how do we know ecx is zero?)
+                                       
+       jmp     store_byte              # and cleverly store it
+
+
+# end of LZSS code
+
+done_logo:
+
+       pop     %rbp                    # get out_buffer and keep in bp
+       mov     %ebp,%ecx               # move out_buffer to ecx
+
+       call    write_stdout            # print the logo
+
+       #
+       #  Setup
+       #
+setup:
+       mov     $strcat,%edx            # use rdx as call pointer (smaller op)
+
+       
+       #==========================
+       # PRINT VERSION
+       #==========================
+       
+#      push    $SYSCALL_UNAME          # uname syscall
+#      pop     %rax                    # in 3 bytes    
+       mov     $uname_info,%edi        # uname struct (0 extend address)
+#      syscall                         # do syscall
+
+       mov     %ebp,%edi               # point %edi to out_buffer
+               
+       mov     $(uname_info+U_SYSNAME),%esi    # os-name from uname "Linux"
+       call    *%rdx                   # call strcat
+
+       mov     $ver_string,%esi                # source is " Version "
+       call    *%rdx                           # call strcat
+       push    %rsi                            # save our .txt pointer
+       
+       mov     $(uname_info+U_RELEASE),%esi    # version from uname "2.4.1"
+       call    *%rdx                           # call strcat
+       
+       pop     %rsi                    # restore .txt pointer
+                                       # source is ", Compiled "
+       call    *%rdx                   # call strcat
+       push    %rsi                    # store for later
+
+       mov     $(uname_info+U_VERSION),%esi    # compiled date
+       call    *%rdx                   # call strcat
+
+       mov     %ebp,%ecx               # move out_buffer to ecx
+
+       mov     $0xa,%ax                # store linefeed on end
+       stosw                           # and zero                        
+
+       call    *%rdx                   # call strcat
+       
+       call    center_and_print        # center and print
+
+       #===============================
+       # Middle-Line
+       #===============================
+middle_line:           
+       #=========
+       # Load /proc/cpuinfo into buffer
+       #=========
+
+       push    %rdx                    # save call pointer
+
+#      push    $SYSCALL_OPEN           # load 5 [ open() ]
+#      pop     %rax                    # in 3 bytes
+       
+#      mov     $cpuinfo,%edi           # '/proc/cpuinfo'
+#      xor     %esi,%esi               # 0 = O_RDONLY <bits/fcntl.h>
+#      cdq                             # clear edx in clever way
+#      syscall                         # syscall.  fd in eax.  
+                                       # we should check that eax>=0
+                                       
+#      mov     %eax,%edi               # save our fd
+       
+#      xor     %eax,%eax               # SYSCALL_READ make== 0
+
+       mov     $disk_buffer,%esi
+
+#      mov     $16,%dh                 # 4096 is maximum size of proc file #)
+                                       # we load sneakily by knowing
+                                       # 16<<8 = 4096. be sure edx clear
+
+#      syscall
+
+#      push    $SYSCALL_CLOSE          # close (to be correct)
+#      pop     %rax
+#      syscall                 
+
+       #=============
+       # Number of CPUs
+       #=============
+number_of_cpus:
+
+       xor     %ebx,%ebx               # chip count
+       
+                                       # $disk_buffer still in %rsi
+bogo_loop:     
+       mov     (%rsi), %eax            # load 4 bytes into eax
+       inc     %esi                    # increment pointer
+       
+       cmp     $0,%al                  # check for end of file
+       je      done_bogo
+       
+       cmp     $('o'<<24+'g'<<16+'o'<<8+'b'),%eax      
+                                       # "bogo" in little-endian
+                                       
+       jne     bogo_loop               # ! if not equal, keep going
+       add     $2,%ebx                 # otherwise, we have a bogo
+                                       # 2 times too for future magic
+       jmp     bogo_loop
+
+done_bogo:
+       lea     one-6(%rbx,%rbx,2), %esi        
+                                       # Load into esi
+                                       # [one]+(num_cpus*6)
+                                       #
+                                       # the above multiplies by three
+                                       # esi = (ebx+(ebx*2))
+                                       # and we double-incremented ebx 
+                                       # earlier
+        
+       mov     %ebp,%edi               # move output buffer to edi
+
+       pop     %rdx                    # restore call pointer
+       call    *%rdx                   # copy it (call strcat)
+
+       mov     $' ',%al                # print a space
+       stosb
+
+       push %rbx
+       push %rdx                       # store strcat pointer
+
+       #=========
+       # MHz
+       #=========
+print_mhz:
+       mov     $('z'<<24+'H'<<16+'M'<<8+' '),%ebx      
+                                       # find ' MHz' and grab up to .
+                                       # we are little endian
+       mov     $'.',%ah
+
+       # below is same as "sub $(strcat-find_string),%edx
+       # gas won't let us force the one-byte constant
+       .byte 0x83,0xEA,strcat-find_string   
+       
+       call    *%rdx                   # call find string
+
+       mov     %ebx,%eax               # clever way to get MHz in, sadly
+       ror     $8,%eax                 # not any smaller than a mov
+       stosl                           
+
+       #=========
+       # Chip Name
+       #=========
+chip_name:     
+       mov     $('e'<<24+'m'<<16+'a'<<8+'n'),%ebx      
+                                       # find 'name\t: ' and grab up to \n
+                                       # we are little endian
+       mov     $' ',%ah
+       call    *%rdx                   # call find_string
+       stosb
+       call    skip_spaces
+       
+       pop     %rdx
+       pop     %rbx                    # restore chip count
+       pop     %rsi
+                               
+       call    *%rdx                   # ' Processor'
+       cmpb    $2,%bl
+       jne     print_s
+       inc     %rsi                    # ! if singular, skip the s
+print_s:
+        call    *%rdx                   # 's, '
+       
+        push    %rsi                    # restore the values
+       push    %rdx
+                       
+       #========
+       # RAM
+       #========
+
+#      push    %rdi    
+#      push    $SYSCALL_SYSINFO        # sysinfo() syscall
+#      pop     %rax    
+#      mov     $sysinfo_buff,%edi
+#      syscall
+#      pop     %rdi
+
+       # The following has to be a 64 bit load, to support
+       # Ram > 4GB
+       mov     (sysinfo_buff+S_TOTALRAM),%rax  # size in bytes of RAM
+       shr     $20,%rax                # divide by 1024*1024 to get M
+       adc     $0, %eax                # round 
+
+       call num_to_ascii
+       
+       pop  %rdx                       # restore strcat pointer
+       
+       pop     %rsi                    # print 'M RAM, '
+       call    *%rdx                   # call strcat
+
+       push    %rsi
+       
+       #========
+       # Bogomips
+       #========
+       
+       mov     $('s'<<24+'p'<<16+'i'<<8+'m'),%ebx              
+                                       # find 'mips\t: ' and grab up to \n
+       mov     $0xa,%ah
+       call    find_string
+
+       pop     %rsi                    # bogo total follows RAM 
+
+       call    *%rdx                   # call strcat
+
+       push    %rsi
+
+       mov     %ebp,%ecx               # point ecx to out_buffer
+
+       push    %rcx
+       call    center_and_print        # center and print
+
+       #=================================
+       # Print Host Name
+       #=================================
+last_line:
+       mov     %ebp,%edi               # point to output_buffer
+       
+       mov     $(uname_info+U_NODENAME),%esi   # host name from uname()
+       call    *%rdx                   # call strcat
+
+       pop     %rcx                    # ecx is unchanged
+       call    center_and_print        # center and print
+       
+       pop     %rcx                    # (.txt) pointer to default_colors
+       
+       call    write_stdout
+
+       #================================
+       # Exit
+       #================================
+exit:
+       push    $SYSCALL_EXIT           # Put exit syscall in rax
+       pop     %rax
+
+       xor     %edi,%edi               # Make return value $0
+       syscall
+
+
+       #=================================
+       # FIND_STRING 
+       #=================================
+       #   ah is char to end at
+       #   ebx is 4-char ascii string to look for
+       #   edi points at output buffer
+
+find_string:
+                                       
+       mov     $disk_buffer-1,%esi     # look in cpuinfo buffer
+find_loop:
+       inc     %esi
+       cmpb    $0, (%rsi)              # are we at EOF?
+       je      done                    # ! if so, done
+
+       cmp     (%rsi), %ebx            # do the strings match?
+       jne     find_loop               # ! if not, loop
+       
+                                       # ! if we get this far, we matched
+
+find_colon:
+       lodsb                           # repeat till we find colon
+       cmp     $0,%al
+       je      done
+       cmp     $':',%al
+       jne     find_colon
+
+skip_spaces:           
+       lodsb                           # skip spaces
+       cmp     $0x20,%al               # Loser new intel chips have lots??
+       je      skip_spaces
+       
+store_loop:     
+       cmp     $0,%al
+       je      done
+       cmp     %ah,%al                 # is it end string?
+       je      almost_done             # ! if so, finish
+       cmp     $'\n',%al
+       je      almost_done
+       stosb                           # ! if not store and continue
+       lodsb
+       
+       jmp     store_loop
+        
+almost_done:    
+       movb     $0, (%rdi)             # replace last value with NUL 
+done:
+       ret
+
+
+       #================================
+       # strcat
+       #================================
+
+strcat:
+       lodsb                           # load a byte from [ds:esi]
+       stosb                           # store a byte to [es:edi]
+       cmp     $0,%al                  # is it zero?
+       jne     strcat                  # ! if not loop
+       dec     %edi                    # point to one less than null
+       ret                             # return
+
+       #==============================
+       # center_and_print
+       #==============================
+       # string to center in ecx
+
+center_and_print:
+       push    %rdx                    # save strcat pointer
+       push    %rcx                    # save the string pointer
+       inc     %edi                    # move to a clear buffer
+       push    %rdi                    # save for later
+
+       mov     $('['<<8+27),%ax        # we want to output ^[[
+       stosw
+
+       cdq                             # clear dx
+       
+str_loop2:                             # find end of string    
+       inc     %edx
+       cmpb    $0,(%rcx,%rdx)          # repeat till we find zero
+       jne     str_loop2
+       
+       push    $81                     # one added to cheat, we don't
+                                       # count the trailing '\n'
+       pop     %rax
+       
+       cmp     %eax,%edx               # see if we are >=80
+       jl      not_too_big             # ! if so, don't center
+       push    $80
+       pop     %rdx
+       
+not_too_big:                   
+       sub     %edx,%eax               # subtract size from 80
+       
+       shr     %eax                    # then divide by 2
+       
+       call    num_to_ascii            # print number of spaces
+       mov     $'C',%al                # tack a 'C' on the end
+                                       # ah is zero from num_to_ascii
+       stosw                           # store C and a NULL
+       pop  %rcx                       # pop the pointer to ^[[xC
+       
+       call write_stdout               # write to the screen
+       
+done_center:
+       pop  %rcx                       # restore string pointer
+                                       # and trickily print the real string
+
+       pop %rdx                        # restore strcat pointer
+
+       #================================
+       # WRITE_STDOUT
+       #================================
+       # ecx has string
+       # eax,ebx,ecx,edx trashed
+write_stdout:
+       push    %rdx
+       push    $SYSCALL_WRITE          # put 4 in eax (write syscall)
+       pop     %rax                    # in 3 bytes of code
+       
+       cdq                             # clear edx
+       
+       lea     1(%rdx),%edi            # put 1 in ebx (stdout)
+                                       # in 3 bytes of code
+
+       mov     %ecx,%esi
+       
+str_loop1:
+       inc     %edx
+       cmpb    $0,(%rcx,%rdx)          # repeat till zero
+       jne     str_loop1
+
+       syscall                         # run the syscall
+       pop     %rdx
+       ret
+
+       ##############################
+       # num_to_ascii
+       ##############################
+       # ax = value to print
+       # edi points to where we want it
+       
+num_to_ascii:
+       push    $10
+       pop     %rbx
+       xor     %ecx,%ecx       # clear ecx
+div_by_10:
+       cdq                     # clear edx
+       div     %ebx            # divide
+       push    %rdx            # save for later
+       inc     %ecx            # add to length counter
+       or      %eax,%eax       # was Q zero?
+       jnz     div_by_10       # ! if not divide again
+       
+write_out:
+       pop     %rax            # restore in reverse order
+       add     $0x30, %al      # convert to ASCII
+       stosb                   # save digit
+       loop    write_out       # loop till done
+       ret
+
+#===========================================================================
+#      section .data
+#===========================================================================
+.data
+
+ver_string:    .ascii  " Version \0"
+compiled_string:       .ascii  ", Compiled \0"
+processor:             .ascii  " Processor\0"
+s_comma:               .ascii  "s, \0"
+ram_comma:     .ascii  "M RAM, \0"
+bogo_total:    .ascii  " Bogomips Total\n\0"
+
+default_colors:        .ascii "\033[0m\n\n\0"
+
+cpuinfo:       .ascii  "/proc/cpuinfo\0"
+
+
+one:   .ascii  "One\0\0\0"
+two:   .ascii  "Two\0\0\0"
+three: .ascii  "Three\0"
+four:  .ascii  "Four\0"
+
+.include       "../logo.lzss_new"
+
+disk_buffer:
+.ascii "processor      : 0\n"
+.ascii "vendor_id      : GenuineIntel\n"
+.ascii "cpu family     : 15\n"
+.ascii "model          : 6\n"
+.ascii "model name     : Intel(R) Xeon(TM) CPU 3.46GHz\n"
+.ascii "stepping       : 4\n"
+.ascii "cpu MHz                : 3200.000\n"
+.ascii "cache size     : 2048 KB\n"
+.ascii "physical id    : 0\n"
+.ascii "siblings       : 2\n"
+.ascii "core id                : 0\n"
+.ascii "cpu cores      : 2\n"
+.ascii "apicid         : 0\n"
+.ascii "initial apicid : 0\n"
+.ascii "fpu            : yes\n"
+.ascii "fpu_exception  : yes\n"
+.ascii "cpuid level    : 6\n"
+.ascii "wp             : yes\n"
+.ascii "flags          : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx lm constant_tsc pebs bts pni dtes64 monitor ds_cpl vmx est cid cx16 xtpr pdcm lahf_lm tpr_shadow\n"
+.ascii "bogomips       : 6934.38\n"
+.ascii "clflush size   : 64\n"
+.ascii "cache_alignment        : 128\n"
+.ascii "address sizes  : 36 bits physical, 48 bits virtual\n"
+.ascii "power management:\n"
+.ascii "\n"
+.ascii "processor      : 1\n"
+.ascii "vendor_id      : GenuineIntel\n"
+.ascii "cpu family     : 15\n"
+.ascii "model          : 6\n"
+.ascii "model name     : Intel(R) Xeon(TM) CPU 3.46GHz\n"
+.ascii "stepping       : 4\n"
+.ascii "cpu MHz                : 3200.000\n"
+.ascii "cache size     : 2048 KB\n"
+.ascii "physical id    : 1\n"
+.ascii "siblings       : 2\n"
+.ascii "core id                : 0\n"
+.ascii "cpu cores      : 2\n"
+.ascii "apicid         : 4\n"
+.ascii "initial apicid : 4\n"
+.ascii "fpu            : yes\n"
+.ascii "fpu_exception  : yes\n"
+.ascii "cpuid level    : 6\n"
+.ascii "wp             : yes\n"
+.ascii "flags          : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx lm constant_tsc pebs bts pni dtes64 monitor ds_cpl vmx est cid cx16 xtpr pdcm lahf_lm tpr_shadow\n"
+.ascii "bogomips       : 6934.13\n"
+.ascii "clflush size   : 64\n"
+.ascii "cache_alignment        : 128\n"
+.ascii "address sizes  : 36 bits physical, 48 bits virtual\n"
+.ascii "power management:\n\0"
+
+uname_info:
+.ascii "Linux\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+.ascii "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+
+.ascii "domori\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+.ascii "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+
+.ascii "2.6.29\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+.ascii "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+
+.ascii "#1 SMP Mon May 4 09:51:54 EDT 2009\0\0"
+.ascii "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+
+.ascii "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+.ascii "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+
+.ascii "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+.ascii "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+
+sysinfo_buff:
+.long 0,0,0,0,0,0,0,0,2048*1024*1024,0,0,0,0,0,0,0
+
+
+#============================================================================
+#      section .bss
+#============================================================================
+.bss
+
+.lcomm  text_buf, (N+F-1)
+.lcomm out_buffer,16384
diff --git a/exp-bbv/tests/amd64-linux/ll.post.exp b/exp-bbv/tests/amd64-linux/ll.post.exp
new file mode 100644 (file)
index 0000000..5417125
--- /dev/null
@@ -0,0 +1,54 @@
+T:1:10   :7:10   :5:38   :2:44   :8:65   :9:662   :4:119   :6:2   :3:51   
+T:7:5   :5:16   :2:18   :8:52   :9:858   :4:35   :6:1   :3:15   
+T:7:5   :5:16   :2:18   :8:52   :9:858   :4:35   :6:1   :3:15   
+T:7:5   :5:14   :2:16   :8:91   :9:863   :4:7   :6:1   :3:3   
+T:7:5   :5:12   :2:14   :8:78   :9:880   :4:7   :6:1   :3:3   
+T:7:5   :5:6   :2:8   :8:52   :9:928   :6:1   
+T:7:5   :5:10   :2:10   :8:65   :9:909   :6:1   
+T:7:5   :5:14   :2:18   :8:117   :9:845   :6:1   
+T:5:8   :2:8   :8:52   :9:932   
+T:7:5   :5:8   :2:10   :8:65   :9:911   :6:1   
+T:5:8   :2:8   :8:52   :9:932   
+T:7:5   :5:6   :2:8   :8:52   :9:928   :6:1   
+T:5:6   :2:6   :8:39   :9:949   
+T:7:5   :5:6   :2:8   :8:52   :9:928   :6:1   
+T:5:4   :2:4   :8:26   :9:966   
+T:7:5   :5:12   :2:14   :8:78   :9:880   :4:7   :6:1   :3:3   
+T:5:6   :2:6   :8:39   :9:949   
+T:7:5   :5:8   :2:10   :8:65   :9:911   :6:1   
+T:7:5   :5:14   :2:16   :8:91   :9:863   :4:7   :6:1   :3:3   
+T:5:8   :2:8   :8:52   :9:932   
+T:7:5   :5:10   :2:12   :8:78   :9:894   :6:1   
+T:7:5   :5:10   :2:12   :8:74   :9:898   :6:1   
+T:5:12   :2:12   :8:82   :9:894   
+T:7:5   :5:8   :2:8   :8:39   :9:390   :4:7   :6:1   :3:3   :10:3   :11:9   :12:527   
+T:12:1000   
+T:12:1000   
+T:12:1000   
+T:12:1000   
+T:12:1000   
+T:12:1000   
+T:12:1000   
+T:15:5   :18:2   :19:3   :20:2   :21:3   :22:4   :16:281   :17:10   :12:687   :13:1   :14:2   
+T:23:1   :32:7   :34:351   :33:176   :16:3   :17:2   :24:10   :25:195   :26:4   :27:3   :30:4   :31:11   :11:9   :12:204   :13:2   :14:4   :28:9   :29:5   
+T:34:666   :33:334   
+T:34:667   :33:333   
+T:34:665   :33:333   :35:2   
+T:34:667   :33:333   
+T:34:667   :33:333   
+T:34:666   :33:334   
+T:34:666   :33:332   :35:2   
+T:34:357   :33:178   :36:4   :37:8   :38:4   :40:258   :39:173   :16:16   :17:2   
+T:49:6   :50:2   :51:4   :52:2   :53:1   :54:6   :56:3   :38:4   :40:333   :39:225   :41:39   :42:26   :43:15   :44:46   :45:46   :46:40   :47:60   :48:6   :16:88   :17:4   :28:9   :55:18   :29:17   
+T:57:4   :38:4   :40:591   :39:395   :16:4   :17:2   
+T:40:600   :39:400   
+T:58:2   :59:4   :40:453   :39:303   :41:18   :42:12   :43:6   :44:16   :45:16   :46:14   :47:21   :48:2   :16:68   :17:2   :24:10   :25:53   
+
+
+# Thread 1
+#   Total intervals: 45 (Interval Size 1000)
+#   Total instructions: 45639
+#   Total reps: 0
+#   Unique reps: 0
+#   Total fldcw instructions: 0
+
diff --git a/exp-bbv/tests/amd64-linux/ll.stderr.exp b/exp-bbv/tests/amd64-linux/ll.stderr.exp
new file mode 100644 (file)
index 0000000..3e75445
--- /dev/null
@@ -0,0 +1,6 @@
+# Thread 1
+#   Total intervals: 45 (Interval Size 1000)
+#   Total instructions: 45639
+#   Total reps: 0
+#   Unique reps: 0
+#   Total fldcw instructions: 0
diff --git a/exp-bbv/tests/amd64-linux/ll.stdout.exp b/exp-bbv/tests/amd64-linux/ll.stdout.exp
new file mode 100644 (file)
index 0000000..61cd23c
--- /dev/null
@@ -0,0 +1,17 @@
+\e[0;1;37;47m#################################################################\e[0;30;47m#####\e[1;37m#########\e[1;37;40m
+\e[0;1;37;47m################################################################\e[0;30;47m#######\e[1;37m########\e[1;37;40m
+\e[0;1;37;47m###################\e[31m#\e[37m############################################\e[0;30;47m##\e[1;37mO\e[0;30;47m#\e[1;37mO\e[0;30;47m##\e[1;37m########\e[1;37;40m
+\e[0;1;37;47m##\e[0;30;47m######\e[1;37m##########\e[31m##\e[0;30;47m#\e[1;37m###########################################\e[0;30;47m#\e[1;33m#####\e[0;30;47m#\e[1;37m########\e[1;37;40m
+\e[0;1;37;47m####\e[0;30;47m##\e[1;37m#############\e[0;30;47m#\e[1;37m##########################################\e[0;30;47m##\e[1;37m##\e[33m###\e[37m##\e[0;30;47m##\e[1;37m######\e[1;37;40m
+\e[0;1;37;47m####\e[0;30;47m##\e[1;37m#########\e[31m###\e[37m###\e[0;30;47m###\e[1;37m#\e[0;30;47m####\e[1;37m###\e[0;30;47m###\e[1;37m####\e[0;30;47m###\e[1;37m##\e[0;30;47m#####\e[1;37m#\e[0;30;47m######\e[1;37m#####\e[0;30;47m#\e[1;37m##########\e[0;30;47m##\e[1;37m#####\e[1;37;40m
+\e[0;1;37;47m####\e[0;30;47m##\e[1;37m########\e[31m#\e[37m##\e[31m#\e[0;30;47m#\e[1;37m###\e[0;30;47m###\e[1;37m####\e[0;30;47m##\e[1;37m##\e[0;30;47m##\e[1;37m#####\e[0;30;47m##\e[1;37m####\e[0;30;47m##\e[1;37m###\e[0;30;47m##\e[1;37m#######\e[0;30;47m#\e[1;37m############\e[0;30;47m##\e[1;37m####\e[1;37;40m
+\e[0;1;37;47m####\e[0;30;47m##\e[1;37m#######\e[31m#\e[37m###\e[31m#\e[0;30;47m#\e[1;37m###\e[0;30;47m##\e[1;37m#####\e[0;30;47m##\e[1;37m##\e[0;30;47m##\e[1;37m#####\e[0;30;47m##\e[1;37m######\e[0;30;47m###\e[1;37m#########\e[0;30;47m#\e[1;37m############\e[0;30;47m###\e[1;37m###\e[1;37;40m
+\e[0;1;37;47m####\e[0;30;47m##\e[1;37m##########\e[31m##\e[0;30;47m#\e[1;37m###\e[0;30;47m##\e[1;37m#####\e[0;30;47m##\e[1;37m##\e[0;30;47m##\e[1;37m#####\e[0;30;47m##\e[1;37m######\e[0;30;47m###\e[1;37m########\e[33m##\e[0;30;47m#\e[1;37m###########\e[0;30;47m##\e[1;33m#\e[37m###\e[1;37;40m
+\e[0;1;37;47m####\e[0;30;47m##\e[1;37m#######\e[0;30;47m#\e[1;37m#\e[31m##\e[0;30;47m#\e[1;37m####\e[0;30;47m##\e[1;37m#####\e[0;30;47m##\e[1;37m##\e[0;30;47m##\e[1;37m#####\e[0;30;47m##\e[1;37m#####\e[0;30;47m##\e[1;37m#\e[0;30;47m##\e[1;37m#####\e[33m######\e[0;30;47m#\e[1;37m#######\e[30m#\e[33m######\e[37m#\e[1;37;40m
+\e[0;1;37;47m####\e[0;30;47m##\e[1;37m######\e[0;30;47m##\e[1;37m#\e[31m##\e[0;30;47m#\e[1;37m#\e[0;30;47m#\e[1;37m##\e[0;30;47m##\e[1;37m#####\e[0;30;47m##\e[1;37m##\e[0;30;47m###\e[1;37m###\e[0;30;47m###\e[1;37m####\e[0;30;47m##\e[1;37m###\e[0;30;47m##\e[1;37m####\e[33m#######\e[0;30;47m#\e[1;37m#####\e[0;30;47m#\e[1;33m#######\e[37m#\e[1;37;40m
+\e[0;1;37;47m##\e[0;30;47m############\e[1;37m##\e[0;30;47m###\e[1;37m##\e[0;30;47m####\e[1;37m###\e[0;30;47m####\e[1;37m###\e[0;30;47m####\e[1;37m#\e[0;30;47m###\e[1;37m#\e[0;30;47m#####\e[1;37m#\e[0;30;47m######\e[1;37m###\e[33m#####\e[30m#\e[0;30;47m#####\e[1m#\e[33m#####\e[37m###\e[1;37;40m
+
+\e[7CLinux Version 2.6.29, Compiled #1 SMP Mon May 4 09:51:54 EDT 2009
+\e[2CTwo 3200MHz Intel(R) Xeon(TM) Processors, 2048M RAM, 6934.38 Bogomips Total
+\e[37Cdomori\e[0m
+
diff --git a/exp-bbv/tests/amd64-linux/ll.vgtest b/exp-bbv/tests/amd64-linux/ll.vgtest
new file mode 100644 (file)
index 0000000..6031a58
--- /dev/null
@@ -0,0 +1,5 @@
+prog: ll
+vgopts: --interval-size=1000 --bb-out-file=ll.out.bb
+post:  cat ll.out.bb
+cleanup: rm ll.out.bb
+
diff --git a/exp-bbv/tests/amd64-linux/million.S b/exp-bbv/tests/amd64-linux/million.S
new file mode 100644 (file)
index 0000000..d72ee4b
--- /dev/null
@@ -0,0 +1,22 @@
+
+            # count for 1 million instructions
+            #   total is 2 + 1 + 499997*2 + 3
+            
+       .globl _start   
+_start:        
+       xor     %rcx,%rcx               # not needed, pads total to 1M
+       xor     %rax,%rax               # not needed, pads total to 1M
+       
+       mov     $499997,%rcx            # load counter
+test_loop:     
+       dec     %rcx                    # repeat count times
+       jnz     test_loop
+
+       #================================
+       # Exit
+       #================================
+exit:
+       xor     %rdi,%rdi               # we return 0
+       mov     $60,%rax                # put exit syscall number (60) in rax
+       syscall
+
diff --git a/exp-bbv/tests/amd64-linux/million.post.exp b/exp-bbv/tests/amd64-linux/million.post.exp
new file mode 100644 (file)
index 0000000..30bdd29
--- /dev/null
@@ -0,0 +1,21 @@
+T:1:5   :2:99996   
+T:2:100000   
+T:2:100000   
+T:2:100000   
+T:2:100000   
+T:2:100000   
+T:2:100000   
+T:2:100000   
+T:2:100000   
+
+
+# Thread 1
+#   Total intervals: 10 (Interval Size 100000)
+#   Total instructions: 1000000
+#   Total reps: 0
+#   Unique reps: 0
+#   Total fldcw instructions: 0
+
+F:1:400078:
+F:2:400085:
+F:3:40008a:
diff --git a/exp-bbv/tests/amd64-linux/million.stderr.exp b/exp-bbv/tests/amd64-linux/million.stderr.exp
new file mode 100644 (file)
index 0000000..adeb35d
--- /dev/null
@@ -0,0 +1,6 @@
+# Thread 1
+#   Total intervals: 10 (Interval Size 100000)
+#   Total instructions: 1000000
+#   Total reps: 0
+#   Unique reps: 0
+#   Total fldcw instructions: 0
diff --git a/exp-bbv/tests/amd64-linux/million.vgtest b/exp-bbv/tests/amd64-linux/million.vgtest
new file mode 100644 (file)
index 0000000..969a636
--- /dev/null
@@ -0,0 +1,5 @@
+prog: million 
+vgopts: --interval-size=100000 --bb-out-file=million.out.bb --pc-out-file=million.out.pc
+post:  cat million.out.bb million.out.pc
+cleanup: rm million.out.bb million.out.pc
+
diff --git a/exp-bbv/tests/amd64-linux/rep_prefix.S b/exp-bbv/tests/amd64-linux/rep_prefix.S
new file mode 100644 (file)
index 0000000..6fe8ac3
--- /dev/null
@@ -0,0 +1,347 @@
+#
+# rep, repe (repz) and repne (repnz) prefixed string instructions
+#   only count as one instruction, even though they repeat many times
+# This test makes sure the bbv plugin counts these instructions properly
+# The answer is validated to hw perf counters.
+#
+
+       .globl _start   
+_start:        
+       cld                             # we want these to happen forward
+
+
+       #===============================================
+       # Some SSE2 instructions start with 0xf2 or 0xf3
+       # Check for them, to make sure our rep detection
+       #   handles things properly.
+       # We should check this on x86 too, but then we'd
+       #   have to check for SSE2 capability somehow?
+       #===================================
+false_positives:
+
+       movdqu  %xmm1,%xmm2
+       movdqu  %xmm2,%xmm1
+       addsd   %xmm1,%xmm2
+       pause
+
+       #===================================
+       # Check varied order of the size prefix
+       #   with the rep prefix.  Older binutils
+       #   did this one way, newer binutils the other
+       #===================================
+       
+size_prefix:
+       # test 16-bit load
+       
+       mov     $8192, %rcx
+       mov     $buffer1, %rsi          # set source
+       .byte 0x66, 0xf3, 0xad          # lodsw
+       
+       mov     $8192, %rcx
+       mov     $buffer1, %rsi          # set source
+       .byte 0xf3, 0x66, 0xad          # lodsw 
+       
+       
+
+
+       #===================================
+       # Load and Store Instructions
+       #===================================
+loadstore:
+       xor     %rax, %rax
+       mov     $0xd, %al               # set eax to d
+       
+       # test 8-bit store
+       
+       mov     $16384, %rcx
+       mov     $buffer1, %rdi          # set destination
+       rep     stosb                   # store d 16384 times, auto-increment
+       
+       # test 8-bit load
+       
+       mov     $16384, %rcx
+       mov     $buffer1, %rsi          # set source
+       rep     lodsb                   # load byte 16384 times, auto-increment
+
+       cmp     $0xd,%al                # if we loaded wrong value
+       jne     print_error             # print an error
+
+       # test 16-bit store
+       
+       mov     $0x020d,%ax             # store 0x020d
+       
+       mov     $8192, %rcx
+       mov     $buffer1, %rdi          # set destination
+       rep     stosw                   # store 8192 times, auto-increment
+       
+       # test 16-bit load
+       
+       mov     $8192, %rcx
+       mov     $buffer1, %rsi          # set source
+       rep     lodsw                   # load 8192 times, auto-increment
+
+       cmp     $0x020d,%ax             # if we loaded wrong value
+       jne     print_error             # print an error
+
+       # test 32-bit store
+       
+       mov     $0x0feb1378,%eax        # store 0x0feb1378
+       
+       mov     $4096, %rcx
+       mov     $buffer1, %rdi          # set destination
+       rep     stosl                   # store 4096 times, auto-increment
+       
+       # test 32-bit load
+       
+       mov     $4096, %rcx
+       mov     $buffer1, %rsi          # set source
+       rep     lodsl                   # load 4096 times, auto-increment
+
+       cmp     $0x0feb1378,%eax        # if we loaded wrong value
+       jne     print_error             # print an error
+       
+       # test 64-bit store
+       
+       mov     $0xfeb131978a5a5a5a,%rax        
+                                               
+       mov     $2048, %rcx
+       mov     $buffer1, %rdi          # set destination
+       rep     stosq                   # store 2048 times, auto-increment
+       
+       # test 64-bit load
+       
+       mov     $2048, %rcx
+       mov     $buffer1, %rsi          # set source
+       rep     lodsq                   # load 2048 times, auto-increment
+
+       cmp     $0x8a5a5a5a,%eax
+                                       # !if we loaded wrong value
+       jne     print_error             # print an error
+       
+
+       #=============================
+       # Move instructions
+       #=============================
+moves:
+       # test 8-bit move
+       
+       mov    $16384, %rcx
+       mov    $buffer1, %rsi
+       mov    $buffer2, %rdi
+       rep    movsb
+
+       # test 16-bit move
+       
+       mov    $8192, %rcx
+       mov    $buffer2, %rsi
+       mov    $buffer1, %rdi
+       rep    movsw
+
+       # test 32-bit move
+       
+       mov    $4096, %rcx
+       mov    $buffer1, %rsi
+       mov    $buffer2, %rdi
+       rep    movsl    
+       
+       # test 64-bit move
+       
+       mov    $2048, %rcx
+       mov    $buffer1, %rsi
+       mov    $buffer2, %rdi
+       rep    movsq            
+       
+
+       #==================================
+       # Compare equal instructions
+       #==================================
+compare_equal: 
+       # first set up the areas to compare
+       
+       mov     $0xa5a5a5a5,%eax
+       mov     $buffer1, %rdi
+       mov     $4096, %rcx
+       rep     stosl
+       
+       mov     $0xa5a5a5a5,%eax
+       mov     $buffer2, %rdi
+       mov     $4096, %rcx
+       rep     stosl
+
+
+       # test 8-bit
+       
+       mov     $buffer1,%rsi
+       mov     $buffer2,%rdi
+       mov     $16384, %rcx
+       repe    cmpsb
+       jnz     print_error
+
+       # test 16-bit
+       
+       mov     $buffer1,%rsi
+       mov     $buffer2,%rdi
+       mov     $8192, %rcx
+       repe    cmpsw
+       jnz     print_error
+
+       # test 32-bit
+       
+       mov     $buffer1,%rsi
+       mov     $buffer2,%rdi
+       mov     $4096, %rcx
+       repe    cmpsl
+       jnz     print_error             
+       
+       # test 64-bit
+       
+       mov     $buffer1,%rsi
+       mov     $buffer2,%rdi
+       mov     $2048, %rcx
+       repe    cmpsq
+       jnz     print_error                     
+
+
+
+       #==================================
+       # Compare not equal instructions
+       #==================================
+compare_noteq: 
+       # change second buffer
+       
+       mov     $0x5a5a5a5a,%eax
+       mov     $buffer2, %rdi
+       mov     $4096, %rcx
+       rep     stosl
+       
+       # test 8-bit
+       
+       mov     $buffer1,%rsi
+       mov     $buffer2,%rdi
+       mov     $16384, %rcx
+#      repne   cmpsb             FIXME!  Not implemented valgrind
+#      je      print_error
+
+       # test 16-bit
+       
+       mov     $buffer1,%rsi
+       mov     $buffer2,%rdi
+       mov     $8192, %rcx
+#      repne   cmpsw             FIXME!  Not implemented valgrind
+#      je      print_error     
+
+       # test 32-bit
+       
+       mov     $buffer1,%rsi
+       mov     $buffer2,%rdi
+       mov     $4096, %rcx
+#      repne   cmpsl             FIXME!  Not implemented valgrind
+#      je      print_error                     
+
+       # test 64-bit
+       
+       mov     $buffer1,%rsi
+       mov     $buffer2,%rdi
+       mov     $2048, %rcx
+#      repne   cmpsq             FIXME!  Not implemented valgrind
+#      je      print_error                     
+
+       #====================================
+       # Check scan equal instruction
+       #====================================
+scan_eq:
+       # test 8-bit
+
+       mov     $0xa5,%al
+       mov     $buffer1,%rdi
+       mov     $16384, %rcx
+       repe    scasb
+       jnz     print_error
+
+       # test 16-bit
+       
+       mov     $0xa5a5,%ax
+       mov     $buffer1,%rdi
+       mov     $8192, %rcx
+       repe    scasw
+       jnz     print_error     
+
+       # test 32-bit
+       
+       mov     $0xa5a5a5a5,%eax
+       mov     $buffer1,%rdi
+       mov     $4096, %rcx
+       repe    scasl
+       jnz     print_error             
+       
+       # test 64-bit
+       
+       mov     $0xa5a5a5a5a5a5a5a5,%rax
+       mov     $buffer1,%rdi
+       mov     $2048, %rcx
+       repe    scasq
+       jnz     print_error                     
+       
+
+       #====================================
+       # Check scan not-equal instruction
+       #====================================
+
+       # test 8-bit
+scan_ne:
+       mov     $0xa5,%al
+       mov     $buffer2,%rdi
+       mov     $16384, %rcx
+       repne   scasb
+       jz      print_error
+
+       # test 16-bit
+       
+       mov     $0xa5a5,%ax
+       mov     $buffer2,%rdi
+       mov     $8192, %rcx
+       repne   scasw
+       jz      print_error     
+       
+       # test 32-bit
+       
+       mov     $0xa5a5a5a5,%eax
+       mov     $buffer2,%rdi
+       mov     $4096, %rcx
+       repne   scasl
+       jz      print_error             
+       
+       # test 64-bit
+       
+       mov     $0xa5a5a5a5a5a5a5a5,%rax
+       mov     $buffer2,%rdi
+       mov     $2048, %rcx
+       repne   scasq
+       jz      print_error                     
+
+       jmp     exit                    # no error, skip to exit
+       
+print_error:
+           
+       mov     $1, %rax                # Write syscall
+       mov     $1, %rdi                # print to stdout
+       mov     $error_string, %rsi     # string to print
+       mov     $16, %edx               # strlen
+       syscall                         # call syscall
+
+       #================================
+       # Exit
+       #================================
+exit:
+       mov     $60,%rax
+       xor     %rdi,%rdi               # we return 0
+       syscall                         # and exit
+
+
+.data
+error_string:  .asciz "Error detected!\n"
+
+.bss
+
+.lcomm buffer1,        16384
+.lcomm buffer2,        16384
diff --git a/exp-bbv/tests/amd64-linux/rep_prefix.stderr.exp b/exp-bbv/tests/amd64-linux/rep_prefix.stderr.exp
new file mode 100644 (file)
index 0000000..2ca3548
--- /dev/null
@@ -0,0 +1,6 @@
+# Thread 1
+#   Total intervals: 0 (Interval Size 100000)
+#   Total instructions: 152
+#   Total reps: 165917
+#   Unique reps: 29
+#   Total fldcw instructions: 0
diff --git a/exp-bbv/tests/amd64-linux/rep_prefix.vgtest b/exp-bbv/tests/amd64-linux/rep_prefix.vgtest
new file mode 100644 (file)
index 0000000..bc89a1c
--- /dev/null
@@ -0,0 +1,4 @@
+prog: rep_prefix
+vgopts: --interval-size=100000 --bb-out-file=rep_prefix.out.bb
+cleanup: rm rep_prefix.out.bb
+
diff --git a/exp-bbv/tests/filter_bb b/exp-bbv/tests/filter_bb
new file mode 100644 (file)
index 0000000..ca9345f
--- /dev/null
@@ -0,0 +1,12 @@
+#! /bin/sh
+
+dir=`dirname $0`
+
+$dir/../../tests/filter_stderr_basic  |
+
+# This attempts to filter out the basic block numbers
+# While keeping total count.  This is because the
+#  basic block number is non-deterministic on a
+#  multi-threaded benchmark
+
+sed s/:\[0-9\]\*:/' '/g
diff --git a/exp-bbv/tests/filter_stderr b/exp-bbv/tests/filter_stderr
new file mode 100644 (file)
index 0000000..15b6f6e
--- /dev/null
@@ -0,0 +1,13 @@
+#! /bin/sh
+
+dir=`dirname $0`
+
+$dir/../../tests/filter_stderr_basic  |
+
+# Remove lines that don't start with #
+sed '/^[^#]/d' |
+
+# Remove all blank lines
+sed '/^$/d'
+
+
diff --git a/exp-bbv/tests/logo.include b/exp-bbv/tests/logo.include
new file mode 100644 (file)
index 0000000..e5aac17
--- /dev/null
@@ -0,0 +1,6 @@
+.equ FREQUENT_CHAR,0
+.equ N,1024
+.equ F,64
+.equ THRESHOLD,2
+.equ P_BITS,10
+.equ POSITION_MASK,3
diff --git a/exp-bbv/tests/logo.lzss_new b/exp-bbv/tests/logo.lzss_new
new file mode 100644 (file)
index 0000000..626bf0e
--- /dev/null
@@ -0,0 +1,21 @@
+logo:
+       .byte   255,27,91,48,59,49,59,51,55
+       .byte   159,59,52,55,109,35,204,247,192,7,51
+       .byte   141,48,200,27,27,91,196,7,203,31,28,12,59
+       .byte   15,52,48,109,10,192,247,1,96,26,56,44,156
+       .byte   31,27,91,51,49,109,204,4,65,172,13,36
+       .byte   2,28,16,79,13,32,16,65,147,152,131,52,28,52,204,16
+       .byte   16,12,36,111,57,236,167,28,8,51,22,20,137,85,44,96
+       .byte   0,43,97,214,113,226,200,203,8,212,9,211,16,43,89,245,209
+       .byte   0,128,17,210,24,13,40,28,20,13,44,28,28,240,74,26,91
+       .byte   0,13,80,95,101,135,101,43,85,245,205,205,40,205,20,137,65
+       .byte   0,29,135,66,75,114,83,28,120,15,98,135,109,85,88,247,193
+       .byte   0,232,43,244,151,73,120,61,176,27,95,151,176,18,43,171,202
+       .byte   16,223,22,26,245,90,245,217,63,51,27,86,146,91,176,2
+       .byte   0,12,29,211,200,172,57,23,102,50,246,110,109,236,68,96,94
+       .byte   8,175,10,166,105,20,1,48,51,11,222,31,49,15,211,188
+       .byte   0,175,79,25,86,170,69,82,219,40,82,70,127,8,83,219,35
+       .byte   0,169,85,170,53,24,33,18,104,145,42,200,34,178,104,112,45
+       .byte   0,198,80,178,121,145,74,112,49,248,81,243,40,221,23,255,23
+       .byte   8,2,54,3,36,229,66,10
+logo_end:
diff --git a/exp-bbv/tests/ppc32-linux/Makefile.am b/exp-bbv/tests/ppc32-linux/Makefile.am
new file mode 100644 (file)
index 0000000..d022cf7
--- /dev/null
@@ -0,0 +1,22 @@
+include $(top_srcdir)/Makefile.tool-tests.am
+
+dist_noinst_SCRIPTS = filter_stderr
+
+check_PROGRAMS = \
+       million ll
+
+EXTRA_DIST = \
+          ll.stderr.exp \
+          ll.stdout.exp \
+          ll.post.exp \
+          ll.vgtest \
+          million.stderr.exp \
+          million.post.exp \
+          million.vgtest
+
+AM_CCASFLAGS += -ffreestanding
+
+LDFLAGS += -nostartfiles -nodefaultlibs
+
+ll_SOURCES = ll.S
+million_SOURCES = million.S
diff --git a/exp-bbv/tests/ppc32-linux/filter_stderr b/exp-bbv/tests/ppc32-linux/filter_stderr
new file mode 100644 (file)
index 0000000..1c07666
--- /dev/null
@@ -0,0 +1,5 @@
+#! /bin/sh
+
+../filter_stderr
+
+
diff --git a/exp-bbv/tests/ppc32-linux/ll.S b/exp-bbv/tests/ppc32-linux/ll.S
new file mode 100644 (file)
index 0000000..7621b95
--- /dev/null
@@ -0,0 +1,579 @@
+#
+#  linux_logo in ppc assembly language
+#    based on the code from ll_asm-0.36
+#
+#  By Vince Weaver <vince _at_ deater.net>
+#
+# Modified to remove non-deterministic system calls
+# And to avoid reading from /proc
+#
+
+# offsets into the results returned by the uname syscall
+.equ U_SYSNAME,0
+.equ U_NODENAME,65
+.equ U_RELEASE,65*2
+.equ U_VERSION,(65*3)
+.equ U_MACHINE,(65*4)
+.equ U_DOMAINNAME,65*5
+
+# offset into the SYSCALL_SYSINFO buffer
+.equ S_TOTALRAM,16
+
+# Sycscalls
+.equ SYSCALL_EXIT,     1
+#.equ SYSCALL_READ,     3
+.equ SYSCALL_WRITE,    4
+#.equ SYSCALL_OPEN,     5
+#.equ SYSCALL_CLOSE,    6
+#.equ SYSCALL_SYSINFO,116
+#.equ SYSCALL_UNAME,  122
+
+#
+.equ STDIN, 0
+.equ STDOUT,1
+.equ STDERR,2
+
+.equ BSS_BEGIN,25
+.equ DATA_BEGIN,26
+
+.include "../logo.include"
+
+       .globl _start   
+_start:        
+
+        #========================
+       # Initialization
+       #========================
+       
+
+#      eieio                           # coolest opcode of all time ;)
+                                       # not needed, but I had to put it here
+       # the hack loading BSS_BEGIN and DATA_BEGIN
+       # saves one instruction on any future load from memory
+       # as we can just do an addi rather than an lis;addi
+
+       lis     25,bss_begin@ha
+       addi    25,25,bss_begin@l
+       
+       lis     26,data_begin@ha
+       addi    26,26,data_begin@l
+
+       addi    14,BSS_BEGIN,(out_buffer-bss_begin)
+                                       # the output buffer
+
+       addi    21,BSS_BEGIN,(text_buf-bss_begin)
+               
+
+       mr      17,14                   # store out-buffer for later
+
+        #=========================
+       # PRINT LOGO
+       #=========================
+
+# LZSS decompression algorithm implementation
+# by Stephan Walter 2002, based on LZSS.C by Haruhiko Okumura 1989
+# optimized some more by Vince Weaver
+
+
+       li      8,(N-F)                 # grab "R"
+
+       addi    9,DATA_BEGIN,(logo-data_begin)-1
+                                       # logo_pointer
+
+       addi    12,DATA_BEGIN,(logo_end-data_begin)-1
+                                       # end of the logo
+
+
+       mr      16,17
+
+decompression_loop:
+       lbzu    10,1(9)                 # load in a byte
+                                       # auto-update
+       mr      11,10                   # copy to 11
+       ori     11,11,0xff00            # re-load top as a hackish 
+                                       # 8-bit counter
+
+test_flags:
+       cmpw    0,12,9                  # have we reached the end?
+       ble     done_logo               # ! if so exit
+
+       andi.   13,11,0x1
+       srawi   11,11,1
+       
+       bne     0,discrete_char
+
+offset_length:
+       lbzu    10,1(9)
+       lbzu    24,1(9)
+       slwi    24,24,8
+       or      24,24,10
+       
+       mr      10,24
+
+       srawi  15,10,P_BITS
+       addi   15,15,THRESHOLD+1 # cl = ax >> (P_BITS)+THRESH+1
+                                # = match length
+                                
+output_loop:
+       andi.  24,24,(POSITION_MASK<<8+0xff)    # mask it
+       lbzx   10,21,24                         
+       addi   24,24,1
+       
+store_byte:
+       stbu   10,1(16)
+       
+       stbx    10,21,8
+       addi    8,8,1
+       andi.   8,8,(N-1)
+
+       addic.  15,15,-1
+       bne     0,output_loop
+       
+       andi.   13,11,0xff00
+       bne     test_flags
+       
+       b       decompression_loop
+
+discrete_char:
+
+       lbzu    10,1(9)
+       li      15,1
+
+       b       store_byte
+
+done_logo:
+
+       addi    4,17,1          # restore (plus one because r17 is decremented)
+       bl      write_stdout    # and print the logo
+       
+
+        #==========================
+       # First Line
+       #==========================
+
+       
+       #==========================
+       # PRINT VERSION
+       #==========================
+       
+#      li      0,SYSCALL_UNAME         # uname syscall
+#      addi    3,BSS_BEGIN,(uname_info-bss_begin)              
+                                       # uname struct
+#      sc                              # do syscall
+
+
+       addi    16,DATA_BEGIN,(uname_info-data_begin)+U_SYSNAME@l-1     
+                                       # os-name from uname "Linux"
+       bl      strcat
+       
+       addi    16,DATA_BEGIN,(ver_string-data_begin)-1
+                                       # source is " Version "
+       bl      strcat
+       
+       addi    16,DATA_BEGIN,(uname_info-data_begin)+U_RELEASE@l-1
+                                       # version from uname "2.4.1"
+       bl      strcat
+       
+       addi    16,DATA_BEGIN,(compiled_string-data_begin)-1
+                                       # source is ", Compiled "
+       bl      strcat
+
+       addi    16,DATA_BEGIN,(uname_info-data_begin)+U_VERSION-1
+                                       # compiled date
+       bl      strcat
+       
+       bl      center_and_print        # write it to screen
+       
+
+       #===============================
+       # Middle-Line
+       #===============================
+       
+       #=========
+       # Load /proc/cpuinfo into buffer
+       #=========
+
+#      li      0,SYSCALL_OPEN          # open()
+#      addi    3,DATA_BEGIN,(cpuinfo-data_begin)               
+                                       # '/proc/cpuinfo'
+#      li      4,0                     # O_RDONLY <bits/fcntl.h>
+#      sc                              # syscall.  fd in r0.  
+                                       # we should check that r0>=0
+                                       
+#      mr      13,3                    # save fd in r13
+       
+#      li      0,SYSCALL_READ          # read
+#      addi    4,BSS_BEGIN,(disk_buffer-bss_begin)
+#      li      5,4096                  # 4096 is maximum size of proc file ;)
+#      sc      
+
+#      mr      3,13                    # restore fd
+#      li      0,6                     # close
+#      sc
+
+       #=============
+       # Number of CPUs
+       #=============
+       
+       mr      14,17                   # point output to out_buf
+
+       # Assume 1 CPU for now
+       # my iBook's /proc/cpuinfo does not have a "processor" line ???
+       
+       addi    16,DATA_BEGIN,(one-data_begin)-1
+       bl      strcat
+       
+       #=========
+       # MHz
+       #=========
+       
+       lis     20,('l'<<8)+'o'         # find 'lock ' and grab up to M
+       addi    20,20,('c'<<8)+'k'
+       li      23,'M'                  
+       bl      find_string
+   
+       addi    16,DATA_BEGIN,(megahertz-data_begin)-1
+                                       # print 'MHz '
+       bl      strcat
+   
+  
+       #=========
+       # Chip Name
+       #=========
+       
+       lis     20,('c'<<8)+'p'         # find 'cpu\t: ' and grab up to \n
+       addi    20,20,('u'<<8)+'\t'
+       li      23,'\n'
+       bl      find_string
+       
+       addi    16,DATA_BEGIN,(comma-data_begin)-1
+                                       # print ', '
+       bl      strcat
+       
+       #========
+       # RAM
+       #========
+       
+#      li      0,SYSCALL_SYSINFO       # sysinfo() syscall
+#      addi    3,BSS_BEGIN,(sysinfo_buff-bss_begin)
+                                       # sysinfo_buffer
+
+#      sc
+
+       lwz     4,(sysinfo_buff+S_TOTALRAM-data_begin)(DATA_BEGIN)
+                                       # load bytes of RAM into r4
+
+       srawi   4,4,20          # divide by 2^20 to get MB
+       li      5,0
+
+       bl      num_to_ascii
+
+       addi    16,DATA_BEGIN,(ram_comma-data_begin)-1
+                                       # print 'M RAM, '
+
+       bl      strcat
+       
+       #========
+       # Bogomips
+       #========
+       
+       lis     20,('m'<<8)+'i'         # find 'mips' and grab up to \n
+       addi    20,20,('p'<<8)+'s'
+       li      23,'\n'
+       bl      find_string
+      
+       addi    16,DATA_BEGIN,(bogo_total-data_begin)-1
+                                       # print "Bogomips Total"
+       bl      strcat
+
+       bl      center_and_print        # center it
+
+
+       #=================================
+       # Print Host Name
+       #=================================
+       
+       mr      14,17                   # restore out buffer
+       
+       addi    16,DATA_BEGIN,((uname_info-data_begin)+U_NODENAME)-1
+                                       # hostname                    
+                                       
+       bl      strcat                          
+       
+       bl      center_and_print
+
+       #================================
+       # Exit
+       #================================
+exit:  
+        li      3,0            # 0 exit value
+       li      0,SYSCALL_EXIT  # put the exit syscall number in eax
+       sc                      # and exit
+
+
+
+
+       #=================================
+       # FIND_STRING 
+       #=================================
+       #   r23 is char to end at
+       #   r20 is the 4-char ascii string to look for
+       #   r14 points at output buffer
+       #   r16,r21
+
+find_string:
+               
+       addi    16,DATA_BEGIN,(disk_buffer-data_begin)-1        
+                                       # look in cpuinfo buffer
+                                       # -1 so we can use lbzu
+       
+find_loop:
+       lwzu    13,1(16)                # load in 32 bits, incrementing 8bits
+       cmpwi   13,0                    # ! if null, we are done
+       beq     done
+       cmpw    13,20                   # compare with out 4 char string
+       bne     find_loop               # ! if no match, keep looping
+
+       
+                                       # ! if we get this far, we matched
+                                       
+       li      21,':'
+find_colon:
+       lbzu    13,1(16)                # repeat till we find colon
+       cmpwi   13,0
+       beq     done
+       cmpw    13,21
+       bne     find_colon
+
+       addi    16,16,1                 # skip a char [should be space]
+       
+store_loop:     
+        lbzu   13,1(16)
+        cmpwi  13,0
+        beq    done
+        cmpw   13,23                   # is it end string?
+        beq    almost_done             # ! if so, finish
+        stbu   13,1(14)                # ! if not store and continue
+        b      store_loop
+        
+almost_done:    
+       li      13,0                    # replace last value with null
+       stb     13,1(14)
+
+done:
+       blr
+
+       #================================
+       # strcat
+       #================================
+       # r13 = "temp"
+       # r16 = "source"
+               # r14 = "destination"
+strcat:
+       lbzu    13,1(16)                # load a byte from [r16]
+       stbu    13,1(14)                # store a byte to [r14]
+       cmpwi   13,0                    # is it zero?
+       bne     strcat                  # ! if not loop
+       subi    14,14,1                 # point to one less than null
+       blr                             # return
+
+       #==============================
+       # center_and_print
+       #==============================
+       # r14 is end of buffer
+       # r17 is start of buffer
+       # r29 = saved link register
+       # r4-r10, r19-r22, r30 trashed
+       
+center_and_print:
+
+       mflr    29                      # back up return address
+
+       subf    5,17,14                 # see how long the output
+                                       # buffer is
+                                       
+       cmpwi   5,80                    # see if we are >80
+        bgt    done_center             # ! if so, bail
+
+       li      4,80                    # 80 column screen
+       subf    4,5,4                   # subtract strlen
+       srawi   23,4,1                  # divide by two
+
+       lis     4,escape@ha
+       addi    4,4,escape@l
+       bl      write_stdout
+
+       mr      4,23
+       li      5,1                     # print to stdout
+       bl      num_to_ascii            # print number
+       
+       lis     4,c@ha
+       addi    4,4,c@l
+       bl      write_stdout
+
+
+done_center:   
+
+       addi    4,17,1                  # move string to output+1
+       bl      write_stdout            # call write stdout
+
+       lis     4,linefeed@ha
+       addi    4,4,linefeed@l
+
+       mtlr    29                      # restore link register
+                                       # and let write_stdout
+                                       # return for us
+
+
+
+       #================================
+       # WRITE_STDOUT
+       #================================
+       # r4 has string
+       # r0,r3,r4,r5,r6 trashed
+               
+write_stdout:
+       li      0,SYSCALL_WRITE         # write syscall
+       li      3,STDOUT                # stdout        
+       
+       li      5,0                     # string length counter
+strlen_loop:
+       lbzx    6,4,5                   # get byte from (r4+r5)
+               addi    5,5,1                   # increment counter
+       cmpi    0,6,0                   # is it zero?
+       bne     strlen_loop             # ! if not keep counting
+       addi    5,5,-1
+       sc                              # syscall
+       
+       blr                             # return
+
+
+       ##############################
+       # Num to Ascii
+       ##############################
+       # num is in r4
+       # r5 =0 then strcat, otherwise stdout
+       # r5-r10,r19,r20,r21,r22,r30 trashed    
+
+num_to_ascii:
+
+       mflr    30                      # save the link register
+
+       addi    16,BSS_BEGIN,(num_to_ascii_end-bss_begin)
+                                       # the end of a backwards growing
+                                       # 10 byte long buffer.  
+                                       
+       li      20,10                   # we will divide by 10
+       mr      19,4                    # load in the value passed
+       
+div_by_10:
+       divw    21,19,20                # divide r19 by r20 put into r21 
+       
+       mullw   22,21,20                # find remainder.  1st q*dividend
+       subf    22,22,19                # then subtract from original = R
+       addi    22,22,0x30              # convert remainder to ascii
+       
+       stbu    22,-1(16)               # Store to backwards buffer
+       
+       mr      19,21                   # move Quotient as new dividend
+       cmpwi   19,0                    # was quotient zero?
+       bne     div_by_10               # ! if not keep dividing
+       
+write_out:
+       cmpwi   5,0                     # ! if r5 is 0 then skip ahead
+       bne     stdout_num              
+
+       addi    16,16,-1                # point to the beginning
+       bl      strcat                  # and strcat it
+
+       mtlr    30                      # restore link register
+
+       blr                             # return
+       
+stdout_num:
+        mr     4,16                    # point to our buffer
+       mtlr    30                      # restore link register
+       b       write_stdout            # stdout will return for us
+
+
+#===========================================================================
+.data
+#===========================================================================
+
+
+data_begin:
+
+.include "../logo.lzss_new"
+
+ver_string:    .ascii  " Version \0"
+compiled_string:       .ascii  ", Compiled \0"
+megahertz:     .ascii  "MHz PPC \0"
+.equ space, ram_comma+6
+.equ comma, ram_comma+5
+linefeed:      .ascii  "\n\0"
+escape:                .ascii  "\033[\0"
+c:             .ascii  "C\0"
+ram_comma:     .ascii  "M RAM, \0"
+
+bogo_total:    .ascii  " Bogomips Total\0"
+
+default_colors:        .ascii  "\033[0m\n\n\0"
+
+cpuinfo:       .ascii  "/proc/cpuinfo\0"
+
+one:   .ascii  "One \0"
+
+disk_buffer:
+.ascii "processor      : 0\n"
+.ascii "cpu            : 745/755\n"
+.ascii "temperature    : 22-24 C (uncalibrated)\n"
+.ascii "clock          : 600.000000MHz\n"
+.ascii "revision       : 51.17 (pvr 0008 3311)\n"
+.ascii "bogomips       : 49.79\n"
+.ascii "timebase       : 24960000\n"
+.ascii "platform       : PowerMac\n"
+.ascii "model          : PowerBook4,1\n"
+.ascii "machine                : PowerBook4,1\n"
+.ascii "motherboard    : PowerBook4,1 MacRISC2 MacRISC Power Macintosh\n"
+.ascii "detected as    : 257 (iBook 2)\n"
+.ascii "pmac flags     : 0000001b\n"
+.ascii "L2 cache       : 256K unified\n"
+.ascii "pmac-generation        : NewWorld\n\0"
+
+uname_info:
+.ascii "Linux\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+.ascii "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+.ascii "henparma\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+.ascii "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+.ascii "2.6.29\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+.ascii "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+.ascii "#1 Wed May 13 15:51:54 UTC 2009\0"
+.ascii "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+.ascii "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+.ascii "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+.ascii "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+.ascii "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+
+       
+sysinfo_buff:
+.long 0,0,0,0,512*1024*1024,0,0,0
+
+#============================================================================
+#.bss
+#============================================================================
+
+.lcomm bss_begin,0
+.lcomm num_to_ascii_buff,10
+.lcomm num_to_ascii_end,1
+.lcomm  text_buf, (N+F-1)      # These buffers must follow each other
+.lcomm out_buffer,16384
+
+
+
+
+
+
+
+
+
+
diff --git a/exp-bbv/tests/ppc32-linux/ll.post.exp b/exp-bbv/tests/ppc32-linux/ll.post.exp
new file mode 100644 (file)
index 0000000..28c9d41
--- /dev/null
@@ -0,0 +1,49 @@
+T:1:16   :8:10   :6:32   :2:56   :9:48   :10:666   :4:90   :5:36   :7:2   :3:45   
+T:8:5   :6:20   :2:34   :9:80   :10:775   :4:42   :5:22   :7:1   :3:21   
+T:8:5   :6:16   :2:27   :9:64   :10:824   :4:30   :5:18   :7:1   :3:15   
+T:8:5   :6:10   :2:18   :9:80   :10:865   :4:6   :5:12   :7:1   :3:3   
+T:8:5   :6:10   :2:18   :9:96   :10:858   :5:12   :7:1   
+T:8:5   :6:10   :2:18   :9:80   :10:865   :4:6   :5:12   :7:1   :3:3   
+T:6:6   :2:9   :9:36   :10:943   :5:6   
+T:8:5   :6:8   :2:15   :9:92   :10:869   :5:10   :7:1   
+T:6:14   :2:21   :9:112   :10:839   :5:14   
+T:8:5   :6:6   :2:12   :9:64   :10:902   :5:10   :7:1   
+T:8:5   :6:8   :2:15   :9:80   :10:883   :5:8   :7:1   
+T:6:8   :2:12   :9:64   :10:908   :5:8   
+T:6:6   :2:9   :9:48   :10:931   :5:6   
+T:8:5   :6:4   :2:9   :9:48   :10:927   :5:6   :7:1   
+T:6:6   :2:9   :9:48   :10:931   :5:6   
+T:8:5   :6:6   :2:12   :9:64   :10:904   :5:8   :7:1   
+T:6:2   :2:3   :9:16   :10:977   :5:2   
+T:8:5   :6:12   :2:21   :9:96   :10:842   :4:6   :5:14   :7:1   :3:3   
+T:6:6   :2:9   :9:48   :10:931   :5:6   
+T:6:6   :2:9   :9:48   :10:931   :5:6   
+T:8:5   :6:14   :2:24   :9:112   :10:819   :4:6   :5:16   :7:1   :3:3   
+T:8:5   :6:6   :2:12   :9:64   :10:904   :5:8   :7:1   
+T:6:6   :2:9   :9:48   :10:931   :5:6   
+T:8:5   :6:8   :2:15   :9:80   :10:881   :5:10   :7:1   
+T:8:5   :6:10   :2:18   :9:96   :10:858   :5:12   :7:1   
+T:6:10   :2:15   :9:80   :10:885   :5:10   
+T:8:5   :6:10   :2:15   :9:64   :10:470   :4:6   :5:12   :7:1   :3:3   :11:2   :12:7   :13:405   
+T:13:1000   
+T:13:1000   
+T:13:1000   
+T:13:1000   
+T:13:1000   
+T:13:1000   
+T:13:1000   
+T:13:1000   
+T:13:1000   
+T:13:1000   
+T:16:2   :19:2   :20:2   :21:2   :22:2   :23:1   :17:268   :18:10   :24:4   :25:6   :26:3   :30:3   :31:2   :12:28   :13:636   :14:8   :15:4   :27:12   :28:2   :29:3   
+T:33:3   :34:4   :46:2   :47:4   :48:2   :49:4   :53:2   :54:4   :35:8   :37:246   :36:168   :38:8   :40:27   :39:22   :41:8   :44:51   :42:38   :43:34   :45:6   :17:116   :18:10   :32:10   :13:183   :14:4   :15:2   :27:12   :50:16   :28:2   :51:2   :52:2   
+T:55:2   :56:1   :35:4   :37:381   :36:256   :38:4   :40:12   :39:10   :41:4   :44:15   :42:12   :43:10   :45:3   :17:64   :18:2   :24:4   :25:6   :26:3   :30:3   :31:2   :12:28   :13:148   :14:6   :15:3   :27:12   :28:2   :29:3   
+
+
+# Thread 1
+#   Total intervals: 40 (Interval Size 1000)
+#   Total instructions: 40330
+#   Total reps: 0
+#   Unique reps: 0
+#   Total fldcw instructions: 0
+
diff --git a/exp-bbv/tests/ppc32-linux/ll.stderr.exp b/exp-bbv/tests/ppc32-linux/ll.stderr.exp
new file mode 100644 (file)
index 0000000..60e953f
--- /dev/null
@@ -0,0 +1,6 @@
+# Thread 1
+#   Total intervals: 40 (Interval Size 1000)
+#   Total instructions: 40330
+#   Total reps: 0
+#   Unique reps: 0
+#   Total fldcw instructions: 0
diff --git a/exp-bbv/tests/ppc32-linux/ll.stdout.exp b/exp-bbv/tests/ppc32-linux/ll.stdout.exp
new file mode 100644 (file)
index 0000000..b296561
--- /dev/null
@@ -0,0 +1,16 @@
+\e[0;1;37;47m#################################################################\e[0;30;47m#####\e[1;37m#########\e[1;37;40m
+\e[0;1;37;47m################################################################\e[0;30;47m#######\e[1;37m########\e[1;37;40m
+\e[0;1;37;47m###################\e[31m#\e[37m############################################\e[0;30;47m##\e[1;37mO\e[0;30;47m#\e[1;37mO\e[0;30;47m##\e[1;37m########\e[1;37;40m
+\e[0;1;37;47m##\e[0;30;47m######\e[1;37m##########\e[31m##\e[0;30;47m#\e[1;37m###########################################\e[0;30;47m#\e[1;33m#####\e[0;30;47m#\e[1;37m########\e[1;37;40m
+\e[0;1;37;47m####\e[0;30;47m##\e[1;37m#############\e[0;30;47m#\e[1;37m##########################################\e[0;30;47m##\e[1;37m##\e[33m###\e[37m##\e[0;30;47m##\e[1;37m######\e[1;37;40m
+\e[0;1;37;47m####\e[0;30;47m##\e[1;37m#########\e[31m###\e[37m###\e[0;30;47m###\e[1;37m#\e[0;30;47m####\e[1;37m###\e[0;30;47m###\e[1;37m####\e[0;30;47m###\e[1;37m##\e[0;30;47m#####\e[1;37m#\e[0;30;47m######\e[1;37m#####\e[0;30;47m#\e[1;37m##########\e[0;30;47m##\e[1;37m#####\e[1;37;40m
+\e[0;1;37;47m####\e[0;30;47m##\e[1;37m########\e[31m#\e[37m##\e[31m#\e[0;30;47m#\e[1;37m###\e[0;30;47m###\e[1;37m####\e[0;30;47m##\e[1;37m##\e[0;30;47m##\e[1;37m#####\e[0;30;47m##\e[1;37m####\e[0;30;47m##\e[1;37m###\e[0;30;47m##\e[1;37m#######\e[0;30;47m#\e[1;37m############\e[0;30;47m##\e[1;37m####\e[1;37;40m
+\e[0;1;37;47m####\e[0;30;47m##\e[1;37m#######\e[31m#\e[37m###\e[31m#\e[0;30;47m#\e[1;37m###\e[0;30;47m##\e[1;37m#####\e[0;30;47m##\e[1;37m##\e[0;30;47m##\e[1;37m#####\e[0;30;47m##\e[1;37m######\e[0;30;47m###\e[1;37m#########\e[0;30;47m#\e[1;37m############\e[0;30;47m###\e[1;37m###\e[1;37;40m
+\e[0;1;37;47m####\e[0;30;47m##\e[1;37m##########\e[31m##\e[0;30;47m#\e[1;37m###\e[0;30;47m##\e[1;37m#####\e[0;30;47m##\e[1;37m##\e[0;30;47m##\e[1;37m#####\e[0;30;47m##\e[1;37m######\e[0;30;47m###\e[1;37m########\e[33m##\e[0;30;47m#\e[1;37m###########\e[0;30;47m##\e[1;33m#\e[37m###\e[1;37;40m
+\e[0;1;37;47m####\e[0;30;47m##\e[1;37m#######\e[0;30;47m#\e[1;37m#\e[31m##\e[0;30;47m#\e[1;37m####\e[0;30;47m##\e[1;37m#####\e[0;30;47m##\e[1;37m##\e[0;30;47m##\e[1;37m#####\e[0;30;47m##\e[1;37m#####\e[0;30;47m##\e[1;37m#\e[0;30;47m##\e[1;37m#####\e[33m######\e[0;30;47m#\e[1;37m#######\e[30m#\e[33m######\e[37m#\e[1;37;40m
+\e[0;1;37;47m####\e[0;30;47m##\e[1;37m######\e[0;30;47m##\e[1;37m#\e[31m##\e[0;30;47m#\e[1;37m#\e[0;30;47m#\e[1;37m##\e[0;30;47m##\e[1;37m#####\e[0;30;47m##\e[1;37m##\e[0;30;47m###\e[1;37m###\e[0;30;47m###\e[1;37m####\e[0;30;47m##\e[1;37m###\e[0;30;47m##\e[1;37m####\e[33m#######\e[0;30;47m#\e[1;37m#####\e[0;30;47m#\e[1;33m#######\e[37m#\e[1;37;40m
+\e[0;1;37;47m##\e[0;30;47m############\e[1;37m##\e[0;30;47m###\e[1;37m##\e[0;30;47m####\e[1;37m###\e[0;30;47m####\e[1;37m###\e[0;30;47m####\e[1;37m#\e[0;30;47m###\e[1;37m#\e[0;30;47m#####\e[1;37m#\e[0;30;47m######\e[1;37m###\e[33m#####\e[30m#\e[0;30;47m#####\e[1m#\e[33m#####\e[37m###\e[1;37;40m
+
+\e[9CLinux Version 2.6.29, Compiled #1 Wed May 13 15:51:54 UTC 2009
+\e[9COne 600.000000MHz PPC 745/755, 512M RAM, 49.79 Bogomips Total
+\e[36Chenparma
diff --git a/exp-bbv/tests/ppc32-linux/ll.vgtest b/exp-bbv/tests/ppc32-linux/ll.vgtest
new file mode 100644 (file)
index 0000000..6031a58
--- /dev/null
@@ -0,0 +1,5 @@
+prog: ll
+vgopts: --interval-size=1000 --bb-out-file=ll.out.bb
+post:  cat ll.out.bb
+cleanup: rm ll.out.bb
+
diff --git a/exp-bbv/tests/ppc32-linux/million.S b/exp-bbv/tests/ppc32-linux/million.S
new file mode 100644 (file)
index 0000000..e334e86
--- /dev/null
@@ -0,0 +1,23 @@
+
+            # count for 1 million instructions
+            #   total is 3 + 499997*2 + 3
+            
+       .globl _start   
+_start:        
+       nop                             # to give us an even million
+       lis     15,499997@ha            # load high 16-bits of counter
+       addi    15,15,499997@l          # load low 16-bits of counter
+test_loop:     
+       addic.  15,15,-1                # decrement counter             
+       bne     0,test_loop             # loop until zero
+
+       #================================
+       # Exit
+       #================================
+
+exit:
+        li      3,0             # 0 exit value
+       li      0,1             # put the exit syscall number (1) in r0
+       sc                      # and exit
+                       
+
diff --git a/exp-bbv/tests/ppc32-linux/million.post.exp b/exp-bbv/tests/ppc32-linux/million.post.exp
new file mode 100644 (file)
index 0000000..260eee6
--- /dev/null
@@ -0,0 +1,18 @@
+T:1:5   :2:99996   
+T:2:100000   
+T:2:100000   
+T:2:100000   
+T:2:100000   
+T:2:100000   
+T:2:100000   
+T:2:100000   
+T:2:100000   
+
+
+# Thread 1
+#   Total intervals: 10 (Interval Size 100000)
+#   Total instructions: 1000000
+#   Total reps: 0
+#   Unique reps: 0
+#   Total fldcw instructions: 0
+
diff --git a/exp-bbv/tests/ppc32-linux/million.stderr.exp b/exp-bbv/tests/ppc32-linux/million.stderr.exp
new file mode 100644 (file)
index 0000000..adeb35d
--- /dev/null
@@ -0,0 +1,6 @@
+# Thread 1
+#   Total intervals: 10 (Interval Size 100000)
+#   Total instructions: 1000000
+#   Total reps: 0
+#   Unique reps: 0
+#   Total fldcw instructions: 0
diff --git a/exp-bbv/tests/ppc32-linux/million.vgtest b/exp-bbv/tests/ppc32-linux/million.vgtest
new file mode 100644 (file)
index 0000000..c366a8b
--- /dev/null
@@ -0,0 +1,5 @@
+prog: million 
+vgopts: --interval-size=100000 --bb-out-file=million.out.bb
+post:  cat million.out.bb
+cleanup: rm million.out.bb
+
diff --git a/exp-bbv/tests/x86-linux/Makefile.am b/exp-bbv/tests/x86-linux/Makefile.am
new file mode 100644 (file)
index 0000000..f2971e1
--- /dev/null
@@ -0,0 +1,24 @@
+include $(top_srcdir)/Makefile.tool-tests.am
+
+dist_noinst_SCRIPTS = filter_stderr
+
+check_PROGRAMS = \
+       ll clone_test
+
+EXTRA_DIST = \
+          clone_test.stderr.exp \
+          clone_test.post.exp \
+          clone_test.vgtest \
+          ll.stderr.exp \
+          ll.stdout.exp \
+          ll.post.exp \
+          ll.vgtest
+
+AM_CCASFLAGS += -ffreestanding
+
+LDFLAGS += @FLAG_M32@ -static -nostartfiles -nodefaultlibs
+
+clone_test_SOURCES = clone_test.S
+ll_SOURCES = ll.S
+
+AM_CCASFLAGS += @FLAG_M32@
diff --git a/exp-bbv/tests/x86-linux/clone_test.S b/exp-bbv/tests/x86-linux/clone_test.S
new file mode 100644 (file)
index 0000000..c96204a
--- /dev/null
@@ -0,0 +1,95 @@
+            # count for ~1 million instructions thread 1
+            # count for ~2 million instructions thread 2
+            # count for additional 500 million each before exit
+            
+       .globl _start   
+_start:        
+
+       #################################################
+        # 1000 cycles in initial thread                 #
+       #################################################
+       
+       xor     %eax,%eax
+       mov     $499,%ecx               # load counter
+initial_loop:  
+       dec     %ecx                    # repeat count times
+       jnz     initial_loop
+
+
+       #####################################################
+       # Spawn a thread!                                   #
+       #####################################################
+clone:
+       mov    $120,%eax                # clone syscall
+       
+       # Note, clone syscall is different than the glibc implementation
+       
+#      int clone (flags, stack_pointer,parent_tidptr,child_tidptr,tls)
+
+
+                                       # Flags in 
+                                       #/usr/include/bits/sched.h
+                                       # CLONE_THREAD 0x10000
+                                       # CLONE_SIGHAND 0x800
+                                       # CLONE_VM      0x100
+                                       # above must be called together
+                                       # Below required for Valgrind
+                                       # CLONE_FS       0x200
+                                       # CLONE_FILES    0x400
+
+       mov    $0x10f00,%ebx
+       
+
+       mov    $(new_stack+4096),%ecx                   # new stack
+
+       
+
+       mov    $0,%edx          # args (none)
+
+       int    $0x80
+       
+       cmp   $0,%eax           # are we in new thread?
+       jz    thread2           # if so, jump to thrad2
+
+
+       ###############################################
+       # thread1                                     #
+       ###############################################
+
+thread1:
+
+       mov     $499997,%ecx            # load counter
+thread1_loop:  
+       dec     %ecx                    # repeat count times
+       jnz     thread1_loop
+
+       xor     %ebx,%ebx               # we return 0
+       jmp    exit
+       
+thread2:       
+       mov     $999997,%ecx            # load counter
+thread2_loop:  
+       dec     %ecx                    # repeat count times
+       jnz     thread2_loop    
+       
+       mov    $5,%ebx                  # we return 5
+       
+       
+       #================================
+       # Exit
+       #================================
+exit:
+
+       # count an additional 500 million
+
+       mov     $250000,%ecx            # load counter
+exit_loop:     
+       dec     %ecx                    # repeat count times
+       jnz     exit_loop       
+
+actual_exit:
+       mov     $1,%eax         # put exit syscall number (60) in rax
+       int     $0x80
+
+.bss
+.lcomm new_stack,4096
diff --git a/exp-bbv/tests/x86-linux/clone_test.post.exp b/exp-bbv/tests/x86-linux/clone_test.post.exp
new file mode 100644 (file)
index 0000000..55bcf61
--- /dev/null
@@ -0,0 +1,58 @@
+T 4    996    5    2    3    98991   
+T 100000   
+T 100000   
+T 100000   
+T 100000   
+T 100000   
+T 100000   
+T 100000   
+T 100000   
+T 100000   
+T 1001    2    3    98994   
+T 100000   
+T 100000   
+T 100000   
+T 100000   
+
+
+# Thread 1
+#   Total intervals: 15 (Interval Size 100000)
+#   Total instructions: 1501007
+#   Total reps: 0
+#   Unique reps: 0
+#   Total fldcw instructions: 0
+
+T 2    3    99996   
+T 100000   
+T 100000   
+T 100000   
+T 100000   
+T 100000   
+T 100000   
+T 100000   
+T 100000   
+T 100000   
+T 100000   
+T 100000   
+T 100000   
+T 100000   
+T 100000   
+T 100000   
+T 100000   
+T 100000   
+T 100000   
+T 99996    4   
+T 100000   
+T 100000   
+T 100000   
+T 100000   
+T 99998    2   
+
+
+# Thread 2
+#   Total intervals: 25 (Interval Size 100000)
+#   Total instructions: 2500001
+#   Total reps: 0
+#   Unique reps: 0
+#   Total fldcw instructions: 0
+
diff --git a/exp-bbv/tests/x86-linux/clone_test.stderr.exp b/exp-bbv/tests/x86-linux/clone_test.stderr.exp
new file mode 100644 (file)
index 0000000..6a917a2
--- /dev/null
@@ -0,0 +1,12 @@
+# Thread 1
+#   Total intervals: 15 (Interval Size 100000)
+#   Total instructions: 1501007
+#   Total reps: 0
+#   Unique reps: 0
+#   Total fldcw instructions: 0
+# Thread 2
+#   Total intervals: 25 (Interval Size 100000)
+#   Total instructions: 2500001
+#   Total reps: 0
+#   Unique reps: 0
+#   Total fldcw instructions: 0
diff --git a/exp-bbv/tests/x86-linux/clone_test.vgtest b/exp-bbv/tests/x86-linux/clone_test.vgtest
new file mode 100644 (file)
index 0000000..9f5cd4d
--- /dev/null
@@ -0,0 +1,5 @@
+prog: clone_test
+vgopts: --interval-size=100000 --bb-out-file=clone_test.out.bb --pc-out-file=clone_test.out.pc
+post:  cat clone_test.out.bb clone_test.out.bb.2 | ../filter_bb
+cleanup: rm clone_test.out.bb
+
diff --git a/exp-bbv/tests/x86-linux/filter_stderr b/exp-bbv/tests/x86-linux/filter_stderr
new file mode 100644 (file)
index 0000000..1c07666
--- /dev/null
@@ -0,0 +1,5 @@
+#! /bin/sh
+
+../filter_stderr
+
+
diff --git a/exp-bbv/tests/x86-linux/ll.S b/exp-bbv/tests/x86-linux/ll.S
new file mode 100644 (file)
index 0000000..8958521
--- /dev/null
@@ -0,0 +1,608 @@
+#
+#  linux_logo in i386 assembly language
+#    based on the code from ll_asm-0.36
+#
+#  By Vince Weaver <vince _at_ deater.net>
+#
+# Modified to remove non-deterministic system calls
+# And to avoid reading from /proc
+#
+
+.include "../logo.include"
+
+# offsets into the results returned by the uname syscall
+.equ U_SYSNAME,0
+.equ U_NODENAME,65
+.equ U_RELEASE,65*2
+.equ U_VERSION,(65*3)
+.equ U_MACHINE,(65*4)
+.equ U_DOMAINNAME,65*5
+
+# offset into the results returned by the sysinfo syscall
+.equ S_TOTALRAM,16
+
+# Sycscalls
+.equ SYSCALL_EXIT,     1
+.equ SYSCALL_WRITE,    4
+
+#
+.equ STDIN,0
+.equ STDOUT,1
+.equ STDERR,2
+
+       .globl _start   
+_start:        
+       #=========================
+       # PRINT LOGO
+       #=========================
+
+# LZSS decompression algorithm implementation
+# by Stephan Walter 2002, based on LZSS.C by Haruhiko Okumura 1989
+# optimized some more by Vince Weaver
+
+       # we used to fill the buffer with FREQUENT_CHAR
+       # but, that only gains us one byte of space in the lzss image.
+       # the lzss algorithm does automatic RLE... pretty clever
+       # so we compress with NUL as FREQUENT_CHAR and it is pre-done for us
+
+       mov     $(N-F), %bp             # R
+
+       mov     $logo, %esi             # %esi points to logo (for lodsb)
+
+       mov     $out_buffer, %edi       # point to out_buffer
+       push    %edi                    # save this value for later
+
+decompression_loop:    
+       lodsb                   # load in a byte
+
+       mov     $0xff, %bh      # re-load top as a hackish 8-bit counter
+       mov     %al, %bl        # move in the flags
+
+test_flags:
+       cmp     $logo_end, %esi # have we reached the end?
+       je      done_logo       # if so, exit
+
+       shr     $1, %ebx        # shift bottom bit into carry flag
+       jc      discrete_char   # if set, we jump to discrete char
+
+offset_length:
+       lodsw                   # get match_length and match_position
+       mov %eax,%edx           # copy to edx
+                               # no need to mask dx, as we do it
+                               # by default in output_loop
+       
+       shr $(P_BITS),%eax      
+       add $(THRESHOLD+1),%al
+       mov %al,%cl             # cl = (ax >> P_BITS) + THRESHOLD + 1
+                               #                       (=match_length)
+               
+output_loop:
+       and     $POSITION_MASK,%dh      # mask it
+       mov     text_buf(%edx), %al     # load byte from text_buf[]
+       inc     %edx                    # advance pointer in text_buf
+store_byte:    
+       stosb                           # store it
+       
+       mov     %al, text_buf(%ebp)     # store also to text_buf[r]
+       inc     %ebp                    # r++
+       and     $(N-1), %bp             # mask r
+
+       loop    output_loop             # repeat until k>j
+       
+       or      %bh,%bh                 # if 0 we shifted through 8 and must
+       jnz     test_flags              # re-load flags
+       
+       jmp     decompression_loop
+
+discrete_char:
+       lodsb                           # load a byte
+       inc     %ecx                    # we set ecx to one so byte
+                                       # will be output once
+                                       # (how do we know ecx is zero?)
+                                       
+       jmp     store_byte              # and cleverly store it
+
+
+# end of LZSS code
+
+done_logo:
+
+       pop     %ebp                    # get out_buffer and keep in bp
+       mov     %ebp,%ecx               # move out_buffer to ecx
+
+       call    write_stdout            # print the logo
+
+       #
+       #  Setup
+       #
+setup:
+       mov     $strcat,%edx            # use edx as call pointer
+
+       
+       #==========================
+       # PRINT VERSION
+       #==========================
+       
+#      push    $SYSCALL_UNAME          # uname syscall
+#      pop     %eax                    # in 3 bytes    
+#      mov     $uname_info,%ebx        # uname struct
+#      int     $0x80                   # do syscall
+
+       mov     %ebp,%edi               # point %edi to out_buffer
+               
+       mov     $(uname_info+U_SYSNAME),%esi    # os-name from uname "Linux"
+       call    *%edx                   # call strcat
+
+       mov     $ver_string,%esi                # source is " Version "
+       call    *%edx                           # call strcat
+       push    %esi                            # save our .txt pointer
+       
+       mov     $(uname_info+U_RELEASE),%esi    # version from uname "2.4.1"
+       call    *%edx                           # call strcat
+       
+       pop     %esi                    # restore .txt pointer
+                                       # source is ", Compiled "
+       call    *%edx                   # call strcat
+       push    %esi                    # store for later
+
+       mov     $(uname_info+U_VERSION),%esi    # compiled date
+       call    *%edx                   # call strcat
+
+       mov     %ebp,%ecx               # move out_buffer to ecx
+
+       mov     $0xa,%ax                # store linefeed on end
+       stosw                           # and zero                        
+
+       call    *%edx                   # call strcat
+       
+       call    center_and_print        # center and print
+
+       #===============================
+       # Middle-Line
+       #===============================
+       
+       #=========
+       # Load /proc/cpuinfo into buffer
+       #=========
+
+       push    %edx                    # save call pointer
+
+#      push    $SYSCALL_OPEN           # load 5 [ open() ]
+#      pop     %eax                    # in 3 bytes
+       
+#      mov     $cpuinfo,%ebx           # '/proc/cpuinfo'
+#      xor     %ecx,%ecx               # 0 = O_RDONLY <bits/fcntl.h>
+#      cdq                             # clear edx in clever way
+#      int     $0x80                   # syscall.  fd in eax.  
+                                       # we should check that eax>=0
+                                       
+#      mov     %eax,%ebx               # save our fd
+       
+#      push    $SYSCALL_READ           # load 3 = read()
+#      pop     %eax                    # in 3 bytes
+       
+       mov     $disk_buffer,%ecx
+
+#      mov     $16,%dh                 # 4096 is maximum size of proc file #)
+                                       # we load sneakily by knowing
+                                       # 16<<8 = 4096. be sure edx clear
+
+
+#      int     $0x80
+
+#      push    $SYSCALL_CLOSE          # close (to be correct)
+#      pop     %eax
+#      int     $0x80                   
+
+       #=============
+       # Number of CPUs
+       #=============
+number_of_cpus:
+
+       xor     %ebx,%ebx               # chip count
+       
+                                       # $disk_buffer still in ecx
+bogo_loop:     
+       mov     (%ecx), %eax            # load 4 bytes into eax
+       inc     %ecx                    # increment pointer
+       
+       cmp     $0,%al                  # check for end of file
+       je      done_bogo
+       
+       cmp     $('o'<<24+'g'<<16+'o'<<8+'b'),%eax      
+                                       # "bogo" in little-endian
+                                       
+       jne     bogo_loop               # if not equal, keep going
+       
+       inc     %ebx                    # otherwise, we have a bogo
+       inc     %ebx                    # times two for future magic
+       jmp     bogo_loop
+
+done_bogo:
+       lea     one-6(%ebx,%ebx,2), %esi        
+                                       # Load into esi
+                                       # [one]+(num_cpus*6)
+                                       #
+                                       # the above multiplies by three
+                                       # esi = (ebx+(ebx*2))
+                                       # and we double-incremented ebx 
+                                       # earlier
+        
+       mov     %ebp,%edi               # move output buffer to edi
+
+       pop     %edx                    # restore call pointer
+       call    *%edx                   # copy it (call strcat)
+
+       mov     $' ',%al                # print a space
+       stosb
+
+       push %ebx                       # store cpu count
+       push %edx                       # store strcat pointer
+
+       #=========
+       # MHz
+       #=========
+print_mhz:
+       mov     $('z'<<24+'H'<<16+'M'<<8+' '),%ebx      
+                                       # find ' MHz' and grab up to .
+                                       # we are little endian
+       mov     $'.',%ah
+
+       # below is same as "sub $(strcat-find_string),%edx
+       # gas won't let us force the one-byte constant
+       .byte 0x83,0xEA,strcat-find_string   
+       
+       call    *%edx                   # call find string
+
+       mov     %ebx,%eax               # clever way to get MHz in, sadly
+       ror     $8,%eax                 # not any smaller than a mov
+       stosl                           
+
+       #=========
+       # Chip Name
+       #=========
+chip_name:     
+
+       # because of ugly newer cpuinfos from intel I had to hack this
+       # now we grab the first two words in the name field and use that
+       # it works on all recent Intel and AMD chips.  Older things
+       # might choke
+
+       mov     $('e'<<24+'m'<<16+'a'<<8+'n'),%ebx      
+                                       # find 'name\t: ' and grab up to \n
+                                       # we are little endian
+       mov     $' ',%ah
+       call    *%edx                   # print first word
+       stosb                           # store a space
+       call    skip_spaces             # print next word
+
+       pop     %edx
+       pop     %ebx                    # restore chip count
+       pop     %esi
+       
+       call    *%edx                   # ' Processor'
+       cmpb    $2,%bl  
+       jne     print_s
+       inc     %esi                    # if singular, skip the s
+print_s:       
+       call    *%edx                   # 's, '
+
+       push    %esi                    # restore the values
+       push    %edx
+       
+       #========
+       # RAM
+       #========
+       
+#      push    $SYSCALL_SYSINFO        # sysinfo() syscall
+#      pop     %eax    
+#      mov     $sysinfo_buff,%ebx      
+#      int     $0x80
+       
+       mov     (sysinfo_buff+S_TOTALRAM),%eax  # size in bytes of RAM
+       shr     $20,%eax                # divide by 1024*1024 to get M
+       adc     $0, %eax                # round 
+
+
+       call num_to_ascii
+       
+       pop  %edx                       # restore strcat pointer
+       
+       pop     %esi                    # print 'M RAM, '
+       call    *%edx                   # call strcat
+
+       push    %esi
+       
+
+       #========
+       # Bogomips
+       #========
+       
+       mov     $('s'<<24+'p'<<16+'i'<<8+'m'),%ebx              
+                                       # find 'mips\t: ' and grab up to \n
+       mov     $0xa,%ah
+       call    find_string
+
+       pop     %esi                    # bogo total follows RAM 
+
+       call    *%edx                   # call strcat
+
+       push    %esi
+
+       mov     %ebp,%ecx               # point ecx to out_buffer
+
+
+       call    center_and_print        # center and print
+
+       #=================================
+       # Print Host Name
+       #=================================
+
+       mov     %ebp,%edi                 # point to output_buffer
+       
+       mov     $(uname_info+U_NODENAME),%esi   # host name from uname()
+       call    *%edx                     # call strcat
+       
+                                       # ecx is unchanged
+       call    center_and_print        # center and print
+       
+       pop     %ecx                    # (.txt) pointer to default_colors
+       
+       call    write_stdout
+       
+
+       #================================
+       # Exit
+       #================================
+exit:
+       xor     %ebx,%ebx
+       xor     %eax,%eax
+       inc     %eax                    # put exit syscall number (1) in eax
+       int     $0x80                   # and exit
+
+
+       #=================================
+       # FIND_STRING 
+       #=================================
+       #   ah is char to end at
+       #   ebx is 4-char ascii string to look for
+       #   edi points at output buffer
+
+find_string:
+                                       
+       mov     $disk_buffer-1,%esi     # look in cpuinfo buffer
+find_loop:
+       inc     %esi
+       cmpb    $0, (%esi)              # are we at EOF?
+       je      done                    # if so, done
+
+       cmp     (%esi), %ebx            # do the strings match?
+       jne     find_loop               # if not, loop
+       
+                                       # ! if we get this far, we matched
+
+find_colon:                            
+       lodsb                           # repeat till we find colon
+       cmp     $0,%al                  # this is actually smaller code
+       je      done                    #   than an or ecx/repnz scasb
+       cmp     $':',%al
+       jne     find_colon
+
+
+skip_spaces:
+        lodsb                           # skip spaces
+       cmp     $0x20,%al               # Loser new intel chips have lots??
+        je      skip_spaces
+
+store_loop:     
+       cmp     $0,%al
+       je      done
+       cmp     %ah,%al                 # is it end string?
+       je      almost_done             # if so, finish
+       cmp     $'\n',%al               # also end if linefeed
+       je      almost_done
+       stosb                           # if not store and continue
+       lodsb                           # load value    
+       jmp     store_loop
+        
+almost_done:    
+
+       movb     $0, (%edi)             # replace last value with NUL 
+done:
+       ret
+
+
+       #================================
+       # strcat
+       #================================
+
+strcat:
+       lodsb                           # load a byte from [ds:esi]
+       stosb                           # store a byte to [es:edi]
+       cmp     $0,%al                  # is it zero?
+       jne     strcat                  # if not loop
+       dec     %edi                    # point to one less than null
+       ret                             # return
+
+       #==============================
+       # center_and_print
+       #==============================
+       # string to center in ecx
+
+center_and_print:
+       push    %edx
+       push    %ecx                    # save the string pointer
+       inc     %edi                    # move to a clear buffer
+       push    %edi                    # save for later
+
+       mov     $('['<<8+27),%ax        # we want to output ^[[
+       stosw
+
+       cdq                             # clear dx
+       
+str_loop2:                             # find end of string    
+       inc     %edx
+       cmpb    $0,(%ecx,%edx)          # repeat till we find zero
+       jne     str_loop2
+       
+       push    $81                     # one added to cheat, we don't
+                                       # count the trailing '\n'
+       pop     %eax
+       
+       cmp     %eax,%edx               # see if we are >=80
+       jl      not_too_big             # if so, don't center
+       push    $80
+       pop     %edx
+       
+not_too_big:                   
+       sub     %edx,%eax               # subtract size from 80
+       
+       shr     %eax                    # then divide by 2
+       
+       call    num_to_ascii            # print number of spaces
+       mov     $'C',%al                # tack a 'C' on the end
+                                       # ah is zero from num_to_ascii
+       stosw                           # store C and a NULL
+       pop  %ecx                       # pop the pointer to ^[[xC
+       
+       call write_stdout               # write to the screen
+       
+done_center:
+       pop  %ecx                       # restore string pointer
+                                       # and trickily print the real string
+
+       pop %edx
+
+       #================================
+       # WRITE_STDOUT
+       #================================
+       # ecx has string
+       # eax,ebx,ecx,edx trashed
+write_stdout:
+       push    %edx
+       push    $SYSCALL_WRITE          # put 4 in eax (write syscall)
+       pop     %eax                    # in 3 bytes of code
+       
+       cdq                             # clear edx
+       
+       xor     %ebx,%ebx               # put 1 in ebx (stdout)
+       inc     %ebx                    # in 3 bytes of code
+       
+                       # another way of doing this:    lea 1(%edx), %ebx
+
+str_loop1:
+       inc     %edx
+       cmpb    $0,(%ecx,%edx)          # repeat till zero
+       jne     str_loop1
+
+       int     $0x80                   # run the syscall
+       pop     %edx
+       ret
+
+       ##############################
+       # num_to_ascii
+       ##############################
+       # ax = value to print
+       # edi points to where we want it
+       
+num_to_ascii:
+       push    $10
+       pop     %ebx
+       xor     %ecx,%ecx       # clear ecx
+div_by_10:
+       cdq                     # clear edx
+       div     %ebx            # divide
+       push    %edx            # save for later
+       inc     %ecx            # add to length counter
+       or      %eax,%eax       # was Q zero?
+       jnz     div_by_10       # if not divide again
+       
+write_out:
+       pop     %eax            # restore in reverse order
+       add     $0x30, %al      # convert to ASCII
+       stosb                   # save digit
+       loop    write_out       # loop till done
+       ret
+
+#===========================================================================
+#      section .data
+#===========================================================================
+.data
+
+ver_string:    .ascii  " Version \0"
+compiled_string:       .ascii  ", Compiled \0"
+processor:             .ascii " Processor\0"
+s_comma:               .ascii "s, \0"
+ram_comma:     .ascii  "M RAM, \0"
+bogo_total:    .ascii  " Bogomips Total\n\0"
+
+default_colors:        .ascii "\033[0m\n\n\0"
+
+cpuinfo:       .ascii  "/proc/cpuinfo\0"
+
+
+one:   .ascii  "One\0\0\0"
+two:   .ascii  "Two\0\0\0"
+three: .ascii  "Three\0"
+four:  .ascii  "Four\0"
+
+.include       "../logo.lzss_new"
+
+disk_buffer:
+.ascii "processor      : 0\n"
+.ascii "vendor_id      : AuthenticAMD\n"
+.ascii "cpu family     : 6\n"
+.ascii "model          : 6\n"
+.ascii "model name     : AMD Athlon(tm) XP 2000+\n"
+.ascii "stepping       : 2\n"
+.ascii "cpu MHz                : 1665.267\n"
+.ascii "cache size     : 256 KB\n"
+.ascii "fdiv_bug       : no\n"
+.ascii "hlt_bug                : no\n"
+.ascii "f00f_bug       : no\n"
+.ascii "coma_bug       : no\n"
+.ascii "fpu            : yes\n"
+.ascii "fpu_exception  : yes\n"
+.ascii "cpuid level    : 1\n"
+.ascii "wp             : yes\n"
+.ascii "flags          : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 mmx fxsr sse syscall mmxext 3dnowext 3dnow up\n"
+.ascii "bogomips       : 3330.53\n"
+.ascii "clflush size   : 32\n"
+.ascii "power management: ts\n\0"
+
+uname_info:
+.ascii "Linux\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+.ascii "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+
+.ascii "tobler\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+.ascii "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+
+.ascii "2.6.29\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+.ascii "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+
+.ascii "#1 SMP Mon May 4 09:51:54 EDT 2009\0\0"
+.ascii "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+
+.ascii "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+.ascii "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+
+.ascii "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+.ascii "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+
+
+sysinfo_buff:
+.long 0,0,0,0,512*1024*1024,0,0,0,0
+.long 0,0,0,0,0,0,0,0,0
+
+#============================================================================
+#      section .bss
+#============================================================================
+.bss
+
+.lcomm  text_buf, (N+F-1)
+.lcomm out_buffer,16384
+
+
+
+
+
diff --git a/exp-bbv/tests/x86-linux/ll.post.exp b/exp-bbv/tests/x86-linux/ll.post.exp
new file mode 100644 (file)
index 0000000..6c1fd4d
--- /dev/null
@@ -0,0 +1,48 @@
+T:1:9   :7:10   :5:38   :2:44   :8:65   :9:663   :4:119   :6:2   :3:51   
+T:7:5   :5:16   :2:18   :8:52   :9:858   :4:35   :6:1   :3:15   
+T:7:5   :5:16   :2:18   :8:52   :9:858   :4:35   :6:1   :3:15   
+T:7:5   :5:14   :2:16   :8:91   :9:863   :4:7   :6:1   :3:3   
+T:7:5   :5:12   :2:14   :8:78   :9:880   :4:7   :6:1   :3:3   
+T:7:5   :5:6   :2:8   :8:52   :9:928   :6:1   
+T:7:5   :5:10   :2:11   :8:65   :9:908   :6:1   
+T:7:5   :5:14   :2:17   :8:117   :9:846   :6:1   
+T:5:8   :2:8   :8:52   :9:932   
+T:7:5   :5:8   :2:10   :8:65   :9:911   :6:1   
+T:5:8   :2:8   :8:52   :9:932   
+T:7:5   :5:6   :2:8   :8:52   :9:928   :6:1   
+T:5:6   :2:6   :8:39   :9:949   
+T:7:5   :5:6   :2:8   :8:52   :9:928   :6:1   
+T:5:4   :2:4   :8:26   :9:966   
+T:7:5   :5:12   :2:14   :8:78   :9:880   :4:7   :6:1   :3:3   
+T:5:6   :2:6   :8:39   :9:949   
+T:7:5   :5:8   :2:10   :8:65   :9:911   :6:1   
+T:7:5   :5:14   :2:16   :8:91   :9:863   :4:7   :6:1   :3:3   
+T:5:8   :2:8   :8:52   :9:932   
+T:7:5   :5:10   :2:12   :8:78   :9:894   :6:1   
+T:7:5   :5:10   :2:12   :8:75   :9:897   :6:1   
+T:5:12   :2:12   :8:81   :9:895   
+T:7:5   :5:8   :2:8   :8:39   :9:389   :4:7   :6:1   :3:3   :10:3   :11:9   :12:528   
+T:12:1000   
+T:12:1000   
+T:12:1000   
+T:12:1000   
+T:12:1000   
+T:12:1000   
+T:12:1000   
+T:15:4   :18:2   :19:3   :20:2   :21:3   :22:4   :16:283   :17:10   :12:686   :13:1   :14:2   
+T:23:1   :32:7   :34:352   :33:177   :16:1   :17:2   :24:10   :25:195   :26:4   :27:3   :30:4   :31:11   :11:9   :12:204   :13:2   :14:4   :28:9   :29:5   
+T:34:667   :33:333   
+T:34:665   :33:332   :35:3   
+T:34:128   :33:64   :36:4   :37:8   :49:6   :38:8   :40:407   :39:274   :41:21   :42:14   :43:6   :44:10   :45:10   :46:8   :47:12   :48:2   :16:16   :17:2   
+T:50:2   :51:4   :52:2   :53:2   :54:6   :56:3   :57:4   :38:4   :40:405   :39:272   :41:18   :42:12   :43:9   :44:30   :45:30   :46:26   :47:39   :48:4   :16:88   :17:6   :28:9   :55:12   :29:13   
+T:40:600   :39:400   
+T:58:2   :59:3   :40:352   :39:236   :41:18   :42:12   :43:6   :44:16   :45:16   :46:14   :47:21   :48:2   :16:68   :17:2   :24:10   :25:210   :26:4   :27:3   :28:5   
+
+
+# Thread 1
+#   Total intervals: 39 (Interval Size 1000)
+#   Total instructions: 39439
+#   Total reps: 0
+#   Unique reps: 0
+#   Total fldcw instructions: 0
+
diff --git a/exp-bbv/tests/x86-linux/ll.stderr.exp b/exp-bbv/tests/x86-linux/ll.stderr.exp
new file mode 100644 (file)
index 0000000..a78db79
--- /dev/null
@@ -0,0 +1,6 @@
+# Thread 1
+#   Total intervals: 39 (Interval Size 1000)
+#   Total instructions: 39439
+#   Total reps: 0
+#   Unique reps: 0
+#   Total fldcw instructions: 0
diff --git a/exp-bbv/tests/x86-linux/ll.stdout.exp b/exp-bbv/tests/x86-linux/ll.stdout.exp
new file mode 100644 (file)
index 0000000..42415bc
--- /dev/null
@@ -0,0 +1,17 @@
+\e[0;1;37;47m#################################################################\e[0;30;47m#####\e[1;37m#########\e[1;37;40m
+\e[0;1;37;47m################################################################\e[0;30;47m#######\e[1;37m########\e[1;37;40m
+\e[0;1;37;47m###################\e[31m#\e[37m############################################\e[0;30;47m##\e[1;37mO\e[0;30;47m#\e[1;37mO\e[0;30;47m##\e[1;37m########\e[1;37;40m
+\e[0;1;37;47m##\e[0;30;47m######\e[1;37m##########\e[31m##\e[0;30;47m#\e[1;37m###########################################\e[0;30;47m#\e[1;33m#####\e[0;30;47m#\e[1;37m########\e[1;37;40m
+\e[0;1;37;47m####\e[0;30;47m##\e[1;37m#############\e[0;30;47m#\e[1;37m##########################################\e[0;30;47m##\e[1;37m##\e[33m###\e[37m##\e[0;30;47m##\e[1;37m######\e[1;37;40m
+\e[0;1;37;47m####\e[0;30;47m##\e[1;37m#########\e[31m###\e[37m###\e[0;30;47m###\e[1;37m#\e[0;30;47m####\e[1;37m###\e[0;30;47m###\e[1;37m####\e[0;30;47m###\e[1;37m##\e[0;30;47m#####\e[1;37m#\e[0;30;47m######\e[1;37m#####\e[0;30;47m#\e[1;37m##########\e[0;30;47m##\e[1;37m#####\e[1;37;40m
+\e[0;1;37;47m####\e[0;30;47m##\e[1;37m########\e[31m#\e[37m##\e[31m#\e[0;30;47m#\e[1;37m###\e[0;30;47m###\e[1;37m####\e[0;30;47m##\e[1;37m##\e[0;30;47m##\e[1;37m#####\e[0;30;47m##\e[1;37m####\e[0;30;47m##\e[1;37m###\e[0;30;47m##\e[1;37m#######\e[0;30;47m#\e[1;37m############\e[0;30;47m##\e[1;37m####\e[1;37;40m
+\e[0;1;37;47m####\e[0;30;47m##\e[1;37m#######\e[31m#\e[37m###\e[31m#\e[0;30;47m#\e[1;37m###\e[0;30;47m##\e[1;37m#####\e[0;30;47m##\e[1;37m##\e[0;30;47m##\e[1;37m#####\e[0;30;47m##\e[1;37m######\e[0;30;47m###\e[1;37m#########\e[0;30;47m#\e[1;37m############\e[0;30;47m###\e[1;37m###\e[1;37;40m
+\e[0;1;37;47m####\e[0;30;47m##\e[1;37m##########\e[31m##\e[0;30;47m#\e[1;37m###\e[0;30;47m##\e[1;37m#####\e[0;30;47m##\e[1;37m##\e[0;30;47m##\e[1;37m#####\e[0;30;47m##\e[1;37m######\e[0;30;47m###\e[1;37m########\e[33m##\e[0;30;47m#\e[1;37m###########\e[0;30;47m##\e[1;33m#\e[37m###\e[1;37;40m
+\e[0;1;37;47m####\e[0;30;47m##\e[1;37m#######\e[0;30;47m#\e[1;37m#\e[31m##\e[0;30;47m#\e[1;37m####\e[0;30;47m##\e[1;37m#####\e[0;30;47m##\e[1;37m##\e[0;30;47m##\e[1;37m#####\e[0;30;47m##\e[1;37m#####\e[0;30;47m##\e[1;37m#\e[0;30;47m##\e[1;37m#####\e[33m######\e[0;30;47m#\e[1;37m#######\e[30m#\e[33m######\e[37m#\e[1;37;40m
+\e[0;1;37;47m####\e[0;30;47m##\e[1;37m######\e[0;30;47m##\e[1;37m#\e[31m##\e[0;30;47m#\e[1;37m#\e[0;30;47m#\e[1;37m##\e[0;30;47m##\e[1;37m#####\e[0;30;47m##\e[1;37m##\e[0;30;47m###\e[1;37m###\e[0;30;47m###\e[1;37m####\e[0;30;47m##\e[1;37m###\e[0;30;47m##\e[1;37m####\e[33m#######\e[0;30;47m#\e[1;37m#####\e[0;30;47m#\e[1;33m#######\e[37m#\e[1;37;40m
+\e[0;1;37;47m##\e[0;30;47m############\e[1;37m##\e[0;30;47m###\e[1;37m##\e[0;30;47m####\e[1;37m###\e[0;30;47m####\e[1;37m###\e[0;30;47m####\e[1;37m#\e[0;30;47m###\e[1;37m#\e[0;30;47m#####\e[1;37m#\e[0;30;47m######\e[1;37m###\e[33m#####\e[30m#\e[0;30;47m#####\e[1m#\e[33m#####\e[37m###\e[1;37;40m
+
+\e[7CLinux Version 2.6.29, Compiled #1 SMP Mon May 4 09:51:54 EDT 2009
+\e[5COne 1665MHz AMD Athlon(tm) Processor, 512M RAM, 3330.53 Bogomips Total
+\e[37Ctobler\e[0m
+
diff --git a/exp-bbv/tests/x86-linux/ll.vgtest b/exp-bbv/tests/x86-linux/ll.vgtest
new file mode 100644 (file)
index 0000000..6031a58
--- /dev/null
@@ -0,0 +1,5 @@
+prog: ll
+vgopts: --interval-size=1000 --bb-out-file=ll.out.bb
+post:  cat ll.out.bb
+cleanup: rm ll.out.bb
+
diff --git a/exp-bbv/tests/x86/Makefile.am b/exp-bbv/tests/x86/Makefile.am
new file mode 100644 (file)
index 0000000..3857aae
--- /dev/null
@@ -0,0 +1,28 @@
+include $(top_srcdir)/Makefile.tool-tests.am
+
+dist_noinst_SCRIPTS = filter_stderr
+
+check_PROGRAMS = \
+       million rep_prefix fldcw_check complex_rep
+
+EXTRA_DIST = \
+          complex_rep.stderr.exp \
+          complex_rep.vgtest \
+          fldcw_check.stderr.exp \
+          fldcw_check.vgtest \
+          million.stderr.exp \
+          million.post.exp \
+          million.vgtest \
+          rep_prefix.stderr.exp \
+          rep_prefix.vgtest 
+
+AM_CCASFLAGS += -ffreestanding
+
+LDFLAGS += @FLAG_M32@ -static -nostartfiles -nodefaultlibs
+
+complex_rep_SOURCES = complex_rep.S
+fldcw_check_SOURCES = fldcw_check.S
+million_SOURCES = million.S
+rep_prefix_SOURCES = rep_prefix.S
+
+AM_CCASFLAGS += @FLAG_M32@
diff --git a/exp-bbv/tests/x86/complex_rep.S b/exp-bbv/tests/x86/complex_rep.S
new file mode 100644 (file)
index 0000000..fca36e5
--- /dev/null
@@ -0,0 +1,62 @@
+# When trying (and failing) to instrument at the basic block level
+# I thought up a lot of corner-cases in the rep code.  This tries
+# to catch some of them
+
+# Performance counters give us 8207 insns
+#    11 + 8*1024 + 3 = 8206
+
+       .globl _start   
+_start:        
+       cld                             # we want these to happen forward
+
+       mov    $0xfeb1378,%eax          # value to store
+
+       # test back-to-back rep/stosb's
+
+       mov     $1024,%ecx
+       mov     $buffer1, %edi          # set destination
+       rep     stosb                   # store 1024 times
+       rep     stosb                   # should store 0 times  
+       rep     stosb                   # should store 0 times
+
+       
+       # test stosb where cx is 0
+       
+       xor    %ecx,%ecx
+       mov    $buffer1, %edi           # set destination
+       rep    stosb                    # should not load at all
+       
+       # test rep inside of a loop
+       
+       mov    $1024, %ebx
+rep_loop:      
+
+       mov    $1024,%ecx
+       mov    $buffer1, %edi           # set destination
+       rep    stosb
+       
+       mov    $1024,%ecx
+       mov    $buffer1, %edi           # set destination
+       rep    stosb
+
+       dec    %ebx
+       jnz    rep_loop
+       
+       
+       #================================
+       # Exit
+       #================================
+exit:
+       mov     $1,%eax
+#ifdef VGO_darwin
+       pushl   $0
+#else  
+       xor     %ebx,%ebx               # we return 0
+#endif 
+       int     $0x80                   # and exit
+
+
+#.bss
+
+.lcomm buffer1,        16384
+
diff --git a/exp-bbv/tests/x86/complex_rep.stderr.exp b/exp-bbv/tests/x86/complex_rep.stderr.exp
new file mode 100644 (file)
index 0000000..ceabe14
--- /dev/null
@@ -0,0 +1,6 @@
+# Thread 1
+#   Total intervals: 0 (Interval Size 100000)
+#   Total instructions: 8206
+#   Total reps: 2100228
+#   Unique reps: 2052
+#   Total fldcw instructions: 0
diff --git a/exp-bbv/tests/x86/complex_rep.vgtest b/exp-bbv/tests/x86/complex_rep.vgtest
new file mode 100644 (file)
index 0000000..ef5ac30
--- /dev/null
@@ -0,0 +1,4 @@
+prog: complex_rep
+vgopts: --interval-size=100000 --bb-out-file=complex_rep.out.bb
+cleanup: rm complex_rep.out.bb
+
diff --git a/exp-bbv/tests/x86/filter_stderr b/exp-bbv/tests/x86/filter_stderr
new file mode 100644 (file)
index 0000000..1c07666
--- /dev/null
@@ -0,0 +1,5 @@
+#! /bin/sh
+
+../filter_stderr
+
+
diff --git a/exp-bbv/tests/x86/fldcw_check.S b/exp-bbv/tests/x86/fldcw_check.S
new file mode 100644 (file)
index 0000000..ef4c3a7
--- /dev/null
@@ -0,0 +1,130 @@
+
+.globl _start
+
+_start:
+        # This code tests for the fldcw "load floating point command word"
+       #   instruction.  On most x86 processors the retired_instruction
+       #   performance counter counts this as one instruction.  However,
+       #   on Pentium 4 systems it counts as two.  Therefore this can
+       #   affect BBV results on such a system.
+       # fldcw is most often used to set the rouding mode when doing
+       #   floating point to integer conversions
+       
+       # It is encoded as "d9 /5" which means
+       #   1101 1001 xx10 1yyy
+       # Where xx is the "mod" which will be 00, 01, or 10 indicating offset
+       #   and yyy is the register field
+
+
+
+        # these are instructions with similar encodings to fldcw
+       # that can cause false positives if the test isn't explicit enough
+similar:       
+        fld1                                   # d9 e8
+       fldl2t                          # d9 e9
+       fldl2e                          # d9 ea
+       fldpi                           # d9 eb
+       fldlg2                          # d9 ec
+       fldln2                          # d9 ed
+       fldz                            # d9 ee
+
+       # check some varied ways of calling fldcw
+
+
+       # offset on stack
+stack: 
+       sub     $4,%esp                 # allocate space on stack
+       fnstcw  2(%esp)         
+       fldcw   2(%esp)         
+       add     $4,%esp                 # restore stack
+       
+       # 32-bit register
+       
+       fnstcw  cw
+       mov     $cw,%eax
+       fldcw   0(%eax)                 # eax
+       mov     $cw,%ebx
+       fldcw   0(%ebx)                 # ebx
+       mov     $cw,%ecx        
+       fldcw   0(%ecx)                 # ecx
+       mov     $cw,%edx                 
+       fldcw   0(%edx)                 # edx
+       
+       # register + 8-bit offset
+eight_bit:     
+       mov     $cw,%eax
+       sub     $32,%eax
+       
+       fldcw   32(%eax)                # eax + 8 bit offset
+       mov     %eax,%ebx
+       fldcw   32(%ebx)                # ebx + 8 bit offset    
+       mov     %eax,%ecx
+       fldcw   32(%ecx)                # ecx + 8 bit offset            
+       mov     %eax,%edx
+       fldcw   32(%edx)                # edx + 8 bit offset
+       
+       # register + 32-bit offset
+thirtytwo_bit: 
+       mov     $cw,%eax
+       sub     $30000,%eax
+       
+       fldcw   30000(%eax)             # eax + 16 bit offset
+       mov     %eax,%ebx
+       fldcw   30000(%ebx)             # ebx + 16 bit offset   
+       mov     %eax,%ecx
+       fldcw   30000(%ecx)             # ecx + 16 bit offset           
+       mov     %eax,%edx
+       fldcw   30000(%edx)             # edx + 16 bit offset                   
+
+       # check an fp/integer conversion
+       # in a loop to give a bigger count
+
+       mov     $1024,%ecx
+big_loop:
+
+       fldl    three                   # load value onto fp stack
+       fnstcw  saved_cw                # store control word to mem
+       movzwl  saved_cw, %eax          # load cw from mem, zero extending
+       movb    $12, %ah                # set cw for "round to zero"
+       movw    %ax, cw                 # store back to memory
+       fldcw   cw                      # save new rounding mode
+       fistpl  result                  # save stack value as integer to mem
+       fldcw   saved_cw                # restore old cw
+       
+       loop    big_loop                # loop to make the count more obvious
+
+       movl    result, %ebx            # sanity check to see if the
+       cmp     $3,%ebx                 # result is the expected one
+       je      exit
+       
+print_error:
+       mov     $4,%eax                 # write syscall
+#ifdef VGO_darwin
+       pushl   $1
+       pushl   $error
+       pushl   $22
+#else  
+       mov     $1,%ebx                 # stdout
+       mov     $error,%ecx             # string        
+       mov     $22,%edx                # length of string
+#endif 
+       int     $0x80
+       
+exit:
+#ifdef VGO_darwin
+       pushl   result
+#else  
+       movl    result, %ebx            # load converted value
+#endif 
+       movl    $1,     %eax            # SYSCALL_EXIT
+       int     $0x80
+       
+
+
+.data
+saved_cw:      .long 0
+cw:    .long   0
+result: .long  0
+three: .long   0                       # a floating point 3.0
+       .long   1074266112
+error: .asciz  "Error!  Wrong result!\n"
diff --git a/exp-bbv/tests/x86/fldcw_check.stderr.exp b/exp-bbv/tests/x86/fldcw_check.stderr.exp
new file mode 100644 (file)
index 0000000..c1add90
--- /dev/null
@@ -0,0 +1,6 @@
+# Thread 1
+#   Total intervals: 0 (Interval Size 10000)
+#   Total instructions: 9261
+#   Total reps: 0
+#   Unique reps: 0
+#   Total fldcw instructions: 2061
diff --git a/exp-bbv/tests/x86/fldcw_check.vgtest b/exp-bbv/tests/x86/fldcw_check.vgtest
new file mode 100644 (file)
index 0000000..f9bbae9
--- /dev/null
@@ -0,0 +1,4 @@
+prog: fldcw_check
+vgopts: --interval-size=10000 --bb-out-file=fldcw_check.out.bb
+cleanup: rm fldcw_check.out.bb
+
diff --git a/exp-bbv/tests/x86/million.S b/exp-bbv/tests/x86/million.S
new file mode 100644 (file)
index 0000000..0d72b00
--- /dev/null
@@ -0,0 +1,33 @@
+               # many thanks to David Fang
+               # for providing an OSX 10.5 machine to test on
+
+            # count for 1 million instructions
+            #   total is 1 + 1 + 499997*2 + 4
+
+       .globl _start   
+_start:
+       xor     %ecx,%ecx               # not needed, pads total to 1M
+       mov     $499997,%ecx            # load counter
+test_loop:     
+       dec     %ecx                    # repeat count times
+       jnz     test_loop
+
+       #================================
+       # Exit
+       #================================
+
+       # syscall numbers in /usr/include/sys/syscall.h on OSX
+       #                 in arc/x86/include/asm/unistd_32.h on Linux
+       # disassemble on OSX otool -tV
+exit:
+#ifdef VGO_darwin
+       pushl   $0                      # we return 0
+       xor     %eax,%eax
+       inc     %eax                    # put exit syscall number (1) in eax
+       int     $0x80                   # and exit
+#else  
+       xor     %ebx,%ebx               # we return 0
+       xor     %eax,%eax
+       inc     %eax                    # put exit syscall number (1) in eax
+       int     $0x80                   # and exit
+#endif
diff --git a/exp-bbv/tests/x86/million.post.exp b/exp-bbv/tests/x86/million.post.exp
new file mode 100644 (file)
index 0000000..6eb56fc
--- /dev/null
@@ -0,0 +1,18 @@
+T:1:4   :2:99997   
+T:2:100000   
+T:2:100000   
+T:2:100000   
+T:2:100000   
+T:2:100000   
+T:2:100000   
+T:2:100000   
+T:2:100000   
+
+
+# Thread 1
+#   Total intervals: 10 (Interval Size 100000)
+#   Total instructions: 1000000
+#   Total reps: 0
+#   Unique reps: 0
+#   Total fldcw instructions: 0
+
diff --git a/exp-bbv/tests/x86/million.stderr.exp b/exp-bbv/tests/x86/million.stderr.exp
new file mode 100644 (file)
index 0000000..adeb35d
--- /dev/null
@@ -0,0 +1,6 @@
+# Thread 1
+#   Total intervals: 10 (Interval Size 100000)
+#   Total instructions: 1000000
+#   Total reps: 0
+#   Unique reps: 0
+#   Total fldcw instructions: 0
diff --git a/exp-bbv/tests/x86/million.vgtest b/exp-bbv/tests/x86/million.vgtest
new file mode 100644 (file)
index 0000000..fc91c77
--- /dev/null
@@ -0,0 +1,5 @@
+prog: million 
+vgopts: --interval-size=100000 --bb-out-file=million.out.bb --pc-out-file=million.out.pc
+post:  cat million.out.bb
+cleanup: rm million.out.bb million.out.pc
+
diff --git a/exp-bbv/tests/x86/rep_prefix.S b/exp-bbv/tests/x86/rep_prefix.S
new file mode 100644 (file)
index 0000000..346248c
--- /dev/null
@@ -0,0 +1,280 @@
+#
+# rep, repe (repz) and repne (repnz) prefixed string instructions
+#   only count as one instruction, even though they repeat many times
+# This test makes sure the bbv plugin counts these instructions properly
+# The answer is validated to hw perf counters.
+#
+
+       .globl _start   
+_start:        
+       cld                             # we want these to happen forward
+
+       #===================================
+       # Check varied order of the size prefix
+       #   with the rep prefix.  Older binutils
+       #   did this one way, newer binutils the other
+       #===================================
+       
+size_prefix:
+       # test 16-bit load
+       
+       mov     $8192, %ecx
+       mov     $buffer1, %esi          # set source
+       .byte 0x66, 0xf3, 0xad          # lodsw
+       
+       mov     $8192, %ecx
+       mov     $buffer1, %esi          # set source
+       .byte 0xf3, 0x66, 0xad          # lodsw 
+       
+       
+       
+
+       #===================================
+       # Load and Store Instructions
+       #===================================
+loadstore:
+       xor     %eax, %eax
+       mov     $0xd, %al               # set eax to d
+       
+       # test 8-bit store
+       
+       mov     $16384, %ecx
+       mov     $buffer1, %edi          # set destination
+       rep     stosb                   # store d 16384 times, auto-increment
+       
+       # test 8-bit load
+       
+       mov     $16384, %ecx
+       mov     $buffer1, %esi          # set source
+       rep     lodsb                   # load byte 16384 times, auto-increment
+
+       cmp     $0xd,%al                # if we loaded wrong value
+       jne     print_error             # print an error
+
+       # test 16-bit store
+       
+       mov     $0x020d,%ax             # store 0x020d
+       
+       mov     $8192, %ecx
+       mov     $buffer1, %edi          # set destination
+       rep     stosw                   # store 8192 times, auto-increment
+       
+       # test 16-bit load
+       
+       mov     $8192, %ecx
+       mov     $buffer1, %esi          # set source
+       rep     lodsw                   # load 8192 times, auto-increment
+
+       cmp     $0x020d,%ax             # if we loaded wrong value
+       jne     print_error             # print an error
+       
+       # test 32-bit store
+       
+       mov     $0x0feb1378,%eax        # store 0x0feb1378
+       
+       mov     $4096, %ecx
+       mov     $buffer1, %edi          # set destination
+       rep     stosl                   # store 4096 times, auto-increment
+       
+       # test 32-bit load
+       
+       mov     $4096, %ecx
+       mov     $buffer1, %esi          # set source
+       rep     lodsl                   # load 4096 times, auto-increment
+
+       cmp     $0x0feb1378,%eax        # if we loaded wrong value
+       jne     print_error             # print an error
+
+       #=============================
+       # Move instructions
+       #=============================
+moves:
+       # test 8-bit move
+       
+       mov    $16384, %ecx
+       mov    $buffer1, %esi
+       mov    $buffer2, %edi
+       rep    movsb
+       
+       # test 16-bit move
+       
+       mov    $8192, %ecx
+       mov    $buffer2, %esi
+       mov    $buffer1, %edi
+       rep    movsw
+       
+       # test 32-bit move
+       
+       mov    $4096, %ecx
+       mov    $buffer1, %esi
+       mov    $buffer2, %edi
+       rep    movsl    
+       
+       #==================================
+       # Compare equal instructions
+       #==================================
+compare_equal: 
+       # first set up the areas to compare
+       
+       mov     $0xa5a5a5a5,%eax
+       mov     $buffer1, %edi
+       mov     $4096, %ecx
+       rep     stosl
+       
+       mov     $0xa5a5a5a5,%eax
+       mov     $buffer2, %edi
+       mov     $4096, %ecx
+       rep     stosl
+       
+       # test 8-bit
+       
+       mov     $buffer1,%esi
+       mov     $buffer2,%edi
+       mov     $16384, %ecx
+       repe    cmpsb
+       jnz     print_error
+       
+       # test 16-bit
+       
+       mov     $buffer1,%esi
+       mov     $buffer2,%edi
+       mov     $8192, %ecx
+       repe    cmpsw
+       jnz     print_error     
+       
+       # test 32-bit
+       
+       mov     $buffer1,%esi
+       mov     $buffer2,%edi
+       mov     $4096, %ecx
+       repe    cmpsl
+       jnz     print_error             
+       
+       #==================================
+       # Compare not equal instructions
+       #==================================
+compare_noteq: 
+       # change second buffer
+       
+       mov     $0x5a5a5a5a,%eax
+       mov     $buffer2, %edi
+       mov     $4096, %ecx
+       rep     stosl
+       
+       # test 8-bit
+       
+       mov     $buffer1,%esi
+       mov     $buffer2,%edi
+       mov     $16384, %ecx
+       repne   cmpsb
+       je      print_error
+       
+       # test 16-bit
+       
+       mov     $buffer1,%esi
+       mov     $buffer2,%edi
+       mov     $8192, %ecx
+       repne   cmpsw
+       je      print_error     
+       
+       # test 32-bit
+       
+       mov     $buffer1,%esi
+       mov     $buffer2,%edi
+       mov     $4096, %ecx
+       repne   cmpsl
+       je      print_error                     
+       
+       #====================================
+       # Check scan equal instruction
+       #====================================
+
+       # test 8-bit
+
+       mov     $0xa5,%al
+       mov     $buffer1,%edi
+       mov     $16384, %ecx
+       repe    scasb
+       jnz     print_error
+       
+       # test 16-bit
+       
+       mov     $0xa5a5,%ax
+       mov     $buffer1,%edi
+       mov     $8192, %ecx
+       repe    scasw
+       jnz     print_error     
+       
+       # test 32-bit
+       
+       mov     $0xa5a5a5a5,%eax
+       mov     $buffer1,%edi
+       mov     $4096, %ecx
+       repe    scasl
+       jnz     print_error             
+
+       #====================================
+       # Check scan not-equal instruction
+       #====================================
+
+       # test 8-bit
+
+       mov     $0xa5,%al
+       mov     $buffer2,%edi
+       mov     $16384, %ecx
+       repne   scasb
+       jz      print_error
+       
+       # test 16-bit
+       
+       mov     $0xa5a5,%ax
+       mov     $buffer2,%edi
+       mov     $8192, %ecx
+       repne   scasw
+       jz      print_error     
+       
+       # test 32-bit
+       
+       mov     $0xa5a5a5a5,%eax
+       mov     $buffer2,%edi
+       mov     $4096, %ecx
+       repne   scasl
+       jz      print_error             
+
+       jmp     exit                    # no error, skip to exit
+       
+print_error:
+           
+       mov     $4, %eax                # Write syscall
+#ifdef VGO_darwin
+       pushl   $1
+       pushl   $error_string
+       pushl   $16
+#else  
+       mov     $1, %ebx                # print to stdout
+       mov     $error_string, %ecx     # string to print
+       mov     $16, %edx               # strlen
+#endif 
+       int     $0x80                   # call syscall
+
+       #================================
+       # Exit
+       #================================
+exit:
+#ifdef VGO_darwin      
+       xor     %ebx,%ebx               # we return 0
+#else
+       pushl   $0                      # we return 0
+#endif 
+       xor     %eax,%eax
+       inc     %eax                    # put exit syscall number (1) in eax
+       int     $0x80                   # and exit
+
+
+.data
+error_string:  .asciz "Error detected!\n"
+
+#.bss
+
+.lcomm buffer1,        16384
+.lcomm buffer2,        16384
diff --git a/exp-bbv/tests/x86/rep_prefix.stderr.exp b/exp-bbv/tests/x86/rep_prefix.stderr.exp
new file mode 100644 (file)
index 0000000..e71e657
--- /dev/null
@@ -0,0 +1,6 @@
+# Thread 1
+#   Total intervals: 0 (Interval Size 100000)
+#   Total instructions: 124
+#   Total reps: 229402
+#   Unique reps: 26
+#   Total fldcw instructions: 0
diff --git a/exp-bbv/tests/x86/rep_prefix.vgtest b/exp-bbv/tests/x86/rep_prefix.vgtest
new file mode 100644 (file)
index 0000000..bc89a1c
--- /dev/null
@@ -0,0 +1,4 @@
+prog: rep_prefix
+vgopts: --interval-size=100000 --bb-out-file=rep_prefix.out.bb
+cleanup: rm rep_prefix.out.bb
+